#these are helper function used by HiC_matrix internally but with possible general usefulness
#these function "do the work" on basic classes.
#HiC_matrix class functions feed appropriate slots to these helpers.
#' Subset data.table by i,j indexes
#'
#'efficiently subset data.table as one would a subrange of a matrix
#'
#'
#' @param dt a data.table containing i, j, and val
#' @param i_range integer, range of i indexes to retrieve
#' @param j_range integer, range of j indexes to retrieve
#' @param fill_square logical, should region spanning below diagonal be filled in?
#'
#' @return subset of dt within ranges
#' @export
#'
#' @examples
subset_dt = function(dt, i_range, j_range, fill_square = FALSE){
if(!any(class(dt) == "data.table")){
stop("stop: dt must have class data.table")
}
if(!all(key(dt) == c("i", "j"))){
stop("stop: data.table must be keyed by i and j")
}
qi = rep(i_range, length(j_range))
qj = as.vector(sapply(j_range, function(x){
rep(x, length(i_range))
}))
if(fill_square){
out = unique(rbind(dt[.(qi, qj), .(i = j, j = i, val)][!is.na(val)],
dt[.(qj, qi), .(i = j, j = i, val)][!is.na(val)]))
out = out[i %in% i_range & j %in% j_range]
}else{
out = dt[.(qi, qj)]
}
out
}
#' fetch_hic_byGRanges
#'
#' Retrieves rectangular region of a HiC_matrix.
#'
#' @param hic_mat a valid HiC_matrix object
#' @param gr1 GRanges, specifies range of i indexes
#' @param gr2 Granges, specifies range of j indexes, default is same as gr1
#' @param ext integer, extends gr1 and gr2 by set amount, default is 0
#'
#' @return data.table correspoding to gr1xgr2 region of hic_mat
#' @export
#'
#' @examples
fetch_hic_byGRanges = function(hic_mat, gr1, gr2 = gr1, ext = 0){
stopifnot(class(hic_mat) %in% c("HiC_matrix", "HiC_matrix_wInsulation"))
stopifnot(class(gr1) == "GRanges")
stopifnot(class(gr2) == "GRanges")
idx1 = get_chrRange_indexes(hic_mat@hic_1d,
chr = as.character(seqnames(gr1)),
start(gr1) - ext,
end(gr1) + ext)
idx2 = get_chrRange_indexes(hic_mat@hic_1d,
chr = as.character(seqnames(gr2)),
start(gr2) - ext,
end(gr2) + ext)
res_dt = subset_dt(dt = hic_mat@hic_2d, j_range = idx1, i_range = idx2, fill_square = TRUE)
res_dt[, i_start := hic_mat@hic_1d[.(i)]$start]
res_dt[, i_end := hic_mat@hic_1d[.(i)]$end]
res_dt[, j_start := hic_mat@hic_1d[.(j)]$start]
res_dt[, j_end := hic_mat@hic_1d[.(j)]$end]
res_dt
}
#Found via Dave Shirley's hic package, HiC_functions.R
#Adapted from geotheory package, https://gist.github.com/geotheory/5748388 author: Robin Edwards
#Helper function so not imported
hex_coord_df <- function(x, y, width, height, size = 1) {
# like hex_coord but returns a dataframe of vertices grouped by an id variable
dx <- size * width / 6
dy <- size * height
hex_x <- rbind(x - 2 * dx, x - dx, x + dx, x + 2 * dx, x + dx, x - dx)
hex_y <- rbind(y, y + dy, y + dy, y, y - dy, y - dy)
id <- rep(1:length(x), each=6)
data.frame(cbind(x=as.vector(hex_x), y=as.vector(hex_y), id))
}
#adapted from hex_coord_df
diamond_coord_df <- function(x, y, width, height, size = 1) {
# like hex_coord but returns a dataframe of vertices grouped by an id variable
dx <- size * width / 4
dy <- size * height
#for diamonds, delete 3rd and
hex_x <- rbind(x - 2 * dx, x, x + 2 * dx, x)
hex_y <- rbind(y, y + dy, y, y - dy)
id <- rep(1:length(x), each=4)
data.frame(cbind(x=as.vector(hex_x), y=as.vector(hex_y), id))
}
#plot min calls
ggplot_tad_scores = function(tad_calls, regions, chr, start, end, bin_dist_range = c(-Inf, Inf), ins_delta_range = c(-Inf, Inf)){
pos_indexes = get_chrRange_indexes(regions, chr, start, end)
ggplot(tad_calls[min_index %in% pos_indexes &
bin_dist >= bin_dist_range[1] &
bin_dist <= bin_dist_range[2] &
ins_delta_mednorm >= ins_delta_range[1] &
ins_delta_mednorm <= ins_delta_range[2]], aes(x = mid, y = grp)) + geom_point()
}
#convert a list of data.table to a single tall data.table with added column of list names
list_dt2tall_dt = function(list_dt, new_col = NULL, show_progress = T){
if(is.null(names(list_dt))) names(list_dt) = paste("group", 1:length(list_dt))
if(exists("tmp_dt")) remove("tmp_dt")
for(i in 1:length(list_dt)){
dt = list_dt[[i]]
if(!is.null(names(list_dt)) & !is.null(new_col)){
dt[[new_col]] = names(list_dt)[i]
}
if(!exists("tmp_dt")){
tmp_dt = dt
}else{
tmp_dt = rbind(tmp_dt, dt)
}
if(show_progress){
cat('\r',round(i / length(list_dt) * 100, digits = 2), "%", rep(" ", 20))
flush.console()
}
}
return(tmp_dt)
}
#' apply_diagonal_zscore
#'
#' calculates z-score of log2 transformed data
#' z-scores are segmented by chromosome and by distance from diagonal
#' returns recalculated HiC_matrix_wInsulation
#' @param hic_mat
#'
#' @return
#' @export
#'
#' @examples
apply_diagonal_zscore = function(hic_mat, include_hetero_chr = FALSE){
test_dt = hic_mat@hic_2d
test_reg = hic_mat@hic_1d
#merge to include chromosome information
print("assembling chromosome info...")
test_dt = merge(merge(test_dt, test_reg, by.x = "i", by.y = "index"), test_reg, by.x = "j", by.y = "index")
test_dt[, val := log2(val)]
MAX = test_dt[seqnames.x == seqnames.y, max(j-i)]
all_chrms = unique(test_reg$seqnames)
test_dt[, index_diff := abs(i - j)]
setkey(test_dt, index_diff, seqnames.x, seqnames.y)
print("calculating z-scores...")
if(include_hetero_chr){
het_dt = test_dt[seqnames.y != seqnames.x, .(i, j, val)]
het_dt = het_dt[, .(i, j, val = (val - mean(val, na.rm = T)) / sd(val, na.rm = T))]
}
test_dt = test_dt[seqnames.y == seqnames.x, ]
test_dt = test_dt[, .(i, j, val, seqnames = seqnames.x, index_diff)]
#ninja data.table segmented z-score calculation - kachow
test_dt = test_dt[, .(i, j, val = (val - mean(val, na.rm = T)) / sd(val, na.rm = T)), by = c("index_diff", "seqnames")]
test_dt$index_diff = NULL
test_dt$seqnames = NULL
if(include_hetero_chr){
test_dt = rbind(test_dt, het_dt)
}
setkey(test_dt, i, j)
test_dt = test_dt[!is.na(val),]
hic_mat@hic_2d = test_dt
hic_mat@hic_1d = hic_mat@hic_1d[,1:4]
hic_mat@parameters@log2_over_mean_normalization = F
return(HiC_matrix_wInsulation(hic_mat))
}
#' decrease_resolution
#'
#' @param hic_mat
#' @param pool_factor
#' @param calc_insulation
#'
#' @return
#' @export
#'
#' @examples
decrease_resolution = function(hic_mat, pool_factor, calc_insulation = F){
bin_size = hic_mat@parameters@bin_size
newb_size = bin_size * pool_factor
chr_sizes = hic_mat@hic_1d[, .(size = max(end)),by = seqnames]
setkey(chr_sizes, seqnames)
grs = lapply(chr_sizes$seqnames, function(chr){
ends = 1:ceiling(chr_sizes[chr]$size / newb_size) * newb_size
starts = ends - newb_size
ends = ifelse(ends > chr_sizes[chr]$size, chr_sizes[chr]$size, ends)
GRanges(chr, IRanges(starts, ends))
})
grs = unlist(GRangesList(grs))
grs$pooled_index = 1:length(grs)
odt = as.data.table(findOverlaps(grs, GRanges(hic_mat@hic_1d), minoverlap = 5, ignore.strand = TRUE))
setkey(odt, queryHits)
print("reduce 1d...")
new_1d = as.data.table(grs)
new_1d = new_1d[, .(seqnames, start, end, index = pooled_index)]
new_1d$seqnames = as.character(new_1d$seqnames)
print("reduce 2d...")
nbins = pool_factor^2
setkey(odt, subjectHits)
new_2d = hic_mat@hic_2d
new_2d[ , pooled_i := odt[.(i), queryHits]]
new_2d[ , pooled_j := odt[.(j), queryHits]]
agg_func = function(x){
sum(x) / nbins
}
new_2d = new_2d[, .(val = agg_func(val)), by = c("pooled_i", "pooled_j") ]
new_2d = new_2d[, .(i = pooled_i, j = pooled_j, val = val)]
setkey(new_2d, i, j)
print("assemble HiC_matrix...")
new_hic = hic_mat
new_hic@hic_2d = new_2d
new_hic@hic_1d = new_1d
new_hic@parameters = HiC_parameters(bin_size = hic_mat@parameters@bin_size * pool_factor)
if(calc_insulation){
print("calc insulation...")
new_hic = HiC_matrix_wInsulation(new_hic)
}
return(new_hic)
}
#
#' mat2dt
#'
#' dense matrix to sparse data.table
#'
#' @param mat
#'
#' @return
#' @export
#'
#' @examples
mat2dt = function(mat){
dt2 = as.data.table(mat)
colnames(dt2) = as.character(1:ncol(dt2))
dt2$i = 1:nrow(dt2)
dt2m = melt(dt2, id.vars = "i", variable.name = "j")
dt2m$j = as.integer(dt2m$j)
return(dt2m)
}
#' dt2mat
#'
#' sparse data.table to dense matrix conversions
#'
#' @param dt
#'
#' @return
#' @export
#'
#' @examples
dt2mat = function(dt){
adj_i = dt[, min(i)] - 1
adj_j = dt[, min(j)] - 1
as.matrix(sparseMatrix(dt$i - adj_i, dt$j - adj_j, x = dt$val))
}
gaussian_cluster_chrm = function(hic_mat, chrm, sigma, nclust, output_prefix = "gaussian_blur", sample_description = paste0("matrix_", hic_mat@parameters@bin_size)){
idxs = hic_mat@hic_1d[seqnames == chrm]$index
chrm_dt = hic_mat@hic_2d[i %in% idxs & j %in% idxs]
chrm_dt[, index_diff := abs(i - j)]
chrm_dt[, seqnames := chrm]
chrm_dt = rbind(chrm_dt, chrm_dt[, .(index_diff, seqnames, i = j, j = i, val)])
picture = dt2mat(chrm_dt)
picture = ifelse(picture < 0, 0, picture)
picture2 <- as.matrix(blur(as.im(picture), sigma=sigma))
png(paste0(output_prefix, "_", sample_description, "_sigma", sigma, "_blur_comparison.png"), width = 1000, height = 1000)
layout(matrix(c(1:4), nrow=2))
image.plot(picture, col=gray.colors(50), main="original image", asp=1)
image.plot(picture2, col=gray.colors(50), main=paste0("blurred with sigma = ", sigma), asp=1)
drape.plot(1:nrow(picture), 1:ncol(picture), picture, border=NA, theta=0, phi=45, main="original spectrogram")
drape.plot(1:nrow(picture), 1:ncol(picture), picture2, border=NA, theta=0, phi=45, main=paste0("blurred with sigma = ", sigma))
dev.off()
# kpic = kmeans(pic, centers = 6)
hdist = dist(picture2)^2
hpic = hclust(d = hdist, method = "average")
hclusters = cutree(hpic, k = nclust)
side_cols = rainbow(n = nclust)[hclusters]
png(paste0(output_prefix, "_", sample_description, "_sigma", sigma, "_nclust", nclust, "_clustering.png"), width = 1000, height = 2000)
layout(1:2)
par(xpd = NA)
image.plot(picture2, col=gray.colors(50))
sc2 = lapply(sort(unique(side_cols)), function(x){
mtch = which(x == side_cols)
mis = which(!mtch[-1] - 1 == mtch[-length(mtch)])
if(length(mis) == 0)return(data.frame(starts = min(mtch), ends = max(mtch), color = x, stringsAsFactors = F))
starts = c(min(mtch), mtch[mis+1])
ends = c(mtch[mis], max(mtch))
data.frame(starts, ends, color = rep(x, length(starts)), stringsAsFactors = F)
})
tmp = sc2[[1]]
for(i in 2:length(sc2)){
tmp = rbind(tmp, sc2[[i]])
}
tmp$starts = tmp$starts - 1
tmp$starts = tmp$starts / max(tmp$ends)
tmp$ends = tmp$ends / max(tmp$ends)
par(xpd = NA)
for(i in 1:nrow(tmp)){
# print(tmp[i,])
rect(xleft = tmp[i,1], xright = tmp[i,2], ybottom = .99, ytop = 1.03, col = tmp[i,3])
rect(ybottom = tmp[i,1], ytop = tmp[i,2], xleft = .99, xright = 1.03, col = tmp[i,3])
}
image.plot(picture2[hpic$order, hpic$order], col=gray.colors(50))
# sc = side_cols[hpic$order]
sc = sapply(sort(unique(side_cols[hpic$order])), function(x){
range(which(x == side_cols[hpic$order]))
})
sc[1,] = sc[1,] - 1
sc = sc / max(sc)
par(xpd = NA)
for(i in 1:ncol(sc)){
rect(xleft = sc[1,i], xright = sc[2,i], ybottom = .99, ytop = 1.03, col = colnames(sc)[i])
rect(ybottom = sc[1,i], ytop = sc[2,i], xleft = .99, xright = 1.03, col = colnames(sc)[i])
}
# side_cols = cutree(hpic, k = k)
# side_cols = rainbow(n = k)[hclusters]
# sc = side_cols
dev.off()
}
fetch_bigwig_as_dt = function(bigwig_file, chr, start, end, n_bins = 200, bin_method = c("mean", "max")[2], show_progress_bar = T){
qgr = GRanges(chr, IRanges(start, end))
runx_gr = import(con = bigwig_file,
format = "BigWig", which = qgr)
runx_dt = as.data.table(runx_gr)
tiles_gr = tile(qgr, n = n_bins)[[1]]
tiles_dt = as.data.table(tiles_gr)
olaps_dt = as.data.table(findOverlaps(runx_gr, tiles_gr, ignore.strand = TRUE))
setkey(olaps_dt, "subjectHits")
if(show_progress_bar){
app_fun = pbapply::pbsapply
}else{
app_fun = sapply
}
tiles_mat = t(app_fun(unique(olaps_dt$subjectHits), function(x){
hit_dt = runx_dt[olaps_dt[.(x)]$queryHits]
# hit_gr = runx_gr[olaps_dt[.(x)]$queryHits]
s = tiles_dt[x]$start
e = tiles_dt[x]$end
hit_dt[, start := ifelse(start < s, s, start)]
hit_dt[, end := ifelse(end > e, e, end)]
# start(hit_gr)[start(hit_gr) < s] = s
# end(hit_gr)[end(hit_gr) > e] = e
mean_score = sum(hit_dt[, (end - start + 1) * score]) / tiles_dt[x, end - start + 1]
# mean_score = sum(width(hit_gr) * hit_gr$score) / width(tiles_gr[x])
max_score = max(hit_dt$score)
min_score = min(hit_dt$score)
return(c(s, e+1, mean_score, min_score, max_score))
}))
colnames(tiles_mat) = c("start", "end", "mean", "min", "max")
tiles_dt = as.data.table(tiles_mat)
tiles_dt[, xmin := start]
tiles_dt[, xmax := end]
tiles_dt[, x := (xmin + xmax) / 2]
tiles_dt[, y := get(bin_method)]
tiles_dt[, c("xmin", "xmax", "x", "y")]
}
add_bigwig_plot.tiles = function(bigwig_files, chr, start, end, n_bins = 200,
bin_method = c("mean", "max")[2], bigwig_title = "FE",
bigwig_colors = NULL, p = NULL, fe_max = NULL, fe_min = NULL,
alpha = NULL, sample_desc = NULL, just_data = F, show_progress_bar = F){
tiles_dtlist = lapply(bigwig_files, function(x){
fetch_bigwig_as_dt(x, chr, start, end, n_bins = n_bins, bin_method = bin_method, show_progress_bar = show_progress_bar)
})
tiles_dt = list_dt2tall_dt(tiles_dtlist, new_col = "group")
# tiles_dt = fetch_bigwig_as_dt(bigwig_file, chr, start, end, n_bins = n_bins, bin_method = bin_method)
if(is.null(fe_max)) fe_max = max(tiles_dt$y)
if(is.null(fe_min)) fe_min = min(tiles_dt$y)
if(is.null(alpha)){
if(length(bigwig_files) > 1){
alpha = .4
}else{
alpha = 1
}
}
# p = p + geom_rect(data = tiles_dt, aes(xmin = xmin, xmax = xmax, ymin= 0, ymax = y, fill = group))
# p = p + geom_line(data = tiles_dt, aes(x = x, y = y, col = group))
todo = unique(tiles_dt$group)
names(todo) = todo
plot_list = lapply(todo, function(grp){
grp_dt = tiles_dt[group == grp]
y1 = grp_dt[nrow(grp_dt),y]
y2 = 0
y3 = 0
y4 = grp_dt[1,y]
x1 = grp_dt[, max(xmax)]
x2 = x1
x3 = grp_dt[, min(xmin)]
x4 = x3
main_dt = rbind(grp_dt[, .(x = xmax, y, group)],
grp_dt[, .(x = xmin, y, group)]
)
main_dt = main_dt[order(x)]
ending_dt = data.table(x = c(x1,x2,x3,x4), y = c(y1,y2,y3,y4), group = grp)
rbind(main_dt, ending_dt)
})
plot_dt = list_dt2tall_dt(plot_list, new_col = NULL)
if(is.null(p)){
p = ggplot() +
labs(x = "", y = bigwig_title) +
coord_cartesian(xlim = c(start, end), ylim = c(fe_min, fe_max))
}
alpha = substr(rgb(.1, .1, .1, alpha), start = 8, stop = 9)
if(!is.null(bigwig_colors)){
group_colors = bigwig_colors
if(substr(group_colors, 0, 1)[1] != "#"){
group_colors = col2hex(group_colors)
}
}else{
len = length(unique(tiles_dt$group))
group_colors = RColorBrewer::brewer.pal(max(len, 3), "Dark2")[1:len]
}
group_colors = paste0(group_colors, alpha)
names(group_colors) = unique(tiles_dt$group)
if(just_data) return(plot_dt)
p = p + geom_polygon(data = plot_dt, aes(x = x, y = y, col = NULL, fill = group)) +
labs(fill = sample_desc) +
scale_fill_manual(values = group_colors) +
scale_color_manual(values = group_colors)
return(p)
}
fetch_region_by_gene_name = function(gene_names, ext = 0){
# if(!exists("gene_ref")){
# load("../HiC_histone_genes/hg38ref.save")
# ref_dt = as.data.table(ref)
# ref_dt = ref_dt[!duplicated(gene_name)]
# ref_dt = ref_dt[, .(seqnames = chrm, start, end, strand, symbol = gene_name, ensembl_id = gene_id)]
# setkey(ref_dt, symbol)
# genesymbol = GRanges(ref_dt)
# names(genesymbol) = genesymbol$symbol
# gene_ref <<- genesymbol
# }
# rng = range(gene_ref[gene_names], ignore.strand = TRUE)
rng = range(gene_gr[gene_names], ignore.strand = TRUE)
start(rng) = start(rng) - ext
end(rng) = end(rng) + ext
return(rng)
}
fetch_genes_in_region = function(qgr = NULL, chr = NULL, start = NULL, end = NULL, target_gr = NULL){
if(is.null(qgr)){
if(is.null(chr) | is.null(start) | is.null(end)){
stop("need qgr GRanges or chr, start, and end")
}
qgr = GRanges(chr, IRanges(start, end))
}
if(is.null(target_gr)){
target_gr = exon_gr
}
target_gr[queryHits(findOverlaps(query = target_gr, qgr, ignore.strand = TRUE))]
}
# plot_combined_hic = function(hic_list, hic_names, target_gene, extension_size_bp, file_prefix = "combined_plots"){
# tp_hic_list = my_p_hicsdz_capped
#
# qgr = fetch_region_by_gene_name("ZEB1", ext = 2*10^6)
# start(qgr) = start(qgr)
# chr = as.character(seqnames(qgr))
#
# start = start(qgr)
# end = end(qgr)
# bin_size = tp_hic_list[[1]]@parameters@bin_size
# start = floor(start / bin_size) * bin_size
# end = ceiling(end / bin_size) * bin_size
#
# p.annot = ggplot(txdb_gn) +
# geom_alignment(which = qgr,
# group.selfish = T, range.geom = "arrowrect", names.expr = "gene_id") +
# coord_cartesian(xlim = c(start, end))
# hg38_ideogram = biovizBase::getIdeogram(genome = "hg38")
# p.ideo <- Ideogram(obj = hg38_ideogram, subchr = chr, zoom.region = c(start, end), color = "green", fill = "darkgreen")
#
# cells = c("MCF10A", "MCF10A-AT1", "MCF10A-CA1a")
# for(i in 1:3){
# main = cells[i]
# p.list = plot_upperMatrix_with_insulation(tp_hic_list[[i]], chr, start - 1*10^6, end + 1*10^6, show_plot = F)
# # p.list = plot_upperMatrix_with_insulation(tp_hic_list[[i]], chr, start, end, show_plot = F)
# p.runx = add_bigwig_plot(runx_bw[i], chr, start, end, bigwig_title = "RUNX1 FE", fe_max = 30)
# p.ctcf = add_bigwig_plot(ctcf_bw[i], chr, start, end, bigwig_title = "CTCF FE", fe_max = 30)
# #manual assignment
# p.list = list(ggplotGrob(p.ideo),
# p.list[[1]],
# ggplotGrob(p.annot),
# p.list[[2]],
# ggplotGrob(p.runx),
# ggplotGrob(p.ctcf))
# #get max widths to assign to all
# maxWidth = as.list(unit.pmax(p.list[[1]]$widths, p.list[[2]]$widths,
# p.list[[3]]$widths, p.list[[4]]$widths,
# p.list[[5]]$widths, p.list[[6]]$widths))
# for(j in 1:length(p.list)){
# p.list[[j]]$widths = maxWidth
# }
# pdf(paste0("plots_ZEB1_tss_combined_", main, "_", bin_size, ".pdf"), height = 12)
# grid.arrange(arrangeGrob(grobs = p.list, ncol=1,heights=c(.2,.6,.6,.3, .3, .3)), top = main)
# dev.off()
# }
# }
#combines insulation profiles and performs quantile normalization
quant_norm_insulation = function(my_hicsd){
my_hicsd_tad_calls = assemble_tad_calls(my_hicsd)
ggplot_tad_scores(my_hicsd_tad_calls, my_hicsd[[1]]@hic_1d, "chr2", 40*10^6, 55*10^6, bin_dist_range = c(0,20), ins_delta_range = c(2,Inf))
comb_ins_dt = list_dt2tall_dt(lapply(my_hicsd, function(x)x@hic_1d))
comb_ins_dt[, mid := (start + end) / 2]
dc_comb_ins_dt = dcast(comb_ins_dt[, c("index", "value", "group", "mid")],
formula = index + mid ~ group,
value.var = "value")
val_cn = colnames(dc_comb_ins_dt)[-1:-2]
raw_mat = as.matrix(dc_comb_ins_dt[,val_cn, with = F])
normq_mat = preprocessCore::normalize.quantiles(raw_mat)
for(i in 1:ncol(normq_mat)){
dc_comb_ins_dt[,val_cn[i]] = normq_mat[,i]
}
return(list(normq_mat, dc_comb_ins_dt))
}
#creates data.frame containing min and max info for insulation profiles
make_hic_minmax_df = function(dt_1d, qgr){
chr = as.character(seqnames(qgr))
start = start(qgr) + 1
end = end(qgr)
gr = GRanges(dt_1d)
start(gr) = start(gr) + 1
q_gr = GRanges(chr, IRanges(start, end))
q_indexes = dt_1d[subjectHits(findOverlaps(query = q_gr, subject = gr, ignore.strand = TRUE))]$index
dt_1d = dt_1d[q_indexes]
dt_1d[, xs := (start + end) / 2]
max_k = dt_1d$minmax == 1
max_k[is.na(max_k)] = F
min_k = dt_1d$minmax == -1
min_k[is.na(min_k)] = F
insulation_df = data.frame(x = dt_1d$xs, y = dt_1d$value, id = 0, type = "insulation")
if(any(max_k)){
insulation_df = rbind(insulation_df,
data.frame(x = dt_1d$xs[max_k], y = dt_1d$value[max_k], id = 1, type = "insulation"))
}
if(any(min_k)){
insulation_df = rbind(insulation_df,
data.frame(x = dt_1d$xs[min_k], y = dt_1d$value[min_k], id = -1, type = "insulation"))
}
return(insulation_df)
}
#windowed view of bigwig file, filtered by qgr if supplied
fetch_windowed_bw = function(bw_file, win_size = 50, qgr = NULL){
if(is.null(qgr)){
bw_gr = rtracklayer::import.bw(bw_file)
}else{
bw_gr = rtracklayer::import.bw(bw_file, which = qgr)
}
rng = range(bw_gr)
start(rng) = start(rng) - start(rng) %% win_size + 1
end(rng) = end(rng) - end(rng) %% win_size + win_size
# end(rng) = ceiling(end(rng)/win_size) * win_size
win = slidingWindows(rng, width = win_size, step = win_size)
print(object.size(win), units = "GB")
print(object.size(bw_gr), units = "GB")
win = unlist(win)
sn = names(seqlengths(win))
names(sn) = sn
suppressWarnings({
new_seqlengths = pbapply::pbsapply(sn, function(x){
max(end(subset(win, seqnames == x)))
})
seqlengths(win) = new_seqlengths
seqlengths(bw_gr) = seqlengths(win)
})
mid_gr = function(gr){
start(gr) + floor((width(gr) - 1)/2)
}
mids = mid_gr(win)
start(win) = mids
end(win) = mids
olaps = findOverlaps(win, bw_gr, ignore.strand = TRUE)
win = win[queryHits(olaps)]
win$FE = bw_gr[subjectHits(olaps)]$score
return(win)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.