##Finds transcription factor dense DNA zones for different accumulation
##threshold values. The function works only if the "accumulation" in input is
##calculated for a single chromosome.
##input:
## accumulation: a list of four elements, as the output of the accumulation
## function.
## threshold_step: an integer, the step used to calculate the threshold
## values.
## chr: optional argument, a string with a cromosome name. It is needed to
## apply the function only to a single cromosome present in the accumulation
## input found with chr = "all". If chr = "all" (default value) the function
## operates for all the chromosome present in the input.
## writeBed: When set to TRUE, for each threshold value (and for each
## chromosome) a ".bed" file with the chromosome and genomic coordinates of
## the dense zones found is created.
##output: a list of eight elements:
## zones: a list with "GRanges" objects with the dense zones found for each
## chromosome and threshold value considered.
## zones_count: a list with a data frame, for each chromosome considered,
## containing the considered threshold values and the number of dense zones
## obtained with each of the threshold values.
## bases_count: a list with a data frame, for each chromosome considered,
## containing the considered threshold values and the total number of bases
## belonging to the dense zones obtained with each of the threshold values.
## lengths: a list with a data frame, for each chromosome considered,
## containing the considered threshold values and min, max, mean, median and
## standard deviation of the dense zone lengths obtained with each of the
## considered threshold values.
## distances: a list with a data frame, for each chromosome considered,
## containing the considered threshold values and min, max, mean, median and
## standard deviation of the distances between adjacent dense zones obtained
## with each of the threshold values.
## acctype: a string with the accumulation type used.
## chr: a string with the chromosome name associated with the accumulation
## vector used.
## w: an integer with half-width of the window used to calculate the
## accumulation vector.
## When writeBed is set to TRUE, for each threshold value (and for each
## chromosome) a ".bed" file with the chromosome and genomic coordinates of
## the dense zones found is created.
dense_zones <- function(accumulation, threshold_step, chr = NULL, writeBed =
FALSE) {
if (!is.list(accumulation))
stop("'accumulation' must be an object of type 'list'.")
if (class(accumulation$accvector) != "Rle" & class(accumulation$accvector)
!= "SimpleRleList")
stop("'accvector' element of the list 'accumulation' must be of class
'Rle' or 'SimpleRleList'.")
if (!is.numeric(threshold_step))
stop("'threshold_step' must be an object of type 'numeric'.")
if (threshold_step < 1)
stop("'threshold_step' must be > 0.")
## initializing vectors
d_zones <- list()
zones_count <- list()
bases_count <- list()
lengths <- list()
distances <- list()
acc_tf <- accumulation$accvector
if(accumulation$chr != "all") {
acc_regions <- GRanges(seqnames = rep(accumulation$chr,
length(ranges(acc_tf))), ranges = ranges(acc_tf),
score = runValue(acc_tf))
c <- accumulation$chr
} else {
acc_regions <- as(acc_tf,"GRanges")
if(!is.null(chr)) {
c <- chr
acc_regions <- acc_regions[seqnames(acc_regions) == chr]
} else {
c <- "all"
}
}
acc_regions <- acc_regions[score(acc_regions) > 0]
u=unique(seqnames(acc_regions))
for (k in seq_along(u)) {
acc_regions_chr <- acc_regions[seqnames(acc_regions) == u[k]]
threshold <- seq(1, max(score(acc_regions_chr)), threshold_step)
n_zones <- vector()
n_bases <- vector()
length_zone_min <- vector()
length_zone_max <- vector()
length_zone_mean <- vector()
length_zone_median <- vector()
length_zone_sd <- vector()
dist_zone_min <- vector()
dist_zone_max <- vector()
dist_zone_mean <- vector()
dist_zone_median <- vector()
dist_zone_sd <- vector()
z=list()
for (i in seq_along(threshold)) {
zones <- reduce(acc_regions_chr[score(acc_regions_chr) >=
threshold[i]])
z[[i]]=zones
## counting the number of dense zones that are formed by contiguos bases
## given a threshold, and the number of bases belonging to the zones.
n_bases[i] <- sum(width(zones))
n_zones[i] <- length(zones)
if (writeBed == TRUE) {
## writing on files chromosome and positions of starting and ending
## points
write.table(data.frame(rep(u[k], length(zones)),
start(zones) - 1, end(zones)),
file = paste(accumulation$acctype, "_acc_w_",
accumulation$w, "_", u[k], "_dense_zones_th_",
threshold[i], ".bed", sep = ""), row.names = FALSE,
col.names = FALSE, quote = FALSE, sep = "\t")
## finding elements of length dataframe
}
length_zone_min[i] <- min(width(zones))
length_zone_max[i] <- max(width(zones))
length_zone_mean[i] <- mean(width(zones))
length_zone_median[i] <- median(width(zones))
length_zone_sd[i] <- sd(width(zones))
## calculating distances of dense zones
if (length(zones) > 1) {
dist_zone <- start(zones)[-1] - end(zones[-length(end(zones))])
dist_zone_min[i] <- min(dist_zone)
dist_zone_max[i] <- max(dist_zone)
dist_zone_mean[i] <- mean(dist_zone)
dist_zone_median[i] <- median(dist_zone)
dist_zone_sd[i] <- sd(dist_zone)
} else {
dist_zone <- NA
dist_zone_min[i] <- NA
dist_zone_max[i] <- NA
dist_zone_mean[i] <- NA
dist_zone_median[i] <- NA
dist_zone_sd[i] <- NA
}
}
zones_count[[k]] <- data.frame(threshold, n_zones)
bases_count[[k]] <- data.frame(threshold, n_bases)
lengths[[k]] <- data.frame(threshold, n_zones, length_zone_min,
length_zone_max, length_zone_mean,
length_zone_median, length_zone_sd)
distances[[k]] <- data.frame(threshold, n_zones, dist_zone_min,
dist_zone_max, dist_zone_mean,
dist_zone_median, dist_zone_sd)
names(z) <- paste("th_", threshold, sep="")
d_zones[[k]] <- z
}
## result dataframes creation
names(zones_count) <- u
names(bases_count) <- u
names(lengths) <- u
names(distances) <- u
names(d_zones) <- u
return(list(zones = d_zones, zones_count = zones_count, bases_count =
bases_count, lengths = lengths, distances = distances,
chr = c, w = accumulation$w, acctype =
accumulation$acctype))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.