Nothing
## Track management functions
## ----- ---------- ---------
build.track <- function(datatab, bycol="Chrom", poscol=c("Mid", "Start"),
start.col="Start", end.col="End", category.col=NULL, binsize=1e6,
no.histogram=FALSE, max.segments=200, check.unit=FALSE,
category.col2=NULL, plot.range=NULL, orig.range=NULL, chroms=NULL) {
if(is.null(datatab))
return(NULL)
if(nrow(datatab)==0L)
return(NULL)
# Filter chromosomes
if(!is.null(chroms)) {
if(any(!chroms %in% datatab[,bycol]))
warning("Chromosomes not in the dataset: ", paste(setdiff(chroms,
datatab[,bycol]), collapse=", "))
datatab <- datatab[datatab[,bycol] %in% chroms,]
}
groupby <- NULL
Group <- match(category.col, colnames(datatab))
Group <- Group[!is.na(Group)][1]#
# Find position column
poscoli <- match(poscol, colnames(datatab))
if(sum(is.na(poscoli))==length(poscol)) {
stop("'", paste(poscol[is.na(poscoli)], collapse=", "),
"' is not a column of datatab.")
} else {
poscol <- poscol[!is.na(poscoli)][1L]
}
# Remove rows with missing values for postions columns
datatab <- datatab[!is.na(datatab[,bycol]) & !is.na(datatab[,poscol]),]
if(check.unit)
datatab <- checkunit(datatab)
maxseglen <- max(datatab[,end.col] - datatab[,start.col] - 1)
if(!is.null(category.col)) {
if(category.col %in% colnames(datatab)) {
col.idx <- match(category.col, colnames(datatab))
colnames(datatab)[col.idx] <- "Category"
}
}
if(!is.null(category.col2)) {
if(category.col2 %in% colnames(datatab)) {
col.idx2 <- match(category.col2, colnames(datatab))
colnames(datatab)[col.idx2] <- "Category2"
}
}
if("gieStain" %in% names(datatab)) {
datatab <- cytoBands(datatab)
}
if((maxseglen < binsize | nrow(datatab) >= max.segments) & !no.histogram){
# build track histogram
if("Category" %in% colnames(datatab)) {
groupby=split(datatab$Category, datatab[,bycol])
}
track_ls <- list()
for(chr in sort(unique(datatab[,bycol]))) {
rows <- datatab[,bycol] %in% chr
track_ls[[chr]] <- split.bins(datatab[rows,poscol], step=binsize,
fun=length, group.by=groupby[[chr]])
}
track_ls <- track_ls[!sapply(track_ls, is.null)]
track_ls <- sort.chrom(track_ls)
# Adjust values to preset scale if provided
if(!is.null(plot.range)){
if(length(plot.range)!=2L) {
stop("plot.range must ve a vector with 2 values, min and max.")
}
if(is.null(orig.range)) {
if(is.null(dim(track_ls[[1]]$counts))){
cmin <- min(sapply(track_ls, function(x) min(x$counts)))
cmax <- max(sapply(track_ls, function(x) max(x$counts)))
} else {
cmin <- min(sapply(track_ls,
function(x) min(rowSums(x$counts))))
cmax <- max(sapply(track_ls,
function(x) max(rowSums(x$counts))))
}
} else {
cmin <- orig.range[1]
cmax <- orig.range[2]
}
# forces min count to 0 for histograms
track_ls <- lapply(track_ls,
function(x){x$counts=scale.vec(x$counts, 0, cmax,
plot.range[1], plot.range[2]);x})
attr(track_ls, "count_range") <- c(cmin, cmax)
}
attr(track_ls, "is_int") <- FALSE
}else{
track_ls <- split(datatab, datatab[,bycol])
track_ls <- sort.chrom(track_ls)
attr(track_ls, "is_int") <- TRUE
}
return(track_ls)
}
build.stat <- function(stat=NULL, statcol=NULL, maxgenecount=5, stat.typ="p",
scex=1, spty=20, binsize=1e6, margin=margin, statthreshold=NULL,
col.stat="blue", col.stat2="gray57", statSumm="none", bycol="Chrom",
poscol=c("Mid", "Start"), start.col="Start", end.col="End", id.col="ID",
colors.col="Colors", chroms=NULL) {
# Sanity checks
if(!statcol %in% colnames(stat) & !id.col %in% colnames(stat)) {
stop(paste(statcol, " not a column of stat object."))
}
if(id.col %in% colnames(stat) & statSumm != "none") {
warning("the statSumm argument should be 'none' if an 'ID'",
" columns is present. statSum will be ignored for this track.")
}
# Filter chromosomes
if(!is.null(chroms)) {
if(any(!chroms %in% stat[,bycol]))
warning("Chromosomes not in the dataset: ", paste(setdiff(chroms,
stat[,bycol]), collapse=", "))
stat <- stat[stat[,bycol] %in% chroms,]
}
# Find position column
poscoli <- match(poscol, colnames(stat))
if(sum(is.na(poscoli))==length(poscol)) {
stop("'", paste(poscol[is.na(poscoli)], collapse=", "),
"' is not a column of stat.")
} else {
poscol <- poscol[!is.na(poscoli)][1L]
}
# Declare variables
maxstat <- NULL
minstat <- NULL
Realmaxstat <-NULL
# Always order stat by position
stat <- stat[order(stat[,bycol], stat[,poscol], decreasing=FALSE ) , ]
if(id.col %in% colnames(stat)) {
stat <- stat[!is.na(stat[,poscol]), ]
stat <- stat[!is.na(stat[,bycol]), ]
stat[, id.col]<-as.character(stat[,id.col])
NaID <- is.na(stat[,id.col])
NULLID <- is.null(stat[,id.col])
stat[NaID, id.col] <- ""
if(!statcol %in% colnames(stat)) {
stat <- cbind(stat, margin)
colnames(stat)[ncol(stat)] <- statcol
}
if( colors.col %in% colnames(stat)) {
stat <-stat[,c(bycol, poscol, end.col, id.col, statcol, colors.col)]
} else {
stat[,colors.col] <-"black"
stat <- stat[,c(bycol, poscol,end.col, id.col,statcol, colors.col)]
}
}
if(statSumm== "none" | (id.col %in% colnames(stat))){
stat_vals<-stat
colnames(stat_vals)[match(c(poscol,statcol),
colnames(stat_vals))]<-c("mids", "statSumm")
trackStat <- split (stat_vals, as.factor(stat_vals[,bycol]))
maxstat <- max(stat[,statcol])
minstat <- min(stat[,statcol])
Realminstat <- minstat
Realmaxstat <- maxstat
} else{
stat_vals <- data.frame(as.numeric(stat[,poscol]),
as.numeric(stat[,statcol]))
chromstat <- split(stat_vals, as.factor(stat[,bycol]))
trackStat <- lapply(chromstat, split.bins, step=binsize, fun=statSumm)
trackStat <- trackStat[!sapply(trackStat, is.null)]
maxstat <- max(unlist(sapply(trackStat, function(x) max(x$statSumm))))
minstat <- min(unlist(sapply(trackStat, function(x) min(x$statSumm))))
Realminstat <- minstat
Realmaxstat <- maxstat
} #else
if(!is.null(statthreshold)){
trackStat <- lapply(trackStat, function(x){
x$Colors=ifelse(x$statSumm >= statthreshold,
col.stat, col.stat2);x} )
} else {
trackStat <- lapply(trackStat, function(x){
x$Colors=col.stat; x} )
}
sdstat <- sd(unlist(sapply(trackStat, function(x) x$statSumm)))
if(sdstat ==0) { # Value was a constant
trackStat <- lapply(trackStat, function(x){x$statSumm=margin;x})
} else { # Value varied, must scale
#Use this code to increase observable differences by removing the minimum value.
trackStat <- lapply(trackStat,
function(x){x$statSumm=(x$statSumm-minstat)/
(maxstat-minstat)*
(maxgenecount-margin)+margin;x})
}
maxstat <- max(unlist(sapply(trackStat, function(x) max(x$statSumm))))
minstat <- min(unlist(sapply(trackStat, function(x) min(x$statSumm))))
attr(trackStat, "colScaleStat") <- col.stat
attr(trackStat, "is_int" ) <- FALSE
attr(trackStat, "stats.typ" ) <- stat.typ
attr(trackStat, "scex" ) <- scex #
attr(trackStat, "spty" ) <- spty #
attr(trackStat, "maxstat" ) <- maxstat #
attr(trackStat, "Realminstat" ) <- Realminstat #
attr(trackStat, "Realmaxstat" ) <- Realmaxstat #
attr(trackStat, "minstat" ) <- minstat
attr(trackStat, "sdstat") <- sdstat
return(trackStat)
}
## Plotting functions
## -------- ---------
plot.track <- function(track_ls, which.track, margin, side=1, lwd=3,
sort_segs=TRUE, col_segs=NULL, ylims=NULL, stack=FALSE, form=NULL, bin=1e6,
cex=NULL, trckmar=0, legchr=NULL, title=NULL, overlap.ok=FALSE) {
if(is.null(track_ls))
return(NULL)
track <- track_ls[[which.track]]
if("ID" %in% colnames(track_ls[[which.track]]) ){
if(any(track$ID != "")) {
linelength <- strwidth("xx", units="user", cex=cex)
labtrack <- track[track$ID != "",]
unstacked <- unstacked(x= 1, y=labtrack$mids, trackId= labtrack,
cex=cex)
if("Colors" %in% colnames(track)){
lab_colors <- as.character(track$Colors)
} else {
lab_colors <- col_segs
}
segments (side*labtrack$statSumm, labtrack$mids,
side*(labtrack$statSumm+linelength),
unstacked$y, col="black", lwd=0.7, lend=1)
text(side*(labtrack$statSumm+linelength), unstacked$y,
unstacked$ID, col=unstacked$Colors, adj=0.5-(side*0.5),
lwd=lwd, cex=cex)
}
}
typ <- attr(track_ls, "stats.typ")
scex <- attr(track_ls, "scex")
spty <- attr(track_ls, "spty")
flag <- 0
if(attr(track_ls, "is_int")) {
if("Colors" %in% colnames(track_ls[[1]])){
col_segs <- sort(unique(unlist(sapply(track_ls, "[", "Colors"))))
}
if("Category" %in% colnames(track_ls[[1]])) {
all_categories1 <- unlist(sapply(track_ls, "[", "Category"))
slevs1 <- sort(unique(all_categories1))
nlevs1 <- length(slevs1)
if(length(col_segs)<nlevs1) {
stop("Not enough colors provided. There are ", nlevs1,
" categories of elements and ", length(col_segs),
" colors.")
}
}
if("Category2" %in% colnames(track_ls[[1]])){
seg_form <- c(15, 16, 17, 18, 25, 20:30)
all_categories2 <- unlist(sapply(track_ls, "[", "Category2"))
slevs2 <- sort(unique(all_categories2))
nlevs2 <- length(slevs2)
if(length(seg_form)<nlevs2) {
stop("Not enough forms provided. There are ", nlevs2,
" categories of elements and ", length(seg_form),
" forms.")
}
}
}
if(!is.null(track) ) {
if(attr(track_ls, "is_int")) {
if("Colors" %in% colnames(track)){
seg_colors <- as.character(track$Colors) }
if("Category" %in% colnames(track)) {
seg_colors <- col_segs[match(track$Category, slevs1)]
} else if(!"Colors" %in% colnames(track)) {
seg_colors <- rep(col_segs[1L], nrow(track))
}
if("Category2" %in% colnames(track)){
form <- seg_form[match(track$Category2, slevs2)]
}
}
if(!is.null(typ)) {
if("Colors" %in% names(track)) {
seg_colors <- track$Colors
} else {
seg_colors <- rep(col_segs[1L], length(track$mids))
}
flag=1
switch(typ,
l=lines(side * (track$statSumm), track$mids,
col=seg_colors, type="l"),
p=lines(side * (track$statSumm), track$mids,
col=seg_colors, type="p", n=NULL, pch=spty, cex=scex),
hst=attr(track_ls, "is_int")<-FALSE,
seg=attr(track_ls, "is_int")<-FALSE)
}
if(attr(track_ls, "is_int")) {
if(overlap.ok) {
segments(margin*side, track[,"Start"],margin*side,track[,"End"],
col=seg_colors, lwd=lwd, lend=1)
} else {
if(!stack){
plot.lodclust(as.matrix(track[,c("Start", "End")]),
col=seg_colors, lwd=lwd, ymax=margin*side,
stack_ints=FALSE, form=form)
} else {
plot.segments(track, seg_colors, startfrom=side*margin,
lwd=lwd, sort.segs=sort_segs, form=form)
}
}
} else{
if(flag==0){
cat("Small segments, I will plot histograms for this track:\n")
if(trckmar>margin) {
track$counts <- scale.vec(track$counts, margin, 1,trckmar,1)
margin <- trckmar
}
if(!is.null(dim(track$counts))) {
plot.stackedHist(track, margin, ylims=NULL, col=col_segs[1],
stepsize=bin, side=side)
} else {
chromhist(side*margin, side*(track$counts),
track$mids, ylims=NULL, col=col_segs[1], stepsize=bin)#bin
}
}
}
}
# Plot a legend for is_int with Category
if(!is.null(legchr)) if(!is.na(legchr)) {
if("Category"%in%colnames(track_ls[[1]])&attr(track_ls, "is_int") &
!is.null(legchr)){
if(which.track == legchr) {
lcols <- col_segs[1:nlevs1]
legend("bottom",legend=slevs1,ncol=max(1,floor(sqrt(nlevs1))-1),
lwd=3, col=lcols, lty=1, xpd=NA,
cex=cex, bg="white", title=title)
}
}
if("Category2"%in%colnames(track_ls[[1]])&attr(track_ls, "is_int") &
!is.null(legchr)){
if(which.track == legchr) {
lform <- seg_form[1:nlevs2]
legend("right",
legend=slevs2, ncol=max(1,floor(sqrt(nlevs2))-1),
lwd=3, col="black", lty=1, xpd=NA, pch=lform,
cex=cex, bg="white", title=paste(title, "2nd Category"))
}
}
}
} # plot.track
plot.stackedHist <- function(track=NULL, margin=NULL, ylims=NULL, col=NULL,
stepsize=1e6, side=1) {
data <- t(track[[2]]) # assuming that first elements is always mids
for(i in 1:nrow(data)){
if(i==1){
x1 <- margin
x2 <- as.numeric(data[i,])
} else{
x1 <- x2
x2 <- x1 + as.numeric(data[i,]) - margin
}
chromhist(side*x1, side*x2, track$mids, col=col[i], stepsize=stepsize,
ylims=ylims)
}
} # plot.stackedHist
plot.int <- function(intervals, intclusters, col, lwd=3, ymax, stack_ints=TRUE,
form) {
intervals <- as.matrix(intervals)
if(ncol(intervals)==1)
intervals <- t(intervals)
for(i in 1:length(intclusters)){
incluster=intclusters[[i]]
plot.lodclust(intervals[incluster,], col=col[incluster], lwd, ymax,
stack_ints=stack_ints, form=form[incluster])
}
}
chromhist <- function(xleft, xright, y, ylims=NULL, col="red", stepsize=1) {
ytop <- y - stepsize/2
ybot <- y + stepsize/2
if(!is.null(ylims)) {
ytop <- unlist(sapply(ytop, max, ylims[1]))
ybot <- unlist(sapply(ybot, min, ylims[2]))
}
rect(xleft, ybot, xright, ytop, col=col, border=NA)
}
draw.scale <-function(y, minval, maxval, lwd, col, minlab=minlab, maxlab=maxval,
title="Counts", cex, ...) {
lht <- sign(diff(par("usr")[3:4])) * strheight(title)
tickpos <- pretty_ticks(minval, maxval, 2, ...)
ticklab <- pretty_ticks(minlab, maxlab, 2, ...)
ticklab <- format.bignum(ticklab, unit="", sep="")
tickpos <- tickpos - tickpos[length(tickpos)] *0.5 # move it 50% to the left
lines(c(tickpos[1], tickpos[length(tickpos)]), rep(y+lht*2,2), col=col,
lwd=lwd, lend=1)
segments(tickpos, y+lht*1, tickpos, y+lht*3, col="black", lwd=1)
text(tickpos[1] + (tickpos[length(tickpos)]-tickpos[1])*.7, y+lht*0,
title, adj=.5, font=1, cex=cex)
text(tickpos, y+lht*4, ticklab, cex=cex) # Bottom side of scale
}
plot.segments <- function(segobj, colvec, startfrom, lwd=3, sort.segs=TRUE,
form=NULL) {
intsmat <- segobj[, c("Start", "End")]
intslen <- intsmat[,2]-intsmat[,1]
clusts <- find.intclusters(intsmat, sort.by.size=sort.segs)
plot.int(intsmat, clusts, colvec, lwd=lwd, startfrom, stack_ints=TRUE,
form=form)
}
plot.lodclust<-function(intmat, col, lwd=3, ymax, stack_ints=TRUE, form=NULL){
intmat <- matrix(intmat, ncol=2)
lwdusr <- pt2usr(lwd, axisunit="x", lpi=96)
onestep <- lwdusr * 1.2 * sign(ymax)
y <- ymax
idx <- 1L:nrow(intmat)
assigned_levs <- idx
for(i in 1L:nrow(intmat)) {
if(i > 1L) {
if(stack_ints) {
used_lev<-assigned_levs[sapply(idx,function(x) overlap(intmat[i,],
intmat[x,]) & x!=i)]
clear <- min(setdiff(idx, used_lev))
assigned_levs[i] <- clear
} else {
clear <- i
}
y <- ymax + onestep * (clear-1L)
}
plot.lodint(intmat[i,], col=col[i], lwd=lwd, y=y,form=form[i])
}
}
plot.lodint<-function(lodint, lty=2, col=1, lwd=3, y=NULL, form=1) {
lowint<-numeric()
highlim<-numeric()
if(length(dim(lodint))>1) {
lowlim<-lodint[1,2]
highlim<-lodint[nrow(lodint),2]
}else{
lowlim<-lodint[1]
highlim<-lodint[2]
}
if(!is.null(form)) {
line_col <- "darkgray"
} else {
line_col <- col
}
lines(rep(y, 2), c(lowlim, highlim), col=line_col, lwd=lwd, lend=1)
if(!is.null(form)){
points(y, mean(c(lowlim, highlim)), col=col, lwd=lwd, lend=1,
pch=form, cex=1)
}
}
## Auxiliary functions
## --------- ---------
split.bins <-function(dataframe, column=NULL, step, fun=length, group.by=NULL){
## splits data by bins of 'step' size and apply 'fun' to each cell resulting
## from the combination of levels bin and the group.by column in dataframe
## (if provided)
##
## dataframe object to split n bins
## column column to use as variable to create bins. Can
## be a integer or a string with colname
## step bin size. values > 1 are used as absolute
## size or a fraction of the range in column
if(is.null(fun))
return(dataframe)
# if(is.character(fun))
# fun <- get(fun)
if(is.null(column))
x<-dataframe
else
x<-dataframe[,column]
if(is.data.frame(x) | is.matrix(x)) {
if(ncol(x)>1) {
y=x[,2]
x=x[,1]
} else {
y=x
}
}else{
y=x
}
y<-y[!is.na(y)]
x<-x[!is.na(x)]
if(identical(length(x), 0L))
return(NULL)
span<-diff(range(x, na.rm=TRUE))
if(step<=1)
step<-span*step
bin<-ceiling(x/step)
mids<-round((bin*step+(bin-1)*step)/2)
if(is.null(group.by)) {
byfactor <- list(mids)
} else {
byfactor <- list(mids, group.by)
}
out<-tapply(y, byfactor, fun)
if(!is.null(group.by) & identical(fun, length)) {
out[is.na(out)] <- 0
}
outname<-deparse(substitute(fun))
if(grepl('length', outname)) outname<-'counts'
if(is.null(group.by))
out <- as.numeric(out)
output<-list(mids=sort(unique(mids)), out)
names(output)[length(output)]=outname
return(output)
} # split.bins
chrplotMar <- function(mar, sides, tracks, lwd, lpi=96, stacked=FALSE) {
## Calculate appropriate margin for each track according to what other tracks
## are plotted on the same chromosomal side. If a histogram and segments are
## plotted on the same side, the histogram start where segments end.
if(length(sides)!=length(tracks))
stop("Arguments sides and tracks must be of same length.")
if(any(!sides %in% c(-1,1)))
stop("values of sides must be in c(-1, 1)")
out <- rep(mar, length(sides))
os <- pt2usr(lwd, axisunit="x", lpi=lpi)
are_drawn<- !sapply(tracks, is.null)
are_ints <- sapply(tracks, attr, "is_int")
are_ints <- unlist(sapply(are_ints, function(x) if(is.null(x)) FALSE else x))
off_set <- which(are_drawn & !are_ints)
side_height <- list(`-1`=NULL, `1`=NULL)
## If plotting int segments and stacking, calculate max cluster size to
## adjust margin
track_height <- rep(1, length(tracks))
if(sum(are_ints)>0 & stacked) {
track_height[are_ints] <- sapply(tracks[are_ints], max.cluster.height)
}
# Max height per chrom side
side_height_tmp <- tapply(track_height, sides, max)
side_height[names(side_height_tmp)] <- side_height_tmp
# Add the hight of one line each segment in the same side as a hist
if(length(off_set)>0L) {
for (i in 1:length(off_set)) {
if(any(sides[-i] %in% sides[i] & are_ints[-i]))
out[i] <- mar + os * side_height[[sides[i]*.5+1.5]]
}
}
return(out)
}
format.bignum <- function(number, unit="b", sep=" ") {
if(length(number)>1)
return(unlist(sapply(number, format.bignum, unit, sep)))
gigas <- number/1e9
megas <- number/1e6
kilos <- number/1e3
gig_u <- paste(sep="", "G", unit)
meg_u <- paste(sep="", "M", unit)
kil_u <- paste(sep="", "K", unit)
if(gigas[length(gigas)] == 0)
return(0)
if(gigas[length(gigas)] == as.integer(gigas[length(gigas)]))
return(paste(gigas, gig_u, sep=sep))
else if(megas[length(megas)] == as.integer(megas[length(megas)]))
return(paste(megas, meg_u, sep=sep))
else if(kilos[length(kilos)] == as.integer(kilos[length(kilos)]))
return(paste(kilos, kil_u, sep=sep))
else
return(paste(format(number, big.mark=",", trim=TRUE), unit, sep=sep))
}
max.cluster.size <- function(segslist) {
ints_by_chrom <- lapply(segslist, function(x) x[,c("Start", "End")])
clusts_by_chrom <- lapply(ints_by_chrom, find.intclusters,
sort.by.size=FALSE)
output <- max(sapply(clusts_by_chrom[[1]], length))
return(output)
}
max.cluster.height <- function(segslist) {
ints_by_chrom <- lapply(segslist, function(x) x[,c("Start", "End")])
clusts_by_chrom <- lapply(ints_by_chrom, find.intclusters,
sort.by.size=FALSE)
ints_by_clust <- list()
for (chr in 1:length(ints_by_chrom)) {
ints_by_clust[[chr]] <- lapply(clusts_by_chrom[[chr]],
function(x) ints_by_chrom[[chr]][x,])
}
output <- max(unlist(sapply(ints_by_clust,
function(x) sapply(x, maxclustsize))))
return(output)
}
## this is just an approximation to the number of levels needed for plotting
## a clusters
maxclustsize <- function(ints) {
tot_size <- sum(apply(ints, 1, diff))
min_possible_size <- max(ints[,2])-min(ints[,1])
max_possible_size <- mean(ints[,2])-mean(ints[,1])
estimate1 <- tot_size/min_possible_size
estimate2 <- tot_size/max_possible_size
estimated_size <- ceiling((estimate1+estimate2)/2)
return(estimated_size)
}
scale.track <- function(track, stat.col="statSumm", mintarget, maxtarget) {
## Converts scale of values in a track to a new range of values
## (mintarget,maxtarget)
maxstat <- max(unlist(sapply(track, function(x) max(x$statSumm))))
minstat <- min(unlist(sapply(track, function(x) min(x$statSumm))))
for(i in length(track)) {
track[[i]] <- scale.vec(track[[i]][,stat.col], minstat, maxstat,
mintarget, maxtarget)
}
return(track)
}
scale.vec <- function(vec, minstat, maxstat, mintarget, maxtarget) {
(vec-minstat)/(maxstat-minstat)*(maxtarget-mintarget)+mintarget
}
checkunit <- function(featable, column=c("Start", "End"), minlen=10e6) {
if(max(featable[,column], na.rm=TRUE)<minlen) { # it is not Mb
featable[,column] <- featable[,column] * 1e6
}
return(featable)
}
sort.chrom <- function(chroms) {
if(is.list(chroms))
return(sort.chrom.list(chroms))
chroms <- as.character(chroms)
dchroms <-sub("chr", "", chroms)
arenum <- grep("^[[:blank:]]*[[:digit:]]+[[:blank:]]*$", dchroms)
aresex <- setdiff((1:length(chroms)), arenum)
outchroms <- chroms[arenum][order(as.numeric(dchroms[arenum]))]
outchroms <- c(outchroms, sort(chroms[aresex]))
return(outchroms)
}
sort.chrom.list <- function(chromList) {
chroms <- names(chromList)
return(chromList[sort.chrom(chroms)])
}
pretty.choose <- function(number, num.options) {
if(missing(num.options))
num.options <- c(0, 5, 10, 20, 50, seq(100, 1000, by=100),
seq(2e3, 1e4, by=1e3), seq(2e4, 1e5, by=1e4), seq(2e5, 1e6, by=1e5),
seq(1e-01, 1, by=1e-01))
if(length(number)>1)
return(unique(unlist(sapply(number, pretty.choose, num.options))))
output <- number
num.options <- sort(num.options, decreasing=TRUE)
for(i in 1:length(num.options)) {
if(number > num.options[i] | i==length(num.options)) {
output <- num.options[i]
break
} else if(number > num.options[i+1]) {
if((num.options[i] - number) <= (number - num.options[i+1]))
output <- num.options[i]
else
output <- num.options[i+1]
break
} else
next
}
return(output)
}
pretty_ticks <- function(minval, maxval, howmany=3, as.is=FALSE, digits=2, ...){
outticks <- signif(seq(minval, maxval, length=howmany), digits=digits)
if(!as.is) {
outticks <- pretty.choose(outticks)
}
return(outticks)
}
pt2usr <- function(lwd, axisunit="x", lpi=96) {
if(!axisunit %in% c("x", "y"))
stop("Argument 'axisunit' must be in c('x', 'y').")
pusr <- par("usr")
pinch <- par("pin")
lwdin <- lwd * 1/lpi
if(axisunit=="x")
usr.in <- (pusr[2]-pusr[1])/pinch[1]
else
usr.in <- (pusr[4]-pusr[3])/pinch[1]
lwdusr <- lwdin * usr.in
return(lwdusr)
}
usr2pt <- function(lwdusr, axisunit="x", lpi=96) {
# lwdusr line width in user coordinates units
# lpi lines per inch (device dependent)
if(!axisunit %in% c("x", "y"))
stop("Argument 'axisunit' must be in c('x', 'y').")
pusr <- par("usr")
pinch <- par("pin")
if(axisunit == "x")
usr.in <- (pusr[2]-pusr[1])/pinch[1]
else
usr.in <- (pusr[4]-pusr[3])/pinch[1]
lwdin <- lwdusr / usr.in
lwdpt <- lwdin * lpi
return(lwdpt)
}
find.intclusters <- function(intmat, sort.by.size=TRUE){
indices <- 1:nrow(intmat)
clusters <- list()
i=0
while(length(indices)>0L) {
i=i+1
lrgst <- which.max(intmat[indices,2]-intmat[indices,1])
members <- indices[lrgst]
secindx <- setdiff(indices, members)
if(length(secindx)>0L) {
member_mem <- 0
while(!all(members %in% member_mem)) {
member_mem <- members
for(j in secindx) {
if(overlap(intmat[members,], intmat[j,])) {
members <- unique(append(members, j))
secindx <- setdiff(indices, members)
}
}
}
}
if(sort.by.size)
members <- members[order(intmat[members,2]-intmat[members,1],
decreasing=TRUE)]
clusters[[i]] <- members
indices=setdiff(indices, members)
}
cat(" ", length(clusters), "segment clusters.\n")
return(clusters)
}
overlap<-function(int1, int2) {
if(length(int1)==0L | length(int2)==0L)
return(FALSE)
if(!is.null(dim(int1)))
int1 <- c(min(int1[,1]), max(int1[,2]))
if(!is.null(dim(int2)))
int2 <- c(min(int2[,1]), max(int2[,2]))
int1 <- as.numeric(sort(int1))
int2 <- as.numeric(sort(int2))
len1 <- diff(int1)
len2 <- diff(int2)
total_len <- len1 + len2
span <- max(int1, int2) - min(int1, int2)
if(length(total_len)!=0L & length(span)!=0L)
if(total_len >= span) {
return(TRUE)
}
return(FALSE)
}
checkStructure <- function(data = NULL, rmrnd=FALSE,
columnNames=c("Chrom", "Start", "End"),
optional.columns=NULL) {
if(is.null(data)) return(NULL)
# GenomicRanges
if("GRanges" %in% class(data)) {
data <- data.frame(seqnames(data), start(data), end(data), mcols(data))
colnames(data)[1:3] <- columnNames
}
if(is.character(data)) {
# I will assume that this is a file name
if(!file.exists(data) & !grepl("http", data))
stop("File ", data, "does not exist.")
Lines<-read.delim(data, sep="\n", header=FALSE, fill=FALSE,
comment.char="#")
if(grepl("[A|C|G|T]{1,}", Lines[2,]) ){
cat ("Reading AXT file", "\n")
data <- transformAlign(Lines)
} else {
cat("Reading file ", data, " assuming bed format...", "\n")
data <- read.delim(data, sep="\t", header=TRUE, fill=FALSE,
comment.char="#")
}
}
name <- deparse(substitute(data))
if(!is.data.frame(data)){
# Everything failed....
stop(name, " could not be read.")
}
datacols <- colnames(data)
if(!all(columnNames %in% datacols)){
stop(name, " is missing some column(s): ",
paste(setdiff(columnNames, datacols), collapse=", "))
}
areNA <- apply(is.na(data[,columnNames]), 1, any)
if(sum(areNA)>0L) {
warning(sum(areNA), " rows have missing values for columns ",
paste(columnNames, collapse=", "),
"and will be removed.")
data <- data[!areNA,]
}
# Assuming first column name is Chromosome
data[,columnNames[1]] <- as.character(sub("^chr", "",
as.character(data[,columnNames[1]])))
if(rmrnd) {
#Remove bad chrom
bad_chroms <-grepl("_", data[,columnNames[1]])
if( length ( bad_chroms )>0L )
data <- data[!bad_chroms,]
}
if(!is.null(optional.columns)) {
for(i in 1: length(optional.columns)) {
if(optional.columns[i] %in% datacols)
data[,optional.columns[i]] <- as.character(data[,optional.columns[i]])
}
}
return(data)
}
selectchr <- function(chrom, data) {
if( any (!chrom %in% names(data)))
stop("Some chromsomes in 'chr' cannot be drwan.")
data <- data[names (data) %in% chrom]
return (data)
}
transformAlign <- function(filename = NULL){
y <- nrow(filename) # size file
row <- seq(1, y, by=3) # vector with the number of rows that
## contains chromosomal position
data <- filename[row, ]
data<-as.character(data)
data <- strsplit(data, " ", fixed=TRUE)
matrix <- t(sapply(data, unlist)) #list to matrix
aux <-matrix[, c(2:5)]
dataBed <- as.data.frame(aux)
colnames(dataBed) <- c("Chrom", "Start", "End", "Group")
dataBed$Start <-as.numeric(as.character(dataBed$Start))
dataBed$End <-as.numeric(as.character(dataBed$End))
dataBed$Group <-as.character(dataBed$Group)
dataBed$Chrom <-sub("^chr([^:digit:]|[M|X|Y|].+)", "\\1", dataBed$Chrom)
return(dataBed)
}
getAnnotBiomaRt <-function(org=NULL, annotBiomaRt=NULL ) {
if(!is.null(org) ){
if(annotBiomaRt){
dataset <-paste(org, "_gene_ensembl", sep="")
ensembl <- useMart("ENSEMBL_MART_ENSEMBL",dataset=dataset,
host="www.ensembl.org")
annot<-getBM(attributes=c("chromosome_name","start_position",
"end_position","ensembl_gene_id", "strand"),
filters="chromosome_name",
values=as.list(as.character(c(1:40, paste(1:40, "L", sep=""),
paste(1:40, "R", sep=""), "X", "Y")))
, mart = ensembl)
colnames(annot)[1]<-"Chrom"
colnames(annot)[2]<-"Start"
colnames(annot)[3]<-"End"
colnames(annot)[4]<-"Name"
colnames(annot)[5]<-"Strand"
return(annot)
}
}
}
#For plotting ID from tables
unstacked<-function(x=NULL, y=NULL, trackId=NULL, cex=NULL){
rest <- NULL
trackId$x <- x
trackId$y <- y
for(i in 1:nrow(trackId)){
rest <- 0
if(i<=nrow(trackId)-1){
rest<-trackId[i+1,"mids"] - trackId[i,"mids"]
offset <- trackId[i,"mids"] + abs(strheight(trackId[i,"ID"], cex=cex)*1.05)- trackId[i+1,"mids"]
if(offset>0){
trackId[i+1, "y"]<- trackId[i+1, "y"]+offset
}
}
}
return(trackId)
}
cytoBands<- function(data=NULL){
if(!"Colors" %in% names(data)) {
if("gieStain" %in% names(data)) {
data <- data[!is.na(data[ ,"gieStain"]), ]
type.bands <- match(data$gieStain, c("acen", "gneg", "gpos",
"gvar", "stalk", "gpos25", "gpos50","gpos66", "gpos75", "gpos100"))
bandcol<- c("#CCCCCC", "grey96", "#000000", "grey96", "#CCCCCC",
"grey90", "grey70","#666666", "grey40", "grey0")[type.bands]
data$Colors<-bandcol
} else{
stop("The parameter bands does not contain a color column or",
" Giemsa stain results (UCSC format).")
}
} else{
if("Group" %in% names(data)) {
data<-NULL
}
}
return(data)
}
makeCache <- function() {
cache <- new.env(parent=emptyenv())
list(get = function(key) cache[[key]],
set = function(key, value) cache[[key]] <- value,
load = function(rdaFile) load(rdaFile, cache),
ls = function() ls(cache))
}
# Save an object in a chache
saveObj <- function(object, oname, cname="chromPlot-cache") {
chrpltchache <- new.env(parent=emptyenv())
assign(oname, object, envir=chrpltchache)
attach(chrpltchache, name=cname)
} # saveObj
# Get object from cache. If not there, returns NULL silently
getObj <- function(oname, cname="chromPlot-cache") {
if(exists(oname)) {
out <- get(oname)
return(out)
}
return(NULL)
} # getObj
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.