Nothing
# functions for plotting metaProfiles:
# default color scheme for heatmaps
#.jets <- colorRampPalette(c("#334b8e", "#456ca7", "#71c6cd", "#8fc56c",
# "#f3e92b","#e6762b","#d9272a"))
.jets<-colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan",
"#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))
# winsorize a matrix using percentile ranges
.winsorize<-function(mat,rng){
hi.th=quantile(mat,rng[2]/100,na.rm=TRUE)
lo.th=quantile(mat,rng[1]/100,na.rm=TRUE)
mat[mat>hi.th]=hi.th
mat[mat<lo.th]=lo.th
mat
}
# ---------------------------------------------------------------------------- #
#' Heatmap for meta-region profiles
#'
#' Function calculates meta-profile(s) from a ScoreMatrix or a ScoreMatrixList, then
#' produces a heatmap or a set of stacked heatmaps for meta-region profiles
#'
#' @param mat \code{ScoreMatrix} or \code{ScoreMatrixList} to be plotted
#' @param centralTend a character that determines central tendency of meta-profile(s).
#' It takes "mean" (default) or "median".
#' @param profile.names a character vector for names of profiles. If NULL,
#' the names
#' will be taken from names(mat) if mat is a
#' \code{ScoreMatrixList} object.
#' @param xcoords a vector of numbers showing relative positions of the bases or
#' windows. It must match the number of columns in the \code{ScoreMatrix}
#' For example: if there are 2001 elements in the matrices which
#' are base-pair resolution
#' and they are centered around an anchor point like TSS, the xcoords
#' argument should be -1000:1000. This argument is used to plot
#' accurate x-axis labels for the plots.If NULL it will be equal
#' to 1:ncol(mat).
#' @param meta.rescale if TRUE meta-region profiles are scaled to 0 to 1 range by
#' subracting the min from profiles and dividing them by
#' max-min.
#' @param winsorize Numeric vector of two, defaults to c(0,100). This vector
#' determines the upper and lower percentile values to limit the
#' extreme values. For example, c(0,99) will limit the values to
#' only 99th percentile, everything above the 99 percentile will
#' be equalized to the value of 99th percentile. This is useful
#' for visualization of matrices that have outliers.
#' @param col a vector of color pallete.
#' color scheme to be used. If NULL, a version of jet colors will be
#' used.
#' @param legend.name a character label plotted next to the legend
#' @param cex.legend A numerical value giving the amount by which
#' legend axis marks should be magnified relative to the default
#' @param xlab label a character string for x-axis
#' @param main a character string for the plot title
#' @param cex.lab A numerical value giving the amount by which
#' axis labels (including 'legend.name')
#' should be magnified relative to the default.
#' @param cex.axis A numerical value giving the amount by which
#' axis marks should be magnified relative to the default
#'
#' @return returns meta-profile matrix invisibly.
#'
#'
#' @examples
#' data(cage)
#' data(promoters)
#' scores1=ScoreMatrix(target=cage,windows=promoters,strand.aware=TRUE)
#' data(cpgi)
#' scores2=ScoreMatrix(target=cpgi,windows=promoters,strand.aware=TRUE)
#'
#' x=new("ScoreMatrixList",list(scores1,scores2))
#' \donttest{
#' heatMeta(mat=x,legend.name="fg",cex.legend=0.8,main="fdf",cex.lab=6,
#' cex.axis=0.9)
#' }
#' @export
heatMeta<-function(mat, centralTend="mean",
profile.names=NULL,xcoords=NULL,col=NULL,
meta.rescale=FALSE, winsorize=c(0,100),
legend.name=NULL,cex.legend=1,xlab=NULL,
main="",cex.lab=1,cex.axis=1){
# check class
if(! class(mat) %in% c("ScoreMatrix","ScoreMatrixList"))
stop("mat is not ScoreMatrix or ScoreMatrixList\n")
# check centralTend
if(! centralTend %in% c("median","mean"))
stop("centralTend is not mean or median\n")
# get meta profiles by taking the mean
if( class(mat)=="ScoreMatrix" ){
if(centralTend=="mean"){
metas=list(colMeans(mat,na.rm=TRUE))
}else{
metas=list(apply(mat, 2, function(x) median(x,na.rm=TRUE)))
}
}else if( class(mat)=="ScoreMatrixList" ){
if(centralTend=="mean"){
metas=lapply(mat,function(a) colMeans(a,na.rm=TRUE) )
}else{
metas=lapply(mat,function(a) apply(a, 2, function(x) median(x,na.rm=TRUE)) )
}
}
# if the ncols of matrices do not match do not plot anything
if(length(unique(sapply(metas,ncol))) != 1){
stop("ScoreMatrix number of columns do not match\n",
"Try using binMatrix to make matrices with large number of columns",
"equal to the smaller ones\n")
}
# get the default xcoordinates to plot
if(!is.null(xcoords)){
# if it is a two element vector of start and end coordinates
# of the first and the last column of the matrix
if(length(xcoords)==2 & xcoords[1]<xcoords[2]){
xcoords=seq(xcoords[1],xcoords[2],length.out=length(metas[[1]]) )
}
if(length(xcoords) != length(metas[[1]]))
stop("xcoords has wrong length: ",length(xcoords)," \n",
"it should be equal to the number of columns of ScoreMatrices\n",
"which is: ",length(metas[[1]]),"\n")
}else{
xcoords=1:length(metas[[1]])
}
# try to get profile names from names of ScoreMatrixList
if(is.null(profile.names) & !is.null(names(mat)) & class(mat)=="ScoreMatrixList" )
{
profile.names=names(mat)
}
# if user wants scaling
if(meta.rescale){
metas=lapply(metas,function(x) (x-min(x))/(max(x)-min(x) ) )
}
img=do.call("rbind",metas)
if(is.null(col)){
col=.jets(100)
}
marOrg=par()$mar # get original parMar to be used later
marNew=marOrg
marNew[4]=6.1
par(mar=marNew)
image(x=xcoords,y=1:nrow(img),z=as.matrix(t(img[nrow(img):1,,drop=FALSE])),useRaster=TRUE,
col=col,yaxt="n",ylab="",xlab=xlab,main=main,
cex.lab=cex.lab,cex.axis=cex.axis)
if(!is.null(profile.names)){
axis(side=4,at=1:nrow(img),labels=rev(profile.names),
las=2,cex.axis=cex.axis)
}
#plot.new()
vps <- baseViewports()
pushViewport(vps$figure) # get the plot viewport from base graphics
# showViewport(current.viewport());current.vpTree()
#grid.text(c("one"),
# x=unit(1, "native"), y=unit(2, "native"),
# just="right", rot=60)
if(par()$mar[2]<4.1){
warning("left margin of the plot (set by mar in par()) should not be less then 4.1 lines")
}
# make view port for the legend
legendVp <- viewport(width=unit(1, "lines"), height=unit(0.4, "npc"),
x = unit(3, "lines"), y = unit(0.5, "npc"),just="left")
pushViewport(legendVp) # push the legend VP
#current.viewport()
current.vpTree()
rng=range(img)
.heatLegendY(min=rng[1],max=rng[2],cols=col,
legend.name=legend.name,main=TRUE,cex.legend=cex.legend,
cex.lab=cex.lab)
popViewport(2) # remove the legend VP
current.vpTree()
par(mar=marOrg)
metas=do.call("rbind",metas)
invisible(metas)
}
# function based on plotrix::dispersion,
# and additionally it takes into account NA values in data
# the problem is that when scoreMatrix is calculated then
# some of the columns have only one numerical value and rest of them is NA
# and then mean of such column is a numerical value, but variation e.g. sd is NA.
# border = NA omits borders.
# Args:
# ulim and llim: the extent of the dispersion measures (upper and lower band)
# intervals: whether the limits are intervals (TRUE) or absolute values (FALSE)
# border: line type for drawing a border on the confidence band, e.g. 2
.dispersion2 <- function(x, y, ulim, llim=ulim, intervals=TRUE,
border = NA, ...){
if (intervals) {
llim <- y - llim
ulim <- y + ulim
}
ulim.na <- is.na(ulim)
if(any(ulim.na)){
#selecting 'segments' of numerical values that are located between NA's
w <- which(ulim.na)
previous.v <- 0 #location of NA to the left of segment
for(i in 1:length(w)){ #for each segment
if(w[i]==previous.v+1){
#if there are some neighboring NA
previous.v <- w[i]
next
}
from <- previous.v+1
to <- w[i]-1
#running polygon separately for each segment
polygon(c(x[from:to], rev(x[from:to])),
c(llim[from:to], rev(ulim[from:to])),
border = border, ...)
previous.v <- w[i]
}
from <- previous.v+1
to <- length(x)
polygon(c(x[from:to], rev(x[from:to])),
c(llim[from:to], rev(ulim[from:to])),
border = border, ...)
}else{
polygon(c(x, rev(x)), c(llim, rev(ulim)),
border = border, ...)
}
}
# ---------------------------------------------------------------------------- #
#' Line plot(s) for meta-region profiles
#'
#' Function calculates meta-profile(s) from a ScoreMatrix or a ScoreMatrixList, then
#' produces a line plot or a set of line plots for meta-region profiles
#'
#' @param mat \code{ScoreMatrix} or \code{ScoreMatrixList} object. If it is a
#' \code{ScoreMatrixList} object, all matrices in the ScoreMatrixList should have
#' the same number of
#' columns.
#' @param centralTend a character that determines central tendency of meta-profile(s).
#' It takes "mean" (default) or "median".
#' @param overlay If TRUE multiple profiles will be overlayed in the same plot
#' (Default:TRUE). If FALSE, and mat is a ScoreMatrixList, consider
#' using par(mfrow=c(1,length(mat))) to see the plots from all
#' matrices at once.
#' @param winsorize Numeric vector of two, defaults to c(0,100). This vector
#' determines the upper and lower percentile values to limit the
#' extreme values. For example, c(0,99) will limit the values to
#' only 99th percentile, everything above the 99 percentile will
#' be equalized to the value of 99th percentile.This is useful
#' for visualization of matrices that have outliers.
#' @param profile.names a character vector for names of the profiles. The order
#' should be same as the as the order of ScoreMatrixList.
#' @param xcoords a numeric vector which designates
#' relative base positions of the meta-region profiles.
#' For example, for a 2001 column ScoreMatrix, xcoord=-1000:1000 indicates
#' relative positions of each column in the score matrix. If NULL (Default),
#' xcoords equals to 1:ncol(mat)
#' @param meta.rescale if TRUE meta-region profiles are scaled to 0 to 1 range by
#' subtracting the min from profiles and dividing them by max-min.
#' If dispersion is not NULL, then dispersion will be scaled as well.
#' @param smoothfun a function to smooth central tendency and dispersion bands (Default: NULL), e.g.
#' stats::lowess.
#' @param line.col color of lines for \code{centralTend} of meta-region profiles. Defaults to colors from
#' \code{rainbow()} function.
#' @param ylim same as \code{ylim} at \code{\link{plot}} function.
#' if NULL ylim is estimated from all meta-region profiles.
#' @param ylab same as \code{ylab} at \code{\link{plot}} function.
#' Default: "average score"
#' @param xlab same as \code{xlab} at \code{\link{plot}} function.
#' Default: "bases"
#' @param dispersion shows dispersion interval bands around \code{centralTend} (default:NULL). It takes
#' one of the character:
#' \itemize{
#' \item{"se"}{shows standard error of the mean and 95 percent confidence interval for the mean}
#' \item{"sd"}{shows standard deviation and 2*(standard deviation)}
#' \item{"IQR"}{shows 1st and 3rd quartile and
#' confidence interval around the median based on the median +/- 1.57 * IQR/sqrt(n) (notches)}
#' }
#' @param dispersion.col color of bands of \code{dispersion}.
#' Defaults to colors from \code{rainbow()} and transparency is set to 0.5
#' (rainbow(length(mat), alpha = 0.5)).
#' @param ... other options to \code{\link{plot}}
#'
#' @return returns the meta-region profiles invisibly as a matrix.
#'
#' @note
#' Score matrices are plotted according to ScoreMatrixList order.
#' If ScoreMatrixList contains more than one matrix then they will
#' overlap each other on a plot, i.e.
#' the first one is plotted first and every next one overlays previous one(s) and
#' the last one is the topmost.
#'
#' Missing values in data slow down plotting dispersion around central tendency.
#' The reason is that dispersion is plotted only for non-missing values,
#' for each segment that
#' contains numerical values \code{graphics::polygon} function is used to plot dispersion bands.
#' There might be a situation, when in a column of ScoreMatrix is only one
#' numeric number and the rest are NAs, then at corresponding position
#' only central tendency will be plotted.
#'
#' Notches show the 95 percent confidence interval for the median
#' according to an approximation based on the normal distribution.
#' They are used to compare groups - if notches corresponding to adjacent base pairs
#' on the plot do not overlap, this is strong evidence that medians differ.
#' Small sample sizes (5-10) can cause notches to extend beyond the interquartile range (IQR)
#' (Martin Krzywinski \emph{et al}. \emph{Nature Methods 11}, 119-120 (2014))
#' @examples
#' data(cage)
#' data(promoters)
#' scores1=ScoreMatrix(target=cage,windows=promoters,strand.aware=TRUE)
#'
#' data(cpgi)
#' scores2=ScoreMatrix(target=cpgi,windows=promoters,strand.aware=TRUE)
#'
#' # create a new ScoreMatrixList
#' x=new("ScoreMatrixList",list(scores1,scores2))
#' \donttest{
#' plotMeta(mat=x,overlay=TRUE,main="my plotowski")
#'
#' # plot dispersion nd smooth central tendency and variation interval bands
#' plotMeta(mat=x, centralTend="mean", dispersion="se", winsorize=c(0,99),
#' main="Dispersion as interquartile band", lwd=4,
#' smoothfun=function(x) stats::lowess(x, f = 1/5))
#' }
#' @export
#' @docType methods
#' @rdname plotMeta
#'
plotMeta<-function(mat, centralTend="mean",
overlay=TRUE,winsorize=c(0,100),
profile.names=NULL,xcoords=NULL,
meta.rescale=FALSE,
smoothfun=NULL,
line.col=NULL,
dispersion=NULL,dispersion.col=NULL,
ylim=NULL,ylab="average score",xlab="bases", ...){
# check class
if(! class(mat) %in% c("ScoreMatrix","ScoreMatrixList"))
stop("mat is not ScoreMatrix or ScoreMatrixList\n")
# check centralTend args
if(! centralTend %in% c("median","mean"))
stop("centralTend is not mean or median\n")
# check dispersion args
disp.args <- c("se","sd","IQR") #dispersion arguments
if(!is.null(dispersion) && !dispersion %in% c(disp.args)){
stop("dispersion is not FALSE, 'se', 'sd' or 'IQR'\n")
}
# check smoothfun
if(!is.null(smoothfun) & !is.function(smoothfun)){stop("'smoothfun' has to be a function or NULL\n")}
if(is.null(line.col) & is.null(dispersion))
line.col=ifelse(is.list(mat),
list(rainbow(length(mat))),
"black")[[1]]
if(is.null(line.col) & is.null(dispersion.col)){
dispersion.col=ifelse(is.list(mat),
list(rainbow(length(mat), alpha = 0.4)),
rainbow(1, alpha=0.4))[[1]]
line.col=ifelse(is.list(mat),
list(rainbow(length(mat))),
rainbow(1))[[1]]
}
#mat is always a list/ScoreMatrixList
if( class(mat)=="ScoreMatrix" ){
mat <- list(mat)
}
# if the ncols of matrices do not match do not plot anything
if(length(unique(sapply(mat,ncol))) != 1){
stop("ScoreMatrix number of columns do not match\n",
"Try using binMatrix to make matrices with high number of columns",
"equal\n")
}
#init of some variables before for loop
if(!is.null(dispersion) && dispersion %in% disp.args){
bound1<-list()
if(dispersion=="IQR"){q1<-list(); q3<-list();
}else{bound2 <- list()}
}
metas<-list()
for(i in 1:length(mat)){ #for every score matrix
# this can set extreme values to given percentile
if(winsorize[2]<100 | winsorize[1]>0){
mat[[i]]=.winsorize(mat[[i]]@.Data, winsorize)
}
# get meta profiles by taking the mean/median
if(centralTend=="mean"){
if(!is.null(dispersion) && dispersion=="IQR"){
warning("dispersion is set to show 1st and 3rd quartile and
confidence interval around the median,
but centralTend is 'mean'. Setting centralTend to 'median'..\n")
metas[[i]]=colMedians(mat[[i]], na.rm=TRUE)
}else{
metas[[i]]=colMeans(mat[[i]], na.rm=TRUE)
}
}else if(centralTend=="median"){
if(!is.null(dispersion) && dispersion=="se"){
warning("dispersion is set to standard error of the mean and 95% confidence interval for the mean, but
centralTend is 'median'. Setting centralTend to 'mean'..\n")
metas[[i]]=colMeans(mat[[i]],na.rm=TRUE)
}else{
metas[[i]]=colMedians(mat[[i]], na.rm=TRUE)
}
}
# calculate dispersion around the mean/median
if(!is.null(dispersion) && dispersion %in% disp.args){
if(dispersion=="se"){
bound1[[i]] <- std.error(mat[[i]], na.rm = TRUE)
bound2[[i]] <- bound1[[i]] * 1.96
}else if(dispersion=="sd"){
bound1[[i]] <- colSds(mat[[i]], na.rm=TRUE)
bound2[[i]] <- bound1[[i]] * 2
}else if(dispersion=="IQR"){
q <- colQuantiles(mat[[i]], probs=c(0.25, 0.75), na.rm=TRUE)
q1[[i]] <- q[,1] #1st quartile
q3[[i]] <- q[,2] #3rd quartile
n<-nrow(mat[[i]])
bound1[[i]] <- (1.57*(q3[[i]] - q1[[i]])) / sqrt(n) #notch
}
}
if(meta.rescale){
val2unit <- function(x){(x-min(x, na.rm = TRUE))/(max(x, na.rm = TRUE)-min(x, na.rm = TRUE))}
metas[[i]]=val2unit(metas[[i]])
if(!is.null(dispersion) && dispersion %in% disp.args){
bound1[[i]]=val2unit(bound1[[i]])
if(dispersion=="IQR"){
q1[[i]]=val2unit(q1[[i]])
q3[[i]]=val2unit(q3[[i]])
}else{
bound2[[i]]=val2unit(bound2[[i]])
}
}
}
#smoothing
if(!is.null(smoothfun)){
metas[[i]] <- smoothfun(metas[[i]])$y
if(!is.null(dispersion) && dispersion %in% disp.args){
bound1[[i]] <- smoothfun(bound1[[i]])$y
if(dispersion=="IQR"){
q1[[i]] <- smoothfun(q1[[i]])$y
q3[[i]] <- smoothfun(q3[[i]])$y
}else{
bound2[[i]] <- smoothfun(bound2[[i]])$y
}
}
}
}#end of for loop
# get the default xcoordinates to plot
if(!is.null(xcoords)){
# if it is a two element vector of start and end coordinates
# of the first and the last column of the matrix
if(length(xcoords)==2 & xcoords[1]<xcoords[2]){
xcoords=seq(xcoords[1],xcoords[2],length.out=length(metas[[1]]) )
}
if(length(xcoords) != length(metas[[1]]))
stop("xcoords has wrong length: ",length(xcoords)," \n",
"it should be equal to the number of columns of ScoreMatrices\n",
"which is: ",length(metas[[1]]),"\n")
}else{
xcoords=1:length(metas[[1]])
}
# if ylim is not NULL, change the ranges to plot to ylim
if(!is.null(ylim)){
myrange=ylim
}else{
myrange=range(unlist(metas), na.rm = TRUE)
if(!is.null(dispersion) && dispersion %in% disp.args){
if(dispersion!="IQR"){
myrange[2] <- max(unlist(metas) + unlist(bound2), na.rm = TRUE)
myrange[1] <- min(unlist(metas) - unlist(bound2), na.rm = TRUE)
}else{
# can be a situation when q1 or q2 are equal to median
max.q3 <- max(unlist(metas) + unlist(q3), na.rm = TRUE)
min.q1 <- min(unlist(metas) - unlist(q1), na.rm = TRUE)
max.notch.upper <- max(unlist(metas) + unlist(bound1), na.rm = TRUE)
min.notch.lower <- min(unlist(metas) - unlist(bound1), na.rm = TRUE)
if(min.notch.lower < min.q1){
myrange[1] <- min.notch.lower
}else{
myrange[1] <- min.q1
}
if(max.notch.upper > max.q3){
myrange[2] <- max.notch.upper
}else{
myrange[2] <- max.q3
}
}
}
}
marOrg=par()$mar # get original parMar to be used later
marNew=marOrg
marNew[4]=8
par(mar=marNew) # extend right margin for the legend
par(xpd=TRUE) # do this so that you can plot legend out of the plotting box
if(overlay & length(metas)>1){
# plot overlayed lines
if(!is.null(dispersion) && dispersion %in% disp.args){
plot(xcoords,metas[[1]],type="l",col=dispersion.col[1],
ylim=myrange,ylab=ylab,xlab=xlab, ...)
for(i in 1:length(metas)){
# "IQR"
if(dispersion=="IQR"){
# plotting notches
.dispersion2(xcoords, metas[[i]], bound1[[i]],
col=dispersion.col[i], ...)
# plotting 1st and 3rd quartile
.dispersion2(xcoords, metas[[i]],
llim=q1[[i]], ulim=q3[[i]], intervals=FALSE,
col=dispersion.col[i], ...)
# if 1st or 3rd quartile is equal to median, then
# colour notch(es) again
if(all(metas[[i]]==q1[[i]])){
# then colour 'lower' notch again
.dispersion2(xcoords, metas[[i]],
llim=bound1[[i]], ulim=metas[[i]],
col=dispersion.col[i], ...)
}
if(all(metas[[i]]==q3[[i]])){
# then colour 'upper' notch again
.dispersion2(xcoords, metas[[i]],
ulim=bound1[[i]], llim=metas[[i]],
col=dispersion.col[i], ...)
}
# "sd" or "se"
}else{
.dispersion2(xcoords, metas[[i]], bound2[[i]],
col=dispersion.col[i], ...)
.dispersion2(xcoords, metas[[i]], bound1[[i]],
col=dispersion.col[i], ...)
}
}
for(j in 1:length(metas) ){ #drawing central tendency line(s) on top
lines(xcoords,metas[[j]],col=line.col[j],...)
}
}else{ #without plotting dispersion
plot(xcoords,metas[[1]],type="l",col=line.col[1],
ylim=myrange,ylab=ylab,xlab=xlab,...)
for(i in 2:length(metas) ){ #drawing central tendency line(s) on top
lines(xcoords,metas[[i]],col=line.col[i],...)
}
}
# if profile names are given, plot them as legend
if(!is.null(profile.names))
if(!is.null(dispersion) && dispersion %in% disp.args){
legend(max(xcoords)+0.05*max(xcoords),myrange[2],legend=profile.names
,fill=dispersion.col,bty="n", border=line.col)
}else{
legend(max(xcoords)+0.05*max(xcoords),myrange[2],legend=profile.names
,fill=line.col,bty="n")
}
}else{ # plot things one by one, in this case user must use par
for(j in 1:length(metas)){
plot(xcoords,metas[[j]],type="l",col=line.col[j],
ylim=myrange,ylab=ylab,xlab=xlab,...)
if(!is.null(dispersion) && dispersion %in% disp.args){
if(dispersion=="IQR"){
.dispersion2(xcoords, metas[[j]], bound1[[j]],
col=dispersion.col[j], ...)
.dispersion2(xcoords, metas[[j]],
llim=metas[[j]]-q1[[j]], ulim=q3[[j]]-metas[[j]],
col=dispersion.col[j], ...)
# if 1st or 3rd quartile is equal to median, then
# colour notch(es) again
if(all(metas[[i]]==q1[[i]])){
# then colour 'lower' notch again
.dispersion2(xcoords, metas[[i]],
llim=bound1[[i]], ulim=metas[[i]],
col=dispersion.col[i], ...)
}
if(all(metas[[i]]==q3[[i]])){
# then colour 'upper' notch again
.dispersion2(xcoords, metas[[i]],
ulim=bound1[[i]], llim=metas[[i]],
col=dispersion.col[i], ...)
}
}else{
.dispersion2(xcoords, metas[[j]], bound1[[j]],
col=dispersion.col[j], ...)
.dispersion2(xcoords, metas[[j]], bound2[[j]],
col=dispersion.col[j], ...)
}
}
lines(xcoords,metas[[j]],col=line.col[j],...)
}
}
# revert par shit to its original state
par(xpd=FALSE)
par(mar=marOrg)
invisible(do.call("rbind",metas))
}
# put a y axis legend
.heatLegendY<-function(min,max,cols,legend.name,main=TRUE,cex.legend=1,
cex.lab=1){
# get value range as a vector of 100
vals=seq(min,max,length.out=100)
rng <- range(vals, na.rm = TRUE) # get min/mqx
m <- (vals - min)/(max-min) # get normalized range
rasta= rgb(colorRamp(cols)(m), maxColorValue= 255) # get color for each element of range
grid.raster( rev(rasta), interpolate=FALSE,height = unit(1, "npc"),
width=unit(1, "npc")) # make the legend
# make legend ticks
at = seq(0,1,length.out=5); label = seq(min,max,length.out=5)
#make the axis of the legend
grid.yaxis(at=at,label=formatC(label,digits=2,format="g"),main=main,
edits=gEdit("labels", rot=90,hjust=0.5),
gp=gpar(cex=cex.legend)) # make axis for legend
my.x = -2
if(main==FALSE)
my.x=3.4
grid.text(legend.name,rot=90,x=unit(my.x, "npc"),gp=gpar(cex=cex.lab))
}
.heatLegendX<-function(min,max,cols,legend.name,main=TRUE,cex.legend=1,
cex.lab=1,hjust=0,vjust=0){
# get value range as a vector of 100
vals=seq(min,max,length.out=100)
rng <- range(vals, na.rm = TRUE) # get min/mqx
m <- (vals - min)/(max-min) # get normalized range
rasta= rgb(colorRamp(cols)(m), maxColorValue = 255) # get color for each element of range
grid.raster( matrix((rasta),nrow=1), interpolate=FALSE,height = unit(1, "npc"),
width=unit(1, "npc")) # make the legend
# make legend ticks
label = pretty(c(min,max),n=5);at = seq(0,1,length.out=length(label));
#make the axis of the legend
grid.xaxis(at=at,label=label,main=main,
edits=gEdit("labels",hjust=hjust,vjust=vjust),
gp=gpar(cex=cex.legend)) # make axis for legend
my.y = -3
grid.text(legend.name,y=unit(my.y, "npc"),gp=gpar(cex=cex.lab))
}
# convert a matrix or vector to color matrix
# to be used in grid.raster()
.convertToColors <- function(mat,cols,rng=NULL) {
if(is.null(rng)){
# Produce 'normalized' version of matrix, with values ranging from 0 to 1
rng <- range(mat, na.rm = TRUE)
}
# get the color number
nc = length(cols)
# arrange min and max of the matrix in different situations
if (diff(rng) == 0)
rng <- if (rng[1L] == 0){
c(-1, 1)
}else{rng[1L] + c(-0.4, 0.4) * abs(rng[1L])}
# normalize matrix
mat <- (mat - rng[1L])/diff(rng)
# assign color numbers to normalized matrix for each cell
zi <- floor((nc - 1e-05) * mat + 1e-07)
zi[zi < 0 | zi >= nc] <- NA
# construct the final matrix
zc <- cols[zi + 1L]
dim(zc) <- dim(mat)
return(zc)
}
# make a heatmap from a given matrix using grid.raster()
.gridHeat<-function(mat,col,xcoords,xlab,cex.lab,cex.axis,angle=0,
hjust=0,vjust=0,rng=NULL){
mat2=.convertToColors(mat,col,rng)
ras=grid.raster(mat2,interpolate = FALSE, width= unit(1, "npc"),
height=unit(1, "npc"))
if(any(is.character(xcoords))) {
# take legend labels
at = seq(0,1,length.out=length(xcoords)); label = xcoords
} else {
# make legend ticks
at = seq(0,1,length.out=5); label = seq(min(xcoords),max(xcoords)
,length.out=5)
}
ax=grid.xaxis(at=at,label=formatC(label,digits=4,format="g"),
edits=gEdit("labels", rot=angle,hjust=hjust,vjust=vjust),
gp=gpar(cex=cex.axis)) # make axis for legend
grid.text(xlab,y=unit(-2.5, "lines"),gp=gpar(cex=cex.lab))
#grid.draw(ax)
}
.rowSideCol<-function(group.vector,group.names=NULL,group.col=NULL,cex.lab=1){
if( is.null(group.col) ){
cols=rainbow(length(unique(group.vector)))
img=cols[factor(group.vector,levels=unique(group.vector))]
}else{
img=group.col[factor(group.vector,levels=unique(group.vector))]
}
grid.raster(img,interpolate = FALSE, width= unit(1, "npc"),
height=unit(1, "npc"))
# segment heights calculated from group.vector
# will be used to put group names in the middle of the segment
segh=as.vector(table(factor(group.vector,levels=unique(group.vector))))
name.coord=1-((cumsum(segh)-(segh/2))/sum(segh)) # NPC coord
if( is.null(group.names)){
grid.text(unique(group.vector), y=unit(name.coord,"npc"),
x = unit(-0.5, "lines"),
gp=gpar(cex=cex.lab),just="right")
}else{
grid.text(group.names, y=unit(name.coord,"npc"),
x = unit(-0.5, "lines"),
gp=gpar(cex=cex.lab),just="right")
}
}
# ---------------------------------------------------------------------------- #
#' Draw a heatmap of a given ScoreMatrix object
#'
#'
#' The function makes a heatmap out of given \code{ScoreMatrix} object. If desired
#' it can use clustering using given clustering function
#' (e.g. k-means) and plot cluster color codes as a sidebar.
#' In addition, user can define groups of rows using 'group' argument.
#'
#' @param mat a \code{ScoreMatrix} object
#' @param grid if TRUE, grid graphics will be used. if FALSE, base graphics
#' will be used on the top level, so users can use par(mfrow)
#' or par(mfcol) prior to calling the function. Default:FALSE
#' @param col a vector of colors, such as the ones created by heat.colors(10).
#' If NULL (which is default), jet color scheme (common in matlab
#' plots) will be used.
#' @param xcoords a vector of numbers showing relative positions of the bases or
#' windows. It must match the number of columns in the \code{ScoreMatrix}.
#' Alternatively, it could be a numeric vector of two elements. Such
#' as c(0,100) showing the relative start and end coordinates of the first
#' and last column of the \code{ScoreMatrix} object.
#'
#' @param group a list of vectors of row numbers or a factor. This grouping is
#' used for rowside colors of the heatmap. If it is a list,
#' each element of the list must be a vector of row numbers. Names
#' of the elements of the list will be used as names of groups.
#' If \code{group} is a factor
#' , it's length must match the number of rows of the matrix, and
#' factor levels will be used as the names of the groups in the plot.
#
#' @param group.col a vector of color names to be used at the rowside colors if
#' \code{group} argument is given or \code{clustfun} function is given.
#' @param order Logical indicating if the rows should be ordered or not
#' (Default:FALSE). If \code{order=TRUE} the matrix will be ordered
#' with rowSums(mat) values in descending order.
#' If \code{group} argument is provided, first the groups
#' will be ordered in descending order of sums of rows then, everything
#' within the clusters will be ordered by sums of rows.
#' If \code{clustfun} is given then rows within clusters
#' will be order in descending order of sums of rows.
#'
#' @param user.order a numerical vector indicating the order of groups/clusters (it works only
#' when \code{group} or \code{clustfun} argument is given).
#'
#' @param winsorize Numeric vector of two, defaults to c(0,100). This vector
#' determines the upper and lower percentile values to limit the
#' extreme values. For example, c(0,99) will limit the values to
#' only 99th percentile, everything above the 99 percentile will
#' be equalized to the value of 99th percentile.This is useful
#' for visualization of matrices that have outliers.
#' @param clustfun a function for clustering
#' rows of \code{mat} that returns
#' a vector of integers indicating the cluster to which
#' each point is allocated (a vector of cluster membership),
#' e.g. k-means algorithm with 3 centers:
#' function(x) kmeans(x, centers=3)$cluster.
#' By default FALSE.
#' @param main a character string for the plot title
#' @param legend.name a character label plotted next to the legend
#' @param cex.legend A numerical value giving the amount by which
#' legend axis marks should be magnified relative to the default
#' @param xlab label a character string for x-axis of the heatmap
#' @param cex.lab A numerical value giving the amount by which
#' axis labels (including 'legend.name')
#' should be magnified relative to the default.
#' @param cex.main A numerical value giving the amount by which
#' plot title should be magnified
#' @param cex.axis A numerical value giving the amount by which
#' axis marks should be magnified relative to the default
#' @param newpage logical indicating if \code{grid.newpage()} function should be
#' invoked if \code{grid=TRUE}.
#'
#' @return
#' returns clustering result invisibly, if clustfun is definied
#'
#' @examples
#'
#' data(cage)
#' data(promoters)
#' scores1=ScoreMatrix(target=cage,windows=promoters,strand.aware=TRUE,
#' weight.col="tpm")
#'
#' set.seed(1000)
#' \donttest{
#' heatMatrix(mat=scores1,legend.name="tpm",winsorize=c(0,99),xlab="region around TSS",
#' xcoords=-1000:1000,
#' cex.legend=0.8,main="CAGE clusters on promoters",cex.lab=1,
#' cex.axis=0.9,grid=FALSE)
#'
#' ## examples using clustering functions
#' ## k-means
#' cl1 <- function(x) kmeans(x, centers=3)$cluster
#' set.seed(1000)
#' heatMatrix(mat=scores1,legend.name="tpm",winsorize=c(0,99),xlab="region around TSS",
#' xcoords=-1000:1000,clustfun=cl1,
#' cex.legend=0.8,main="CAGE clusters on promoters",cex.lab=1,
#' cex.axis=0.9,grid=FALSE,
#' user.order=c(1,3,2))
#'
#' ## hierarchical clustering
#' cl2 <- function(x) cutree(hclust(dist(x), method="complete"), k=3)
#' set.seed(1000)
#' heatMatrix(mat=scores1,legend.name="tpm",winsorize=c(0,99),xlab="region around TSS",
#' xcoords=-1000:1000,clustfun=cl2,
#' cex.legend=0.8,main="CAGE clusters on promoters",cex.lab=1,
#' cex.axis=0.9,grid=FALSE)
#' }
#'
#'
#' @export
#' @rdname heatMatrix
heatMatrix<-function(mat,grid=FALSE,col=NULL,xcoords=NULL,
group=NULL,group.col=NULL,
order=FALSE,user.order=FALSE,
winsorize=c(0,100),
clustfun=NULL,
main="",legend.name=NULL,cex.legend=1,xlab=NULL,cex.main=1,
cex.lab=1,cex.axis=1,newpage=TRUE
){
if( class(mat) !="ScoreMatrix" ){stop("'mat' is not a ScoreMatrix object\n")}
if(!is.null(clustfun) & !is.function(clustfun)){stop("'clustfun' has to be a function or NULL\n")}
mat2=mat@.Data # get the matrix class, some operations are not transitive
# if this is changed, a rowSide color map will be drawn
# setting clustering function or group.list will populate this vector
# and this function will check on its value later on
# to decide to plot rowside colors or not
group.vector=NULL
group.names=NULL # will be plotted as annotation if filled in later
# this can set extreme values to given percentile
# better for visualization
# alternative is to take log or sth
# but that should be done prior to heatMatrix
if(winsorize[2]<100 | winsorize[1]>0){
mat2=.winsorize(mat2, winsorize)
}
# do clustfun if requested
if(!is.null(clustfun)){
# impute values if there are NAs
if(any(is.na(mat2)) ) {
mat3=impute.knn(mat2, k = 10,
rowmax = 0.5, colmax = 0.8,
maxp = 1500)$data
clu=clustfun(mat3)
}
else{
clu=clustfun(mat2)
}
group.vector=clu
# order things by clusters only
mat2=mat2[order(group.vector),]
group.vector=group.vector[order(group.vector)]
# if user wants to order
if(order){
# ordering within clusters
my.order=order(group.vector,-rowSums(mat2,na.rm=TRUE))
# commence the new order: Novus Ordo Seclorum
mat2=mat2[my.order,]
group.vector=group.vector[my.order]
}
group.names=unique(group.vector) # to be used for the rowSide colors
}
# check conditions of group.list
# group must not have duplicated numbers
# warn if total number group elements is below nrow(mat)
if(!is.null(group) & is.null(clustfun)){ #if grouping then without clustering
# if group is a list of rowids, row numbers for original windows argument
if(is.list(group)){
# group must not have duplicated numbers
win.numbs=(lapply(group, function(x) unique(x) ))
win.vec=unlist(win.numbs)
if(any(table(unlist(win.numbs))>1)){
stop("'group' containing a list must not have duplicated numbers\n")
}
row.ids=rownames(mat2) # get row ids from the matrix
group.vector=rep(0,length(row.ids)) # make a starting vector of zeros
for(i in 1:length(win.numbs)){
group.vector[row.ids %in% win.numbs[[i]] ]=i # make group vector
}
# if list has names, take it as group.names
# group.names will be plotted with rowSide colors
if(!is.null(names(group))){
group.names=names(group)
}
# stop if total number group elements is below nrow(mat)
if(all(group.vector==0)){
stop("None of the elements in 'group' are a part of rownames(mat) \n")
}
# warn if total number group elements is below nrow(mat)
if(any(group.vector==0)){
warning("Number of elements in 'group' argument is less then nrow(mat) \n",
"Dropping rows from 'mat' that are not contained in 'group'\n")
mat2=mat2[group.vector>0,]
group.vector=group.vector[group.vector>0]
}
}
else if(is.factor(group)){
# check if the length is same as nrow(mat)
if( length(group) != nrow(mat)){
stop("'group' is a factor, and its length should be equal to nrow(mat)\n")
}
# arrange factor levels by the order of appeareance
group=factor(as.character(group),levels=as.character(unique(group)))
group.names=levels(group)
levels(group)=1:length(levels(group))
group.vector=as.numeric(group)
}else{
stop("'group' must be a factor or a list\n")
}
# order things by clusters only
mat2=mat2[order(group.vector),]
group.vector=group.vector[order(group.vector)]
if(order){
my.order=order(group.vector,-rowSums(mat2,na.rm=TRUE))
# commence the new order: Novus Ordo Seclorum
mat2=mat2[my.order,]
group.vector=group.vector[my.order]
names(group.vector)=rownames(mat2)
}
}else if(order & is.null(clustfun) ){ # if only ordering is needed, no grouping or clustering
#czy napewno clustfun FALSE? chyba tak?
order.vector =rep(1,nrow(mat2))
names(order.vector)=rownames(mat2)
order.vector = order.vector[order(-rowSums(mat2,na.rm=TRUE))]
mat2 =mat2[order(-rowSums(mat2,na.rm=TRUE)),]
}
#if user.order is provided
if(!identical(user.order, FALSE) & !is.null(group.names)){
if(length(user.order)!=length(group.names)){
warning(paste0("length of 'user.order' vector (", length(user.order) ,") should be the the same as number of clusters (",length(group.names),"). Skipping it..\n"))
}else{
gv.fac.ord <- sort(factor(group.vector, levels = user.order))
group.vector <- group.vector[names(gv.fac.ord)] #convert factor to vector
#group.names <- user.order
}
}else if(!identical(user.order, FALSE) & is.null(group.names)){
warning("There are no groups or clusters to order. Skipping it..")
}
# THE PLOTTING STARTS HERE with ordered mat2
if(!grid){
plot.new()
vps <- baseViewports()
pushViewport(vps$figure) # get the plot viewport from base graphics
}else{
if(newpage)
grid.newpage()
}
# get the default/given xcoordinates to plot
if(!is.null(xcoords) & is.vector(xcoords)){
## if it is a numeric vector
if(all(is.numeric(xcoords))) {
# if it is a two element vector of start and end coordinates
# of the first and the last column of the matrix
if(length(xcoords)==2 & xcoords[1]<xcoords[2]){
xcoords=seq(xcoords[1],xcoords[2],length.out=ncol(mat2) )
}
}
if( all(is.numeric(xcoords)) & ( length(xcoords) != ncol(mat2) ) )
stop("xcoords has wrong length: ",length(xcoords)," \n",
" it should be equal to the number of columns of ScoreMatrix\n",
" which is",ncol(mat2),"\n")
}else{
xcoords=1:ncol(mat2)
}
# get heatcolor scale
if(is.null(col)){
col=.jets(100)
}
# make legend viewport
legendVp <- viewport(width=unit(0.7, "lines"), height=unit(0.4, "npc"),
x = unit(0.71, "npc"), y = unit(0.5, "npc"),just="left")
pushViewport(legendVp)
#grid.rect()
rng=range(mat2,na.rm=TRUE)
# make the legend
.heatLegendY(min=rng[1],max=rng[2],
col,legend.name,main=FALSE,cex.legend,
cex.lab)
popViewport()
# make heatmap viewport
heatHeightNPC=0.7
heatVp <- viewport(width=unit(0.5, "npc"), height=unit(heatHeightNPC, "npc"),
x = unit(0.2, "npc"), y = unit(0.5, "npc"),just="left")
pushViewport(heatVp) # push the heatmap VP
.gridHeat(mat2,col,xcoords,xlab,cex.lab,cex.axis,hjust=0.5) # make the heat
#upViewport(1) # up one level current.vpTree()
popViewport()
# make side colors
if(!is.null(group.vector)){
sideVp <- viewport(width=unit(0.05, "npc"), height=unit(heatHeightNPC, "npc"),
x = unit(0.145, "npc"), y = unit(0.5, "npc"),just="left")
pushViewport(sideVp) # push the viewport
grid.rect()
# make the rowside colors and labels
.rowSideCol(group.vector,group.names=group.names,
group.col=group.col,cex.lab=cex.lab)
popViewport()
}
# make the title
#title.y=convertY(unit(0.85, "npc"), "lines")
title.y=unit(0.9, "npc")
grid.text(main, y=title.y,
x = unit(0.45, "npc"),
gp=gpar(cex=cex.main))
#return groups if clustfun is given
if(!grid)popViewport()
if(identical(clustfun, FALSE) | !is.null(group.vector)){
return(invisible(group.vector))
}else if(order & is.null(group.vector) ){
return(invisible(order.vector))
}
}
# ---------------------------------------------------------------------------- #
#' Draw multiple heatmaps from a ScoreMatrixList object
#'
#' The function plots multiple heatmaps for a ScoreMatrixList object side by side.
#' Each matrix can have different color schemes but it is essential that each matrix
#' is obtained from same regions or neighbouring regions.
#'
#' @param sml a \code{ScoreMatrixList} object
#' @param grid if TRUE, grid graphics will be used. if FALSE, base graphics
#' will be used on the top level, so users can use par(mfrow)
#' or par(mfcol) prior to calling the function. Default:FALSE
#' @param col a color palette or list of color palettes, such as
#' list(heat.colors(10),topo.colors(10)). If it is a list,
#' it is length must match the number of matrices to be plotted.
#' If it is a single palette
#' every heatmap will have the same colors.
#' @param xcoords a vector of numbers showing relative positions of the bases or
#' windows or a list of vectors.
#' The elements of the list must match the number of columns in the
#' corresponding \code{ScoreMatrix}.
#' Alternatively, the elements could be a numeric vector of two elements. Such
#' as c(0,100) showing the relative start and end coordinates of the first
#' and last column of the \code{ScoreMatrix} object. The remaining
#' coordinates will be automatically matched in this case. If the
#' argument is not a list but a single vector, then all heatmaps
#' will have the same coordinate on their x-axis.
#'
#' @param group a list of vectors of row numbers or a factor. The rows will be
#' reordered to match their grouping. The grouping is
#' used for rowside colors of the heatmap. If it is a list,
#' each element of the list must be a vector of row numbers. Names
#' of the elements of the list will be used as names of groups.
#' If \code{group} is a factor
#' , it's length must match the number of rows of the matrix, and
#' factor levels will be used as the names of the groups in the plot.
#
#'
#' @param group.col a vector of color names to be used at the rowside colors if
#' \code{group} and \code{clustfun} arguments are given
#' @param order Logical indicating if the rows should be ordered or not
#' (Default:FALSE). If \code{order=TRUE} the matrix will be ordered
#' with rowSums(mat) values in descending order.
#' If \code{group} argument is provided, first the groups
#' will be ordered in descending order of sums of rows then, everything
#' within the clusters will be ordered by sums of rows.
#' If \code{clustfun} is given then rows within clusters
#' will be order in descending order by sums of rows.
#'
#' @param user.order a numerical vector indicating the order of groups/clusters (it works only
#' when \code{group} or \code{clustfun} argument is given).
#'
#' @param winsorize Numeric vector of two, defaults to c(0,100). This vector
#' determines the upper and lower percentile values to limit the
#' extreme values. For example, c(0,99) will limit the values to
#' only 99th percentile for a matrix,
#' everything above the 99 percentile will
#' be equalized to the value of 99th percentile.This is useful
#' for visualization of matrices that have outliers.
#' @param clustfun a function for clustering
#' rows of \code{mat} that returns
#' a vector of integers indicating the cluster to which
#' each point is allocated (a vector of cluster membership),
#' e.g. k-means algorithm with 3 centers:
#' function(x) kmeans(x, centers=3)$cluster.
#' By default FALSE.
#' @param clust.matrix a numerical vector of indexes or a character vector of names
#' of the \code{ScoreMatrix} objects in 'sml'
#' to be used in clustering (if \code{clustfun} argument is provided).
#' By default all matrices are clustered. Matrices that are
#' not indicated in clust.matrix are ordered according to
#' result of clustering algorithm.
#' @param column.scale Logical indicating if matrices should be scaled or not,
#' prior to clustering or ordering. Setting this
#' to TRUE scales the columns of the
#' matrices using,
#' \code{scale()} function. scaled columns are only used for
#' clustering or ordering. Original scores are displayed for heatmaps.
#' @param matrix.main a vector of strings for the titles of the heatmaps. If NULL
#' titles will be obtained from names of the \code{ScoreMatrix}
#' objects in the \code{ScoreMatrixList} objects.
#' @param common.scale if TRUE (Default:FALSE) all the heatmap colors will be
#' coming from the same score
#' scale, although each heatmap color scale can be different.
#' The color intensities will be coming from the same scale.
#' The scale will be
#' determined by minimum of all matrices and maximum of all
#' matrices. This is useful when all matrices are on the same
#' score scale. If FALSE, the color scale will be determined
#' by minimum and maximum of each matrix individually.
#' @param legend if TRUE and color legend for the heatmap is drawn.
#' @param legend.name a vector of legend labels to be plotted with legends
#' of each heatmap. If it is a length 1 vector, all heatmaps
#' will have the same legend label.
#' @param cex.legend A numerical value giving the amount by which
#' legend axis marks should be magnified relative to the default
#'
#' @param xlab a vector of character strings for x-axis labels of the heatmaps.
#' if it is length 1, all heatmaps will have the same label.
#' @param cex.lab A numerical value giving the amount by which
#' axis labels (including 'legend.name')
#' should be magnified relative to the default.
#' @param cex.main A numerical value giving the amount by which
#' plot title should be magnified
#' @param cex.axis A numerical value giving the amount by which
#' axis marks should be magnified relative to the default
#' @param newpage logical indicating if \code{grid.newpage()} function should be
#' invoked if \code{grid=TRUE}.
#'
#' @return
#' invisibly returns the order of rows, if clustfun is provided and/or order=TRUE
#'
#' @examples
#' data(cage)
#' data(promoters)
#' scores1=ScoreMatrix(target=cage,windows=promoters,strand.aware=TRUE)
#'
#' data(cpgi)
#' scores2=ScoreMatrix(target=cpgi,windows=promoters,strand.aware=TRUE)
#'
#' sml=new("ScoreMatrixList",list(a=scores1,b=scores2))
#'
#' # use with k-means
#' \donttest{multiHeatMatrix(sml,
#' clustfun=function(x) kmeans(x, centers=2)$cluster,
#' cex.axis=0.8,xcoords=c(-1000,1000),
#' winsorize=c(0,99),
#' legend.name=c("tpm","coverage"),xlab="region around TSS")
#'
#' # use with hierarchical clustering
#' cl2 <- function(x) cutree(hclust(dist(x), method="complete"), k=2)
#' multiHeatMatrix(sml,legend.name="tpm",winsorize=c(0,99),xlab="region around TSS",
#' xcoords=-1000:1000,clustfun=cl2,
#' cex.legend=0.8,cex.lab=1,
#' cex.axis=0.9,grid=FALSE)
#'
#' # use different colors
#' require(RColorBrewer)
#' col.cage= brewer.pal(9,"Blues")
#' col.cpgi= brewer.pal(9,"YlGn")
#' multiHeatMatrix(sml,
#' clustfun=function(x) kmeans(x, centers=2)$cluster,
#' cex.axis=0.8,xcoords=c(-1000,1000),
#' winsorize=c(0,99),col=list(col.cage,col.cpgi),
#' legend.name=c("tpm","coverage"),xlab="region around TSS")
#' }
#'
#'
#'
#' @export
multiHeatMatrix<-function(sml,grid=TRUE,col=NULL,xcoords=NULL,
group=NULL,group.col=NULL,
order=FALSE,user.order=FALSE,
winsorize=c(0,100),
clustfun=FALSE,
clust.matrix=NULL,
column.scale=TRUE,
matrix.main=NULL,
common.scale=FALSE,legend=TRUE,
legend.name=NULL,cex.legend=0.8,
xlab=NULL,
cex.lab=1, cex.main=1,cex.axis=0.8,newpage=TRUE)
{
if( class(sml) !="ScoreMatrixList" ){stop("'sml' is not a ScoreMatrix object\n")}
# check if col and xcoords are lists
# if so their number should be equal to the number of matrices
if(is.list(col) & length(col) != length(sml)){
stop("'col' is a list and its length does not match the length of ScoreMatrixList\n")
col=NULL
}
if( is.list(xcoords) & length(xcoords) != length(sml) ){
stop("'xcoords' is a list and its length does not match the length of ScoreMatrixList\n")
xcoords=NULL
}
# check legend.name argument if , it is a vector
if(!is.null(legend.name) & length(legend.name)>1 & length(legend.name) != length(sml)){
stop("'legend.name' should match the length of the 'sml' ",
"if it is not a length 1 vector\n")
}else if (length(legend.name)==1){
legend.name=rep(legend.name,length(sml))
}
# check xlab argument and set it
if(!is.null(xlab) & length(xlab)>1 & length(xlab) != length(sml)){
stop("'xlab' should match the length of the 'sml' ",
"if it is not a length 1 vector\n")
}else if (length(xlab)==1){
xlab=rep(xlab,length(sml))
}
#check number of rows if they match or not
# if not, warn and run unionScoreMatrixList
if( length(unique(sapply(sml,nrow)))>1 ){
warning("\nThe row numbers are different\n",
"attempting to get common rows and to reorder rows\n",
"using 'intersectScoreMatrixList()'\n")
sml=intersectScoreMatrixList(sml,reorder=TRUE)
}
# check clust.matrix arg
if(!is.null(clust.matrix)){
clust.matrix = unique(clust.matrix)
if(is.numeric(clust.matrix)){
if(length(clust.matrix)>length(sml) | max(clust.matrix) > length(sml) | 0 %in% clust.matrix){
warning("\n'clust.matrix' vector shouldn't be longer than 'sml'\n",
"and shouldn't have greater values that length of 'sml'\n",
"clustering all matrices\n")
clust.matrix = NULL
}
}else{
if(length(clust.matrix)>length(sml) | sum(clust.matrix %in% names(sml)) == length(clust.matrix)){
warning("\n'clust.matrix' vector shouldn't be longer than 'sml'\n",
"and should contain names of matrices of 'sml'\n",
"clustering all matrices\n")
clust.matrix = NULL
}
}
}
mat.list=lapply(sml,function(x) x@.Data) # get the matrix class, some operations are not transitive
# if this is changed, a rowSide color map will be drawn
# setting clustfun or group.list will populate this vector
# and this function will check on its value later on
# to decide to plot rowside colors or not
group.vector=NULL
group.names=NULL # will be plotted as annotation if filled in later
# this can set extreme values to given percentile
# better for visualization
# alternative is to take log or sth
# but that should be done prior to heatMatrix
if(winsorize[2]<100 | winsorize[1]>0){
mat.list=lapply(mat.list,function(x) .winsorize(x@.Data, winsorize) )
}
# if order is true | clustfun is provided
# make a one large matrix by cbind
if(!identical(clustfun, FALSE) | order){
if(!is.null(clust.matrix)){
mat2=do.call("cbind",mat.list[clust.matrix])
}else{
mat2=do.call("cbind",mat.list)
}
if(column.scale){
mat2=scale(mat2)
mat2[is.nan(mat2)]=0
}
}
# do clustfun if requested
if(!identical(clustfun, FALSE)){
# impute values if there are NAs
if(any(is.na(mat2)) ) {
mat3=impute.knn(mat2 ,k = 10,
rowmax = 0.5, colmax = 0.8,
maxp = 1500)$data
clu=clustfun(mat3)
}
else{
# cluster
clu=clustfun(mat2)
}
# get group.vector centers, will be used at ordering later
group.vector=clu
# order things by clusters only
mat.list=lapply(mat.list,function(x) x[order(group.vector),])
group.vector=group.vector[order(group.vector)]
# if user wants to order
if(order){
# replicate center value for each cluster
g.factor=factor(group.vector,levels=unique(group.vector))
# do ordering within clusters
my.order=order(group.vector,-rowSums(mat2,na.rm=TRUE))
# commence the new order: Novus Ordo Seclorum
mat.list=lapply(mat.list,function(x) x[my.order,])
group.vector=group.vector[my.order]
}
group.names=unique(group.vector) # to be used for the rowSide colors
}
# check conditions of group.list
# group must not have duplicated numbers
# warn if total number group elements is below nrow(mat)
if(!is.null(group) & identical(clustfun, FALSE)){
# if group is a list of rowids, row numbers for original windows argument
if(is.list(group)){
# group must not have duplicated numbers
win.numbs=(lapply(group, function(x) unique(x) ))
win.vec=unlist(win.numbs)
if(any(table(unlist(win.numbs))>1)){
stop("'group' containing a list must not have duplicated numbers\n")
}
row.ids=rownames(mat.list[[1]]) # get row ids from the matrix
group.vector=rep(0,length(row.ids)) # make a starting vector of zeros
for(i in 1:length(win.numbs)){
group.vector[row.ids %in% win.numbs[[i]] ]=i # make group vector
}
# if list has names, take it as group.names
# group.names will be plotted with rowSide colors
if(!is.null(names(group))){
group.names=names(group)
}
# stop if total number group elements is below nrow(mat)
if(all(group.vector==0)){
stop("None of the elements in 'group' are a part of rownames(mat) \n")
}
# warn if total number group elements is below nrow(mat)
if(any(group.vector==0)){
warning("Number of elements in 'group' argument is less then nrow(mat) \n",
"Dropping rows from 'mat' that are not contained in 'group'\n")
mat.list=lapply(mat.list,function(x) x[group.vector>0,])
group.vector=group.vector[group.vector>0]
}
}
else if(is.factor(group)){
# check if the length is same as nrow(mat)
if(length(group) != nrow(mat.list[[1]])){
stop("'group' is a factor, and its length should be equal to nrow(mat)\n")
}
# arrange factor levels by the order of appeareance
group=factor(as.character(group),levels=as.character(unique(group)))
group.names=levels(group)
levels(group)=1:length(levels(group))
group.vector=as.numeric(group)
}else{
stop("'group' must be a factor or a list\n")
}
# order things by clusters only
mat.list=lapply(mat.list,function(x) x[order(group.vector),])
group.vector=group.vector[order(group.vector)]
if(order){
# get cbound matrix for ordering
mat2=do.call("cbind",mat.list)
if(column.scale){
mat2=scale(mat2)
mat2[is.nan(mat2)]=0
}
my.order=order(group.vector,-rowSums(mat2,na.rm=TRUE))
# commence the new order: Novus Ordo Seclorum
mat.list=lapply(mat.list,function(x) x[my.order,])
group.vector=group.vector[my.order]
}
}else if(order & identical(clustfun, FALSE) ){ # if only ordering is needed no group or clustering
# get cbound matrix for ordering
mat2=do.call("cbind",mat.list)
if(column.scale){
mat2=scale(mat2)
mat2[is.nan(mat2)]=0
}
order.vector =rep(1,nrow(mat2))
names(order.vector)=rownames(mat2)
my.order=order(-rowSums(mat2,na.rm=TRUE))
mat.list=lapply(mat.list,function(x) x[my.order,])
order.vector = order.vector[my.order]
}
#if user.order is provided
if(!identical(user.order, FALSE) & !is.null(group.names)){
if(length(user.order)!=length(group.names)){
warning(paste0("length of 'user.order' vector (", length(user.order) ,") should be the the same as number of clusters (",length(group.names),"). Skipping it..\n"))
}else{
gv.fac.ord <- sort(factor(group.vector, levels = user.order))
group.vector <- group.vector[names(gv.fac.ord)] #convert factor to vector
group.names <- user.order
}
}else if(!identical(user.order, FALSE) & is.null(group.names)){
warning("There are no groups or clusters to order. Skipping it..")
}
# THE PLOTTING STARTS HERE with ordered mat.list
if(!grid){
plot.new()
vps <- baseViewports()
pushViewport(vps$figure) # get the plot viewport from base graphics
}else{
if(newpage)
grid.newpage()
}
# try to get matrix names from names of ScoreMatrixList
if(is.null(matrix.main) & !is.null(names(sml)) & class(sml)=="ScoreMatrixList" )
{
matrix.main= names(sml)
}else if(!is.null(matrix.main) & length(matrix.main) != length(sml)){
warning("'matrix.main' length does not match to the 'sml' length\n",
"setting it to NULL")
matrix.main=NULL
}
# calculate the width of heatmaps and everything else relative to that
l.sml=length(mat.list) # get length of matrix
# get the npc width of the heatmap
# this will be our rudder when plotting
# every coordinate and dimension will be relative to this value
hw=40*(1-(convertX( unit(4,"lines"),"npc",valueOnly=TRUE)) )/(l.sml*44+7)
hh=0.7 # this heatmap height in NPC
hy=0.6 # heatmap y coordinate on the plot
# if group.vector is not NULL, plot the rowside colors
# make side colors
if(!is.null(group.vector)){
sideVp <- viewport(width=unit(hw/8, "npc"), height=unit(hh, "npc"),
x = convertX( unit(4,"lines"),"npc"),
y = unit(hy, "npc"),just="left")
pushViewport(sideVp) # push the viewport
grid.rect()
# make the rowside colors and labels
.rowSideCol(group.vector,group.names=group.names,
group.col=group.col,cex.lab=cex.lab)
popViewport()
}
# heatmap start coordinates
heat.startCoord=convertX( unit(4,"lines"),"npc",valueOnly=TRUE) + (hw/8) + (hw/20)
# if the same scale to be used for all plots
common.range=NULL
if(common.scale){
common.range=range(mat.list,na.rm=TRUE)
}
for(i in 1:length(mat.list)){
# cxcoords stands for current xcoords
if(is.list(xcoords)){
cxcoords=xcoords[[i]]
}else if( is.vector(xcoords) | is.null(xcoords) ){
cxcoords=xcoords
}
else if(!is.null(xcoords)){
warning("xcoords should be a vector or a list or NULL,nothing else!!\n",
"setting xcoords to NULL")
cxcoords=NULL
}
## if it is a numeric vector
if(all(is.numeric(cxcoords))) {
# if it is a two element vector of start and end coordinates
# of the first and the last column of the matrix
if(length(cxcoords)==2){
cxcoords=seq(cxcoords[1],cxcoords[2],length.out=ncol(mat.list[[i]]))
}
}
# ccol: stands for current.color
if(is.list(col)){
ccol=col[[i]]
}else if(is.vector(col) | is.null(col) ){
ccol=col
}else if(!is.null(col)){
warning("col should be a vector or a list or NULL,nothing else!!\n",
"setting colors to default")
ccol=NULL
}
# get the default/given xcoordinates to plot
if(!is.null(cxcoords)){
if( all(is.numeric(cxcoords)) & ( length(cxcoords) != ncol( mat.list[[i]] ) ) )
stop("xcoords has wrong length: ",length(cxcoords)," \n",
" it should be equal to the number of columns of ScoreMatrix\n",
" which is",ncol(mat.list[[i]]),"\n")
}else{
cxcoords=1:ncol( mat.list[[i]] )
}
# get heatcolor scale
if(is.null(ccol))
ccol=.jets(100)
# make heatmap viewport
heatVp <- viewport(width=unit(hw, "npc"), height=unit(hh, "npc"),
x =unit(heat.startCoord,"npc"),
y = unit(hy, "npc"),just="left")
pushViewport(heatVp) # push the heatmap VP
grid.rect()
.gridHeat(mat.list[[i]],ccol,cxcoords,xlab[i],cex.lab,cex.axis,
angle=60,hjust=0.6,vjust=-0.5,rng=common.range) # make the heat
#upViewport(1) # up one level current.vpTree()
popViewport()
if(legend){
# make the legend viewport
legendVp <- viewport(width=unit(hw*0.7, "npc"),
height=unit(0.5, "lines"),
x =unit(heat.startCoord+hw*0.15,"npc"),
y = unit(0.1, "npc"),
just="left")
pushViewport(legendVp)
grid.rect()
if(common.scale){
rng=common.range
}else{
rng=range(mat.list[[i]],na.rm=TRUE)
}
# make the legend
.heatLegendX(min=rng[1],max=rng[2],
ccol,legend.name[i],main=TRUE,cex.legend,
cex.lab,vjust=-0.5,hjust=0.5)
popViewport()
}
if(!is.null(matrix.main)){
title.y=unit(0.96, "npc")
grid.text(matrix.main[i], y=title.y,
x = unit(heat.startCoord+hw*0.5,"npc"),
gp=gpar(cex=cex.main),just="bottom")
}
# update start coord for next one
heat.startCoord=heat.startCoord+ hw+ (hw/20)
}
if(!grid){
popViewport()
}
if(!identical(clustfun, FALSE) | !is.null(group.vector)){
return(invisible(group.vector))
}else if(order & is.null(group.vector) ){
return(invisible(order.vector))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.