#' Diagnostic plots for the metaseqr package
#'
#' This is the main function for producing sructured quality control and informative
#' graphs base on the results of the various steps of the metaseqR package. The
#' graphs produced span a variety of issues like good sample reproducibility
#' (Multi-Dimensional Scaling plot, biotype detection, heatmaps. diagplot.metaseqr,
#' apart from implementing certain package-specific plots, is a wrapper around
#' several diagnostic plots present in other RNA-Seq analysis packages such as
#' EDASeq and NOISeq.
#'
#' @param object a matrix or a data frame containing count data derived before or
#' after the normalization procedure, filtered or not by the metaseqR's filters
#' and/or p-value. The object can be fed to any of the \code{diagplot.metaseqr}
#' plotting systems but not every plot is meaningful. For example, it's meaningless
#' to create a \code{"biodist"} plot for a count matrix before normalization or
#' statistical testing.
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param annotation a data frame containing annotation elements for each row in
#' object. Usually, a subset of the annotation obtained by \code{\link{get.annotation}}
#' or a subset of possibly embedded annotation with the input counts table. This
#' parameter is optional and required only when diagplot.type is any of
#' \code{"biodetection"}, \code{"countsbio"}, \code{"saturation"}, \code{"rnacomp"},
#' \code{"readnoise"}, \code{"biodist"}, \code{"gcbias"}, \code{"lengthbias"} or
#' \code{"filtered"}.
#' @param contrast.list a named structured list of contrasts as returned by
#' \code{\link{make.contrast.list}} or just the vector of contrasts as defined in
#' the main help page of \code{\link{metaseqr}}. This parameter is optional and
#' required only when \code{diagplot.type} is any of \code{"deheatmap"},
#' \code{"volcano"} or \code{"biodist"}.
#' @param p.list a list of p-values for each contrast as obtained from any of the
#' \code{stat.*} methods of the metaseqr package. This parameter is optional and
#' required only when \code{diagplot.type} is any of \code{"deheatmap"},
#' \code{"volcano"} or \code{"biodist"}.
#' @param thresholds a list with the elements \code{"p"} and \code{"f"} which are
#' the p-value and the fold change cutoff when \code{diagplot.type="volcano"}.
#' @param diagplot.type one or more of the diagnostic plots supported in metaseqR
#' package. Many of these plots require the presence of additional package,
#' something that is checked while running the main metaseqr function. The supported
#' plots are \code{"mds"}, \code{"biodetection"}, \code{"countsbio"},
#' \code{"saturation"}, \code{"rnacomp"}, \code{"boxplot"}, \code{"gcbias"},
#' \code{"lengthbias"}, \code{"meandiff"}, \code{"meanvar"}, \code{"deheatmap"},
#' \code{"volcano"}, \code{"biodist"}, \code{"filtered"}, \code{"readnoise"},
#' \code{"venn"}, \code{"correl"}, \code{"pairwise"}. For a brief description of
#' these plots please see the main \code{\link{metaseqr}} help page.
#' @param is.norm a logical indicating whether object contains raw or normalized
#' data. It is not essential and it serves only plot annotation purposes.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"png"}, \code{"jpg"}, \code{"bmp"}, \code{"pdf"},
#' \code{"ps"} or \code{"json"}. The latter is currently available for the creation
#' of interactive volcano plots only when reporting the output, through the
#' highcharts javascript library. The default plotting (\code{"x11"}) is not
#' supported due to instability in certain devices.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return A named list containing the file names of the produced plots. Each list
#' member is names according to the selected plotting device and is also a named
#' list, whose names are the plot types. The final contents are the file names in
#' case the plots are written to a physical location (not meaningful for \code{"x11"}).
#' @note In order to make the best out of this function, you should generally
#' provide the annotation argument as most and also the most informative plots
#' depend on this. If you don't know what is inside your counts table or how many
#' annotation elements you can provide by embedding it, it's always best to set
#' the annotation parameter of the main metaseqr function to \code{"download"} to
#' use predefined annotations that work better with the functions of the whole
#' package.
#' @author Panagiotis Moulos
#' @export
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' diagplot.metaseqr(data.matrix,sample.list,diagplot.type=c("mds","boxplot"))
#'
#' norm.args <- get.defaults("normalization","deseq")
#' object <- normalize.deseq(data.matrix,sample.list,norm.args)
#' diagplot.metaseqr(object,sample.list,diagplot.type="boxplot")
#'
#' p <- stat.deseq(object)
#' diagplot.metaseqr(object,sample.list,contrast.list=contrast,p.list=p,
#' diagplot.type="volcano")
#'}
diagplot.metaseqr <- function(object,sample.list,annotation=NULL,contrast.list=NULL,
p.list=NULL,thresholds=list(p=0.05,f=1),diagplot.type=c("mds","biodetection",
"countsbio","saturation","readnoise","rnacomp","correl","pairs","boxplot",
"gcbias","lengthbias","meandiff","meanvar","deheatmap","volcano","biodist",
"filtered","venn"),is.norm=FALSE,output="x11",path=NULL,...) {
# annotation should have the format internally created here... This function
# can be used outside so it must be checked at some point...
if (!is.matrix(object) && !is.data.frame(object))
stopwrap("object argument must be a matrix or data frame!")
if (is.null(annotation) && any(diagplot.type %in% c("biodetection",
"countsbio","saturation","rnacomp","readnoise","biodist","gcbias",
"lengthbias","filtered")))
stopwrap("annotation argument is needed when diagplot.type is ",
"\"biodetection\", \"countsbio\",\"saturation\",\"rnacomp\", ",
"\"readnoise\", \"biodist\", \"gcbias\", \"lengthbias\", ",
"\"filtered\" or \"venn\"!")
if (any(diagplot.type %in% c("deheatmap","volcano","biodist","venn"))) {
if (is.null(contrast.list))
stopwrap("contrast.list argument is needed when diagplot.type is ",
"\"deheatmap\",\"volcano\", \"biodist\" or \"venn\"!")
if (is.null(p.list))
stopwrap("The p argument which is a list of p-values for each ",
"contrast is needed when diagplot.type is \"deheatmap\", ",
"\"volcano\", \"biodist\" or \"venn\"!")
if (is.na(thresholds$p) || is.null(thresholds$p) || thresholds$p==1) {
warnwrap(paste("The p-value threshold when diagplot.type is ",
"\"deheatmap\", \"volcano\", \"biodist\" or \"venn\" must allow ",
"the normal plotting of DEG diagnostic plots! Setting to 0.05..."))
thresholds$p <- 0.05
}
}
if (is.null(path)) path <- getwd()
if (is.data.frame(object) && !("filtered" %in% diagplot.type))
object <- as.matrix(object)
if (any(diagplot.type %in% c("biodetection","countsbio","saturation",
"rnacomp","biodist","readnoise")))
covars <- list(
data=object,
length=annotation$end - annotation$start,
gc=annotation$gc_content,
chromosome=annotation[,1:3],
factors=data.frame(class=as.class.vector(sample.list)),
biotype=annotation$biotype,
gene_name=as.character(annotation$gene_name)
)
raw.plots <- c("mds","biodetection","countsbio","saturation","readnoise",
"correl","pairwise")
norm.plots <- c("boxplot","gcbias","lengthbias","meandiff","meanvar",
"rnacomp")
stat.plots <- c("deheatmap","volcano","biodist")
other.plots <- c("filtered")
venn.plots <- c("venn")
files <- list()
for (p in diagplot.type) {
disp(" Plotting ",p,"...")
if (p %in% raw.plots && !is.norm) {
switch(p,
mds = {
files$mds <- diagplot.mds(object,sample.list,output=output,
path=path)
},
biodetection = {
files$biodetection <- diagplot.noiseq(object,sample.list,
covars,which.plot=p,output=output,path=path,...)
},
countsbio = {
files$countsbio <- diagplot.noiseq(object,sample.list,
covars,which.plot=p,output=output,path=path,...)
},
saturation = {
fil <- diagplot.noiseq(object,sample.list,covars,
which.plot=p,output=output,path=path,...)
files$saturation$biotype <- fil[["biotype"]]
files$saturation$sample <- fil[["sample"]]
},
readnoise = {
files$readnoise <- diagplot.noiseq(object,sample.list,
covars,which.plot=p,output=output,path=path,...)
},
correl = {
files$correl$heatmap <- diagplot.cor(object,type="heatmap",
output=output,path=path,...)
files$correl$correlogram <- diagplot.cor(object,
type="correlogram",output=output,path=path,...)
},
pairwise = {
files$pairwise <- diagplot.pairs(object,output=output,
path=path)
}
)
}
if (p %in% norm.plots) {
switch(p,
boxplot = {
files$boxplot <- diagplot.boxplot(object,name=sample.list,
is.norm=is.norm,output=output,path=path,...)
},
gcbias = {
files$gcbias <- diagplot.edaseq(object,sample.list,
covar=annotation$gc_content,is.norm=is.norm,
which.plot=p,output=output,path=path,...)
},
lengthbias = {
files$lengthbias <- diagplot.edaseq(object,sample.list,
covar=annotation$end-annotation$start,is.norm=is.norm,
which.plot=p,output=output,path=path,...)
},
meandiff = {
fil <- diagplot.edaseq(object,sample.list,is.norm=is.norm,
which.plot=p,output=output,path=path,...)
for (n in names(fil)) {
if (!is.null(fil[[n]]))
files$meandiff[[n]] <- unlist(fil[[n]])
}
},
meanvar = {
fil <- diagplot.edaseq(object,sample.list,is.norm=is.norm,
which.plot=p,output=output,path=path,...)
for (n in names(fil)) {
if (!is.null(fil[[n]]))
files$meanvar[[n]] <- unlist(fil[[n]])
}
},
rnacomp = {
files$rnacomp <- diagplot.noiseq(object,sample.list,covars,
which.plot=p,output=output,is.norm=is.norm,path=path,
...)
}
)
}
if (p %in% stat.plots && is.norm) {
for (cnt in names(contrast.list)) {
disp(" Contrast: ",cnt)
samples <- names(unlist(contrast.list[[cnt]]))
mat <- as.matrix(object[,match(samples,colnames(object))])
switch(p,
deheatmap = {
files$deheatmap[[cnt]] <- diagplot.de.heatmap(mat,cnt,
output=output,path=path)
},
volcano = {
fc <- log2(make.fold.change(cnt,sample.list,object,1))
for (contrast in colnames(fc)) {
files$volcano[[contrast]] <- diagplot.volcano(
fc[,contrast],p.list[[cnt]],contrast,
fcut=thresholds$f,pcut=thresholds$p,
output=output,path=path)
}
},
biodist = {
files$biodist[[cnt]] <- diagplot.noiseq(object,
sample.list,covars,which.plot=p,output=output,
biodist.opts=list(p=p.list[[cnt]],
pcut=thresholds$p,name=cnt),path=path,...)
}
)
}
}
if (p %in% other.plots) {
switch(p,
filtered = {
files$filtered <- diagplot.filtered(object,annotation,
output=output,path=path)
}
)
}
if (p %in% venn.plots) {
switch(p,
venn = {
for (cnt in names(contrast.list)) {
disp(" Contrast: ",cnt)
if (!is.null(annotation)) {
alt.names <- as.character(annotation$gene_name)
names(alt.names) <- rownames(annotation)
}
else
alt.names <- NULL
files$venn[[cnt]] <- diagplot.venn(p.list[[cnt]],
pcut=thresholds$p,nam=cnt,output=output,path=path,
alt.names=alt.names)
}
}
)
}
}
return(files)
}
#' Boxplots wrapper for the metaseqR package
#'
#' A wrapper over the general boxplot function, suitable for matrices produced
#' and processed with the metaseqr package. Intended for internal use but can be
#' easily used as stand-alone. It can colors boxes based on group depending on
#' the name argument.
#'
#' @param mat the count data matrix.
#' @param name the names of the samples plotted on the boxplot. If \code{NULL},
#' the function check the column names of mat. If they are also \code{NULL}, sample
#' names are autogenerated. If \code{name="none"}, no sample names are plotted.
#' If name is a list, it should be the sample.list argument provided to the manin
#' metaseqr function. In that case, the boxes are colored per group.
#' @param log.it whether to log transform the values of mat or not. It can be
#' \code{TRUE}, \code{FALSE} or \code{"auto"} for auto-detection. Auto-detection
#' log transforms by default so that the boxplots are smooth and visible.
#' @param y.lim custom y-axis limits. Leave the string \code{"default"} for default
#' behavior.
#' @param is.norm a logical indicating whether object contains raw or normalized
#' data. It is not essential and it serves only plot annotation purposes.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"}, \code{"ps"} or \code{"json"}. The latter is
#' currently available for the creation of interactive volcano plots only when
#' reporting the output, through the highcharts javascript library (JSON for
#' boxplots not yet available).
#' @param path the path to create output files.
#' @param alt.names an optional vector of names, e.g. HUGO gene symbols, alternative
#' or complementary to the unique names of \code{f} or \code{p} (one of them must
#' be named!). It is used only in JSON output.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filename of the boxplot produced if it's a file.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' diagplot.boxplot(data.matrix,sample.list)
#'
#' norm.args <- get.defaults("normalization","deseq")
#' object <- normalize.deseq(data.matrix,sample.list,norm.args)
#' diagplot.boxplot(object,sample.list)
#'}
diagplot.boxplot <- function(mat,name=NULL,log.it="auto",y.lim="default",
is.norm=FALSE,output="x11",path=NULL,alt.names=NULL,...) {
if (is.null(path)) path <- getwd()
if (is.norm)
status<- "normalized"
else
status<- "raw"
# Need to log?
if (log.it=="auto") {
if (diff(range(mat,na.rm=TRUE))>1000)
mat <- log2disp(mat)
}
else if (log.it=="yes")
mat <- log2disp(mat)
# Define the axis limits based on user input
if (!is.numeric(y.lim) && y.lim=="default") {
min.y <- floor(min(mat))
max.y <- ceiling(max(mat))
}
else if (is.numeric(y.lim)) {
min.y <- y.lim[1]
max.y <- y.lim[2]
}
grouped <- FALSE
if (is.null(name)) {
if (is.null(colnames(mat)))
nams <- paste("Sample",1:ncol(mat),sep=" ")
else
nams <- colnames(mat)
}
else if (length(name)==1 && name=="none")
nams <- rep("",ncol(mat))
else if (is.list(name)) { # Is sample.list
nams <- unlist(name)
grouped <- TRUE
}
cols <- c("red3","green3","blue2","gold","skyblue","orange3","burlywood",
"red","blue","green","orange","darkgrey","green4","black","pink",
"brown","magenta","yellowgreen","pink4","seagreen4","darkcyan")
if (grouped) {
tmp <- as.numeric(factor(as.class.vector(name)))
b.cols <- cols[tmp]
}
else b.cols <- cols
mat.list <- list()
for (i in 1:ncol(mat))
mat.list[[i]] <- mat[,i]
names(mat.list) <- nams
if (output != "json") {
fil <- file.path(path,paste("boxplot_",status,".",output,sep=""))
graphics.open(output,fil)
if (!is.numeric(y.lim) && y.lim=="default")
b <- boxplot(mat.list,names=nams,col=b.cols,las=2,main=paste(
"Boxplot ",status,sep=""),...)
else
b <- boxplot(mat.list,names=nams,col=b.cols,ylim=c(min.y,max.y),
las=2,main=paste("Boxplot ",status,sep=""),...)
graphics.close(output)
}
else {
# Create boxplot object
b <- boxplot(mat.list,plot=FALSE)
colnames(b$stat) <- nams
# Locate the outliers
o.list <- lapply(names(mat.list),function(x,M,b) {
v <- b[,x]
o <- which(M[[x]]<v[1] | M[[x]]>v[5])
if (length(o)>0)
return(M[[x]][o])
else
return(NULL)
},mat.list,b$stat)
# Create output object
obj <- list(
x=NULL,
y=NULL,
plot=b,
samples=name,
ylims=c(min.y,max.y),
xlims=NULL,
status=status,
pcut=NULL,
fcut=NULL,
altnames=alt.names,
user=o.list
)
json <- boxplotToJSON(obj)
fil <- file.path(path,paste("boxplot_",status,".json",sep=""))
disp("Writing ",fil)
write(json,fil)
}
return(fil)
}
#' Multi-Dimensinal Scale plots or RNA-Seq samples
#'
#' Creates a Multi-Dimensional Scale plot for the given samples based on the count
#' data matrix. MDS plots are very useful for quality control as you can easily
#' see of samples of the same groups are clustered together based on the whole
#' dataset.
#'
#' @param x the count data matrix.
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param method which correlation method to use. Same as the method parameter in
#' \code{\link{cor}} function.
#' @param log.it whether to log transform the values of x or not.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"}, \code{"ps"} or \code{"json"}. The latter is
#' currently available for the creation of interactive volcano plots only when
#' reporting the output, through the highcharts javascript library.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filename of the MDS plot produced if it's a file.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' diagplot.mds(data.matrix,sample.list)
#'}
diagplot.mds <- function(x,sample.list,method="spearman",log.it=TRUE,
output="x11",path=NULL,...) {
if (is.null(path)) path <- getwd()
classes <- as.factor(as.class.vector(sample.list))
design <- as.numeric(classes)
colspace <- c("red","blue","yellowgreen","orange","aquamarine2",
"pink2","seagreen4","brown","purple","chocolate")
pchspace <- c(20,17,15,16,8,3,2,0,1,4)
if (ncol(x)<3) {
warnwrap("MDS plot cannot be created with less than 3 samples! ",
"Skipping...")
return(NULL)
}
if (log.it)
y <- nat2log(x,base=2)
else
y <- x
d <- as.dist(0.5*(1-cor(y,method=method)))
mds.obj <- cmdscale(d,eig=TRUE,k=2)
xr <- diff(range(min(mds.obj$points[,1]),max(mds.obj$points[,1])))
yr <- diff(range(min(mds.obj$points[,2]),max(mds.obj$points[,2])))
xlim <- c(min(mds.obj$points[,1])-xr/10,max(mds.obj$points[,1])+xr/10)
ylim <- c(min(mds.obj$points[,2])-yr/10,max(mds.obj$points[,2])+yr/10)
if (output!="json") {
fil <- file.path(path,paste("mds.",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=9,height=7)
else
graphics.open(output,fil,width=1024,height=768)
plot(mds.obj$points[,1],mds.obj$points[,2],
col=colspace[1:length(levels(classes))][design],
pch=pchspace[1:length(levels(classes))][design],
xlim=xlim,ylim=ylim,
main="MDS plot",xlab="MDS 1",ylab="MDS 2",
cex=0.9,cex.lab=0.9,cex.axis=0.9,cex.main=0.9)
text(mds.obj$points[,1],mds.obj$points[,2],labels=colnames(x),pos=3,
cex=0.7)
grid()
graphics.close(output)
}
else {
# Create output object
xx <- mds.obj$points[,1]
yy <- mds.obj$points[,2]
names(xx) <- names(yy) <- unlist(sample.list)
obj <- list(
x=xx,
y=yy,
plot=NULL,
samples=sample.list,
ylim=ylim,
xlim=xlim,
status=NULL,
pcut=NULL,
fcut=NULL,
altnames=NULL,
user=NULL
)
json <- mdsToJSON(obj)
fil <- file.path(path,"mds.json")
disp("Writing ",fil)
write(json,fil)
}
return(fil)
}
#' Massive X-Y, M-D correlation plots
#'
#' This function uses the read counts matrix to create pairwise correlation plots.
#' The upper diagonal of the final image contains simple scatterplots of each
#' sample against each other (log2 scale) while the lower diagonal contains
#' mean-difference plots for the same samples (log2 scale). This type of diagnostic
#' plot may not be interpretable for more than 10 samples.
#'
#' @param x the read counts matrix or data frame.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filename of the pairwise comparisons plot produced if it's a file.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' diagplot.pairs(data.matrix)
#'}
diagplot.pairs <- function(x,output="x11",path=NULL,...) {
x <- as.matrix(x)
x <- nat2log(x)
n <- ncol(x)
if (!is.null(colnames(x)))
nams <- colnames(x)
else
nams <- paste("Sample_",1:ncol(x),sep="")
if (!is.null(path))
fil <- file.path(path,paste("correlation_pairs",output,sep="."))
else
fil <- paste("correlation_pairs",output,sep=".")
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=12,height=12)
else {
if (ncol(x)<=5)
graphics.open(output,fil,width=800,height=800,res=100)
else
graphics.open(output,fil,width=1024,height=1024,res=150)
}
# Setup the grid
par(mfrow=c(n,n),mar=c(1,1,1,1),oma=c(1,1,0,0),mgp=c(2,0.5,0),cex.axis=0.6,
cex.lab=0.6)
# Plot
for (i in 1:n)
{
for (j in 1:n)
{
if (i==j)
{
plot(0:10,0:10,type="n",xaxt="n",yaxt="n",xlab="",ylab="") # Diagonal
text(c(3,5,3),c(9.5,5,1),c("X-Y plots",nams[i],"M-D plots"),
cex=c(0.8,1,0.8))
arrows(6,9.5,9.5,9.5,angle=20,length=0.1,lwd=0.8,cex=0.8)
arrows(0.2,3.2,0.2,0.2,angle=20,length=0.1,lwd=0.8,cex=0.8)
}
else if (i<j) # XY plot
{
plot(x[,i],x[,j],pch=20,col="blue",cex=0.4,xlab=nams[i],
ylab=nams[j],...)
lines(lowess(x[,i],x[,j]),col="red")
cc <- paste("cor:",formatC(cor(x[,i],x[,j]),digits=3))
text(3,max(x[,j]-1),labels=cc,cex=0.7,)
#grid()
}
else if (i>j) # MD plot
{
plot((x[,i]+x[,j])/2,x[,j]-x[,i],pch=20,col="blue",cex=0.4,...)
lines(lowess((x[,i]+x[,j])/2,x[,j]-x[,i]),col="red")
#grid()
}
}
}
graphics.close(output)
return(fil)
}
#' Summarized correlation plots
#'
#' This function uses the read counts matrix to create heatmap or correlogram
#' correlation plots.
#'
#' @param mat the read counts matrix or data frame.
#' @param type create heatmap of correlogram plots.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filename of the pairwise comparisons plot produced if it's a file.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' diagplot.cor(data.matrix,type="heatmap")
#' diagplot.cor(data.matrix,type="correlogram")
#'}
diagplot.cor <- function(mat,type=c("heatmap","correlogram"),output="x11",
path=NULL,...) {
x <- as.matrix(mat)
type <- tolower(type[1])
check.text.args("type",type,c("heatmap","correlogram"))
#if (!require(corrplot) && type=="correlogram")
# stop("R package corrplot is required!")
cor.mat <- cor(mat)
if (!is.null(colnames(mat)))
colnames(cor.mat) <- colnames(mat)
if (!is.null(path))
fil <- file.path(path,paste("correlation_",type,".",output,sep=""))
else
fil <- paste("correlation_",type,".",output,sep="")
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=7,height=7)
else
graphics.open(output,fil,width=640,height=640,res=100)
if (type=="correlogram")
corrplot(cor.mat,method="ellipse",order="hclust",...)
else if (type=="heatmap") {
n <- dim(cor.mat)[1]
labs <- matrix(NA,n,n)
for (i in 1:n)
for (j in 1:n)
labs[i,j] <- sprintf("%.2f",cor.mat[i,j])
if (n <= 5)
notecex <- 1.2
else if (n > 5 & n < 10)
notecex <- 0.9
else
notecex <- 0.7
heatmap.2(cor.mat,col=colorRampPalette(c("yellow","grey","blue")),
revC=TRUE,trace="none",symm=TRUE,Colv=TRUE,cellnote=labs,
keysize=1,density.info="density",notecex=notecex,cexCol=0.9,
cexRow=0.9,font.lab=2)
}
graphics.close(output)
return(fil)
}
#' Diagnostic plots based on the EDASeq package
#'
#' A wrapper around the plotting functions availale in the EDASeq normalization
#' Bioconductor package. For analytical explanation of each plot please see the
#' vignette of the EDASeq package. It is best to use this function through the
#' main plotting function \code{\link{diagplot.metaseqr}}.
#'
#' @param x the count data matrix.
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param covar The covariate to plot counts against. Usually \code{"gc"} or
#' \code{"length"}.
#' @param is.norm a logical indicating whether object contains raw or normalized
#' data. It is not essential and it serves only plot annotation purposes.
#' @param which.plot the EDASeq package plot to generate. It can be one or more
#' of \code{"meanvar"}, \code{"meandiff"}, \code{"gcbias"} or \code{"lengthbias"}.
#' Please refer to the documentation of the EDASeq package for details on the use
#' of these plots. The \code{which.plot="lengthbias"} case is not covered by
#' EDASeq documentation, however it is similar to the GC-bias plot when the
#' covariate is the gene length instead of the GC content.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filenames of the plot produced in a named list with names the
#' which.plot argument. If \code{output="x11"}, no output filenames are produced.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' gc <- runif(nrow(data.matrix))
#' diagplot.edaseq(data.matrix,sample.list,which.plot="meandiff")
#'}
diagplot.edaseq <- function(x,sample.list,covar=NULL,is.norm=FALSE,
which.plot=c("meanvar","meandiff","gcbias","lengthbias"),output="x11",
path=NULL,...) {
if (is.null(path)) path <- getwd()
check.text.args("which.plot",which.plot,c("meanvar","meandiff","gcbias",
"lengthbias"),multiarg=TRUE)
if (is.null(covar) && which.plot %in% c("gcbias","lengthbias"))
stopwrap("\"covar\" argument is required when \"which.plot\" is ",
"\"gcbias\ or \"lengthbias\"!")
if (is.norm)
status <- "normalized"
else
status <- "raw"
if (is.null(covar)) covar <- rep(NA,nrow(x))
s <- newSeqExpressionSet(x,phenoData=AnnotatedDataFrame(
data.frame(conditions=factor(as.class.vector(sample.list)),
row.names=colnames(x))),featureData=AnnotatedDataFrame(data.frame(
gc=covar,length=covar,row.names=rownames(x))))
switch(which.plot,
meandiff = {
fil <- vector("list",length(sample.list))
names(fil) <- names(sample.list)
for (n in names(sample.list)) {
if (length(sample.list[[n]])==1) {
warnwrap("Cannot create a mean-difference plot with one ",
"sample per condition! Skipping...")
next
}
pair.matrix <- combn(1:length(sample.list[[n]]),2)
fil[[n]] <- vector("list",ncol(pair.matrix))
for (i in 1:ncol(pair.matrix)) {
s1 <- sample.list[[n]][pair.matrix[1,i]]
s2 <- sample.list[[n]][pair.matrix[2,i]]
fil[[n]][[i]] <- file.path(path,paste(which.plot,"_",
status,"_",n,"_",s1,"_",s2,".",output,sep=""))
names(fil[[n]][i]) <- paste(s1,"vs",s2,sep="_")
graphics.open(output,fil[[n]][[i]])
MDPlot(s,y=pair.matrix[,i],main=paste("MD plot for ",n," ",
status," samples ",s1," and ",s2,sep=""),cex.main=0.9)
graphics.close(output)
}
}
},
meanvar = {
fil <- vector("list",length(sample.list))
names(fil) <- names(sample.list)
for (n in names(sample.list)) {
if (length(sample.list[[n]])==1) {
warnwrap("Cannot create a mean-variance plot with one ",
"sample per condition! Skipping...")
next
}
pair.matrix <- combn(1:length(sample.list[[n]]),2)
fil[[n]] <- vector("list",ncol(pair.matrix))
for (i in 1:ncol(pair.matrix)) {
s1 <- sample.list[[n]][pair.matrix[1,i]]
s2 <- sample.list[[n]][pair.matrix[2,i]]
fil[[n]][[i]] <- file.path(path,paste(which.plot,"_",status,
"_",n,"_",s1,"_",s2,".",output,sep=""))
names(fil[[n]][i]) <- paste(s1,"vs",s2,sep="_")
graphics.open(output,fil[[n]][[i]])
suppressWarnings(meanVarPlot(s,main=paste("MV plot for ",n,
" ",status," samples ",s1," and ",s2,sep=""),
cex.main=0.9))
graphics.close(output)
}
}
},
gcbias = {
if (!output=="json") {
fil <- file.path(path,paste(which.plot,"_",status,".",output,
sep=""))
graphics.open(output,fil)
biasPlot(s,"gc",xlim=c(0.1,0.9),log=TRUE,ylim=c(0,15),
main=paste("Expression - GC content ",status,sep=""))
grid()
graphics.close(output)
}
else {
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=sample.list,
ylim=NULL,
xlim=NULL,
status=status,
pcut=NULL,
fcut=NULL,
altnames=NULL,
user=list(counts=x,covar=covar,covarname="GC content")
)
json <- biasPlotToJSON(obj)
fil <- file.path(path,paste(which.plot,"_",status,".json",
sep=""))
disp("Writing ",fil)
write(json,fil)
}
},
lengthbias = {
if (output!="json") {
fil <- file.path(path,paste(which.plot,"_",status,".",output,
sep=""))
graphics.open(output,fil)
biasPlot(s,"length",log=TRUE,ylim=c(0,10),
main=paste("Expression - Gene length ",status,sep=""))
grid()
graphics.close(output)
}
else {
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=sample.list,
ylim=NULL,
xlim=NULL,
status=status,
pcut=NULL,
fcut=NULL,
altnames=NULL,
user=list(counts=x,covar=covar,
covarname="Gene/transcript length")
)
json <- biasPlotToJSON(obj)
fil <- file.path(path,paste(which.plot,"_",status,".json",
sep=""))
disp("Writing ",fil)
write(json,fil)
}
}
)
return(fil)
}
#' Diagnostic plots based on the NOISeq package
#'
#' A wrapper around the plotting functions availale in the NOISeq Bioconductor
#' package. For analytical explanation of each plot please see the vignette of
#' the NOISeq package. It is best to use this function through the main plotting
#' function \code{\link{diagplot.metaseqr}}.
#'
#' @param x the count data matrix.
#' @param sample.list the list containing condition names and the samples under
#' each condition.
#' @param covars a list (whose annotation elements are ideally a subset of an
#' annotation data frame produced by \code{\link{get.annotation}})
#' with the following members: data (the data matrix), length (gene length), gc
#' (the gene gc_content), chromosome (a data frame with chromosome name and
#' co-ordinates), factors (a factor with the experimental condition names
#' replicated by the number of samples in each experimental condition) and biotype
#' (each gene's biotype as depicted in Ensembl-like annotations).
#' @param which.plot the NOISeq package plot to generate. It can be one or more
#' of \code{"biodetection"}, \code{"countsbio"}, \code{"saturation"},
#' \code{"rnacomp"}, \code{"readnoise"} or \code{"biodist"}. Please refer to the
#' documentation of the EDASeq package for details on the use of these plots. The
#' \code{which.plot="saturation"} case is modified to be more informative by
#' producing two kinds of plots. See \code{\link{diagplot.noiseq.saturation}}.
#' @param biodist.opts a list with the following members: p (a vector of p-values,
#' e.g. the p-values of a contrast), pcut (a unique number depicting a p-value
#' cutoff, required for the \code{"biodist"} case), name (a name for the
#' \code{"biodist"} plot, e.g. the name of the contrast.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param is.norm a logical indicating whether object contains raw or normalized
#' data. It is not essential and it serves only plot annotation purposes.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filenames of the plots produced in a named list with names the
#' \code{which.plot} argument. If \code{output="x11"}, no output filenames are
#' produced.
#' @note Please note that in case of \code{"biodist"} plots, the behavior of the
#' function is unstable, mostly due to the very specific inputs this plotting
#' function accepts in the NOISeq package. We have tried to predict unstable
#' behavior and avoid exceptions through the use of tryCatch but it's still
#' possible that you might run onto an error.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' lengths <- round(1000*runif(nrow(data.matrix)))
#' starts <- round(1000*runif(nrow(data.matrix)))
#' ends <- starts + lengths
#' covars <- list(
#' data=data.matrix,
#' length=lengths,
#' gc=runif(nrow(data.matrix)),
#' chromosome=data.frame(
#' chromosome=c(rep("chr1",nrow(data.matrix)/2),rep("chr2",nrow(data.matrix)/2)),
#' start=starts,
#' end=ends
#' ),
#' factors=data.frame(class=as.class.vector(sample.list)),
#' biotype=c(rep("protein_coding",nrow(data.matrix)/2),rep("ncRNA",
#' nrow(data.matrix)/2))
#' )
#' p <- runif(nrow(data.matrix))
#' diagplot.noiseq(data.matrix,sample.list,covars=covars,
#' biodist.opts=list(p=p,pcut=0.1,name="A_vs_B"))
#'}
diagplot.noiseq <- function(x,sample.list,covars,which.plot=c("biodetection",
"countsbio","saturation","rnacomp","readnoise","biodist"),output="x11",
biodist.opts=list(p=NULL,pcut=NULL,name=NULL),path=NULL,is.norm=FALSE,
...) {
if (is.null(path)) path <- getwd()
# covars is a list of gc-content, factors, length, biotype, chromosomes,
# factors, basically copy of the noiseq object
which.plot <- tolower(which.plot[1])
check.text.args("which.plot",which.plot,c("biodetection","countsbio",
"saturation","readnoise","rnacomp","biodist"),multiarg=FALSE)
if (missing(covars))
stopwrap("\"covars\" argument is required with NOISeq specific plots!")
else {
covars$biotype <- as.character(covars$biotype)
names(covars$length) <- names(covars$gc) <-
rownames(covars$chromosome) <- names(covars$biotype) <-
rownames(x)
}
if (which.plot=="biodist") {
if (is.null(biodist.opts$p))
stopwrap("A p-value must be provided for the \"biodist\" plot!")
if (is.null(biodist.opts$pcut) || is.na(biodist.opts$pcut))
biodist.opts$pcut=0.05
}
if (is.norm)
status<- "normalized"
else
status<- "raw"
# All of these plots are NOISeq specific so we need a local NOISeq object
if (any(is.na(unique(covars$biotype))))
covars$biotype=NULL # Otherwise, it will probably crash
local.obj <- NOISeq::readData(
data=x,
length=covars$gene.length,
gc=covars$gc.content,
chromosome=covars$chromosome,
#factors=data.frame(class=covars$factors),
factors=covars$factors,
biotype=covars$biotype
)
switch(which.plot,
biodetection = {
diagplot.data <- NOISeq::dat(local.obj,type=which.plot)
samples <- unlist(sample.list)
if (output!="json") {
fil <- character(length(samples))
names(fil) <- samples
for (i in 1:length(samples)) {
fil[samples[i]] <- file.path(path,paste(which.plot,"_",
samples[i],".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil[samples[i]],width=9,height=7)
else
graphics.open(output,fil[samples[i]],width=1024,height=768)
explo.plot(diagplot.data,samples=i)
graphics.close(output)
}
}
else {
diagplot.data.save = NOISeq::dat2save(diagplot.data)
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=sample.list,
ylims=NULL,
xlims=NULL,
status=status,
pcut=NULL,
fcut=NULL,
altnames=covars$gene_name,
user=list(plotdata=diagplot.data.save,covars=covars)
)
json <- bioDetectionToJSON(obj)
fil <- character(length(samples))
names(fil) <- samples
for (i in 1:length(samples)) {
fil[samples[i]] <- file.path(path,
paste(which.plot,"_",samples[i],".json",sep=""))
disp("Writing ",fil[samples[i]])
write(json[[i]],fil[samples[i]])
}
}
},
countsbio = {
samples <- unlist(sample.list)
if (output!="json") {
diagplot.data <- NOISeq::dat(local.obj,type=which.plot,
factor=NULL)
fil <- character(length(samples))
names(fil) <- samples
for (i in 1:length(samples)) {
fil[samples[i]] <- file.path(path,paste(which.plot,"_",
samples[i],".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil[samples[i]],width=9,height=7)
else
graphics.open(output,fil[samples[i]],width=1024,
height=768)
explo.plot(diagplot.data,samples=i,plottype="boxplot")
graphics.close(output)
}
}
else {
colnames(x) <- unlist(sample.list)
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=sample.list,
ylims=NULL,
xlims=NULL,
status=status,
pcut=NULL,
fcut=NULL,
altnames=covars$gene_name,
user=list(counts=nat2log(x),covars=covars)
)
# Write JSON by sample
fil <- vector("list",2)
names(fil) <- c("sample","biotype")
fil[["sample"]] <- character(length(samples))
names(fil[["sample"]]) <- samples
bts <- unique(as.character(obj$user$covars$biotype))
fil[["biotype"]] <- character(length(bts))
names(fil[["biotype"]]) <- bts
json <- countsBioToJSON(obj,by="sample")
for (i in 1:length(samples)) {
fil[["sample"]][samples[i]] <- file.path(path,
paste(which.plot,"_",samples[i],".json",sep=""))
disp("Writing ",fil[["sample"]][samples[i]])
write(json[[i]],fil[["sample"]][samples[i]])
}
json <- countsBioToJSON(obj,by="biotype")
for (i in 1:length(bts)) {
fil[["biotype"]][bts[i]] <- file.path(path,
paste(which.plot,"_",bts[i],".json",sep=""))
disp("Writing ",fil[["biotype"]][bts[i]])
write(json[[i]],fil[["biotype"]][bts[i]])
}
}
},
saturation = {
# For 10 saturation points
diagplot.data <- NOISeq::dat(local.obj,k=0,ndepth=9,type=which.plot)
d2s <- NOISeq::dat2save(diagplot.data)
if (output != "json")
fil <- diagplot.noiseq.saturation(d2s,output,covars$biotype,
path=path)
else {
samples <- unlist(sample.list)
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=sample.list,
ylims=NULL,
xlims=NULL,
status=status,
pcut=NULL,
fcut=NULL,
altnames=covars$gene_name,
user=list(plotdata=d2s)
)
# Write JSON by sample
fil <- vector("list",2)
names(fil) <- c("sample","biotype")
fil[["sample"]] <- character(length(samples))
names(fil[["sample"]]) <- samples
json <- bioSaturationToJSON(obj,by="sample")
for (i in 1:length(samples)) {
fil[["sample"]][samples[i]] <- file.path(path,
paste(which.plot,"_",samples[i],".json",sep=""))
disp("Writing ",fil[["sample"]][samples[i]])
write(json[[i]],fil[["sample"]][samples[i]])
}
json <- bioSaturationToJSON(obj,by="biotype")
fil[["biotype"]] <- character(length(json))
names(fil[["biotype"]]) <- names(json)
for (n in names(json)) {
fil[["biotype"]][n] <- file.path(path,
paste(which.plot,"_",n,".json",sep=""))
disp("Writing ",fil[["biotype"]][n])
write(json[[n]],fil[["biotype"]][n])
}
}
},
rnacomp = {
if (ncol(local.obj)<3) {
warnwrap("RNA composition plot cannot be created with less ",
"than 3 samples! Skipping...")
return(NULL)
}
if (ncol(local.obj)>12) {
warnwrap("RNA composition plot cannot be created with more ",
"than 12 samples! Skipping...")
return(NULL)
}
diagplot.data <- NOISeq::dat(local.obj,type="cd")
fil <- file.path(path,paste(which.plot,"_",status,".",output,
sep=""))
graphics.open(output,fil)
explo.plot(diagplot.data)
grid()
graphics.close(output)
},
readnoise = {
D <- cddat(local.obj)
if (output!="json") {
fil <- file.path(path,paste(which.plot,".",output,sep=""))
graphics.open(output,fil)
cdplot(D,main="RNA-Seq reads noise")
grid()
graphics.close(output)
}
else {
colnames(D$data2plot)[2:ncol(D$data2plot)] <-
unlist(sample.list)
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=sample.list,
xlim=NULL,
ylim=NULL,
status=NULL,
pcut=NULL,
fcut=NULL,
altnames=NULL,
user=D$data2plot
)
json <- readNoiseToJSON(obj)
fil <- file.path(path,paste(which.plot,".json",sep=""))
disp("Writing ",fil)
write(json,fil)
}
},
biodist = { # We have to fake a noiseq object
p <- biodist.opts$p
if (is.matrix(p)) p <- p[,1]
dummy <- new("Output",
comparison=c("Dummy.1","Dummy.2"),
factor=c("class"),
k=1,
lc=1,
method="n",
replicates="biological",
results=list(
data.frame(
Dummy.1=rep(1,length(p)),
Dummy.2=rep(1,length(p)),
M=rep(1,length(p)),
D=rep(1,length(p)),
prob=as.numeric(p),
ranking=rep(1,length(p)),
Length=rep(1,length(p)),
GC=rep(1,length(p)),
Chrom=as.character(covars$chromosome[,1]),
GeneStart=covars$chromosome[,2],
GeneEnd=covars$chromosome[,3],
Biotype=covars$biotype
)
),
nss=5,
pnr=0.2,
v=0.02
)
if (!is.null(biodist.opts$name))
fil <- file.path(path,paste(which.plot,"_",biodist.opts$name,
".",output,sep=""))
else
fil <- file.path(path,paste(which.plot,".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=10,height=6)
else
graphics.open(output,fil,width=1024,height=640)
tryCatch( # A lot of times, there is a problem with this function
DE.plot(dummy,chromosomes=NULL,q=biodist.opts$pcut,
graphic="distr"),
error=function(e) {
disp(" Known problem with NOISeq and external ",
"p-values detected! Trying to make a plot with ",
"alternative p-values (median of p-value ",
"distribution)...")
fil="error"
tryCatch(
DE.plot(dummy,chromosomes=NULL,
q=quantile(biodist.opts$p,0.5),
graphic="distr"),
error=function(e) {
disp(" Cannot create DEG biotype plot! This ",
"is not related to a problem with the ",
"results. Excluding...")
fil="error"
},
finally=""
)
},
finally=""
)
graphics.close(output)
}
)
return(fil)
}
#' Simpler implementation of saturation plots inspired from NOISeq package
#'
#' Helper function for \code{\link{diagplot.noiseq}} to plot feature detection
#' saturation as presented in the NOISeq package vignette. It has two main outputs:
#' a set of figures, one for each input sample depicting the saturation for each
#' biotype and one single multiplot which depicts the saturation of all samples
#' for each biotype. It expands the saturation plots of NOISeq by allowing more
#' samples to be examined in a simpler way. Don't use this function directly. Use
#' either \code{\link{diagplot.metaseqr}} or \code{\link{diagplot.noiseq}}.
#'
#' @param x the count data matrix.
#' @param o one or more R plotting device to direct the plot result to. Supported
#' mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"}, \code{"bmp"},
#' \code{"pdf"} or \code{"ps"}.
#' @param tb the vector of biotypes, one for each row of x.
#' @param path the path to create output files.
#' @return The filenames of the plots produced in a named list with names the
#' \code{which.plot} argument. If \code{output="x11"}, no output filenames are
#' produced.
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' biotype=c(rep("protein_coding",nrow(data.matrix)/2),rep("ncRNA",
#' nrow(data.matrix)/2))
#' diagplot.noiseq.saturation(data.matrix,"x11",biotype)
#'}
diagplot.noiseq.saturation <- function(x,o,tb,path=NULL) {
if (is.null(path)) path <- getwd()
if (length(unique(tb))==1) {
warnwrap("Saturation plot cannot be created with only one biotype! ",
"Skipping...")
return(NULL)
}
total.biotypes <- table(tb)
the.biotypes <- names(tb)
biotypes <- colnames(x[[1]][,2:ncol(x[[1]])])
colspace <- c("red3","green4","blue2","orange3","burlywood",
"lightpink4","gold","skyblue","red2","green2","firebrick3",
"orange4","yellow4","skyblue3","tan4","gray40",
"brown2","darkgoldenrod","cyan3","coral2","cadetblue",
"bisque3","blueviolet","chocolate3","darkkhaki","dodgerblue")
pchspace <- c(rep(c(15,16,17,18),6),15)
# Plot all biotypes per sample
f.sample <- character(length(names(x)))
names(f.sample) <- names(x)
for (n in names(x)) {
f.sample[n] <- file.path(path,paste("saturation_",n,".",o,sep=""))
if (o %in% c("pdf","ps","x11"))
graphics.open(o,f.sample[n],width=10,height=7)
else
graphics.open(o,f.sample[n],width=1024,height=800)
y <- x[[n]]
sep <- match(c("global","protein_coding"),colnames(y))
yab <- cbind(y[,"depth"],y[,sep])
ynab <- y[,-sep]
colnames(yab)[1] <- colnames(ynab)[1] <- "depth"
xlim <- range(y[,"depth"])
ylim.ab <- range(yab[,2:ncol(yab)])
ylim.nab <- range(ynab[,2:ncol(ynab)])
par(cex.axis=0.9,cex.main=1,cex.lab=0.9,font.lab=2,font.axis=2,pty="m",
lty=2,lwd=1.5,mfrow=c(1,2))
plot.new()
plot.window(xlim,ylim.nab)
axis(1,at=pretty(xlim,10),labels=as.character(pretty(xlim,10)/1e+6))
axis(2,at=pretty(ylim.nab,10))
title(main="Non abundant biotype detection saturation",
xlab="Depth in millions of reads",ylab="Detected features")
co <- 0
for (b in biotypes) {
co <- co + 1
if (b=="global" || b=="protein_coding") {
# Silently do nothing
}
else {
lines(ynab[,"depth"],ynab[,b],col=colspace[co])
points(ynab[,"depth"],ynab[,b],pch=pchspace[co],
col=colspace[co],cex=1)
}
}
grid()
graphics::legend(
x="topleft",legend=colnames(ynab)[2:ncol(ynab)],xjust=1,yjust=0,
box.lty=0,x.intersp=0.5,cex=0.6,text.font=2,
col=colspace[1:(ncol(ynab)-1)],pch=pchspace[1:(ncol(ynab)-1)]
)
plot.new()
plot.window(xlim,ylim.ab)
axis(1,at=pretty(xlim,10),labels=as.character(pretty(xlim,10)/1e+6))
axis(2,at=pretty(ylim.ab,10))
title(main="Abundant biotype detection saturation",
xlab="Depth in millions of reads",ylab="Detected features")
co <- 0
for (b in c("global","protein_coding")) {
co <- co + 1
lines(yab[,"depth"],yab[,b],col=colspace[co])
points(yab[,"depth"],yab[,b],pch=16,col=colspace[co],cex=1.2)
}
grid()
graphics::legend(
x="topleft",legend=c("global","protein_coding"),xjust=1,yjust=0,
box.lty=0,lty=2,x.intersp=0.5,cex=0.7,text.font=2,
col=colspace[1:2],pch=pchspace[1:2]
)
mtext(n,side=3,line=-1.5,outer=TRUE,font=2,cex=1.3)
graphics.close(o)
}
# Plot all samples per biotype
g <- make.grid(length(biotypes))
f.all <- file.path(path,paste("biotype_saturation.",o,sep=""))
if (o %in% c("pdf","ps"))
graphics.open(o,f.all,width=14,height=14)
else
graphics.open(o,f.all,width=1600,height=1600,res=150)
par(cex.axis=0.8,cex.main=0.9,cex.lab=0.8,pty="m",lty=2,lwd=1.5,mfrow=g,
mar=c(3,3,1,1),oma=c(1,1,0,0),mgp=c(2,0.5,0))
for (b in biotypes) {
y <- depth <- vector("list",length(x))
names(y) <- names(depth) <- names(x)
for (n in names(x)) {
y[[n]] <- x[[n]][,b]
depth[[n]] <- x[[n]][,"depth"]
}
y <- do.call("cbind",y)
xlim <- range(do.call("c",depth))
ylim <- range(y)
plot.new()
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,5),labels=as.character(pretty(xlim,5)/1e+6),
line=0.5)
axis(2,at=pretty(ylim,5),line=0.5)
title(main=b,xlab="Depth in millions of reads",
ylab="Detected features")
co <- 0
for (n in colnames(y)) {
co <- co + 1
lines(depth[[n]],y[,n],col=colspace[co])
points(depth[[n]],y[,n],pch=pchspace[co],col=colspace[co])
}
grid()
graphics::legend(
x="bottomright",legend=colnames(y),xjust=1,yjust=0,
box.lty=0,x.intersp=0.5,
col=colspace[1:length(colnames(y))],
pch=pchspace[1:length(colnames(y))]
)
}
graphics.close(o)
return(list(sample=f.sample,biotype=f.all))
}
#' (Interactive) volcano plots of differentially expressed genes
#'
#' This function plots a volcano plot or returns a JSON string which is used to
#' render aninteractive in case of HTML reporting.
#'
#' @param f the fold changes which are to be plotted on the x-axis.
#' @param p the p-values whose -log10 transformation is going to be plotted on
#' the y-axis.
#' @param con an optional string depicting a name (e.g. the contrast name) to
#' appear in the title of the volcano diagplot.
#' @param fcut a fold change cutoff so as to draw two vertical lines indicating
#' the cutoff threshold for biological significance.
#' @param pcut a p-value cutoff so as to draw a horizontal line indicating the
#' cutoff threshold for statistical significance.
#' @param alt.names an optional vector of names, e.g. HUGO gene symbols, alternative
#' or complementary to the unique names of \code{f} or \code{p} (one of them must
#' be named!). It is used only in JSON output.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"}, \code{"ps"} or \code{"json"}. The latter is currently
#' available for the creation of interactive volcano plots only when reporting the
#' output, through the highcharts javascript library.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filenames of the plots produced in a named list with names the
#' \code{which.plot} argument. If \code{output="x11"}, no output filenames are
#' produced.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' M <- norm.edger(data.matrix,sample.list)
#' p <- stat.edger(M,sample.list,contrast)
#' ma <- apply(M[,sample.list$A],1,mean)
#' mb <- apply(M[,sample.list$B],1,mean)
#' f <- log2(ifelse(mb==0,1,mb)/ifelse(ma==0,1,ma))
#' diagplot.volcano(f,p,con=contrast)
#' j <- diagplot.volcano(f,p,con=contrast,output="json")
#'}
diagplot.volcano <- function(f,p,con=NULL,fcut=1,pcut=0.05,alt.names=NULL,
output="x11",path=NULL,...) { # output can be json here...
## Check rjson
#if ("json" %in% output && !require(rjson))
# stopwrap("R package rjson is required to create interactive volcano plot!")
if (is.null(path)) path <- getwd()
if (is.null(con))
con <- conn <- ""
else {
conn <- con
con <- paste("for ",con)
}
fil <- file.path(path,paste("volcano_plot_",conn,".",output,sep=""))
if (output!="json") {
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=8,height=10)
else
graphics.open(output,fil,width=768,height=1024,res=100)
}
rem <- which(is.na(p))
if (length(rem)>0) {
p <- p[-rem]
f <- f[-rem]
if (!is.null(alt.names))
alt.names <- alt.names[-rem]
}
# Fix problem with extremely low p-values, only for display purposes though
p.zero <- which(p==0)
if (length(p.zero)>0)
p[p.zero] <- runif(length(p.zero),0,1e-256)
xlim <- c(-max(abs(f)),max(abs(f)))
ylim <- c(0,ceiling(-log10(min(p))))
up <- which(f>=fcut & p<pcut)
down <- which(f<=-fcut & p<pcut)
u <- union(up,down)
alt.names.neutral <- NULL
if (length(u)>0) {
ff <- f[-u]
pp <- p[-u]
if (!is.null(alt.names))
alt.names.neutral <- alt.names[-u]
}
else {
ff <- f
pp <- p
if (!is.null(alt.names))
alt.names.neutral <- alt.names
}
if (output!="json") {
par(cex.main=1.1,cex.lab=1.1,cex.axis=1.1,font.lab=2,font.axis=2,
pty="m",lwd=1.5)
plot.new()
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,10),labels=as.character(pretty(xlim,10)))
axis(2,at=pretty(ylim,10))
title(paste(main="Volcano plot",con),
xlab="Fold change",ylab="-log10(p-value)")
points(ff,-log10(pp),pch=20,col="blue2",cex=0.9)
points(f[down],-log10(p[down]),pch=20,col="green3",cex=0.9)
points(f[up],-log10(p[up]),pch=20,col="red2",cex=0.9)
abline(h=-log10(pcut),lty=4)
abline(v=-fcut,lty=2)
abline(v=fcut,lty=2)
grid()
graphics::legend(
x="topleft",
legend=c("up-regulated","down-regulated","unregulated",
"p-value threshold","fold change threshold"),
col=c("red2","green3","blue1","black","black"),
pch=c(20,20,20,NA,NA),lty=c(NA,NA,NA,4,2),
xjust=1,yjust=0,box.lty=0,x.intersp=0.5,cex=0.8,text.font=2
)
graphics.close(output)
}
else {
obj <- list(
x=f,
y=p,
plot=NULL,
samples=NULL,
xlim=xlim,
ylim=ylim,
status=NULL,
pcut=pcut,
fcut=fcut,
altnames=alt.names,
user=list(up=up,down=down,unf=ff,unp=pp,ualt=alt.names.neutral,
con=con)
)
#json <- volcanoToJSON(obj)
#fil <- file.path(path,paste("volcano_",con,".json",sep=""))
#write(json,fil)
fil <- volcanoToJSON(obj)
}
return(fil)
}
#' Diagnostic heatmap of differentially expressed genes
#'
#' This function plots a heatmap of the differentially expressed genes produced
#' by the metaseqr workflow, useful for quality control, e.g. whether samples
#' belonging to the same group cluster together.
#'
#' @param x the data matrix to create a heatmap for.
#' @param con an optional string depicting a name (e.g. the contrast name) to
#' appear in the title of the volcano plot.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"}, \code{"ps"}.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filenames of the plots produced in a named list with names the
#' \code{which.plot} argument. If \code{output="x11"}, no output filenames are
#' produced.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' require(DESeq)
#' data.matrix <- counts(makeExampleCountDataSet())
#' sample.list <- list(A=c("A1","A2"),B=c("B1","B2","B3"))
#' contrast <- "A_vs_B"
#' M <- norm.edger(data.matrix,sample.list)
#' p <- stat.edger(M,sample.list,contrast)
#' diagplot.de.heatmap(data.matrix[p[[1]]<0.05])
#'}
diagplot.de.heatmap <- function(x,con=NULL,output="x11",path=NULL,...) {
if (is.null(path)) path <- getwd()
if (is.null(con))
con <- conn <- ""
else {
conn <- con
con <- paste("for ",con)
}
y <- nat2log(x,2,1)
# First plot the normal image
fil <- file.path(path,paste("de_heatmap_",conn,".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=10,height=10)
else
graphics.open(output,fil,width=800,height=800)
heatmap.2(y,trace="none",col=bluered(16),labRow="",cexCol=0.9,keysize=1,
font.lab=2,main=paste("DEG heatmap",con),cex.main=0.9)
graphics.close(output)
## Then the "interactive" using sendplot
#xy.labels <- list(normalized_counts=x,log2_normalized_counts=y)
#x.labels <- data.frame(
# label=colnames(x),
# description=paste("Sample",colnames(x))
#)
#y.labels <- data.frame(
# label=rownames(x),
# description=paste("Gene ID:",rownames(x))
#)
#suppressWarnings(heatmap.send(
# y,
# distfun=dist,
# hclustfun=hclust,
# MainColor=bluered(16),
# labRow="",
# labCol=NULL,
# keep.dendro=TRUE,
# x.labels=x.labels,
# y.labels=y.labels,
# xy.labels=xy.labels,
# image.size="2048x4096",
# fname.root=paste("iframe_de_heatmap_",conn,sep=""),
# dir=paste(path,.Platform$file.sep,sep=""),
# header="v3",
# window.size="2048x4192"
#))
return(fil)
}
#' Diagnostic plot for filtered genes
#'
#' This function plots a grid of four graphs depicting: in the first row, the
#' numbers of filtered genes per chromosome in the first column and per biotype
#' in the second column. In the second row, the percentages of filtered genes
#' per chromosome related to the whole genome in the first columns and per biotype
#' in the second column.
#'
#' @param x an annotation data frame like the ones produced by
#' \code{\link{get.annotation}}. \code{x} should be the filtered annotation
#' according to metaseqR's filters.
#' @param y an annotation data frame like the ones produced by
#' \code{\link{get.annotation}}. \code{y} should contain the total annotation
#' without the application of any metaseqr filter.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filenames of the plots produced in a named list with names the
#' \code{which.plot} argument. If output=\code{"x11"}, no output filenames are
#' produced.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' y <- get.annotation("mm9","gene")
#' x <- y[-sample(1:nrow(y),10000),]
#' diagplot.filtered(x,y)
#'}
diagplot.filtered <- function(x,y,output="x11",path=NULL,...) {
if (output !="json") {
if (is.null(path)) path <- getwd()
fil <- file.path(path,paste("filtered_genes.",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=12,height=8)
else
graphics.open(output,fil,width=1200,height=800,res=100)
chr <- table(as.character(x$chromosome))
bt <- table(as.character(x$biotype))
chr.all <- table(as.character(y$chromosome))
bt.all <- table(as.character(y$biotype))
barlab.chr <- as.character(chr)
barlab.bt <- as.character(bt)
per.chr <- chr/chr.all[names(chr)]
per.bt <- bt/bt.all[names(bt)]
# Some bug...
per.chr[per.chr>1] <- 1
per.bt[per.bt>1] <- 1
#
suppressWarnings(per.chr.lab <- paste(formatC(100*per.chr,digits=1,
format="f"),"%",sep=""))
suppressWarnings(per.bt.lab <- paste(formatC(100*per.bt,digits=1,
format="f"),"%",sep=""))
par(mfrow=c(2,2),mar=c(1,4,2,1),oma=c(1,1,1,1))
# Chromosomes
barx.chr <- barplot(chr,space=0.5,
ylim=c(0,max(chr)+ceiling(max(chr)/10)),yaxt="n",xaxt="n",
plot=FALSE)
plot.new()
plot.window(xlim=c(0,ceiling(max(barx.chr))),
ylim=c(0,max(chr)+ceiling(max(chr)/10)),mar=c(1,4,1,1))
axis(2,at=pretty(0:(max(chr)+ceiling(max(chr)/10))),cex.axis=0.9,padj=1,
font=2)
text(x=barx.chr,y=chr,label=barlab.chr,cex=0.7,font=2,col="green3",
adj=c(0.5,-1.3))
title(main="Filtered genes per chromosome",cex.main=1.1)
mtext(side=2,text="Number of genes",line=2,cex=0.9,font=2)
grid()
barplot(chr,space=0.5,ylim=c(0,max(chr)+ceiling(max(chr)/10)),
col="blue3",border="yellow3",yaxt="n",xaxt="n",font=2,add=TRUE)
# Biotypes
barx.bt <- barplot(bt,space=0.5,ylim=c(0,max(bt)+ceiling(max(bt)/10)),
yaxt="n",xaxt="n",plot=FALSE)
plot.new()
plot.window(xlim=c(0,ceiling(max(barx.bt))),
ylim=c(0,max(bt)+ceiling(max(bt)/10)),mar=c(1,4,1,1))
axis(2,at=pretty(0:(max(bt)+ceiling(max(bt)/10))),cex.axis=0.9,padj=1,
font=2)
text(x=barx.bt,y=bt,label=barlab.bt,cex=0.7,font=2,col="blue",
adj=c(0.5,-1.3),xpd=TRUE)
title(main="Filtered genes per biotype",cex.main=1.1)
mtext(side=2,text="Number of genes",line=2,cex=0.9,font=2)
grid()
barplot(bt,space=0.5,ylim=c(0,max(bt)+ceiling(max(bt)/10)),col="red3",
border="yellow3",yaxt="n",xaxt="n",font=2,add=TRUE)
# Chromosome percentage
barx.per.chr <- barplot(per.chr,space=0.5,ylim=c(0,max(per.chr)),
yaxt="n",xaxt="n",plot=FALSE)
plot.new()
par(mar=c(9,4,1,1))
plot.window(xlim=c(0,max(barx.per.chr)),ylim=c(0,max(per.chr)))
#axis(1,at=barx.per.chr,labels=names(per.chr),cex.axis=0.9,font=2,
# tcl=-0.3,col="lightgrey",las=2)
axis(1,at=barx.per.chr,labels=FALSE,tcl=-0.3,col="lightgrey")
axis(2,at=seq(0,max(per.chr),length.out=5),labels=formatC(seq(0,
max(per.chr),length.out=5),digits=2,format="f"),cex.axis=0.9,padj=1,
font=2)
text(barx.per.chr,par("usr")[3]-max(per.chr)/17,labels=names(per.chr),
srt=45,adj=c(1,1.1),xpd=TRUE,cex=0.9,font=2)
text(x=barx.per.chr,y=per.chr,label=per.chr.lab,cex=0.7,font=2,
col="green3",adj=c(0.5,-1.3),xpd=TRUE)
mtext(side=2,text="fraction of total genes",line=2,cex=0.9,font=2)
grid()
barplot(per.chr,space=0.5,ylim=c(0,max(per.chr)),col="blue3",
border="yellow3",yaxt="n",xaxt="n",font=2,add=TRUE)
# Biotype percentage
barx.per.bt <- barplot(per.bt,space=0.5,ylim=c(0,max(per.bt)),yaxt="n",
xaxt="n",plot=FALSE)
plot.new()
par(mar=c(9,4,1,1))
plot.window(xlim=c(0,max(barx.per.bt)),ylim=c(0,max(per.bt)))
#axis(1,at=barx.per.bt,labels=names(per.bt),cex.axis=0.9,font=2,
# tcl=-0.3,col="lightgrey",las=2)
axis(1,at=barx.per.bt,labels=FALSE,tcl=-0.3,col="lightgrey")
axis(2,at=seq(0,max(per.bt),length.out=5),
labels=formatC(seq(0,max(per.bt),length.out=5),digits=2,format="f"),
cex.axis=0.9,padj=1,font=2)
text(barx.per.bt,par("usr")[3]-max(per.bt)/17,
labels=gsub("prime","'",names(per.bt)),srt=45,adj=c(1,1.1),
xpd=TRUE,cex=0.9,font=2)
text(x=barx.per.bt,y=per.bt,label=per.bt.lab,cex=0.7,font=2,col="blue",
adj=c(0.5,-1.3),xpd=TRUE)
mtext(side=2,text="fraction of total genes",line=2,cex=0.9,font=2)
grid()
barplot(per.bt,space=0.5,ylim=c(0,max(per.bt)),col="red3",
border="yellow3",yaxt="n",xaxt="n",font=2,add=TRUE)
graphics.close(output)
}
else {
obj <- list(
x=NULL,
y=NULL,
plot=NULL,
samples=NULL,
xlim=NULL,
ylim=NULL,
status=NULL,
pcut=NULL,
fcut=NULL,
altnames=NULL,
user=list(filtered=x,total=y)
)
fil <- list(chromosome=NULL,biotype=NULL)
json <- filteredToJSON(obj,by="chromosome")
fil$chromosome <- file.path(path,"filtered_genes_chromosome.json")
write(json,fil$chromosome)
json <- filteredToJSON(obj,by="biotype")
fil$biotype <- file.path(path,"filtered_genes_biotype.json")
write(json,fil$biotype)
}
return(fil)
}
#' Venn diagrams when performing meta-analysis
#'
#' This function uses the R package VennDiagram and plots an up to 5-way Venn
#' diagram depicting the common and specific to each statistical algorithm genes,
#' for each contrast. Mostly for internal use because of its main argument which
#' is difficult to construct, but can be used independently if the user grasps
#' the logic.
#'
#' @param pmat a matrix with p-values corresponding to the application of each
#' statistical algorithm. The p-value matrix must have the colnames attribute and
#' the colnames should correspond to the name of the algorithm used to fill the
#' specific column (e.g. if \code{"statistics"=c("deseq","edger","nbpseq")} then
#' \code{colnames(pmat) <-} \code{c("deseq","edger","nbpseq")}.
#' @param fcmat an optional matrix with fold changes corresponding to the application
#' of each statistical algorithm. The fold change matrix must have the colnames
#' attribute and the colnames should correspond to the name of the algorithm used
#' to fill the specific column (see the parameter \code{pmat}).
#' @param pcut a p-value cutoff for statistical significance. Defaults to
#' \code{0.05}.
#' @param fcut if \code{fcmat} is supplied, an absolute fold change cutoff to be
#' applied to \code{fcmat} to determine the differentially expressed genes for
#' each algorithm.
#' @param direction if \code{fcmat} is supplied, a keyword to denote which genes
#' to draw in the Venn diagrams with respect to their direction of regulation. It
#' can be one of \code{"dereg"} for the total of regulated genes, where
#' \code{abs(fcmat[,n])>=fcut} (default), \code{"up"} for the up-regulated genes
#' where \code{fcmat[,n]>=fcut} or \code{"down"} for the up-regulated genes where
#' \code{fcmat[,n]<=-fcut}.
#' @param nam a name to be appended to the output graphics file (if \code{"output"}
#' is not \code{"x11"}).
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param alt.names an optional named vector of names, e.g. HUGO gene symbols,
#' alternative or complementary to the unique gene names which are the rownames
#' of \code{pmat}. The names of the vector must be the rownames of \code{pmat}.
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return The filenames of the plots produced in a named list with names the
#' \code{which.plot} argument. If output=\code{"x11"}, no output filenames are
#' produced. If \code{"path"} is not \code{NULL}, a file with the intersections
#' in the Venn diagrams will be produced and written in \code{"path"}.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p1 <- 0.001*matrix(runif(300),100,3)
#' p2 <- matrix(runif(300),100,3)
#' p <- rbind(p1,p2)
#' rownames(p) <- paste("gene",1:200,sep="_")
#' colnames(p) <- paste("method",1:3,sep="_")
#' venn.contents <- diagplot.venn(p)
#'}
diagplot.venn <- function(pmat,fcmat=NULL,pcut=0.05,fcut=0.5,
direction=c("dereg","up","down"),nam=as.character(round(1000*runif(1))),
output="x11",path=NULL,alt.names=NULL,...) {
check.text.args("direction",direction,c("dereg","up","down"))
if (is.na(pcut) || is.null(pcut) || pcut==1)
warnwrap("Illegal pcut argument! Using the default (0.05)")
algs <- colnames(pmat)
if (is.null(algs))
stopwrap("The p-value matrices must have the colnames attribute ",
"(names of statistical algorithms)!")
if (!is.null(fcmat) && (is.null(colnames(fcmat)) ||
length(intersect(colnames(pmat),colnames(fcmat)))!=length(algs)))
stopwrap("The fold change matrices must have the colnames attribute ",
"(names of statistical algorithms) and must be the same as in the ",
"p-value matrices!")
nalg <- length(algs)
if(nalg>5) {
warnwrap(paste("Cannot create a Venn diagram for more than 5 result ",
"sets! ",nalg,"found, only the first 5 will be used..."))
algs <- algs[1:5]
nalg <- 5
}
lenalias <- c("two","three","four","five")
aliases <- toupper(letters[1:nalg])
names(algs) <- aliases
genes <- rownames(pmat)
pairs <- make.venn.pairs(algs)
areas <- make.venn.areas(length(algs))
counts <- make.venn.counts(length(algs))
# Initially populate the results and counts lists so they can be used to create
# the rest of the intersections
results <- vector("list",nalg+length(pairs))
names(results)[1:nalg] <- aliases
names(results)[(nalg+1):length(results)] <- names(pairs)
if (is.null(fcmat)) {
for (a in aliases) {
results[[a]] <- genes[which(pmat[,algs[a]]<pcut)]
counts[[areas[[a]]]] <- length(results[[a]])
}
}
else {
switch(direction,
dereg = {
for (a in aliases) {
results[[a]] <-
genes[which(pmat[,algs[a]]<pcut & abs(
fcmat[,algs[a]])>=fcut)]
counts[[areas[[a]]]] <- length(results[[a]])
}
},
up = {
for (a in aliases) {
results[[a]] <-
genes[which(pmat[,algs[a]]<pcut &
fcmat[,algs[a]]>=fcut)]
counts[[areas[[a]]]] <- length(results[[a]])
}
},
down = {
for (a in aliases) {
results[[a]] <-
genes[which(pmat[,algs[a]]<pcut &
fcmat[,algs[a]]<=-fcut)]
counts[[areas[[a]]]] <- length(results[[a]])
}
}
)
}
# Now, perform the intersections
for (p in names(pairs)) {
a = pairs[[p]][1]
b = pairs[[p]][2]
results[[p]] <- intersect(results[[a]],results[[b]])
counts[[areas[[p]]]] <- length(results[[p]])
}
# And now, the Venn diagrams must be constructed
color.scheme <- make.venn.colorscheme(length(algs))
fil <- file.path(path,paste("venn_plot_",nam,".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=8,height=8)
else
graphics.open(output,fil,width=800,height=800,res=100)
switch(lenalias[length(algs)-1],
two = {
v <- draw.pairwise.venn(
area1=counts$area1,
area2=counts$area2,
cross.area=counts$cross.area,
category=paste(algs," (",aliases,")",sep=""),
lty="blank",
fill=color.scheme$fill,
cex=1.5,
cat.cex=1.3,
cat.pos=c(0,0),
cat.col=color.scheme$font,
#cat.dist=0.07,
cat.fontfamily=rep("Bookman",2)
)
},
three = {
#overrideTriple <<- TRUE
v <- draw.triple.venn(
area1=counts$area1,
area2=counts$area2,
area3=counts$area3,
n12=counts$n12,
n13=counts$n13,
n23=counts$n23,
n123=counts$n123,
category=paste(algs," (",aliases,")",sep=""),
lty="blank",
fill=color.scheme$fill,
cex=1.5,
cat.cex=1.3,
#cat.pos=c(0,0,180),
cat.col=color.scheme$font,
#cat.dist=0.07,
cat.fontfamily=rep("Bookman",3)
)
},
four = {
v <- draw.quad.venn(
area1=counts$area1,
area2=counts$area2,
area3=counts$area3,
area4=counts$area4,
n12=counts$n12,
n13=counts$n13,
n14=counts$n14,
n23=counts$n23,
n24=counts$n24,
n34=counts$n34,
n123=counts$n123,
n124=counts$n124,
n134=counts$n134,
n234=counts$n234,
n1234=counts$n1234,
category=paste(algs," (",aliases,")",sep=""),
lty="blank",
fill=color.scheme$fill,
cex=1.5,
cat.cex=1.3,
c(0.1,0.1,0.05,0.05),
cat.col=color.scheme$font,
cat.fontfamily=rep("Bookman",4)
)
},
five = {
v <- draw.quintuple.venn(
area1=counts$area1,
area2=counts$area2,
area3=counts$area3,
area4=counts$area4,
area5=counts$area5,
n12=counts$n12,
n13=counts$n13,
n14=counts$n14,
n15=counts$n15,
n23=counts$n23,
n24=counts$n24,
n25=counts$n25,
n34=counts$n34,
n35=counts$n35,
n45=counts$n45,
n123=counts$n123,
n124=counts$n124,
n125=counts$n125,
n134=counts$n134,
n135=counts$n135,
n145=counts$n145,
n234=counts$n234,
n235=counts$n235,
n245=counts$n245,
n345=counts$n345,
n1234=counts$n1234,
n1235=counts$n1235,
n1245=counts$n1245,
n1345=counts$n1345,
n2345=counts$n2345,
n12345=counts$n12345,
category=paste(algs," (",aliases,")",sep=""),
lty="blank",
fill=color.scheme$fill,
cex=1.5,
cat.cex=1.3,
cat.dist=0.1,
cat.col=color.scheme$font,
cat.fontfamily=rep("Bookman",5)
)
}
)
graphics.close(output)
# Now do something with the results
if (!is.null(path)) {
results.ex <- vector("list",length(results))
names(results.ex) <- names(results)
if (!is.null(alt.names)) {
for (n in names(results))
results.ex[[n]] <- alt.names[results[[n]]]
}
else {
for (n in names(results))
results.ex[[n]] <- results[[n]]
}
max.len <- max(sapply(results.ex,length))
for (n in names(results.ex)) {
if (length(results.ex[[n]])<max.len) {
dif <- max.len - length(results.ex[[n]])
results.ex[[n]] <- c(results.ex[[n]],rep(NA,dif))
}
}
results.ex <- do.call("cbind",results.ex)
write.table(results.ex,file=file.path(path,"..","..","lists",
paste0("venn_categories_",nam,".txt")),sep="\t",
row.names=FALSE,quote=FALSE,na="")
}
return(fil)
}
#' Helper for Venn diagrams
#'
#' This function creates a list of pairwise comparisons to be performed in order
#' to create an up to 5-way Venn diagram using the R package VennDiagram. Internal
#' use mostly.
#'
#' @param algs a vector with the names of the sets (up to length 5, if larger, it
#' will be truncated with a warning).
#' @return A list with as many pairs as the comparisons to be made for the
#' construction of the Venn diagram. The pairs are encoded with the uppercase
#' letters A through E, each one corresponding to order of the input sets.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' sets <- c("apple","pear","banana")
#' pairs <- make.venn.pairs(sets)
#'}
make.venn.pairs <- function(algs)
{
lenalias <- c("two","three","four","five")
switch(lenalias[length(algs)-1],
two = {
return(list(
AB=c("A","B")
))
},
three = {
return(list(
AB=c("A","B"),
AC=c("A","C"),
BC=c("B","C"),
ABC=c("AB","C")
))
},
four = {
return(list(
AB=c("A","B"),
AC=c("A","C"),
AD=c("A","D"),
BC=c("B","C"),
BD=c("B","D"),
CD=c("C","D"),
ABC=c("AB","C"),
ABD=c("AB","D"),
ACD=c("AC","D"),
BCD=c("BC","D"),
ABCD=c("ABC","D")
))
},
five = {
return(list(
AB=c("A","B"),
AC=c("A","C"),
AD=c("A","D"),
AE=c("A","E"),
BC=c("B","C"),
BD=c("B","D"),
BE=c("B","E"),
CD=c("C","D"),
CE=c("C","E"),
DE=c("D","E"),
ABC=c("AB","C"),
ABD=c("AB","D"),
ABE=c("AB","E"),
ACD=c("AC","D"),
ACE=c("AC","E"),
ADE=c("AD","E"),
BCD=c("BC","D"),
BCE=c("BC","E"),
BDE=c("BD","E"),
CDE=c("CD","E"),
ABCD=c("ABC","D"),
ABCE=c("ABC","E"),
ABDE=c("ABD","E"),
ACDE=c("ACD","E"),
BCDE=c("BCD","E"),
ABCDE=c("ABCD","E")
))
}
)
}
#' Helper for Venn diagrams
#'
#' This function creates a list with names the arguments of the Venn diagram
#' construction functions of the R package VennDiagram and list members the
#' internal encoding (uppercase letters A to E and combinations among then) used
#' to encode the pairwise comparisons to create the intersections needed for the
#' Venn diagrams. Internal use mostly.
#'
#' @param n the number of the sets used for the Venn diagram.
#' @return A named list, see descritpion.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' sets <- c("apple","pear","banana")
#' pairs <- make.venn.pairs(sets)
#' areas <- make.venn.areas(length(sets))
#'}
make.venn.areas <- function(n)
{
lenalias <- c("two","three","four","five")
switch(lenalias[n-1],
two = {
return(list(
A="area1",
B="area2",
AB="cross.area"
))
},
three = {
return(list(
A="area1",
B="area2",
C="area3",
AB="n12",
AC="n13",
BC="n23",
ABC="n123"
))
},
four = {
return(list(
A="area1",
B="area2",
C="area3",
D="area4",
AB="n12",
AC="n13",
AD="n14",
BC="n23",
BD="n24",
CD="n34",
ABC="n123",
ABD="n124",
ACD="n134",
BCD="n234",
ABCD="n1234"
))
},
five = {
return(list(
A="area1",
B="area2",
C="area3",
D="area4",
E="area5",
AB="n12",
AC="n13",
AD="n14",
AE="n15",
BC="n23",
BD="n24",
BE="n25",
CD="n34",
CE="n35",
DE="n45",
ABC="n123",
ABD="n124",
ABE="n125",
ACD="n134",
ACE="n135",
ADE="n145",
BCD="n234",
BCE="n235",
BDE="n245",
CDE="n345",
ABCD="n1234",
ABCE="n1235",
ABDE="n1245",
ACDE="n1345",
BCDE="n2345",
ABCDE="n12345"
))
}
)
}
#' Helper for Venn diagrams
#'
#' This function creates a list with names the arguments of the Venn diagram
#' construction functions of the R package VennDiagram and list members are
#' initially \code{NULL}. They are filled by the \code{\link{diagplot.venn}}
#' function. Internal use mostly.
#'
#' @param n the number of the sets used for the Venn diagram.
#' @return A named list, see descritpion.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' sets <- c("apple","pear","banana")
#' counts <- make.venn.counts(length(sets))
#'}
make.venn.counts <- function(n)
{
lenalias <- c("two","three","four","five")
switch(lenalias[n-1],
two = {
return(list(
area1=NULL,
area2=NULL,
cross.area=NULL
))
},
three = {
return(list(
area1=NULL,
area2=NULL,
area3=NULL,
n12=NULL,
n13=NULL,
n23=NULL,
n123=NULL
))
},
four = {
return(list(
area1=NULL,
area2=NULL,
area3=NULL,
area4=NULL,
n12=NULL,
n13=NULL,
n14=NULL,
n23=NULL,
n24=NULL,
n34=NULL,
n123=NULL,
n124=NULL,
n134=NULL,
n234=NULL,
n1234=NULL
))
},
five = {
return(list(
area1=NULL,
area2=NULL,
area3=NULL,
area4=NULL,
area5=NULL,
n12=NULL,
n13=NULL,
n14=NULL,
n15=NULL,
n23=NULL,
n24=NULL,
n25=NULL,
n34=NULL,
n35=NULL,
n45=NULL,
n123=NULL,
n124=NULL,
n125=NULL,
n134=NULL,
n135=NULL,
n145=NULL,
n234=NULL,
n235=NULL,
n245=NULL,
n345=NULL,
n1234=NULL,
n1235=NULL,
n1245=NULL,
n1345=NULL,
n2345=NULL,
n12345=NULL
))
}
)
}
#' Helper for Venn diagrams
#'
#' This function returns a list of colorschemes accroding to the number of sets.
#' Internal use.
#'
#' @param n the number of the sets used for the Venn diagram.
#' @return A list with colors for fill and font.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' sets <- c("apple","pear","banana")
#' cs <- make.venn.colorscheme(length(sets))
#'}
make.venn.colorscheme <- function(n) {
lenalias <- c("two","three","four","five")
switch(lenalias[n-1],
two = {
return(list(
fill=c("blue","orange2"),
font=c("darkblue","orange4")
))
},
three = {
return(list(
fill=c("red","green","mediumpurple"),
font=c("darkred","darkgreen","mediumpurple4")
))
},
four = {
return(list(
fill=c("red","green","mediumpurple","orange2"),
font=c("darkred","darkgreen","mediumpurple4","orange4")
))
},
five = {
return(list(
fill=c("red","green","blue","mediumpurple","orange2"),
font=c("darkred","darkgreen","darkblue","mediumpurple4",
"orange4")
))
}
)
}
#' Create basic ROC curves
#'
#' This function creates basic ROC curves using a matrix of p-values (such a matrix
#' can be derived for example from the result table of \code{\link{metaseqr}} by
#' subsetting the table to get the p-values from several algorithms) given a ground
#' truth vector for differential expression and a significance level.
#'
#' @param truth the ground truth differential expression vector. It should contain
#' only zero and non-zero elements, with zero denoting non-differentially expressed
#' genes and non-zero, differentially expressed genes. Such a vector can be obtained
#' for example by using the \code{\link{make.sim.data.sd}} function, which creates
#' simulated RNA-Seq read counts based on real data.
#' @param p a p-value matrix whose rows correspond to each element in the
#' \code{truth} vector. If the matrix has a \code{colnames} attribute, a legend
#' will be added to the plot using these names, else a set of column names will
#' be auto-generated. \code{p} can also be a list or a data frame.
#' @param sig a significance level (0 < \code{sig} <=1).
#' @param x what to plot on x-axis, can be one of \code{"fpr"}, \code{"fnr"},
#' \code{"tpr"}, \code{"tnr"} for False Positive Rate, False Negative Rate, True
#' Positive Rate and True Negative Rate respectively.
#' @param y what to plot on y-axis, same as \code{x} above.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param draw boolean to determine whether to plot the curves or just return the
#' calculated values (in cases where the user wants the output for later averaging
#' for example). Defaults to \code{TRUE} (make plots).
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return A named list with two members. The first member is a list containing
#' the ROC statistics: \code{TP} (True Postives), \code{FP} (False Positives),
#' \code{FN} (False Negatives), \code{TN} (True Negatives), \code{FPR} (False
#' Positive Rate), \code{FNR} (False Negative Rate), \code{TPR} (True Positive
#' Rate), \code{TNR} (True Negative Rate), \code{AUC} (Area Under the Curve). The
#' second is the path to the created figure graphic.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p1 <- 0.001*matrix(runif(300),100,3)
#' p2 <- matrix(runif(300),100,3)
#' p <- rbind(p1,p2)
#' rownames(p) <- paste("gene",1:200,sep="_")
#' colnames(p) <- paste("method",1:3,sep="_")
#' truth <- c(rep(1,40),rep(-1,40),rep(0,20),rep(1,10),rep(2,10),rep(0,80))
#' names(truth) <- rownames(p)
#' roc.obj <- diagplot.roc(truth,p)
#'}
diagplot.roc <- function(truth,p,sig=0.05,x="fpr",y="tpr",output="x11",
path=NULL,draw=TRUE,...) {
check.text.args("x",x,c("fpr","fnr","tpr","tnr","scrx","sens","spec"),
multiarg=FALSE)
check.text.args("y",y,c("fpr","fnr","tpr","tnr","scry","sens","spec"),
multiarg=FALSE)
if (is.list(p))
pmat <- do.call("cbind",p)
else if (is.data.frame(p))
pmat <- as.matrix(p)
else if (is.matrix(p))
pmat <- p
if (is.null(colnames(pmat)))
colnames(pmat) <- paste("p",1:ncol(pmat),sep="_")
ax.name <- list(
tpr="True Positive Rate",
tnr="True Negative Rate",
fpr="False Positive Rate",
fnr="False Negative Rate",
scrx="Ratio of selected",
scry="Normalized TP/(FP+FN)",
sens="Sensitivity",
spec="1 - Specificity"
)
ROC <- vector("list",ncol(pmat))
names(ROC) <- colnames(pmat)
colspace.universe <- c("red","blue","green","orange","darkgrey","green4",
"black","pink","brown","magenta","yellowgreen","pink4","seagreen4",
"darkcyan")
colspace <- colspace.universe[1:ncol(pmat)]
names(colspace) <- colnames(pmat)
eps <- min(pmat[!is.na(pmat) & pmat>0])
for (n in colnames(pmat)) {
disp("Processing ",n)
gg <- which(pmat[,n]<=sig)
psample <- -log10(pmax(pmat[gg,n],eps))
#psample <- pmat[gg,n]
size <- seq(1,length(gg))
cuts <- seq(-log10(sig),max(psample),length.out=length(gg))
#cuts <- seq(min(psample),sig,length.out=length(gg))
local.truth <- truth[gg]
S <- length(size)
TP <- FP <- FN <- TN <- FPR <- FNR <- TPR <- TNR <- SENS <- SPEC <-
SCRX <- SCRY <- numeric(S)
for (i in 1:S) {
TP[i] <- length(which(psample>cuts[i] & local.truth!=0))
FP[i] <- length(which(psample>cuts[i] & local.truth==0))
FN[i] <- length(which(psample<cuts[i] & local.truth!=0))
TN[i] <- length(which(psample<cuts[i] & local.truth==0))
## Alternatives which I keep in the code
#TP[i] <- length(intersect(names(which(psample>cuts[i])),
# names(which(local.truth!=0))))
#FP[i] <- length(intersect(names(which(psample>cuts[i])),
# names(which(local.truth==0))))
#FN[i] <- length(intersect(names(which(psample<cuts[i])),
# names(which(local.truth!=0))))
#TN[i] <- length(intersect(names(which(psample<cuts[i])),
# names(which(local.truth==0))))
#bad <- which(psample<cuts[i])
#good <- which(psample>cuts[i])
#TP[i] <- length(which(local.truth[good]!=0))
#FP[i] <- length(which(local.truth[good]==0))
#TN[i] <- length(which(local.truth[bad]==0))
#FN[i] <- length(which(local.truth[bad]!=0))
SCRX[i] <- i/S
SCRY[i] <- TP[i]/(FN[i]+FP[i])
if (FP[i]+TN[i] == 0)
FPR[i] <- 0
else
FPR[i] <- FP[i]/(FP[i]+TN[i])
FNR[i] <- FN[i]/(TP[i]+FN[i])
TPR[i] <- TP[i]/(TP[i]+FN[i])
if (TN[i]+FP[i] == 0)
TNR[i] <- 0
else
TNR[i] <- TN[i]/(TN[i]+FP[i])
SENS[i] <- TPR[i]
SPEC[i] <- 1 - TNR[i]
}
#if (all(FPR==0))
# FPR[length(FPR)] <- 1
#if (all(TNR==0)) {
# TNR[1] <- 1
# SPEC[i] <- 0
#}
ROC[[n]] <- list(TP=TP,FP=FP,FN=FN,TN=TN,
FPR=FPR,FNR=FNR,TPR=TPR,TNR=TNR,SCRX=SCRX,SCRY=SCRY/max(SCRY),
SENS=SENS,SPEC=SPEC,AUC=NULL)
}
for (n in colnames(pmat)) {
disp("Calculating AUC for ",n)
auc <- 0
for (i in 2:length(ROC[[n]][[toupper(y)]])) {
auc <- auc +
0.5*(ROC[[n]][[toupper(x)]][i]-ROC[[n]][[toupper(x)]][i-1])*
(ROC[[n]][[toupper(y)]][i]+ROC[[n]][[toupper(y)]][i-1])
}
ROC[[n]]$AUC <- abs(auc)
# There are some extreme cases, with the Intersection case for the paper
# where there are no FPs or TNs for a p-value cutoff of 0.2 (which is
# imposed in order to avoid the saturation of the ROC curves). In these
# cases, performance is virtually perfect, and the actual AUC should be
# 1. For these cases, we set it to a value between 0.95 and 0.99 to
# represent a more plausible truth.
if (ROC[[n]]$AUC==0) ROC[[n]]$AUC <- sample(seq(0.95,0.99,by=0.001),1)
}
disp("")
if (draw) {
fil <- file.path(path,paste("ROC",output,sep="."))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=8,height=8)
else
graphics.open(output,fil,width=1024,height=1024,res=100)
xlim <- c(0,1)
ylim <- c(0,1)
par(cex.axis=0.9,cex.main=1,cex.lab=0.9,font.lab=2,font.axis=2,pty="m",
lwd=1.5,lty=1)
plot.new()
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,10))
axis(2,at=pretty(ylim,10))
for (n in names(ROC))
lines(ROC[[n]][[toupper(x)]],ROC[[n]][[toupper(y)]],
col=colspace[n],...)
grid()
title(xlab=ax.name[[x]],ylab=ax.name[[y]])
auc.text <- as.character(sapply(ROC,function(x)
round(x$AUC,digits=3)))
graphics::legend(x="bottomright",col=colspace,lty=1,cex=0.9,
legend=paste(names(ROC)," (AUC = ",auc.text,")",sep=""))
graphics.close(output)
}
else
fil <- NULL
return(list(ROC=ROC,truth=truth,sig.level=sig,x.axis=x,y.axis=y,path=fil))
}
## Create averaged basic ROC curves
##
## This function creates averaged basic ROC curves using a list of objects returned
## from the \code{\link{diagplot.roc}} function.
##
## @param roc.obj a list containing several lists returned from the application
## of \code{\link{diagplot.roc}} function.
## @param output one or more R plotting device to direct the plot result to.
## Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
## \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
## @param path the path to create output files.
## @param ... further arguments to be passed to plot devices, such as parameter
## from \code{\link{par}}.
## @return A named list with two members. The first member is a list containing
## the mean and standard deviation of ROC statistics. The second is the path to
## the created figure graphic.
## @export
## @author Panagiotis Moulos
## @examples
## \dontrun{
## # Not yet available
##}
#diagplot.avg.roc <- function(roc.obj,output="x11",path=NULL,...) {
# ax.name <- list(
# tpr="True Positive Rate",
# tnr="True Negative Rate",
# fpr="False Positive Rate",
# fnr="False Negative Rate"
# )
# stats <- names(roc.obj[[1]]$ROC)
# x <- toupper(roc.obj[[1]]$x.axis)
# y <- toupper(roc.obj[[1]]$y.axis)
# avg.ROC <- vector("list",length(stats))
# avg.ROC <- lapply(avg.ROC,function(x) {
# return(list(TP=NULL,FP=NULL,FN=NULL,TN=NULL,
# FPR=NULL,FNR=NULL,TPR=NULL,TNR=NULL,AUC=NULL))
# })
# names(avg.ROC) <- stats
# colspace.universe <- c("red","blue","green","orange","darkgrey","green4",
# "black","pink","brown","yellowgreen","magenta","pink2","seagreen4",
# "darkcyan")
# colspace <- colspace.universe[1:length(stats)]
# names(colspace) <- stats
# for (s in stats) {
# disp("Retrieving ",s)
# for (r in names(avg.ROC[[s]])) {
# if (r != "AUC") {
# #avg.ROC[[s]][[r]] <- do.call("cbind",lapply(roc.obj,
# # function(x,s,r) x$ROC[[s]][[r]],s,r))
# lapply(roc.obj,function(x,s,r) print(length(x$ROC[[s]][[r]])),s,r)
# mn <- apply(avg.ROC[[s]][[r]],1,mean)
# st <- apply(avg.ROC[[s]][[r]],1,sd)
# avg.ROC[[s]][[r]] <- list(mean=mn,sd=st)
# }
# }
# }
# disp("")
# means <- do.call("cbind",lapply(avg.ROC,function(x) x$mean))
# stds <- do.call("cbind",lapply(avg.ROC,function(x) x$sd))
# fil <- file.path(path,paste("ROC",output,sep="."))
# if (output %in% c("pdf","ps","x11"))
# graphics.open(output,fil,width=8,height=8)
# else
# graphics.open(output,fil,width=1024,height=1024,res=100)
# xlim <- c(0,1)
# ylim <- c(0,1)
# par(cex.axis=0.9,cex.main=1,cex.lab=0.9,font.lab=2,font.axis=2,pty="m",
# lwd=1.5,lty=1)
# plot.new()
# plot.window(xlim,ylim)
# axis(1,at=pretty(xlim,10))
# axis(2,at=pretty(ylim,10))
# for (n in names(ROC)) {
# lines(ROC[[n]][[x]],ROC[[n]][[y]],col=colspace[n],...)
# }
# grid()
# title(xlab=ax.name[[x]],ylab=ax.name[[y]])
# legend(x="bottomright",legend=names(ROC),col=colspace,lty=1)
# graphics.close(output)
# return(list(ROC=ROC,path=fil))
#}
#' Create False (or True) Positive (or Negative) curves
#'
#' This function creates false (or true) discovery curves using a matrix of
#' p-values (such a matrix can be derived for example from the result table of
#' \code{\link{metaseqr}} by subsetting the table to get the p-values from several
#' algorithms) given a ground truth vector for differential expression.
#'
#' @param truth the ground truth differential expression vector. It should contain
#' only zero and non-zero elements, with zero denoting non-differentially expressed
#' genes and non-zero, differentially expressed genes. Such a vector can be obtained
#' for example by using the \code{\link{make.sim.data.sd}} function, which creates
#' simulated RNA-Seq read counts based on real data. The elements of \code{truth}
#' MUST be named (e.g. each gene's name).
#' @param p a p-value matrix whose rows correspond to each element in the
#' \code{truth} vector. If the matrix has a \code{colnames} attribute, a legend
#' will be added to the plot using these names, else a set of column names will
#' be auto-generated. \code{p} can also be a list or a data frame. The p-values
#' MUST be named (e.g. each gene's name).
#' @param type what to plot, can be \code{"fpc"} for False Positive Curves
#' (default), \code{"tpc"} for True Positive Curves, \code{"fnc"} for False
#' Negative Curves or \code{"tnc"} for True Negative Curves.
#' @param N create the curves based on the top (or bottom) \code{N} ranked genes
#' (default is 2000) to be used with \code{type="fpc"} or \code{type="tpc"}.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param draw boolean to determine whether to plot the curves or just return the
#' calculated values (in cases where the user wants the output for later averaging
#' for example). Defaults to \code{TRUE} (make plots).
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return A named list with two members: the first member (\code{ftdr}) contains
#' the values used to create the plot. The second member (\code{path}) contains
#' the path to the created figure graphic.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p1 <- 0.001*matrix(runif(300),100,3)
#' p2 <- matrix(runif(300),100,3)
#' p <- rbind(p1,p2)
#' rownames(p) <- paste("gene",1:200,sep="_")
#' colnames(p) <- paste("method",1:3,sep="_")
#' truth <- c(rep(1,40),rep(-1,40),rep(0,10),rep(1,10),rep(2,10),rep(0,80))
#' names(truth) <- rownames(p)
#' ftd.obj <- diagplot.ftd(truth,p,N=100)
#'}
diagplot.ftd <- function(truth,p,type="fpc",N=2000,output="x11",path=NULL,
draw=TRUE,...) {
check.text.args("type",type,c("fpc","tpc","fnc","tnc"),multiarg=FALSE)
if (is.list(p))
pmat <- do.call("cbind",p)
else if (is.data.frame(p))
pmat <- as.matrix(p)
else if (is.matrix(p))
pmat <- p
else if (is.numeric(p))
pmat <- as.matrix(p)
if (is.null(colnames(pmat)))
colnames(pmat) <- paste("p",1:ncol(pmat),sep="_")
y.name <- list(
tpc="Number of True Positives",
fpc="Number of False Positives",
tnc="Number of True Negatives",
fnc="Number of False Negatives"
)
ftdr.list <- vector("list",ncol(pmat))
names(ftdr.list) <- colnames(pmat)
colspace.universe <- c("red","blue","green","orange","darkgrey","green4",
"black","pink","brown","magenta","yellowgreen","pink4","seagreen4",
"darkcyan")
colspace <- colspace.universe[1:ncol(pmat)]
names(colspace) <- colnames(pmat)
switch(type,
fpc = {
for (n in colnames(pmat)) {
disp("Processing ",n)
z <- sort(pmat[,n])
for (i in 1:N) {
nn <- length(intersect(names(z[1:i]),
names(which(truth==0))))
if (nn==0)
ftdr.list[[n]][i] <- 1
else
ftdr.list[[n]][i] <- nn
}
}
},
tpc = {
for (n in colnames(pmat)) {
disp("Processing ",n)
z <- sort(pmat[,n])
for (i in 1:N)
ftdr.list[[n]][i] <- length(intersect(names(z[1:i]),
names(which(truth!=0))))
}
},
fnc = {
for (n in colnames(pmat)) {
disp("Processing ",n)
z <- sort(pmat[,n],decreasing=TRUE)
for (i in 1:N) {
nn <- length(intersect(names(z[1:i]),
names(which(truth!=0))))
if (nn==0)
ftdr.list[[n]][i] <- 1
else
ftdr.list[[n]][i] <- nn
}
}
},
tnc = {
for (n in colnames(pmat)) {
disp("Processing ",n)
z <- sort(pmat[,n],decreasing=TRUE)
for (i in 1:N)
ftdr.list[[n]][i] <- length(intersect(names(z[1:i]),
names(which(truth==0))))
}
}
)
disp("")
if (draw) {
fil <- file.path(path,paste("FTDR_",type,".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=8,height=8)
else
graphics.open(output,fil,width=1024,height=1024,res=100)
xlim <- ylim <- c(1,N)
#ylim <- c(1,length(which(truth!=0)))
par(cex.axis=0.9,cex.main=1,cex.lab=0.9,font.lab=2,font.axis=2,pty="m",
lwd=1.5,lty=1)
plot.new()
switch(type,
fpc = {
plot.window(xlim,ylim,log="y")
axis(1,at=pretty(xlim,10))
axis(2)
for (n in names(ftdr.list)) {
lines(ftdr.list[[n]],col=colspace[n],...)
}
grid()
title(main="Selected genes vs False Positives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="topleft",legend=names(ftdr.list),
col=colspace,lty=1)
},
tpc = {
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,10))
axis(2,at=pretty(ylim,10))
for (n in names(ftdr.list)) {
lines(ftdr.list[[n]],col=colspace[n],...)
}
grid()
title(main="Selected genes vs True Positives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="bottomright",legend=names(ftdr.list),
col=colspace,lty=1)
},
fnc = {
plot.window(xlim,ylim,log="y")
axis(1,at=pretty(xlim,10))
axis(2)
for (n in names(ftdr.list)) {
lines(ftdr.list[[n]],col=colspace[n],...)
}
grid()
title(main="Selected genes vs False Negatives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="topleft",legend=names(ftdr.list),
col=colspace,lty=1)
},
tnc = {
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,10))
axis(2,at=pretty(ylim,10))
for (n in names(ftdr.list)) {
lines(ftdr.list[[n]],col=colspace[n],...)
}
grid()
title(main="Selected genes vs True Negatives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="bottomright",legend=names(ftdr.list),
col=colspace,lty=1)
}
)
graphics.close(output)
}
else
fil <- NULL
return(list(ftdr=ftdr.list,truth=truth,type=type,N=N,path=fil))
}
#' Create average False (or True) Discovery curves
#'
#' This function creates false (or true) discovery curves using a list containing
#' several outputs from \code{\link{diagplot.ftd}}.
#'
#' @param ftdr.obj a list with outputs from \code{\link{diagplot.ftd}}.
#' @param output one or more R plotting device to direct the plot result to.
#' Supported mechanisms: \code{"x11"} (default), \code{"png"}, \code{"jpg"},
#' \code{"bmp"}, \code{"pdf"} or \code{"ps"}.
#' @param path the path to create output files.
#' @param draw boolean to determine whether to plot the curves or just return the
#' calculated values (in cases where the user wants the output for later averaging
#' for example). Defaults to \code{TRUE} (make plots).
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @return A named list with two members: the first member (\code{avg.ftdr})
#' contains a list with the means and the standard deviations of the averaged
#' \code{ftdr.obj} and are used to create the plot. The second member (\code{path})
#' contains the path to the created figure graphic.
#' @export
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' p11 <- 0.001*matrix(runif(300),100,3)
#' p12 <- matrix(runif(300),100,3)
#' p21 <- 0.001*matrix(runif(300),100,3)
#' p22 <- matrix(runif(300),100,3)
#' p31 <- 0.001*matrix(runif(300),100,3)
#' p32 <- matrix(runif(300),100,3)
#' p1 <- rbind(p11,p21)
#' p2 <- rbind(p12,p22)
#' p3 <- rbind(p31,p32)
#' rownames(p1) <- rownames(p2) <- rownames(p3) <-
#' paste("gene",1:200,sep="_")
#' colnames(p1) <- colnames(p2) <- colnames(p3) <-
#' paste("method",1:3,sep="_")
#' truth <- c(rep(1,40),rep(-1,40),rep(0,20),
#' rep(1,10),rep(2,10),rep(0,80))
#' names(truth) <- rownames(p1)
#' ftd.obj.1 <- diagplot.ftd(truth,p1,N=100,draw=FALSE)
#' ftd.obj.2 <- diagplot.ftd(truth,p2,N=100,draw=FALSE)
#' ftd.obj.3 <- diagplot.ftd(truth,p3,N=100,draw=FALSE)
#' ftd.obj <- list(ftd.obj.1,ftd.obj.2,ftd.obj.3)
#' avg.ftd.obj <- diagplot.avg.ftd(ftd.obj)
#'}
diagplot.avg.ftd <- function(ftdr.obj,output="x11",path=NULL,draw=TRUE,...) {
y.name <- list(
tpc="Number of True Positives",
fpc="Number of False Positives",
tnc="Number of True Negatives",
fnc="Number of False Negatives"
)
stats <- names(ftdr.obj[[1]]$ftdr)
type <- ftdr.obj[[1]]$type
truth <- ftdr.obj[[1]]$truth
N <- ftdr.obj[[1]]$N
avg.ftdr.obj <- vector("list",length(stats))
names(avg.ftdr.obj) <- stats
colspace.universe <- c("red","blue","green","orange","darkgrey","green4",
"black","pink","brown","magenta","yellowgreen","pink4","seagreen4",
"darkcyan")
colspace <- colspace.universe[1:length(stats)]
names(colspace) <- stats
for (s in stats) {
disp("Retrieving ",s)
avg.ftdr.obj[[s]] <- do.call("cbind",lapply(ftdr.obj,
function(x) x$ftdr[[s]]))
}
disp("")
avg.ftdr.obj <- lapply(avg.ftdr.obj,function(x) {
mn <- apply(x,1,mean)
st <- apply(x,1,sd)
return(list(mean=mn,sd=st))
})
means <- do.call("cbind",lapply(avg.ftdr.obj,function(x) x$mean))
stds <- do.call("cbind",lapply(avg.ftdr.obj,function(x) x$sd))
if (draw) {
fil <- file.path(path,paste("AVG_FTDR_",type,".",output,sep=""))
if (output %in% c("pdf","ps","x11"))
graphics.open(output,fil,width=8,height=8)
else
graphics.open(output,fil,width=1024,height=1024,res=100)
xlim <- ylim <- c(1,N)
par(cex.axis=0.9,cex.main=1,cex.lab=0.9,font.lab=2,font.axis=2,pty="m",
lwd=1.5,lty=1)
plot.new()
switch(type,
fpc = {
plot.window(xlim,ylim,log="y")
axis(1,at=pretty(xlim,10))
axis(2)
for (n in colnames(means)) {
lines(means[,n],col=colspace[n],...)
}
grid()
title(main="Selected genes vs False Positives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="topleft",legend=colnames(means),
col=colspace,lty=1)
},
tpc = {
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,10))
axis(2,at=pretty(ylim,10))
for (n in colnames(means)) {
lines(means[,n],col=colspace[n],...)
}
grid()
title(main="Selected genes vs True Positives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="bottomright",legend=colnames(means),
col=colspace,lty=1)
},
fnc = {
plot.window(xlim,ylim,log="y")
axis(1,at=pretty(xlim,10))
axis(2)
for (n in colnames(means)) {
lines(means[,n],col=colspace[n],...)
}
grid()
title(main="Selected genes vs False Negatives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="topleft",legend=colnames(means),
col=colspace,lty=1)
},
tnc = {
plot.window(xlim,ylim)
axis(1,at=pretty(xlim,10))
axis(2,at=pretty(ylim,10))
for (n in colnames(means)) {
lines(means[,n],col=colspace[n],...)
}
grid()
title(main="Selected genes vs True Negatives",
xlab="Number of selected genes",ylab=y.name[[type]])
graphics::legend(x="bottomright",legend=colnames(means),
col=colspace,lty=1)
}
)
graphics.close(output)
}
else
fil <- NULL
return(list(avg.ftdr=list(means=means,stds=stds),path=fil))
}
#' Open plotting device
#'
#' Wrapper function to open a plotting device. Internal use only.
#'
#' @param o the plotting device, see main metaseqr function
#' @param f a filename, if the plotting device requires it (e.g. \code{"pdf"})
#' @param ... further arguments to be passed to plot devices, such as parameter
#' from \code{\link{par}}.
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' graphics.open("pdf","test.pdf",width=12,height=12)
#'}
graphics.open <- function(o,f,...) {
if(!check.graphics.type(o))
stopwrap("Invalid graphics output type!")
if(check.graphics.file(o) && is.null(f))
stopwrap("Please specify an output file name for your plot")
switch(o,
x11 = { dev.new(...) },
pdf = { pdf(file=f,pointsize=10,...) },
ps = { postscript(file=f,pointsize=10,...) },
png = { png(filename=f,pointsize=12,...) },
jpg = { jpeg(filename=f,pointsize=12,quality=100,...) },
bmp = { bmp(filename=f,pointsize=12,...) },
tiff = { tiff(filename=f,pointsize=12,...) }
)
}
#' Close plotting device
#'
#' Wrapper function to close a plotting device. Internal use only.
#'
#' @param o the plotting device, see main metaseqr function
#' @author Panagiotis Moulos
#' @examples
#' \dontrun{
#' graphics.close("pdf")
#'}
graphics.close <- function(o) {
if (!is.element(o,c("x11","png","jpg","tiff","bmp","pdf","ps")))
return(FALSE)
if (o!="x11")
dev.off()
}
#' Check plotting device
#'
#' Plotting device checker. Internal use only.
#'
#' @param o the plotting device, see main metaseqr function
#' @author Panagiotis Moulos
check.graphics.type <- function(o) {
if (!is.element(o,c("x11","png","jpg","tiff","bmp","pdf","ps")))
return(FALSE)
else
return(TRUE)
}
#' Check graphics file
#'
#' Graphics file checker. Internal use only.
#'
#' @param o the plotting device, see main metaseqr function
#' @author Panagiotis Moulos
check.graphics.file <- function(o) {
if (is.element(o,c("png","jpg","tiff","bmp","pdf","ps")))
return(TRUE)
else
return(FALSE)
}
#' Display value transformation
#'
#' Logarithmic transformation for display purposes. Internal use only.
#'
#' @param mat input data matrix
#' @param base logarithmic base, 2 or 10
#' @author Panagiotis Moulos
log2disp <- function(mat,base=2) {
mat[mat==0] <- 1
if (base==10)
return(log10(mat))
else
return(log2(mat))
}
#' General value transformation
#'
#' Logarithmic transformation. Internal use only.
#'
#' @param x input data matrix
#' @param base logarithmic base, 2 or 10
#' @param off offset to avoid Infinity
#' @author Panagiotis Moulos
nat2log <- function(x,base=2,off=1) {
#x[x==0] <- off
x <- x + off
if (base==2)
return(log2(x))
else
return(log10(x))
}
#' Old functions from NOISeq
#'
#' Old functions from NOISeq to create the \code{"readnoise"} plots. Internal use
#' only.
#'
#' @param input input to cddat.
#' @return a list with data to plot.
#' @note Adopted from an older version of NOISeq package (author: Sonia Tarazona).
#' @author Panagiotis Moulos
cddat <- function (input) {
if (inherits(input,"eSet") == FALSE)
stopwrap("The input data must be an eSet object.\n")
if (!is.null(assayData(input)$exprs)) {
if (ncol(assayData(input)$exprs) < 2)
stopwrap("The input object should have at least two samples.\n")
datos <- assayData(input)$exprs
}
else {
if (ncol(assayData(input)$counts) < 2)
stopwrap("The input object should have at least two samples.\n")
datos <- assayData(input)$counts
}
datos <- datos[which(rowSums(datos) > 0),]
nu <- nrow(datos) # number of detected features
qq <- 1:nu
data2plot = data.frame("%features" = 100*qq/nu)
for (i in 1:ncol(datos)) {
acumu <- 100*cumsum(sort(datos[,i],decreasing=TRUE))/sum(datos[,i])
data2plot = cbind(data2plot, acumu)
}
colnames(data2plot)[-1] = colnames(datos)
# Diagnostic test
KSpval = mostres = NULL
for (i in 1:(ncol(datos)-1)) {
for (j in (i+1):ncol(datos)) {
mostres = c(mostres, paste(colnames(datos)[c(i,j)], collapse="_"))
KSpval = c(KSpval, suppressWarnings(ks.test(datos[,i], datos[,j],
alternative = "two.sided"))$"p.value")
}
}
KSpval = p.adjust(KSpval, method = "fdr")
return(list(
"data2plot"=data2plot,
"DiagnosticTest"=data.frame("ComparedSamples"=mostres,"KSpvalue"=KSpval)
))
}
#' Old functions from NOISeq
#'
#' Old functions from NOISeq to create the \code{"readnoise"} plots. Internal use
#' only.
#' @param dat the returned list from \code{\link{cddat}}.
#' @param samples the samples to plot.
#' @param ... further arguments passed to e.g. \code{\link{par}}.
#' @return Nothing, it created the old RNA composition plot.
#' @note Adopted from an older version of NOISeq package (author: Sonia Tarazona)
#' @author Panagiotis Moulos
cdplot <- function (dat,samples=NULL,...) {
dat = dat$data2plot
if (is.null(samples)) samples <- 1:(ncol(dat)-1)
if (is.numeric(samples)) samples = colnames(dat)[samples+1]
colspace <- c("red","blue","yellowgreen","orange","aquamarine2","pink2",
"seagreen4","brown","purple","chocolate","gray10","gray30","darkblue",
"darkgreen","firebrick2","darkorange4","darkorchid","darkcyan","gold4",
"deeppink3")
if (length(samples)>length(colspace))
miscolores <- sample(colspace,length(samples),replace=TRUE)
else
miscolores <- sample(colspace,length(samples))
plot(dat[,1],dat[,samples[1]],xlab="% features",ylab="% reads",type="l",
col=miscolores[1],...)
for (i in 2:length(samples))
lines(dat[,1],dat[,samples[i]],col=miscolores[i])
graphics::legend("bottom",legend=samples,
text.col=miscolores[1:length(samples)],bty="n",lty=1,lwd=2,
col=miscolores[1:length(samples)])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.