##perl -e ' foreach (@ARGV) { print "library($_)\nrequire($_)\n" }' 'reshape2' 'gplots' 'stringr' 'RSvgDevice' 'rgl' 'ggplot2'
library(reshape2)
#require(reshape2)
require(gplots)
require(stringr)
require(RSvgDevice)
require(rgl)
require(ggplot2)
#' @name StefansExpressionSet
#' @title StefansExpressionSet
#' @description An S4 class to visualize Expression data.
#' @slot data a data.frame containing the expression values for each gene x sample (gene = row)
#' @slot samples a data.frame describing the columnanmes in the data column
#' @slot annotation a data.frame describing the rownames in the data rows
#' @slot outpath the default outpath for the plots and tables from this package
#' @slot name the name for this package (all filesnames contain that)
#' @slot zscored genes are normalized?
#' @slot snorm samples normalized?
#' @slot rownamescol the column name in the annotation table that represents the rownames of the data table
#' @slot sampleNamesCol the column name in the samples table that represents the colnames of the data table
#' @slot stats the stats list for all stats created in the object
#' @slot usedObj here a set of used and probably lateron important objects can be saved. Be very carful using any of them!
#' @exportClass StefansExpressionSet
setClass(
Class='StefansExpressionSet',
representation = representation (
data='data.frame',
samples='data.frame',
ranks='numeric',
raw="data.frame",
annotation='data.frame',
snorm='logical',
outpath='character',
name='character',
rownamescol='character',
sampleNamesCol='character',
stats = 'list',
sig_genes = 'list',
zscored = 'logical',
simple = 'character',
usedObj = 'list'
),
prototype(outpath ='', name = 'StefansExpressionSet',
sampleNamesCol=NA_character_,
stats=list(),
snorm=F,
zscored=F,
sig_genes=list(),
usedObj= list(),
simple= c( 'outpath', 'rownamescol', 'sampleNamesCol', 'simple', 'snorm', 'zscored') )
)
#' @name PMID25158935exp
#' @title Read counts for the expression data described in PMID25158935
#' @description The data was re-mapped against mouse mm10 using HISAT
#' @description and quantified using the R subreads package.
#' @docType data
#' @usage PMID25158935exp
#' @format data.frame
'PMID25158935exp'
#' @name PMID25158935samples
#' @title Read counts for the sample data for the expression information described in PMID25158935
#' @description The data was collected from the NCBI SRA archive
#' @docType data
#' @usage PMID25158935samples
#' @format data.frame
'PMID25158935samples'
#' @name red
#' @title reduced PMID25158935 dataset to a 100x15 StefansExpressionSet
#' @description Reduced StefansExpressionSet from the PMID25158935exp + PMID25158935samples dataset
#' @docType data
#' @usage red
#' @format StefansExpressionSet
'red'
#' @name StefansExpressionSet
#' @aliases StefansExpressionSet,data.frame-method
#' @rdname StefansExpressionSet-methods
#' @docType methods
#' @description this file contains all generic fnction for data export and ploting Create an StefansExpressionSet
#' @description object (S3) This object is mainly used for subsetting of the data and plotting @export
#' @param dat data frame or matrix containing all expression data
#' @param Samples A sample description table
#' @param Analysis If the samples table contains a Analysis column you can subset the data already here to the Analysis here (String). This table has to contain a column filenames that is expected to connect the sample table to the dat column names.
#' @param name The name of this object is going to be used in the output of all plots and tables - make it specific
#' @param namecol The samples table column that contains the (to be set) names of the samples in the data matrix
#' @param namerow This is the name of the gene level annotation column in the dat file to use as ID
#' @param outpath Where to store the output from the analysis
#' @param annotation The annotation table from e.g. affymetrix csv data
#' @param newOrder The samples column name for the new order (default 'Order')
#' @title description of function StefansExpressionSet
#' @exportMethod StefansExpressionSet
setGeneric("StefansExpressionSet", ## Name
function( dat, Samples, class='StefansExpressionSet', Analysis = NULL, name='WorkingSet', namecol='GroupName', namerow= 'GeneID', usecol='Use' , outpath = ''){ ## Argumente der generischen Funktion
standardGeneric("StefansExpressionSet") ## der Aufruf von standardGeneric sorgt für das Dispatching
})
setMethod("StefansExpressionSet", signature = c ('data.frame'),
definition = function ( dat, Samples, class='StefansExpressionSet', Analysis = NULL, name='WorkingSet', namecol='GroupName', namerow= 'GeneID', usecol='Use' , outpath = '' ) {
S <- Samples
if ( ! is.null(Analysis) ){
S <- Samples[which ( Samples$Analysis == Analysis ), ]
}
if ( ! is.null(usecol) ) {
S <- Samples[which ( Samples[, usecol] == 1 ),]
}
if ( exists('filename',S) ) {
n <- make.names(as.vector(S$filename))
ret <- dat[, n ]
annotation <- dat[, is.na(match( colnames(dat), n ))==T ]
}else{
n <- make.names(as.vector(S[,namecol]))
ret <- dat[, n ]
annotation <- dat[, is.na(match( colnames(dat), n ))==T ]
}
if ( class(annotation) == 'factor'){
annotation <- data.frame( annotation )
colnames(annotation) <- namerow
}
if ( exists( 'Order', S)){
ret <- ret[, order(S$Order) ]
S <- S[order(S$Order), ]
}
if ( outpath == '' ){
outpath = pwd()
}
if ( ! file.exists(outpath)){
dir.create( outpath )
}
ret <- data.frame(ret)
if ( is.null(dim(annotation))){
## this xcould be a problem... hope we at least have a vector
if ( length(annotation) == nrow(ret)) {
rownames(ret) <- annotation
}
else {
stop ( "Sorry, please cbind the rownames to the expression values before creating this object!")
}
}else{
rownames(ret) <- annotation[,namerow]
}
data <- list ( 'data' = ret, samples = S, name= name, annotation = annotation, rownamescol = namerow )
data$outpath <- outpath
colnames(ret) <- make.names(forceAbsoluteUniqueSample ( as.vector(S[, namecol]) ))
S$SampleName <- colnames(ret)
write.table (cbind(rownames(ret), ret ), file=paste(name, '_DataValues',".xls", sep=''), sep='\t', row.names=F,quote=F )
write.table (S, file=paste(name,'_Sample_Description', ".xls", sep=''), sep='\t', row.names=F,quote=F )
data$sampleNamesCol <- namecol
class(data) <- 'StefansExpressionSet'
data$batchRemoved=0
r <- new ( class, data = data$data, samples = data$samples, name = data$name, annotation = data$annotation, rownamescol= data$rownamescol,sampleNamesCol = data$sampleNamesCol , outpath= data$outpath )
force.numeric(r)
})
#' @name coexprees4gene
#' @aliases coexprees4gene,StefansExpressionSet-method
#' @rdname coexprees4gene-methods
#' @docType methods
#' @description Calculate the coexpression for any given gene
#' @param x the StefansExpressionSet varibale
#' @param method any method supported by \link[=cor.test]{cor.test}
#' @param geneNameCol the name of the gene column (gene_name)
#' @param padjMethod the method to calucate the FDR with \link[=p.adjust]{p.adjust}
#' @return a data.frame with the columns 'GeneSymbol', 'pval', 'cor', 'adj.p'
#' @title description of function coexprees4gene
#' @export
setGeneric('coexprees4gene', ## Name
function ( x, gene=NULL, method='spearman', geneNameCol='gene_name', padjMethod='BH' ) { ## Argumente der generischen Funktion
standardGeneric('coexprees4gene') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('coexprees4gene', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, gene=NULL, method='spearman', geneNameCol='gene_name', padjMethod='BH' ) {
ret <- NULL
if ( ! is.null(gene) ){
z <- as.vector( t(x@data[ gene[1] ,]) )
pval <- vector( 'numeric', nrow(x@data))
cor <- vector( 'numeric', nrow(x@data))
for ( i in 1:nrow(x@data) ) {
try( { res <- cor.test( z, as.vector(t(x@data[i,]), 'numeric') ,method=method)
pval[i] <- res$p.value
cor[i] <- res$estimate }, silent=T
)
}
adj.p <- p.adjust(pval , method = padjMethod) #BH
ret <- data.frame('GeneSymbol' = x@annotation[,geneNameCol], pval = pval, cor = cor, adj.p = adj.p )
rownames(ret) <- rownames(x@data)
}
ret
})
#' @name pcorMat
#' @aliases pcorMat,StefansExpressionSet-method
#' @rdname pcorMat-methods
#' @docType methods
#' @description calculate a p_value matrix for the data object THIS FUNCTION IS NOT DOING THE RIGTH
#' @description THING BROKEN
#' @param x the StefansExpressionSet varibale
#' @param sd_cut the cut off value for the sd check
#' @param method any method supported by \code{\link[stats]{cor.test}}
#' @param geneNameCol the name of the gene column (gene_name)
#' @param groupCol the column name of the grouping variable in the samples table
#' @param name the name of the analysis
#' @title description of function corMat.Pvalues
#' @export
setGeneric('pcorMat', ## Name
function ( x, sd_cut=1, method='spearman', geneNameCol='gene_name', groupCol=NULL, name='tmp' ) { ## Argumente der generischen Funktion
standardGeneric('pcorMat') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('pcorMat', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, sd_cut=1, method='spearman', geneNameCol='gene_name', groupCol=NULL, name='tmp' ) {
# TODO: implement the p value calculation!
d <- reduce.Obj( x, rownames(x@data)[which( apply(x@data,1,sd) > sd_cut)], name =name )
if ( ! is.null(groupCol) ){
ret <- list()
names <- unique(d$samples[,groupCol])
for ( i in 1:length(names)) {
a <- subset( d, column=groupCol, value=names[i], name= paste(d$name,names[i],sep='_'), mode='equals' )
ret[[i]] = corMat( a, sd_cut= sd_cut,method=method, geneNameCol=geneNameCol )
}
names(ret) <- names
ret
}
else {
n = nrow(d$data)
print ( paste(d$name,": I create a",n,'x', n,'matrix') )
ret <- cor(t(d$data), method=method )
colnames(ret) <- rownames(ret) <- forceAbsoluteUniqueSample( as.vector(d$annotation[,geneNameCol]) )
ret
}
})
#' @name corMat
#' @aliases corMat,StefansExpressionSet-method
#' @rdname corMat-methods
#' @docType methods
#' @description calculate a correlation matrix for the data object
#' @param x the StefansExpressionSet varibale
#' @param sd_cut the cut off value for the sd check
#' @param method any method supported by \code{\link[stats]{cor.test}}
#' @param geneNameCol the name of the gene column (gene_name)
#' @param groupCol the column name of the grouping variable in the samples table
#' @param name the name of the analysis
#' @return the correlation matrix
#' @title description of function corMat
#' @export
setGeneric('corMat', ## Name
function ( x, sd_cut=1, method='spearman', geneNameCol='gene_name', groupCol=NULL, name='tmp' ) { ## Argumente der generischen Funktion
standardGeneric('corMat') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('corMat', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, sd_cut=1, method='spearman', geneNameCol='gene_name', groupCol=NULL, name='tmp' ) {
d <- reduce.Obj( x, rownames(x@data)[which( apply(x@data,1,sd) > sd_cut)], name = name )
if ( ! is.null(groupCol) ){
ret <- list()
names <- unique(d$samples[,groupCol])
for ( i in 1:length(names)) {
a <- subset( d, column=groupCol, value=names[i], name= paste(d$name,names[i],sep='_'), mode='equals' )
ret[[i]] = corMat( a, sd_cut= sd_cut,method=method, geneNameCol=geneNameCol )
}
names(ret) <- namestime.col
ret
}
else {
n = nrow(d$data)
print ( paste(d$name,": I create a",n,'x', n,'matrix') )
ret <- cor(t(d$data), method=method )
colnames(ret) <- rownames(ret) <- forceAbsoluteUniqueSample( as.vector(d$annotation[,geneNameCol]) )
ret
}
})
#' @name cor2cytoscape
#' @aliases cor2cytoscape,StefansExpressionSet-method
#' @rdname cor2cytoscape-methods
#' @docType methods
#' @description exports the correlation matrix in the format \url{http://www.cytoscape.org/} does
#' @description support as network file.
#' @param M the correlation matrix obtained by a run of \code{\link{corMat}}
#' @title description of function cor2cytoscape
#' @export
setGeneric('cor2cytoscape', ## Name
function (M, file, cut=0.9 ) { ## Argumente der generischen Funktion
standardGeneric('cor2cytoscape') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
###last
setMethod('cor2cytoscape', signature = c ( 'StefansExpressionSet') ,
definition = function (M, file, cut=0.9 ) {
edges <- NULL
if ( class(M) == 'list' ) {
edges <- list()
for( i in 1:length(M) ) {
edges[[i]] <- cor2cytoscape( M[[i]], file= paste(file,names(M)[i],'.txt',sep='_'), cut=cut)
}
names(edges) <- names(M)
}
else {
edges <- NULL
n <- as.vector(rownames(M))
diag(M) <- 0
for (i in 1:nrow(M)) {
for (j in which(M[i,i:ncol(M)] >cut | M[i,i:ncol(M)] < -cut ) ) {
edges <- rbind(edges, c(n[i], n[j], M[i,j] ))
}
}
#edges <- matrix( edges, ncol=3 )
edges <- as.data.frame(edges)
colnames(edges) <- c('Start', 'End', 'rho')
edges$Type <- as.vector(rep( 1, nrow(edges) ))
edges$Type[which(as.numeric(as.vector(edges$rho)) < 0)] <- -1
write.table( edges, file , sep=" ",quote=F, row.names=F )
}
invisible(edges)
})
#
#' @name melt
#' @aliases melt,StefansExpressionSet-method
#' @rdname melt-methods
#' @docType methods
#' @description met an StefansExpressionSet to be plotted using ggplot2 functions
#' @param data the StefansExpressionSet object
#' @param groupcol the column in the samples table to group the expression on
#' @param colCol samples column names(s) to color the grouping data
#' @param rowCol annotation column name(s) describing the gene groups
#' @param probeNames which probenames to use (ProbeSetID or Gene.Symbol ...)
#' @title description of function melt
#' @export
setGeneric('melt.StefansExpressionSet', ## Name
package = 'StefansExpressionSet',
function ( dat, groupcol='GroupName', colCol=NULL, rowCol=NULL, probeNames=NULL, na.rm = FALSE, value.name = "value") { ## Argumente der generischen Funktion
standardGeneric('melt.StefansExpressionSet') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('melt.StefansExpressionSet',
signature= ('StefansExpressionSet' ),
function ( dat, groupcol='GroupName', colCol=NULL, rowCol=NULL, probeNames=NULL, na.rm = FALSE, value.name = "value" ) {
if ( is.null(probeNames)){
probeNames <- dat@rownamescol
}
ma <- dat@data[,order(dat@samples[,groupcol] )]
#rownames(ma) <- forceAbsoluteUniqueSample(as.vector(dat@annotation[, probeNames]) )
melted <- reshape2::melt( cbind(rownames(ma),ma) )
dat@samples <- dat@samples[order(dat@samples[,groupcol]),]
if ( length( which ( melted[,2] == '') ) > 0 ){
melted <- melted[ - which ( melted[,2] == ''),]
}
melted[,3] <- as.numeric(as.character(melted[,3]))
grps <- NULL
for ( i in as.vector(dat@samples[,groupcol]) ){
grps <- c( grps, rep( i, nrow(dat@data)))
}
cgrps <- NULL
for ( i in as.vector(dat@samples[,colCol]) ){
cgrps <- c( cgrps, rep( i, nrow(dat@data)))
}
colnames(melted) <- c('ProbeName', 'SampleName', 'Expression')
melted$Group <- grps
melted <- addSampleColGroup( dat, melted, colName= colCol)
melted <- addGeneColGroup( dat, melted, colName= rowCol)
melted
})
#' @name addSampleColGroup
#' @aliases addSampleColGroup,StefansExpressionSet-method
#' @rdname addSampleColGroup-methods
#' @docType methods
#' @description an internal method adding new color columns to the melted dataset
#' @param x the StefansExpressionSet object
#' @param melted the already melted matrix
#' @param colName the colnames to add to the matrix (defult=NULL)
#' @title description of function addSampleColGroup
setGeneric('addSampleColGroup', ## Name
function ( x, melted, colName=NULL ) { ## Argumente der generischen Funktion
standardGeneric('addSampleColGroup') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('addSampleColGroup', signature = c ('StefansExpressionSet'),
definition = function ( x, melted, colName=NULL ) {
if ( ! is.null(colName) ) {
genes = nrow(x@data)
samples = ncol(x@data)
grps <- NULL
for ( GNid in 1:length(colName)){
le <- genes + GNid -1
melted_new <- matrix(nrow=(nrow(melted)+GNid*samples), ncol=ncol(melted) )
for ( sid in 0:(samples-1)) {
for ( i in (1+(sid*genes)):((sid+1)*genes) ) {
melted_new[ (i+GNid * sid),] <- as.vector(t(melted[ i,]))
}
l <- melted[((sid+1)*genes),]
l[c(1,3)] <- c(colName[GNid], as.character(x@samples[sid+1,colName[GNid]]) )
melted_new[ (genes+GNid)*(sid+1), ] <- as.vector(t(l))
}
melted <- data.frame(melted_new,row.names= 1:nrow(melted_new))
}
}
melted
} )
#' @name addGeneColGroup
#' @aliases addGeneColGroup,StefansExpressionSet-method
#' @rdname addGeneColGroup-methods
#' @docType methods
#' @description an internal function to add row level clusters to a merged data table
#' @param x the StefansExpressionSet
#' @param melted the melted table
#' @param colName the annotation column names to use for the coloring default=NULL
#' @title description of function addGeneColGroup
setGeneric('addGeneColGroup', ## Name
function ( x, melted, colName=NULL ) { ## Argumente der generischen Funktion
standardGeneric('addGeneColGroup') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('addGeneColGroup', signature = c ('StefansExpressionSet'),
definition = function ( x, melted, colName=NULL ) {
if ( ! is.null(colName)) {
grps <- NULL
datarows = nrow(x@data)
ret <- NULL
for ( GNid in 1:length(n)){
add <- as.matrix(melted[1:datarows,])
add[,3] <- as.vector(x@annotation[,colName[GNid]])
add[,4] <-'GeneGroup'
ret <- rbind( ret, add )
}
}else {
ret <- melted
}
ret
} )
#' @name plot.stats.as.heatmap
#' @aliases plot.stats.as.heatmap,StefansExpressionSet-method
#' @rdname plot-methods
#' @docType methods
#' @description create heatmap for each statistics table and each pvalue cutoff This function uses
#' @description internally the plot.heatmaps() function for each selected probeset list
#' @param x the StefansExpressionSet
#' @param pvalue a vector of pvalues to set as cut off
#' @param Subset no idear at the moment....
#' @param comp a list of comparisons to restric the plotting to (NULL = all)
#' @param Subset.name no idear what that should do here....
#' @param gene_centered collapse all genes with the same gene symbol into one value
#' @param collaps how to collapse the data if gene_centered
#' @param geneNameCol the name of the gene.symbol column in the annotation
#' @title description of function plot
#' @export
setGeneric('plot.stats.as.heatmap', ## Name
function ( x, pvalue=c( 0.1,1e-2 ,1e-3,1e-4,1e-5, 1e-6, 1e-7, 1e-8 ), Subset=NULL , Subset.name= NULL, comp=NULL, gene_centered=F, collaps=NULL,geneNameCol= "mgi_symbol") { ## Argumente der generischen Funktion
standardGeneric('plot.stats.as.heatmap') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('plot.stats.as.heatmap', signature = c ('StefansExpressionSet'),
definition = function ( x, pvalue=c( 0.1,1e-2 ,1e-3,1e-4,1e-5, 1e-6, 1e-7, 1e-8 ), Subset=NULL , Subset.name= NULL, comp=NULL, gene_centered=F, collaps=NULL,geneNameCol= "mgi_symbol") {
if ( !is.null(comp) ){
print ("At the moment it is not possible to reduce the plotting to one comparison only" )
return (x)
}
add = ''
orig.name = x@name
if ( gene_centered ) {
add = 'GenesOnly'
}
if ( ! is.null(collaps)){
if ( nchar(add) > 1 ){
add = paste( 'mean', add, sep='_')
}else{
add = 'mean'
}
}
if ( ! is.null(Subset) ){
if ( is.null(Subset.name)){
Subset.name = 'subset_name_unset'
}
if ( nchar(add) > 1 ){
add = paste( add, Subset.name,sep='_')
}else{
add = Subset.name
}
}
if ( nchar(add) > 0 ){
x@name = paste( add,x@name, sep='_')
}
print ( x@name )
for (i in match( names(x@sig_genes), pvalue ) ){
try( plot.heatmaps(
x,
x@sig_genes[[i]],
names(x@sig_genes)[i],
analysis_name = paste (x@name, version),
gene_centered = gene_centered,
Subset = Subset,
collaps = collaps,
geneNameCol= geneNameCol
), silent=F)
}
x@name = orig.name
})
#' @name plot.probeset
#' @aliases plot.probeset,StefansExpressionSet-method
#' @rdname plot.probeset-methods
#' @docType methods
#' @description This function plots the expression data grouped by the GroupName using ggplot2.
#' @description Works only for one probeset.
#' @param x the StefansExpressionSet object
#' @param groupcol the grouping column in the samples data
#' @param colCol the coloring column in the sample data
#' @param probeNames the column in the annotation datacontaining the gene symbol
#' @param x the StefansExpressionSet object
#' @param probeset the probeset name (rownames(x@data))
#' @param boxplot (F or T) create a a dots- or box-plot
#' @param pdf save the file as pdf (default svg)
#' @param geneNameCol the column name for the gene symbol to use in the plots
#' @param sampleGroup the sample table column containing the grouping information
#' @title description of function plot.probeset
#' @examples
#' p <- plot.probeset( red, rownames(red@data)[20], geneNameCol= 'GeneID', boxplot=T)
#' #produces the file '../../tmp/reducedSet_boxplot_Mybl1_expression.svg'
#' p <- plot.probeset( red, rownames(red@data)[20], geneNameCol= 'GeneID', boxplot=F)
#' #produces the file '../../tmp/reducedSet_points_Mybl1_expression.svg'
#' @export
setGeneric('plot.probeset', ## Name
function ( x, probeset, boxplot=F, pdf=F, geneNameCol= "mgi_symbol", sampleGroup='GroupName' ) { ## Argumente der generischen Funktion
standardGeneric('plot.probeset') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
####last
setMethod('plot.probeset', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, probeset, boxplot=F, pdf=F, geneNameCol= "mgi_symbol", sampleGroup='GroupName' ) {
if ( sum(is.na(match(probeset, rownames(x@data)))==F) == 0 ){
probeset <- rownames(x@data)[match( probeset, x@annotation[,geneNameCol] ) ]
}
if ( length(probeset) == 0 ) {
stop( "I could not find the probeset in the data object!")
}
print ( probeset )
x <- reduce.Obj(x, probeset )
if ( nrow(x@data) == 1 ) {
melted <- melt( x, probeNames=geneNameCol, groupcol = sampleGroup , colCol= sampleGroup )
colnames(melted) <- c( 'Gene.Symbol', 'Sample', 'Expression', 'Group', 'ColorGroup' )
collaps <- '_points_'
if (boxplot){
collaps <-'_boxplot_'
}
if ( pdf ) {
pdf( file=paste(x@outpath,x@name,collaps,x@annotation[,geneNameCol],"_expression.pdf",sep='') ,width=5,height=4)
}else {
devSVG( file=paste(x@outpath,x@name,collaps,x@annotation[,geneNameCol],"_expression.svg",sep='') ,width=5,height=4)
}
if ( boxplot ){
p <- ggplot(melted ,aes(x=Group, y=Expression,color=Group)) + geom_boxplot(shape=19)+theme_bw()
}else{
#TODO: adjust jitter width based on sample group numbers
p <-ggplot(melted ,aes(x=Group, y=Expression,color=Group)) +geom_jitter(shape=19, width=0.2, height=0)+theme_bw()
}
print ( p )
dev.off()
invisible(p)
}
else {
stop("You should rather use a heatmap to display more than one probeset")
}
})
#' @name drop.samples
#' @aliases drop.samples,StefansExpressionSet-method
#' @rdname drop.samples-methods
#' @docType methods
#' @description drops samples from the StefansExpressionSet
#' @param x the StefansExpressionSet object
#' @param samplenames which samples to drop (samples like colnames(x@data))
#' @param name the name of the new StefansExpressionSet object
#' @title description of function drop.samples
#' @export
setGeneric('drop.samples', ## Name
function ( x, samplenames=NULL, name='dropped_samples' ) { ## Argumente der generischen Funktion
standardGeneric('drop.samples') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('drop.samples', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, samplenames=NULL, name='dropped_samples' ) {
if ( ! is.null(samplenames)){
red <- new(class(x)[1], name=name )
for (n in c( slot(x,'simple'), 'annotation') ){
slot(red,n) <- slot(x,n)
}
red@samples <- x@samples[ is.na(match(x@samples[,x@sampleNamesCol], samplenames ) ) == T ,]
#print ( paste( "Dropping", length(samplenames), "samples (", paste( samplenames, collapse=", "),")") )
red@data <- x@data[, make.names(as.vector(red@samples[,red@sampleNamesCol]))]
# colnames(red@data) <- forceAbsoluteUniqueSample ( as.vector(red@samples[, red@sampleNamesCol ]) )
# red@samples[,red@sampleNamesCol] <- colnames(red@data)
}
red
})
#' @name restrictSamples
#' @aliases restrictSamples,StefansExpressionSet-method
#' @rdname restrictSamples-methods
#' @docType methods
#' @description Drop the samples, that have been selected!
#' @param x the StefansExpressionSet object
#' @param name the name of the new StefansExpressionSet
#' @param column which column to analyze in the samples table
#' @param value which value to take as 'cut off'
#' @param mode one of 'less', 'more', 'onlyless', 'equals'
#' @title description of function restrictSamples
#' @export
setGeneric('restrictSamples', ## Name
function ( x, column='Analysis', value=NULL, name='newSet', mode= 'equals' ) { ## Argumente der generischen Funktion
standardGeneric('restrictSamples') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('restrictSamples', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, column='Analysis', value=NULL, name='newSet', mode= 'equals' ) {
S <- NULL
if ( is.null(value)) {
stop( "the value must not be NULL!")
}
switch( mode,
'less' = S <- x@samples[which ( x@samples[,column] <= value), ],
'more' = S <- x@samples[which ( x@samples[,column] > value ), ],
'onlyless' = S <- x@samples[which ( x@samples[,column] < value ), ],
'equals' = S <- x@samples[which ( x@samples[,column] == value), ]
)
if ( nrow(S) < nrow(x@samples)){
x <- drop.samples( x, S[,x@sampleNamesCol], name=name)
}
write.table (S, file=paste(name,'_Sample_Description', ".xls", sep=''), sep='\t', row.names=F,quote=F )
x
})
####last
#' @name pwd
#' @aliases pwd
#' @rdname pwd-methods
#' @docType methods
#' @description uses the linux pwd command to determin the working directory
#' @return A string containing the working directory
#' @title description of function pwd
#' @export
setGeneric('pwd', ## Name
function ( a ) { ## Argumente der generischen Funktion
standardGeneric('pwd') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('pwd', signature = c () ,
definition = function ( a ) {
rm(a)
system( 'pwd > __pwd' )
t <- read.delim( file = '__pwd', header=F)
t <- as.vector(t[1,1])
t <- paste(t,"/",sep='')
unlink( '__pwd')
t
})
#' @name forceAbsoluteUniqueSample
#' @aliases forceAbsoluteUniqueSample,character-method
#' @rdname forceAbsoluteUniqueSample-methods
#' @docType methods
#' @description force absolute unique names in a vector by adding _<amount of repeats> to each value
#' @description if there is more than one repeat opf the value @exportMethod
#' @param x the StefansExpressionSet object
#' @param separator '_' or anything you want to set the separator to
#' @title description of function forceAbsoluteUniqueSample
setGeneric('forceAbsoluteUniqueSample', ## Name
function ( x ,separator='_') { ## Argumente der generischen Funktion
standardGeneric('forceAbsoluteUniqueSample') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('forceAbsoluteUniqueSample',
, signature = c ( 'character') ,
definition = function ( x ,separator='_') {
last = ''
ret <- vector(length=length(x))
for ( i in 1:length(x) ){
if ( is.null(ret) ){
last = x[i]
ret[i] <- last
}
else{
last = x[i]
if ( ! is.na(match( last, ret )) ){
last <- paste(last,separator,sum( ! is.na(match( x[1:i], last )))-1, sep = '')
}
ret[i] <- last
}
}
ret
})
setMethod('forceAbsoluteUniqueSample',
, signature = c ( 'factor') ,
definition = function ( x ,separator='_') {
last = ''
ret <- vector(length=length(x))
for ( i in 1:length(x) ){
if ( is.null(ret) ){
last = x[i]
ret[i] <- last
}
else{
last = x[i]
if ( ! is.na(match( last, ret )) ){
last <- paste(last,separator,sum( ! is.na(match( x[1:i], last )))-1, sep = '')
}
ret[i] <- last
}
}
ret
})
#' @name addAnnotation
#' @aliases addAnnotation,StefansExpressionSet-method
#' @rdname addAnnotation-methods
#' @docType methods
#' @description add aditional annotation to the StefansExpressionSet
#' @param x the StefansExpressionSet object
#' @param mart the annotation table (data.frame or mart object)
#' @param mart.col which column corresponds to the rownames(x@data)
#' @title description of function addAnnotation
#' @export
setGeneric('addAnnotation', ## Name
function (x ,mart, mart.col='refseq_mrna') { ## Argumente der generischen Funktion
standardGeneric('addAnnotation') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('addAnnotation', signature = c ( 'StefansExpressionSet') ,
definition = function (x ,mart, mart.col='refseq_mrna') {
if ( ! class(mart) == 'data.frame' ){
x@annotation <- cbind(x@annotation, mart[match(rownames(x@data),mart[,mart.col] ), ] )
}
else {
x@annotation <- mart[is.na(match(mart[,mart.col], rownames(x@data)))==F,]
rownames(x@annotation) <- rownames(x@data)
}
x
})
#' @name getAnnotation4probesets
#' @aliases getAnnotation4probesets,StefansExpressionSet-method
#' @rdname getAnnotation4probesets-methods
#' @docType methods
#' @description subsets the annotation table for a set of probesets
#' @param x the StefansExpressionSet object
#' @param probesets the vector of probesets to annotate (rownames(x@data))
#' @param colname a vector of colnames to annotate
#' @return a vector of annotation values
#' @title description of function getAnnotation4probesets
setGeneric('getAnnotation4probesets', ## Name
function (x, probesets=c(), colname='Gene.Symbol' ) { ## Argumente der generischen Funktion
standardGeneric('getAnnotation4probesets') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('getAnnotation4probesets', signature = c ( 'StefansExpressionSet') ,
definition = function (x, probesets=c(), colname='Gene.Symbol' ) {
as.vector(x@annotation[match( probesets, rownames(x@data) ), colname ])
})
#' @name ranks
#' @aliases ranks,StefansExpressionSet-method
#' @rdname ranks-methods
#' @docType methods
#' @description creates a new ranks vector in the StefansExpressionSet
#' @param x the StefansExpressionSet object
#' @title description of function ranks
#' @export
setGeneric('ranks', ## Name
function (x ) { ## Argumente der generischen Funktion
standardGeneric('ranks') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('ranks', signature = c ('StefansExpressionSet') ,
definition = function (x ) {
if ( ! exists ( 'ranks', where =x ) ){
x@ranks <- apply( x@data,2,order)
colnames( x@ranks ) <- colnames(x@data)
rownames( x@ranks ) <- rownames(x@data)
}
x
})
#' @name reduce.Obj
#' @aliases reduce.Obj,StefansExpressionSet-method
#' @rdname reduce.Obj-methods
#' @docType methods
#' @description reduces the dataset based on genes e.g. dropps genes from the StefansExpressionSet
#' @param x the StefansExpressionSet object
#' @param probeSets a list of probesets to reduce the data to
#' @param name the new StefansExpressionSet name
#' @title description of function reduce.Obj
#' @export
setGeneric('reduce.Obj', ## Name
function ( x, probeSets=c(), name="reducedSet" ) { ## Argumente der generischen Funktion
standardGeneric('reduce.Obj') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('reduce.Obj', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, probeSets=c(), name="reducedSet" ) {
retObj <- new(class(x)[1], name = name)
useOnly <- match(probeSets, rownames(x@data))
not.matched <- probeSets[is.na(useOnly)]
if ( length(not.matched) > 0 ){
print (paste('Problematic genes:', paste(not.matched,sep=', ')))
probeSets <- probeSets[ ! is.na(useOnly)]
useOnly <- useOnly[ ! is.na(useOnly) ]
}
for (n in slot(x,'simple')){
slot(retObj,n) <- slot(x,n)
}
retObj@samples <- x@samples
retObj@data <- data.frame( x@data[ useOnly ,] )
rownames(retObj@data) <- probeSets
colnames(retObj@data) <- colnames(x@data)
retObj@annotation <- data.frame(x@annotation[useOnly,]) ## if I only have one column here
colnames(retObj@annotation) <- colnames(x@annotation)
if ( length( names(x@stats)) > 0){
for ( i in 1:length(names(x@stats))){
retObj@stats[[i]]= x@stats[[i]][ match(probeSets ,x@stats[[i]][,1] ),]
}
names(retObj@stats) <- names(x@stats)
}
if ( length(x@ranks) > 0 ){
retObj@ranks = x@ranks[useOnly,]
}
if ( ncol( x@raw ) > 0 ) {
retObj@raw = x@raw[useOnly,]
}
retObj
})
#' @name ggplot.gene
#' @aliases ggplot.gene,StefansExpressionSet-method
#' @rdname ggplot.gene-methods
#' @docType methods
#' @description Plot one gene in the StefansExpressionSet as boxplot or points plot (using ggplot2)
#' @param dat the StefansExpressionSet object
#' @param gene the gene of interest
#' @param colrs the grouping colors for the x axis (samples)
#' @param groupCol the samples clumn that contains the grouping information
#' @param colCol the sample column that contains the color information
#' @param log2 log2 transform data before plotting (default = F)
#' @title description of function ggplot.gene
#' @export
setGeneric('ggplot.gene', ## Name
function (dat,gene, colrs=NULL, groupCol='GroupID', colCol=NULL, boxplot=F, log2=F) { ## Argumente der generischen Funktion
standardGeneric('ggplot.gene') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('ggplot.gene', signature = c ( 'StefansExpressionSet') ,
definition = function (dat,gene, colrs=NULL, groupCol='GroupID', colCol=NULL, boxplot=F, log2=F) {
not.in = 'NUKL'
if ( is.null(colrs)){
if ( is.null(colCol) ) {
colrs = rainbow( length(unique(dat@samples[,groupCol])))
}
else {
colrs = rainbow( length(unique(dat@samples[,colCol])))
}
}
dat <- reduce.Obj ( dat, gene, name=gene )
if ( log2 ) {
dat@data <- log2(dat@data+1)
}
g1 <- melt.StefansExpressionSet( dat, probeNames=dat@rownamescol, groupcol=groupCol,colCol=colCol)
#g1 <- subset(dat, Gene.Symbol == gene)
colnames(g1) <- c( 'Gene.Symbol', 'Sample', 'Expression', 'Group', 'ColorGroup' ) [1:ncol(g1)]
if ( log2 ) {
exprsname <- 'Expression (log2)'
}
else{
exprsname <- 'Expression'
}
g1$Group <- factor( g1$Group, levels=unique( as.character(g1$Group)))
if ( length(g1) == 0){
not.in = c( gene )
}
if ( boxplot ){
list ( plot=ggplot2::ggplot(g1 ,ggplot2::aes(x=Group, y=Expression,color=Group))
+ ggplot2::geom_boxplot(shape=19)+ggplot2::theme_bw()
+ ggplot2::scale_colour_manual( values= colrs, guide=FALSE )
+ ggplot2::theme( axis.text.x= ggplot2::element_text(angle=90) )
+ ggplot2::labs(title=gene, x='', y=exprsname),
not.in = not.in
)
}else{
list ( plot=ggplot2::ggplot(g1,ggplot2::aes(x=Group, y=Expression,color=Group))
+ ggplot2::geom_jitter(shape=19)+ggplot2::theme_bw()
+ ggplot2::scale_colour_manual( values= colrs, guide=FALSE )
+ ggplot2::theme( axis.text.x= ggplot2::element_text(angle=90) )
+ ggplot2::labs(title=gene, x='', y=exprsname),
not.in = not.in
)
}
})
#' @name gg.heatmap.list
#' @aliases gg.heatmap.list,StefansExpressionSet-method
#' @rdname gg.heatmap.list-methods
#' @docType methods
#' @description uses ggplot2 to plot heatmaps
#' @param dat the StefansExpressionSet object
#' @param glist a list of probesets to plot (or all)
#' @param colrs a list of colors for the sample level boxes (or rainbow colors)
#' @param groupCol the column group in the samples table that contains the grouping strings
#' @param colCol the column group in the samples table that contains the color groups
#' @title description of function gg.heatmap.list
#' @export
setGeneric('gg.heatmap.list', ## Name
function (dat,glist=NULL, colrs=NULL, groupCol='GroupID', colCol=NULL) { ## Argumente der generischen Funktion
standardGeneric('gg.heatmap.list') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('gg.heatmap.list', signature = c ( 'StefansExpressionSet') ,
definition = function (dat,glist=NULL, colrs=NULL, groupCol='GroupID', colCol=NULL) {
if ( ! is.null(glist) ) {
isect <- reduce.Obj ( dat, glist)
}else {
isect <- dat
}
if ( is.null(colCol)){
colCol <- groupCol
}
if ( is.null(colrs) ){
colrs = rainbow( length(unique(isect@samples[,colCol])))
}
if ( ! isect@zscored ) {isect <- z.score(isect)}
dat.ss <- melt.StefansExpressionSet ( isect, probeNames=isect@rownamescol, groupcol=groupCol,colCol=colCol)
#dat.ss <- dat[which(is.na(match(dat$Gene.Symbol,isect))==F),]
colnames(dat.ss) <- c( 'Gene.Symbol', 'Sample', 'Expression', 'Group', 'ColorGroup' )[1:ncol(dat.ss)]
#dat.ss$z[which(dat.ss$z < -5)] <- -5
#dat.ss$z[which(dat.ss$z > 5)] <- 5
samp.cast <- reshape2::dcast(dat.ss,Gene.Symbol~Sample,mean,value.var="z")
samp.mat <- as.matrix(samp.cast[,2:ncol(samp.cast)])
ord.genes <-
as.vector(samp.cast[hclust(dist(samp.mat),method="ward.D")$order,1])
dat.ss$Gene.Symbol <- with(dat.ss,factor(Gene.Symbol,levels =
unique(as.character(ord.genes))))
dat.ss$Sample <- with(dat.ss,factor(Sample,levels =
unique(as.character(Sample))))
dat.ss$Group <- with(dat.ss,factor(Group,levels =
unique(as.character(Group))))
dat.ss$colrss <- colrs[dat.ss$Group]
ss <-dat.ss[which(dat.ss$Gene.Symbol==dat.ss$Gene.Symbol[1]),]
brks= c( -20.1, as.vector(quantile(dat.ss$z[which(dat.ss$z != min(dat.ss$z))],seq(0,1,by=0.1)) ))
brks = unique(brks)
print ( brks )
brks[length(brks)] = brks[length(brks)] + 0.1
dat.ss$z <- cut( dat.ss$z, breaks= brks)
p = ( ggplot2::ggplot(dat.ss, ggplot2::aes(x=SampleName,y=ProbeName))
+ ggplot2::geom_tile(ggplot2::aes(fill=z))
+ ggplot2::scale_fill_manual( values = c( 'gray', gplots::bluered(length(brks) -2 )) )
+ ggplot2::theme(
legend.position= 'bottom',
axis.text.x=ggplot2::element_blank(),
#axis.ticks.x=element_line(color=ss$colrss),
axis.ticks.length=ggplot2::unit(0.00,"cm")
)+ labs( y='') )
if ( ncol(dat.ss) == 5 ){
p <- p + ggplot2::facet_grid( ColorGroup ~ Group,scales="free", space='free')
}else if ( ncol(dat.ss) == 4 ) {
p <- p + ggplot2::facet_grid( . ~ Group,scales="free", space='free')
}
list ( plot = p, not.in = setdiff( glist, rownames(isect@data)) )
# list ( plot = ggplot2::ggplot(dat.ss, ggplot2::aes(x=Sample,y=Gene.Symbol))
# + ggplot2::geom_tile(ggplot2::aes(fill=z))
# + ggplot2::scale_fill_manual( values = c( 'gray', gplots::bluered(length(brks) -2 )) )
# + ggplot2::theme(
# legend.position= 'bottom',
# axis.text.x=ggplot2::element_blank(),
# axis.ticks.x=ggplot2::element_line(color=ss$colrss),
# axis.ticks.length=ggplot2::unit(0.6,"cm")
# )
# + ggplot2::labs( y=''),
# not.in = setdiff( glist, rownames(isect@data)) )
})
#' @name z.score
#' @aliases z.score.matrix,StefansExpressionSet-method
#' @rdname z.score.matrix-methods
#' @docType methods
#' @description z score the matrix
#' @param m the matrix of column = samples and rows = genes or an StefansExpressionSet
#' @return the z scored matrix
#' @title description of function z.score
#' @export
setGeneric('z.score', ## Name
function (m) { ## Argumente der generischen Funktion
standardGeneric('z.score') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('z.score', signature = c ('matrix'),
definition = function (m ) {
rn <- rownames( m )
me <- apply( m, 1, mean )
sd <- apply( m, 1, sd )
sd[which(sd==0)] <- 1e-8
m <- (m - me) /sd
rownames(m) <- rn
m
})
setMethod('z.score',signature = c ('StefansExpressionSet'),
definition = function (m) {
if (! m@zscored ){
#m$data <- z.score( as.matrix( m$data ))
rn <- rownames( m@data )
me <- apply( m@data, 1, mean )
sd <- apply( m@data, 1, sd )
sd[which(sd==0)] <- 1e-8
m@data <- (m@data - me) /sd
rownames(m@data) <- rn
m@zscored = TRUE
}
m
})
#' @name plot.heatmaps
#' @aliases plot.heatmaps,StefansExpressionSet-method
#' @rdname plot.heatmaps-methods
#' @docType methods
#' @description A flexible heatmapping function that depends on heatmap.2 and allows for many data
#' @description selection/conversion options
#' @param dataOBJ the StefansExpressionSet object
#' @param gene.names the rownames(dataObj@data) level gene names
#' @param pvalue an optional cut off value to select genes from the statistical results tables
#' @param analysis_name the name for the outfiles
#' @param gene_centered in case there are multiple probesets for each gene - sum the data or display each probset
#' @param Subset an optional list of strings matching to the gene symbols used to select genes of interest
#' @param collaps collaps the sample groups into a single column of the heatmap using one of collaps=c('median', 'mean')
#' @title description of function plot.heatmaps
setGeneric('plot.heatmaps', ## Name
function ( dataOBJ, gene.names=NULL , pvalue=1, analysis_name =NULL, gene_centered = F, Subset=NULL, collaps=NULL,geneNameCol= "mgi_symbol", pdf=F,... ) { ## Argumente der generischen Funktion
standardGeneric('plot.heatmaps') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('plot.heatmaps', signature = c ( 'StefansExpressionSet') ,
definition = function ( dataOBJ, gene.names=NULL , pvalue=1, analysis_name =NULL, gene_centered = F, Subset=NULL, collaps=NULL,geneNameCol= "mgi_symbol", pdf=F,... ) {
dataOBJ <- normalize(dataOBJ)
dataOBJ <- z.score( dataOBJ )
if ( is.null(analysis_name)){
analysis_name = dataOBJ@name
}
exprs = dataOBJ@data
drop <- which( (apply(exprs,1,sd) == 0) == T)
if ( length(drop) > 0 ){
print ( paste("I need to drop",length(drop),"genes as the varianze is 0") )
exprs = exprs[-drop,]
}
if ( ! is.null(gene.names)){
geneNamesBased <- exprs[is.na(match(rownames(exprs),gene.names$all$id ))==F,]
geneNamesBased <- data.frame ( 'GeneSymbol' = as.vector (dataOBJ@annotation[is.na(match ( dataOBJ@annotation$GeneID, gene.names$all$id ) ) == F, geneNameCol]), geneNamesBased)
}
else {
geneNamesBased <- data.frame( 'GeneSymbol' = dataOBJ@annotation[rownames(exprs),geneNameCol], exprs )
}
if ( ! is.null(Subset) ) { ##
useful <- NULL
for ( i in 1:length(Subset) ){
useful <- c( useful, grep(Subset[i], geneNamesBased[,1] ) )
}
if ( length(useful) > 0 ){
geneNamesBased <- geneNamesBased[ unique(useful), ]
}
}
if ( gene_centered ){
ok <- as.vector(which ( is.na(geneNamesBased[,1] )))
grepped <- grep('_drop',forceAbsoluteUniqueSample(as.vector(geneNamesBased[,1]), '_drop_' ))
if ( length(ok) > 1 ){
geneNamesBased <- geneNamesBased[- grepped [ - match ( ok[-1] , grepped ) ], ]
}
else if ( length(grepped) > 0 ) {
geneNamesBased <- geneNamesBased[-grepped, ]
}
}
fname = paste(dataOBJ@outpath, 'GenesOnly_',dataOBJ@name,sep='')
rn <- rownames(geneNamesBased)
if ( ! is.null(collaps)){
u <- unique(as.vector(dataOBJ@samples$GroupName))
m <- length(u)
mm <- matrix ( rep(0,m * nrow(geneNamesBased)), ncol=m)
colnames(mm) <- u
rownames(mm) <- rownames(geneNamesBased)
if ( collaps=="median") {
for ( i in u ){
## calculate the mean expression for each gene over all cells of the group
mm[,i] <- apply( geneNamesBased[ , which(as.vector(dataOBJ@samples$GroupName) == i )+1],1,median)
}
}
else {
for ( i in u ){
## calculate the mean expression for each gene over all cells of the group
mm[,i] <- apply( geneNamesBased[ , which(as.vector(dataOBJ@samples$GroupName) == i )+1],1,mean)
}
}
geneNamesBased <- data.frame( GeneSymbol = as.vector(geneNamesBased[,1]), mm )
}else {
collaps= ''
}
write.table( data.frame ( 'GeneSymbol' = rownames(geneNamesBased),geneNamesBased[,-1]),file= paste(fname,collaps,'_HeatmapID_',pvalue,'_data4Genesis.txt', sep=''),sep='\t', row.names=F,quote=F )
write.table( geneNamesBased, file= paste(fname,collaps,'_Heatmap_GeneSymbols_',pvalue,'_data4Genesis.txt', sep=''),sep='\t', row.names=F,quote=F )
write.table( dataOBJ@samples, file= paste(fname,collaps,'_Sample_Description.xls', sep=''),sep='\t', row.names=F,quote=F )
if ( nrow(geneNamesBased) > 1 ){
geneNamesBased <- data.frame(read.delim( file= paste(fname,collaps,'_Heatmap_GeneSymbols_',pvalue,'_data4Genesis.txt', sep=''), header=T ))
rownames(geneNamesBased) <- rn
s <- ceiling(20 * nrow(geneNamesBased)/230 )
if ( s < 5 ) {s <- 5}
print (paste ("Height =",s))
if ( pdf ) {
pdf( file=paste(dataOBJ@outpath,dataOBJ@name,collaps,"_Heatmap_IDs_",pvalue,".pdf",sep='') ,width=10,height=s)
}else {
devSVG( file=paste(dataOBJ@outpath,dataOBJ@name,collaps,"_Heatmap_IDs_",pvalue,".svg",sep='') ,width=10,height=s)
}
print ( paste ("I create the figure file ",dataOBJ@outpath,dataOBJ@name,collaps,"_Heatmap_IDs_",pvalue,".svg",sep=''))
heatmap.2(as.matrix(geneNamesBased[,-1]), lwid = c( 1,6), lhei=c(1,5), cexRow=0.4,cexCol=0.7,col = greenred(30), trace="none", margin=c(10, 10),dendrogram="row",scale="row",
'distfun'= function (x) as.dist( 1- cor(t(x), method='pearson')),Colv=F, main=paste( analysis_name, pvalue ),...)
dev.off()
rownames(geneNamesBased) <- paste(geneNamesBased[,1] , rownames(geneNamesBased) )
if ( pdf ) {
pdf( file=paste(dataOBJ@outpath,dataOBJ@name,collaps,"_Heatmap_GeneSymbols_",pvalue,".pdf",sep='') ,width=10,height=s)
}else{
devSVG( file=paste(dataOBJ@outpath,dataOBJ@name,collaps,"_Heatmap_GeneSymbols_",pvalue,".svg",sep='') ,width=10,height=s)
}
print ( paste ("I create the figure file ",dataOBJ@outpath,dataOBJ@name,collaps,"_Heatmap_GeneSymbols_",pvalue,".svg",sep=''))
heatmap.2( as.matrix(geneNamesBased[,-1]), lwid = c( 1,6), lhei=c(1,5), cexRow=0.4,cexCol=0.7,col = greenred(30), trace="none",
distfun = function (x) {as.dist( 1- cor(t(x), method='pearson') ) } ,...)
dev.off()
}
else {
print ( "Problems - P value cutoff to stringent - no selected genes!" );
}
})
#' @name groups.boxplot
#' @aliases groups.boxplot,StefansExpressionSet-method
#' @rdname groups.boxplot-methods
#' @docType methods
#' @description This function can be used to get an overview of the different gene level groups in
#' @description an StefansExpressionSet It uses the linux montage command to merge all different boxplots
#' @description into one figure
#' @param x the ExpresionSet object
#' @param SampleCol the column in the samples table that contains the sample grouping information
#' @param clusters the gene level clusters. The function plots one boxplot for each cluster.
#' @param svg create the single boxplots as svg or (default png) files
#' @param fname the filename extension files are created by paste(x@outpath,fname,<GroupID>,"_boxplot")
#' @param Collapse if set will lead to a collapse of the sample groups into one value per gene. Supports all \code{\link{collaps}} by options.
#' @title description of function groups.boxplot
setGeneric('groups.boxplot', ## Name
function ( x, SampleCol='GroupName', clusters, svg=F, fname='group_', width=800, height=800,mar=NULL, Collapse=NULL, ...) { ## Argumente der generischen Funktion
standardGeneric('groups.boxplot') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('groups.boxplot', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, SampleCol='GroupName', clusters, svg=F, fname='group_', width=800, height=800,mar=NULL, Collapse=NULL, ...) {
maxG <- max( clusters )
ret <- list()
r=1
gnames <- unique(as.vector(x@samples[,SampleCol]))
if ( ! is.null( Collapse) ){
x <- collaps( x, what = Collapse )
fname = paste( fname, Collapse,"_",sep='')
}
x <- z.score(x)
fnames <- vector('character', maxG )
ma = -100
mi = +100
for ( i in 1:maxG ){
if ( svg ) {
fnames[i] = paste(x@outpath,fname,i,"_boxplot.C.svg",sep='')
devSVG ( file= fnames[i], width=width/130, height=height/130 )
}else{
fnames[i] =paste(x@outpath,fname,i,"_boxplot.png",sep='')
png( file=fnames[i],width=width,height=height )
}
robj <- reduce.Obj( x, names(clusters)[which(clusters==i)], name=paste("group_",i,sep='') )
ret[[r]] <- rownames(robj$data)
r = r+1
names(ret)[length(ret)] = robj$name
a= 1;
d <- list()
for ( n in as.vector(gnames) ){
d[[a]] = as.vector(unlist(robj$data[,which(robj$samples[,SampleCol] == n)]))
a = a+1
}
names(d) <- gnames
A <- NULL
if ( ! is.null(mar) ){
A <- boxplot(d, main=paste('Cluster', i, ' - ',nrow(robj$data)," genes", sep='' ),outline=FALSE, par(mar=mar), ... )
}else {
A <- boxplot(d, main=paste('Cluster', i, ' - ',nrow(robj$data)," genes", sep='' ),outline=FALSE, ... )
}
mi <- min(c(mi,A$stats[1,]))
ma <- max(c(ma, A$stats[5,]))
dev.off()
}
print ( paste( "min",mi, 'max', ma) )
#print (paste('montage', paste(fnames, collapse= " "), "-geometry", paste("'", width, "x", height,"+0+0'",sep=''),paste(x@outpath,fname,"montage.png",sep=''), sep=' ' ))
try( file.remove( paste(x@outpath,fname,"montage.png",sep='') ) , silent=T )
system ( paste('montage', fnames, "-geometry", paste("'", width, "x", height,"+0+0'",sep=''),paste(x@outpath,fname,"montage.png",sep='')," 2>/dev/null", collapse=' ' ) )
ret
})
#' @name collaps
#' @aliases collaps,StefansExpressionSet-method
#' @rdname collaps-methods
#' @docType methods
#' @description This function will collpase the data in the StefansExpressionSet to only contain one value
#' @description per sample group.
#' @param dataObj the StefansExpressionSet
#' @param by collapsing method c('median','mean','sd','sum', or own function )
#' @param groupCol the sample names you want to group on
#' @title description of function collaps
#' @export
setGeneric('collaps', ## Name
function (dataObj, by=c('median','mean','sd','sum' ), groupCol='GroupID' ) { ## Argumente der generischen Funktion
standardGeneric('collaps') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('collaps', signature = c ('StefansExpressionSet'),
definition = function (dataObj, by=c('median','mean','sd','sum', 'var' ), groupCol='GroupID' ) {
u <- unique(as.vector(dataObj@samples[,groupCol]))
m <- length(u)
mm <- matrix ( rep(0,m * nrow(dataObj@data)), ncol=m)
colnames(mm) <- u
rownames(mm) <- rownames(dataObj@data)
f <- NULL
if ( is.function(by)){
f <- by
}else {
switch( by,
median = f<- function (x ) { median(x) },
mean = f <- function(x) { mean(x) },
sd = f <- function(x) { sd(x) },
sum = f <-function(x) { sum(x)},
var = f <- function(x) { var(x) }
);
}
if ( is.null(f) ) {
stop("Please set what to one of 'median','mean','sd','sum'" )
}
new_samples <- NULL
for ( i in u ){
all <- which(as.vector(dataObj@samples[, groupCol]) == i )
new_samples <- rbind ( new_samples, dataObj@samples[all[1],] )
mm[,i] <- apply( dataObj@data[ , all],1,f)
}
name = paste( unlist(strsplit( paste( dataObj@name, groupCol, by, sep='_') , '\\s')) , collapse='_')
try ( { ret <- drop.samples ( dataObj,
samplenames= setdiff(
dataObj@samples[,dataObj@sampleNamesCol] ,
new_samples[,dataObj@sampleNamesCol]
),
name= name
) }, silent=TRUE )
colnames(mm) <- as.vector(new_samples[, dataObj@sampleNamesCol])
ret@data <- data.frame(mm)
ret
})
#' @name simpleAnova
#' @aliases simpleAnova,StefansExpressionSet-method
#' @rdname simpleAnova-methods
#' @docType methods
#' @description This function calculates an annova to identify significant changes in the StefansExpressionSet
#' @description has a higher sensitivity for multi group analyses to identify group specific changes
#' @description or general trends in the dataset. This function adds the results into the stats slot
#' @description of the StefansExpressionSet object.
#' @param x the StefansExpressionSet object
#' @param groupCol the samples table column that contains the grouping information
#' @param padjMethod the p value correction method as described in \code{\link[stats]{p.adjust}}
#' @title description of function simpleAnova
#' @export
setGeneric('simpleAnova', ## Name
function ( x, groupCol='GroupName', padjMethod='BH' ) { ## Argumente der generischen Funktion
standardGeneric('simpleAnova') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('simpleAnova', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, groupCol='GroupName', padjMethod='BH' ) {
x <- normalize(x)
significants <- apply ( x@data ,1, function(a) { anova( lm (a ~ a@Samples[, samples.col]))$"Pr(>F)"[1] } )
adj.p <- p.adjust( significants, method = padjMethod)
res <- cbind(significants,adj.p )
res <- data.frame(cbind( rownames(res), res ))
colnames(res) <- c('genes', 'pvalue', paste('padj',padjMethod) )
if ( length (x@stats) == 0 ){
x@stats <- list ( 'simpleAnova' = res )
}
else {
x@stats[[length(x@stats)+1]] <- res
names(x@stats)[length(x@stats)] = 'simpleAnova'
}
x
})
#' @name createStats
#' @aliases createStats,StefansExpressionSet-method
#' @rdname createStats-methods
#' @docType methods
#' @description constructor that has to be implemented in the data specific classes
#' @param x the StefansExpressionSet object
#' @param condition the samples column containing the condition of interest
#' @param files whether or not (default) export the stat files using \code{\link{writeStatTables}}
#' @param A an optional condition A to compare to a condition B
#' @param B the condition B
#' @title description of function createStats
setGeneric('createStats', ## Name
function ( x, condition, files=F, A=NULL, B=NULL ) { ## Argumente der generischen Funktion
standardGeneric('createStats') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('createStats', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, condition, files=F, A=NULL, B=NULL ) {
stop( 'Not implemented' )
})
#' @name normalize
#' @aliases normalize,StefansExpressionSet-method
#' @rdname normalize-methods
#' @docType methods
#' @description constructor that has to be implemented in the data specific classes
#' @param x the StefansExpressionSet object
#' @title description of function normalize
setGeneric('normalize', ## Name
function ( x ) { ## Argumente der generischen Funktion
standardGeneric('normalize') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('normalize', signature = c ('StefansExpressionSet') ,
definition = function ( x ) {
x
})
#' @name force.numeric
#' @aliases force.numeric,StefansExpressionSet-method
#' @rdname force.numeric-methods
#' @docType methods
#' @description The moethod forces the values in the data matrix to be numbers.
#' @param dataObj the StefansExpressionSet object
#' @title description of function force.numeric
#' @export
setGeneric('force.numeric', ## Name
function (dataObj ) { ## Argumente der generischen Funktion
standardGeneric('force.numeric') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('force.numeric', signature = c ('StefansExpressionSet') ,
definition = function ( dataObj ) {
for ( i in 1: ncol(dataObj@data) ) {
if ( ! paste(as.vector(dataObj@data[,i]), collapse=" ") == paste(as.vector(as.numeric(dataObj@data[,i])), collapse=" ") ) {
dataObj@data[,i] <- as.vector(as.numeric(dataObj@data[,i]))
}
}
dataObj
})
#' @name show
#' @aliases show,StefansExpressionSet-method
#' @rdname show-methods
#' @docType methods
#' @description print the StefansExpressionSet
#' @param x the StefansExpressionSet object
#' @return nothing
#' @title description of function show
#' @export
setMethod('show', signature(object='StefansExpressionSet') ,
definition = function (object) {
cat (paste("An object of class", class(object)),"\n" )
cat("named ",object@name,"\n")
cat (paste( 'with',nrow(object@data),'genes and', ncol(object@data),' samples.'),"\n")
cat (paste("Annotation datasets (",paste(dim(object@annotation),collapse=','),"): '",paste( colnames(object@annotation ), collapse="', '"),"' ",sep='' ),"\n")
cat (paste("Sample annotation (",paste(dim(object@samples),collapse=','),"): '",paste( colnames(object@samples ), collapse="', '"),"' ",sep='' ),"\n")
if ( length(names(object@stats)) > 0 ){
cat ( "P values were calculated for ", length(names(object@stats)) -1, " condition(s)\n")
}
})
#' @name export.data
#' @aliases export.data,StefansExpressionSet-method
#' @rdname export.data-methods
#' @docType methods
#' @description write the StefansExpressionSet data, annotation and samples tables to disk
#' @param x the StefansExpressionSet object
#' @title description of function write.data
#' @export
setGeneric('export.data', ## Name
function ( x ) { ## Argumente der generischen Funktion
standardGeneric('export.data') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('export.data', signature = c ( 'StefansExpressionSet') ,
definition = function ( x ) {
write.table(cbind ( x@annotation, x@data ), file=paste(x@outpath,x@name,"_expressionValues.xls",sep=''),sep='\t', row.names=F,quote=F )
write.table( x@samples, file=paste( x@outpath, x@name, "_sampleInformation.xls",sep='' ), sep='\t', row.names=F,quote=F )
})
#' @name write.data
#' @aliases write.data,StefansExpressionSet-method
#' @rdname write.data-methods
#' @docType methods
#' @description write the StefansExpressionSet data table to disk
#' @param x the StefansExpressionSet object
#' @param annotation a vector of annotation data column names to include in the written table (default=none)
#' @title description of function write.data
#' @export
setGeneric('write.data', ## Name
function ( x, annotation=NULL ) { ## Argumente der generischen Funktion
standardGeneric('write.data') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('write.data', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, annotation=NULL ) {
if ( !is.null(annotation) ) {
write.table( cbind( x@annotation[,annotation], x@data), file= paste( x@outpath,x@name,"_expressionValues.xls",sep=''), row.names=F, sep="\t",quote=F )
}
else {
write.table( cbind( rownames(x@data), x@data), file= paste( x@outpath,x@name,"_expressionValues.xls",sep=''), row.names=F, sep="\t",quote=F )
}
})
#' @name writeStatTables
#' @aliases writeStatTables,StefansExpressionSet-method
#' @rdname writeStatTables-methods
#' @docType methods
#' @description export the statistic files
#' @param x the StefansExpressionSet
#' @title description of function writeStatTables
#' @export
setGeneric('writeStatTables', ## Name
function ( x, annotation=NULL, wData=F ) { ## Argumente der generischen Funktion
standardGeneric('writeStatTables') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('writeStatTables', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, annotation=NULL, wData=F ) {
opath = paste(x@outpath,'stats_Pval/', sep='')
if ( wData ) {
opath = paste( x@outpath,'stats_wData/',sep='')
}
write.table( x@samples, file= paste(opath,'stats_Sample_Description.xls', sep=''),sep='\t', row.names=F,quote=F )
Comparisons <- make.names(names(x@stats))
system ( paste('mkdir ',opath, sep='') )
for ( i in 1:length(Comparisons )){
fname <- paste(opath ,Comparisons[i], '.xls',sep='')
if ( ! is.null(annotation) ) {
if ( wData==F ) {
write.table( cbind( x@annotaion[, annotation], x@stats[[i]] ) , file= fname, row.names=F, sep="\t",quote=F )
}else {
write.table( cbind(x@annotation[,annotation], x@stats[[i]], x@data ), file= fname, row.names=F, sep="\t",quote=F )
}
}
else {
if ( wData==F ) {
write.table( x@stats[[i]], file= fname, row.names=F, sep="\t",quote=F )
}else {
write.table( cbind(x@stats[[i]], x@data ), file= fname, row.names=F, sep="\t",quote=F )
}
}
print ( paste ( "table ",fname," for cmp ",Comparisons[i],"written" ) )
}
})
#' @name export4GEDI
#' @aliases export4GEDI,StefansExpressionSet-method
#' @rdname export4GEDI-methods
#' @docType methods
#' @description Convert the values in the StefansExpressionSet to the format GEDI program can import.
#' @param x the StefansExpressionSet object
#' @param fname the filename to export the data to
#' @param tag.col the sample name column in the samples table
#' @param time.col the time column in the samples table
#' @seealso \url{"http://apps.childrenshospital.org/clinical/research/ingber/GEDI/gedihome.htm"}
#' @title description of function export4GEDI
setGeneric('export4GEDI', ## Name
function ( x, fname="GEDI_output.txt", tag.col = NULL, time.col=NULL, minSample_PerTime=1 ) { ## Argumente der generischen Funktion
standardGeneric('export4GEDI') ## der Aufruf von standardGeneric sorgt für das Dispatching
}
)
setMethod('export4GEDI', signature = c ( 'StefansExpressionSet') ,
definition = function ( x, fname="GEDI_output.txt", tag.col = NULL, time.col=NULL, minSample_PerTime=1 ) {
if ( is.null(time.col)){
stop ( paste( "choose time.col from:", paste( colnames(x@samples), collapse=", ") ) )
}
if ( is.null(tag.col)){
stop ( paste( "choose tag.col from:", paste( colnames(x@samples), collapse=", ") ) )
}
groupnames <- vector('numeric', nrow(x@samples))
for (i in 1:nrow(x@samples)) {
groupnames[i] = paste( x@samples[i, tag.col], x@samples[i, time.col] , sep="_" )
}
if ( length(which(table(groupnames) > 1)) > 0 ){
stop ( "Sorry, please calculate the mean expression for the data first ('collaps()')")
}
treatments <- unique( as.vector(x@samples[ , tag.col] ))
## now I need to find all possible days
possible <- NULL
for ( t in treatments ){
possible <- c( possible, x@samples[ grep( t, x@samples[ , tag.col]) , time.col] )
}
required = names(which(table(possible) > minSample_PerTime) )
passed <- NULL
for ( t in treatments ){
l <- rep ( -1, nrow(x@samples) )
p <- grep( t, x@samples[ , tag.col])
if ( length(p) >= length(required) ){
l[p] <- x@samples[ p, time.col]
l[ is.na(match(l, required))== T ] <- -1
passed <- c(passed, paste( c(paste("}",t,sep=""), l), collapse="\t") )
}
}
fileConn<-file(fname)
open( fileConn, "w" )
writeLines( paste( "}Dynamic", length(passed), nrow(x@data), x@name, sep="\t"), con=fileConn)
writeLines( paste(passed , collapse="\n") , con=fileConn )
for ( i in 1:nrow(x@data) ){
writeLines( paste( c(rownames(x@data)[i], x@data[i,]), collapse="\t" ), con=fileConn )
}
close(fileConn)
print( paste ("created file", fname))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.