# Gene level measures
#
geneVisualization <- function ( DEsubs.out,
measures.topological='all',
measures.functional='all',
measures.barplot=TRUE,
topGenes=10,
colors.topological=colorRampPalette(c("white", "red"))(100),
colors.functional=colorRampPalette(c("white", "red"))(100),
colors.barplot='#C7EDFCFF',
size.topological=c(5,4),
size.functional=c(7,4),
size.barplot=c(5,5),
outfile.topological='',
outfile.functional='',
outfile.barplot='',
export='pdf',
verbose=TRUE)
{
if ( verbose )
{ message('Generating gene-level view...', appendLF = FALSE) }
edgeList <- DEsubs.out[['edgeList']]
DEgenes <- DEsubs.out[['DEgenes']]
org <- DEsubs.out[['org']]
nomen <- DEsubs.out[['mRNAnomenclature']]
if ( length(DEgenes) < topGenes ) { topGenes <- length(DEgenes) }
# Keep top DEgenes
topGenes <- sort(DEgenes)[1:topGenes]
output <- list()
# Topological analysis
supportedTopologicalMeasures <- c( 'degree',
'betweenness',
'closeness',
'hub_score',
'eccentricity',
'page_rank')
# Ontological analysis
supportedFunctionalMeasures <- .getExternalMeasures()
if ( !is.null(measures.topological) )
{
if ( 'all' %in% measures.topological )
{ measures.topological <- supportedTopologicalMeasures }
failTopo <- measures.topological[!measures.topological %in%
supportedTopologicalMeasures]
if ( length(failTopo) > 0 )
{ message('Topological option(s) ', failTopo, ' not supported.') }
}
if ( !is.null(measures.functional) )
{
if ( 'all' %in% measures.functional )
{ measures.functional <- supportedFunctionalMeasures }
failFunc <- measures.functional[!measures.functional %in%
supportedFunctionalMeasures]
if ( length(failFunc) > 0 )
{ message('Ontological option(s) ', failFunc, ' not supported.') }
}
# Analysis #1
topology <- .topologicalMeasures(measures=measures.topological,
edgeList=edgeList,
topGenes=topGenes,
org=org,
visualize=TRUE,
colors=colors.topological,
width=size.topological[1],
height=size.topological[2],
outfile=outfile.topological,
export=export)
# Analysis #2
ontology <- .ontologicalMeasures(measures=measures.functional,
topGenes=topGenes,
org=org,
visualize=TRUE,
colors=colors.functional,
width=size.functional[1],
height=size.functional[2],
outfile=outfile.functional,
export=export)
# Analysis #3
if ( measures.barplot )
{
names(topGenes) <- .changeAnnotation(annData=names(topGenes),
org='hsa', choice='entrezToHGNC')
.matrixVisualization( -log10(topGenes), type='barplot', title='',
colors=colors.barplot,
width=size.barplot[1],
height=size.barplot[2],
outfile=outfile.barplot,
export=export)
}
output <- c(output, list('measures.topological'=topology) )
output <- c(output, list('measures.functional'=ontology) )
if ( verbose )
{ message('done', appendLF = TRUE) }
return( output )
}
.topologicalMeasures <- function( edgeList, measures, topGenes, org,
visualize, colors, width, height, outfile, export)
{
if ( is.null(measures) )
{ return(NULL) }
supportedMeasures <- c( 'degree',
'betweenness',
'closeness',
'hub_score',
'eccentricity',
'page_rank')
if ( missing (measures) ) { measures <- supportedMeasures }
gi <- graph_from_edgelist(as.matrix(edgeList[, 1:2]))
topo <- list()
topologicalHandlers <- .getTopologicalHandlers()
for ( i in seq_len(length(measures)) )
{
res <- sort(topologicalHandlers[[measures[i]]](gi), decreasing=TRUE)
topo <- c(topo, list(res))
}
names(topo) <- measures
uGenes <- unique(as.vector(as.matrix(edgeList[, 1:2])))
ranking <- matrix(, nrow=length(uGenes), ncol=length(topo))
for ( i in seq_len(length(topo)) )
{
ranking[, i] <- topo[[i]][as.character(uGenes)]
ranking[, i] <- floor(ranking[, i]/max(ranking[, i])*100)/100
}
rownames(ranking) <- uGenes
colnames(ranking) <- names(topo)
# Return info about top genes
if ( !missing(topGenes) && !is.null(topGenes) )
{
idx <- which( rownames(ranking) %in% names(topGenes) )
ranking <- ranking[idx, , drop=FALSE]
}
if ( visualize )
{
rownames(ranking) <- .changeAnnotation( annData=rownames(ranking),
org='hsa',
choice='entrezToHGNC')
.matrixVisualization( dataVis=as.matrix(ranking),
type='heatmap', title='topological',
colors=colors,
width=width,
height=height,
outfile=outfile,
export=export )
}
return(ranking)
}
.ontologicalMeasures <- function( measures, topGenes, org, visualize, colors,
width, height, outfile, export )
{
if ( org != 'hsa' )
{ return(NULL) } # Homo sapiens only
if ( is.null(measures) )
{ return(NULL) }
supportedTerms <- .getExternalMeasures()
if ( missing (measures) ) { measures <- supportedTerms }
# Change gene annotation to HGNC for hsa
names(topGenes) <- .changeAnnotation(annData=names(topGenes), org='hsa',
choice='entrezToHGNC')
ranking <- matrix(, nrow=length(topGenes), ncol=length(measures))
invalidIdx <- c()
for ( j in seq_len(length(measures)) )
{
targets <- .loadTermData( type=measures[j] )
if ( is.null(targets) )
{
invalidIdx <- c(invalidIdx, j)
next()
}
for ( i in seq_len(length(topGenes)) )
{
idx <- targets %in% names(topGenes[i])
idx <- matrix( idx, nrow=nrow(targets) )*1
ranking[i, j] <- length(which(rowSums( idx ) > 0))
}
if ( max(ranking[, j]) > 0 )
{
ranking[, j] <- floor(ranking[, j]/max(ranking[, j])*100)/100
}
}
rownames(ranking) <- names(topGenes)
colnames(ranking) <- measures
if ( length(invalidIdx) > 0 ) { ranking <- ranking[, -invalidIdx]}
if ( visualize )
{
.matrixVisualization( dataVis=(as.matrix(ranking) ),
type='heatmap', title='external.references',
colors=colors,
width=width,
height=height,
outfile=outfile,
export=export )
}
return( ranking )
}
#
# Subpathway level measures
#
.subLevelMeasures <- function( subpathway, type, topTerms=20 )
{
supportedTerms <- .getExternalMeasures()
if ( missing(type) )
{
type <- supportedTerms
}
failType <- type[!type %in% supportedTerms]
if ( length(failType) > 0 )
{
message('Option(s) ', failType, ' not supported.')
}
# Access local Rdata diles
targets <- .loadTermData( type )
if (is.null(targets)) { return(NULL) }
# Calculate p-values for each class
N <- nrow(targets)
pValues <- vector( mode='numeric', length=N )
uTargets <- unique( as.vector( targets ) )
res <- matrix(, nrow=N, ncol=5)
for ( i in 1:N )
{
termTargetsAll <- which(targets[i,] != '0')
termTargetsOfSub <- which(!is.na(match(targets[i,],
subpathway)))
# Hypergeometric test compares sample to background
# Success.sample, Success.bgd, Failure.bgd, Sample.size
A <- length( termTargetsOfSub )
B <- length( uTargets )
C <- length( termTargetsAll )
D <- length( sub )
pValues[i] <- 1 - phyper(A, C, B-C, D)
pValues[i] <- floor(pValues[i] * 10000)/1000
}
names(pValues) <- rownames(targets)
# Keep significant terms
idx <- which(pValues < 0.05)
if ( length(idx) == 0 )
{
return(NULL)
}
pValues <- pValues[idx]
targets <- targets[idx, , drop=FALSE]
# Keep top terms
idx <- order(pValues)
pValues <- pValues[idx]
targets <- targets[idx, , drop=FALSE]
# Choose most significant classes
if ( length(targets) == 0 ) { return(NULL) }
if ( length(pValues) < topTerms )
{ topTerms <- length(pValues) }
pValues <- pValues[1:topTerms]
targets <- targets[1:topTerms, , drop=FALSE]
# Reduce target matrix with subpathway genes by removing rows and
# and columns not containing any subpathway genes
idx <- targets %in% subpathway
idx <- matrix( idx, nrow=nrow(targets) )*1
targets[!idx] <- '0'
# Remove rows with no subpathway genes
idx <- targets == '0'
if ( length(idx) > 0 )
{
idx <- rowSums(idx) != ncol(targets)
targets <- targets[idx, , drop=FALSE]
}
# Remove columns with no subpathway genes
idx <- targets == '0'
if ( length(idx) > 0 )
{
idx <- colSums(idx) != nrow(targets)
targets <- targets[, idx, drop=FALSE]
}
# Check if any data remains
if ( length(targets) == 0 )
{ return(NULL) }
# Create edgelist for topmost results (class, subpathway genes)
edgeList <- matrix(, nrow=length(targets), ncol=3)
for ( i in seq_len(nrow(targets)) )
{
start <- 1 + (i-1)*ncol(targets)
end <- i*ncol(targets)
genesPerTerm <- expand.grid(rownames(targets)[i], targets[i, ] )
edgeList[start:end, 1:2] <- as.matrix(genesPerTerm)
edgeList[start:end, 3] <- pValues[rownames(targets)[i]]
}
idx <- which(edgeList[, 2] != '0')
edgeList <- edgeList[idx, , drop=FALSE]
return ( edgeList )
}
subpathwayVisualization <- function( DEsubs.out,
references='',
submethod,
subname,
colors=c('#FF0000FF', '#FF9900FF', '#CCFF00FF',
'#33FF00FF', '#00FF66FF', '#0066FFFF',
'#3300FFFF', '#CC00FFFF', '#FF0099FF',
'#EE82EEFF'),
scale=1,
shuffleColors=FALSE,
outfiles='',
export='pdf',
verbose=TRUE )
{
if ( 'all' %in% references )
{
references <- .getExternalMeasures()
references <- c(references, 'GO', 'Disease', 'Regulator')
}
if ( length(scale) == 1 )
{
scale <- rep(scale, length(references))
}
if ( length(references) != length(scale) )
{
message('Number of references has to be equal to different
scaling optios. Setting scale to 1. ')
scale <- rep(1, length(references))
}
if ( (!'' %in% outfiles) && (length(outfiles) != length(references) ) )
{
message('Number of outfiles should be equal to number of references.')
return(NULL)
}
if ( '' %in% outfiles ) { outfiles <- rep('', length(references) ) }
for ( i in seq_len(length(references)) )
{
.subpathwayVisualization(DEsubs.out=DEsubs.out,
reference=references[i],
submethod=submethod,
subname=subname,
colors=colors,
scale=scale[i],
shuffleColors=shuffleColors,
outfile=outfiles[i],
export=export,
verbose=verbose
)
}
}
.subpathwayVisualization <- function( DEsubs.out, reference, submethod,
subname, colors, scale, shuffleColors,
outfile, export, verbose)
{
org <- DEsubs.out[['org']]
edgeList <- DEsubs.out[['edgeList']]
if ( org != 'hsa' )
{ return(NULL) } # Homo sapiens only
if ( is.null(submethod) )
{ return(NULL) }
supportedMethods <- subpathwayTypes()
supportedReferences <- .getExternalMeasures()
supportedReferences <- c(supportedReferences, 'GO', 'Disease', 'Regulator')
if (verbose)
{
message('Generating subpathway-level view ( ', reference,' )...',
appendLF = FALSE)
}
unsupportedOptions <- submethod[!submethod %in% supportedMethods]
if ( length(unsupportedOptions) > 0 )
{
message('Option(s) ', unsupportedOptions, ' not supported.')
return(NULL)
}
unsupportedReferences <- reference[!reference %in% supportedReferences]
if ( length(unsupportedReferences) > 0 )
{
message('Reference(s) ', unsupportedReferences, ' not supported.')
return(NULL)
}
# Subpathway information extraction
submethodName <- paste0('subAnalysis.', submethod)
subpathway.nodelist <- DEsubs.out[[submethodName]][[subname]]
# Subpathway (nodelist) to edgelist
idx1 <- which(edgeList[, 1] %in% subpathway.nodelist)
idx2 <- which(edgeList[, 2] %in% subpathway.nodelist)
idx <- intersect(idx1, idx2)
subpathway.edgelist <- edgeList[idx, ]
# Change annotation from entrez to HGNC
subpathway.nodelist <- .changeAnnotation(annData=subpathway.nodelist,
org='hsa', choice='entrezToHGNC')
subpathway.nodelist <- unname(subpathway.nodelist)
# Special care for double and triple references
references <- reference
top <- 30
agap <- 0; bgap <- 0; cgap <- 50
if ( reference %in% 'GO' )
{
references <- c('GO_bp',
'GO_cc',
'GO_mf')
agap <- 20; bgap <- 20; cgap <- 50
}
if ( reference %in% 'Disease' )
{
references <- c('Disease_OMIM', 'Disease_GAD')
agap <- 20; bgap <- 0; cgap <- 50
}
if ( reference %in% 'Regulator' )
{
references <- c('miRNA', 'TF')
agap <- 20; bgap <- 0; cgap <- 50
}
top <- floor(top/length(references))
edgeLists <- list()
for ( i in seq_len(length(references)) )
{
subpathway.edgelist <- .subLevelMeasures(
subpathway=subpathway.nodelist,
type=references[i],
topTerms=top)
if ( !is.null(subpathway.edgelist) )
{
if ( top > nrow(subpathway.edgelist) )
{ top <- nrow(subpathway.edgelist) }
subpathway.edgelist <- subpathway.edgelist[1:top, , drop=FALSE]
R <- list(subpathway.edgelist)
names(R) <- references[i]
edgeLists <- c(edgeLists, R)
}
}
if ( length(edgeLists) == 0 )
{
if (verbose) { message('done.') }
return(NULL)
}
# Merge rows and columns indepedently for each edgelist.
rowTerms <- c(); colTerms <- c()
for ( i in seq_len(length(edgeLists)) )
{
if ( is.null(edgeLists[[i]]) ) { next() }
rowTerms <- unique(c(rowTerms, edgeLists[[i]][, 1]))
colTerms <- unique(c(colTerms, edgeLists[[i]][, 2]))
}
edgeList <- do.call(rbind, edgeLists)
adjmat <- matrix(0, nrow=length(rowTerms), ncol=length(colTerms))
rownames(adjmat) <- rowTerms
colnames(adjmat) <- colTerms
for ( i in seq_len(nrow(edgeList)) )
{
adjmat[ edgeList[i,1], edgeList[i,2] ] <- 1
}
if ( shuffleColors )
{ colors <- sample(colors, nrow(adjmat), replace=TRUE) }
if ( !shuffleColors )
{ colors <- rep(colors, length.out=nrow(adjmat)) }
# The user has supplied a custom directory
if ( (outfile != '') && ('pdf' %in% export) )
{
.dir.create.rec(outfile)
}
# No custom directory has been supplied
if ( (outfile == '') && ('pdf' %in% export) )
{
dir.create(cache[['outDir']], showWarnings=FALSE, recursive=TRUE)
outfile <- paste0(cache[['outDir']] , '//circos_', reference ,'.pdf')
}
.doCirclize( mat=adjmat,
colors=colors,
agap=agap,
bgap=bgap,
cgap=cgap,
degree=250,
a=round(10/scale)/10,
b=round(10/scale)/10,
outfile=outfile,
export=export)
if (verbose) { message('done.') }
}
#
# Organism level measures
#
organismVisualization <- function( DEsubs.out,
references='',
topSubs=10,
topTerms=20,
colors=colorRampPalette(c("white", "red"))(100),
export='pdf',
width=7,
height=6,
outfiles='',
verbose=TRUE )
{
# Homo sapiens only
org <- DEsubs.out[['org']]
if (org != 'hsa') { return(NULL) }
if ( 'all' %in% references )
{
references <- .getExternalMeasures()
}
if ( (!'' %in% outfiles) && (length(outfiles) != length(references) ) )
{
message('Number of outfiles should be equal to number of references.')
return(NULL)
}
if ( '' %in% outfiles ) { outfiles <- rep('', length(references) ) }
for ( i in seq_len(length(references)) )
{
res <- .organismVisualization(DEsubs.out=DEsubs.out,
references=references[i],
topSubs=topSubs,
topTerms=topTerms,
colors=colors,
export=export,
width=width,
height=height,
outfile=outfiles[i],
verbose=verbose)
}
return(invisible())
}
.organismVisualization <- function( DEsubs.out, references, topSubs, topTerms,
colors, export, width, height,
outfile, verbose )
{
org <- DEsubs.out[['org']]
type <- references
supportedTerms <- .getExternalMeasures()
if (verbose)
{
message(paste0('Generating organism-level view ( ', type , ' )...'),
appendLF = FALSE)
}
failType <- type[!type %in% supportedTerms]
if ( length(failType) > 0 )
{
message('Option(s) ', failType, ' not supported.')
return(invisible())
}
subpathways <- DEsubs.out[[5]]
termsPerSub <- list()
pValuesPerTermPerSub <- list()
pathNames <- c()
top <- c(topSubs, topTerms)
names(top) <- c('subpathways', 'terms')
if ( length(subpathways) < top['subpathways'] )
{ top['subpathways'] <- length(subpathways) }
subpathways <- subpathways[1:top['subpathways']]
subpathways <- subpathways[!is.na(subpathways)]
offset <- 0.001
# Find most significant terms for each subpathway
for ( i in seq_len(length(subpathways)) )
{
sub <- unname(.changeAnnotation(annData=subpathways[[i]], org=org,
choice='entrezToHGNC'))
edgeList <- .subLevelMeasures(subpathway=sub, type=type,
topTerms=top['subpathways'])
if ( !is.null(edgeList) )
{
termsPerSub[[i]] <- matrix( edgeList[, 1], nrow=1 )
pval <- matrix( as.numeric(edgeList[, 3]) + offset, nrow=1 )
pValuesPerTermPerSub[[i]] <- pval
pathNames <- c(pathNames, names(subpathways)[i] )
}
}
if ( length(termsPerSub) == 0)
{
message('done', appendLF = TRUE)
return(NULL)
}
# Convert lists to adjacency matrices
names(termsPerSub) <- pathNames
termsPerSub <- .unlistToMatrix(.fillMatrixList( termsPerSub ) )
names(pValuesPerTermPerSub) <- pathNames
pValuesPerTermPerSub <- .fillMatrixList( pValuesPerTermPerSub )
pValuesPerTermPerSub <- .unlistToMatrix( pValuesPerTermPerSub )
# Find topmost terms amongst the terms for all subpathways
uniqueTerms <- unique(as.vector(termsPerSub))
uniqueTerms <- uniqueTerms[uniqueTerms != '0']
counts <- vector(mode='numeric', length=length(uniqueTerms))
for ( i in seq_len(length(uniqueTerms)) )
{
counts[i] <- length(which(as.vector(termsPerSub) == uniqueTerms[i]))
}
names(counts) <- uniqueTerms
topTerms <- names(head(sort(counts, decreasing=TRUE), top['terms']))
# Keep only topmost terms for each subpathway
idx <- termsPerSub %in% topTerms
if ( length(idx) > 0 ) { termsPerSub[!idx] <- NA }
# Create an edgelist of topmost terms for all subpathways
termsPerSub.edgeList <- matrix(, nrow=length(termsPerSub), ncol=3)
for ( i in seq_len(nrow(termsPerSub)) )
{
if ( is.na(rownames(termsPerSub)[i]) ) { next() }
start <- 1 + (i-1)*ncol(termsPerSub)
end <- i*ncol(termsPerSub)
data1 <- expand.grid( rownames(termsPerSub)[i], termsPerSub[i, ] )
data2 <- expand.grid( rownames(termsPerSub)[i],
pValuesPerTermPerSub[i, ] - offset )
data.all <- cbind(data1, data2[, 2])
termsPerSub.edgeList[start:end, ] <- as.matrix(data.all)
}
# Remove NA's
idx <- which(is.na(termsPerSub.edgeList[, 2]))
if ( length(idx) > 0 )
{
termsPerSub.edgeList <- termsPerSub.edgeList[-idx, , drop=FALSE]
}
termsPerSub.df <- as.data.frame(termsPerSub.edgeList,
stringsAsFactors=FALSE)
colnames(termsPerSub.df) <- c('Subpathway', 'Term', 'pValue')
termsPerSub.df[, 'pValue'] <- as.numeric(termsPerSub.df[, 'pValue'])
.matrixVisualization(dataVis=termsPerSub.df, type='dotplot',
title=c('Subpathway', type, 'Q-values'),
colors=colors,
width=width, height=height,
outfile=outfile,
export=export)
if (verbose)
{ message('done', appendLF = TRUE) }
return(termsPerSub.edgeList)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.