R/Allclasses.R
In SeqGSEA: Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing

Documented in exonID geneID geneList geneSetDescs geneSetNames geneSetSize label loadExonCountData newGeneSets newReadCountSet size subsetByGenes

# copyright: Xi Wang (xi.wang@newcastle.edu.au)

######################
## Class SeqGeneSet ##
######################

SeqGeneSet <- setClass("SeqGeneSet", 
                       representation = representation(
                         name = "character", 
                         sourceFile = "character", 
                         geneList = "character", 
                         GS = "list",  
                         GSNames = "character", 
                         GSDescs = "character", 
                         GSSize = "numeric", 
                         GSSizeMin = "numeric",
                         GSSizeMax = "numeric", 
                         GS.Excluded = "list",
                         GSNames.Excluded = "character",
                         GSDescs.Excluded = "character", 
                         GSEA.ES = "numeric",
                         GSEA.ES.pos = "numeric",
                         GSEA.ES.perm = "matrix",
                         GSEA.score.cumsum = "matrix", 
                         GSEA.normFlag = "logical", 
                         GSEA.pval = "numeric",
                         GSEA.FWER = "numeric", 
                         GSEA.FDR = "numeric", 
                         sc.ES = "matrix", 
                         sc.ES.perm = "matrix", 
                         sc.normFlag = "logical", 
                         scGSEA = "logical", 
                         sc.pval = "matrix", 
                         sc.FWER = "matrix", 
                         sc.FDR = "matrix", 
                         version = "Versions"), 
                       prototype = prototype(
                         name = new("ScalarCharacter", NA),
                         version = new("Versions", "0.0.2"))
)

newGeneSets <- function(GS, GSNames, GSDescs, geneList, scGSEA = FALSE, 
                        name = NA_character_, sourceFile = NA_character_, 
                        GSSizeMin = 5, GSSizeMax = 1000) {
  stopifnot( length(GS) == length(GSNames) & length(GS) == length(GSDescs)) 
  stopifnot( max(unlist(GS)) <= length(geneList) )
  GSSize = unlist(lapply(GS, length))
  GSUse = GSSize >= GSSizeMin & GSSize <= GSSizeMax
  
  GS <- new("SeqGeneSet", name = name, sourceFile = sourceFile, geneList = geneList, 
            GS = GS[GSUse], GSNames = GSNames[GSUse], GSDescs = GSDescs[GSUse], 
            GSSizeMin = GSSizeMin, GSSizeMax = GSSizeMax, GSSize = GSSize[GSUse], 
            GS.Excluded = GS[!GSUse], GSNames.Excluded = GSNames[!GSUse], GSDescs.Excluded = GSDescs[!GSUse], 
            GSEA.ES = numeric(0), GSEA.ES.pos = numeric(0), 
            GSEA.ES.perm = matrix(0, 0 ,0),  GSEA.score.cumsum = matrix(0, 0 ,0), 
            GSEA.normFlag = FALSE,  GSEA.pval = numeric(0), 
            GSEA.FWER = numeric(0), GSEA.FDR = numeric(0),
            sc.ES = matrix(0, 0 ,0), sc.ES.perm = matrix(0, 0 ,0),
            sc.normFlag = FALSE, scGSEA = scGSEA, 
            sc.pval = matrix(0, 0 ,0), sc.FWER = matrix(0, 0 ,0), sc.FDR = matrix(0, 0 ,0) )
  GS
}

setMethod("show", 
          signature(object="SeqGeneSet"),
          function(object) {
            if(!object@scGSEA) {
              expr1 <- ifelse(length(object@GSEA.ES) > 0, 
                            paste(selectSome(object@GSEA.ES, maxToShow=4), collapse=", "), 
                            "not computed")
              expr2 <- ifelse(length(object@GSEA.ES.pos) > 0, 
                            paste(selectSome(object@GSEA.ES.pos, maxToShow=4), collapse=", "), 
                            "not computed")
              expr3 <- ifelse(ncol(object@GSEA.ES.perm) > 0, 
                            paste(ncol(object@GSEA.ES.perm), "-time permutation", sep=""),  
                            "not performed")
              expr4 <- ifelse(object@GSEA.normFlag, "Yes", "No")
              expr5 <- ifelse(length(object@GSEA.pval) > 0, 
                            paste(selectSome(object@GSEA.pval, maxToShow=4), collapse=", "), 
                            "not computed")
              expr6 <- ifelse(length(object@GSEA.FWER) > 0, 
                            paste(selectSome(object@GSEA.FWER, maxToShow=4), collapse=", "), 
                            "not computed")
              expr7 <- ifelse(length(object@GSEA.FDR) > 0, 
                            paste(selectSome(object@GSEA.FDR, maxToShow=4), collapse=", "), 
                            "not computed")
              cat("SeqGeneSet object: ", object@name, "\n",
                "GeneSetSourceFile: ", object@sourceFile, "\n", 
                "GeneSets: ", paste(selectSome(object@GSNames, maxToShow=4), 
                                    collapse="\n          "), "\n", 
                "  with the number of genes in respective sets: ", 
                paste(selectSome(object@GSSize, maxToShow=4), collapse=", "), "\n",
                "  brief descriptions: \n          ",
                paste(selectSome(object@GSDescs, maxToShow=4), collapse="\n          "), "\n",
                "  # gene sets passed filter: ", size(object), 
                "  (#genes >= ", object@GSSizeMin, " AND <= ", object@GSSizeMax, ")\n", 
                "  # gene sets excluded: ", sizeOfExcludedGS(object), 
                "  (#genes < ", object@GSSizeMin, " OR > ", object@GSSizeMax, ")\n",
                "ES scores: ", expr1, "\n",
                "ES postions: ", expr2, "\n",
                "Permutated ES scores: ", expr3, "\n", 
                "ES scores normalized: ", expr4, "\n",
                "ES p-value: ", expr5, "\n",
                "ES FWER: ", expr6, "\n",
                "ES FDR: ", expr7, "\n",
                sep="")
            } else {
              expr1 <- ifelse(length(object@sc.ES) > 0, 
                              paste(nrow(object@sc.ES), "genesets by", ncol(object@sc.ES), "cells computed"), 
                              "not computed")
              expr2 <- ifelse(length(object@sc.ES.perm) > 0, 
                              paste(nrow(object@sc.ES), "genesets by", ncol(object@sc.ES), "permutation times computed"), 
                              "not computed")
              expr3 <- ifelse(object@sc.normFlag, "Yes", "No")
              expr4 <- ifelse(length(object@sc.pval) > 0, 
                              paste(nrow(object@sc.pval), "genesets by ", ncol(object@sc.pval), " cells computed"), 
                              "not computed")
              expr5 <- ifelse(length(object@sc.FWER) > 0, 
                              paste(nrow(object@sc.FWER), "genesets by ", ncol(object@sc.FWER), " cells computed"), 
                              "not computed")
              expr6 <- ifelse(length(object@sc.FDR) > 0, 
                              paste(nrow(object@sc.FDR), "genesets by ", ncol(object@sc.FDR), " cells computed"), 
                              "not computed")
              
              cat("SeqGeneSet object: ", object@name, "\n",
                "GeneSetSourceFile: ", object@sourceFile, "\n", 
                "GeneSets: ", paste(selectSome(object@GSNames, maxToShow=4), 
                                    collapse="\n          "), "\n", 
                "  with the number of genes in respective sets: ", 
                paste(selectSome(object@GSSize, maxToShow=4), collapse=", "), "\n",
                "  brief descriptions: \n          ",
                paste(selectSome(object@GSDescs, maxToShow=4), collapse="\n          "), "\n",
                "  # gene sets passed filter: ", size(object), 
                "  (#genes >= ", object@GSSizeMin, " AND <= ", object@GSSizeMax, ")\n", 
                "  # gene sets excluded: ", sizeOfExcludedGS(object), 
                "  (#genes < ", object@GSSizeMin, " OR > ", object@GSSizeMax, ")\n",
                "sc-ES scores: ", expr1, "\n",
                "Permutated sc-ES scores: ", expr2, "\n", 
                "sc-ES scores normalized: ", expr3, "\n",
                "sc-ES p-value: ", expr4, "\n",
                "sc-ES FWER: ", expr5, "\n",
                "sc-ES FDR: ", expr6, "\n",
                sep="")
            }
          })

setMethod("[", 
          signature = signature(
            x="SeqGeneSet", i="numeric"),
          function(x, i, j, ..., drop=TRUE) {
            stopifnot(all(i <= size(x)))
            if (length(i) == 1 && drop) 
              return(as.vector(x@GS[[i]]))
            GS <- x
            GS@GS <- x@GS[i]
            GS@GSNames <- x@GSNames[i]
            GS@GSDescs <- x@GSDescs[i]
            GS@GSSize <- x@GSSize[i]
            GS@GS.Excluded <- list()
            GSNames.Excluded <- character()
            GSDescs.Excluded <- character()
            if(length(x@GSEA.ES) > 0 ) 
              GS@GSEA.ES <- x@GSEA.ES[i]
            if(length(x@GSEA.ES.pos) > 0)
              GS@GSEA.ES.pos <- x@GSEA.ES.pos[i]
            if(nrow(x@GSEA.ES.perm) > 0)
              GS@GSEA.ES.perm <- x@GSEA.ES.perm[i,,drop=FALSE]
            if(nrow(x@GSEA.score.cumsum) > 0)
              GS@GSEA.score.cumsum <- x@GSEA.score.cumsum[i,,drop=FALSE]
            if(length(x@GSEA.pval) > 0)
              GS@GSEA.pval <- x@GSEA.pval[i]
            if(length(x@GSEA.FDR) > 0)
              GS@GSEA.FDR <- x@GSEA.FDR[i]
            if(length(x@GSEA.FWER) > 0)
              GS@GSEA.FWER <- x@GSEA.FWER[i]
            if(nrow(x@sc.ES) > 0)
              GS@sc.ES <- x@sc.ES[i,,drop=FALSE]
            if(nrow(x@sc.ES.perm) > 0)
              GS@sc.ES.perm <- x@sc.ES.perm[i,,drop=FALSE]
            if(nrow(x@sc.pval) > 0)
              GS@sc.pval <- x@sc.pval[i,,drop=FALSE]
            if(nrow(x@sc.FWER) > 0)
              GS@sc.FWER <- x@sc.FWER[i,,drop=FALSE]
            if(nrow(x@sc.FDR) > 0)
              GS@sc.FDR <- x@sc.FDR[i,,drop=FALSE]
            GS
          } )

geneSetNames <- function(GS) {
  stopifnot( is(GS, "SeqGeneSet"))
  gsn <- GS@GSNames
  names(gsn) <- GS@GSNames
  gsn
}
  
geneSetDescs <- function(GS) {
  stopifnot( is(GS, "SeqGeneSet"))
  gsd <- GS@GSDescs
  names(gsd) <- GS@GSNames
  gsd
}

geneSetSize <- function(GS) {
  stopifnot( is(GS, "SeqGeneSet"))
  gss <- GS@GSSize
  names(gss) <- GS@GSNames
  gss
}

size <- function(GS) {
  stopifnot( is(GS, "SeqGeneSet"))
  length(GS@GS)
}

geneList <- function(GS) {
  stopifnot( is(GS, "SeqGeneSet"))
  GS@geneList
}


sizeOfExcludedGS <- function(GS) {
  stopifnot( is (GS, "SeqGeneSet") )
  length(GS@GS.Excluded)
}

########################
## Class ReadCountSet ##
########################

#require(Biobase)
ReadCountSet <- setClass( "ReadCountSet", 
          contains = "eSet",
          representation = representation(
            featureData_gene = "data.frame",
            permute_NBstat_exon = "matrix",
            permute_NBstat_gene = "matrix"
            ),
          prototype = prototype( new( "VersionedBiobase",
                                      versions = c( classVersion("eSet"), 
                                                    ReadCountSet = "1.0.0" ) ) )
)

newReadCountSet <- function( readCounts, exonIDs, geneIDs ) {
  readCounts <- as.matrix( readCounts )
  if( any( round( readCounts ) != readCounts ) ){
    stop( "The count data is not integer." )}
  mode( readCounts ) <- "integer"
  nRow <- nrow(readCounts)
  
  phenoData <- annotatedDataFrameFrom( readCounts, byrow=FALSE )
  featureData <- annotatedDataFrameFrom( readCounts, byrow=TRUE )
  
  samples <- colnames(readCounts)
  phenoData$label <- rep(1, length(samples))
  phenoData$label[regexpr("C", samples)==1] <- 0
  phenoData$label <- as.factor(phenoData$label)
  varMetadata( phenoData )[ "label", "labelDescription" ] <- "label of samples (1's for cases, 0's for controls" 
  
  exonIDs <- as.character( exonIDs )
  if( length(exonIDs) != nRow )
    stop( "exonIDs must be of the same length as the number of columns in readCounts" )
  geneIDs <- as.factor( geneIDs )
  if( length(geneIDs) != nRow )
    stop( "geneIDs must be of the same length as the number of columns in readCounts" )  
  featureData$exonIDs <- exonIDs
  varMetadata( featureData )[ "exonIDs", "labelDescription" ] <- "exon ID (unique only within a gene)" 
  featureData$geneIDs <- geneIDs
  varMetadata( featureData )[ "geneIDs", "labelDescription" ] <- "ID of gene to which the exon belongs"
  
  featureData$testable <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "testable", "labelDescription" ] <- "slot indicating if an exon should be considered in the test"
  featureData$prob_case <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "prob_case", "labelDescription" ] <- "expection of this exon's prob in a gene in the case group"
  featureData$prob_ctrl <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "prob_ctrl", "labelDescription" ] <- "expection of this exon's prob in a gene in the control group"
  featureData$var_case <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "var_case", "labelDescription" ] <- "variance of this exon's pron in the case group"
  featureData$var_ctrl <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "var_ctrl", "labelDescription" ] <- "variance of this exon's pron in the control group"
  featureData$NBstat <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "NBstat", "labelDescription" ] <- "NB stat for DE"
  featureData$pvalue <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "pvalue", "labelDescription" ] <- "p-value from NB permutation"
  featureData$padjust <- rep( NA_real_, nRow )
  varMetadata( featureData )[ "padjust", "labelDescription" ] <- "adjusted p-value from NB permutation"
  
  featureData_gene_NBstat <- rep( NA_real_, length(unique(geneIDs)))
  featureData_gene_pval <- rep( NA_real_, length(unique(geneIDs))) 
  featureData_gene_padj <- rep( NA_real_, length(unique(geneIDs))) 
    
  RCS <- new( "ReadCountSet",
              assayData = assayDataNew( "environment", counts=readCounts ),
              phenoData = phenoData,
              featureData = featureData,
              featureData_gene = data.frame(NBstat = featureData_gene_NBstat,
                                            pval = featureData_gene_pval,
                                            padj = featureData_gene_padj,
                                            row.names = unique(as.character(geneIDs))),
              permute_NBstat_exon = matrix(0,0,0),
              permute_NBstat_gene = matrix(0,0,0)
              )
  RCS
}

setValidity( "ReadCountSet", function( object ) {  
 if( ! "geneIDs" %in% names(fData(object)) )
    return( "featureData does not contain a 'geneIDs' column.")
  if( ! is( fData(object)$geneID, "factor" ) )
    return( "The 'geneID' column in fData is not a factor." )
  if( ! "exonIDs" %in% names(fData(object)) )
    return( "featureData does not contain an 'exonIDs' column.")
  if( ! is( fData(object)$exonIDs, "character" ) )
    return( "The 'exonIDs' column in fData is not a character vector." )
 if( !is(fData(object)$NBstat, "numeric")){
   return( "The 'NBstat' values are not numeric")}
 if( !is(fData(object)$pvalue, "numeric")){
   return( "The 'pvalue' values are not numeric")}
 if( !is(fData(object)$padjust, "numeric")){
   return( "The 'padjust' values are not numeric")} 
 if( !is.integer( assayData(object)[["counts"]] ) )
   return( "The count data is not in integer mode." )
 if( any( assayData(object)[["counts"]] < 0 ) )
   return( "The count data contains negative values." )
 if( !is(object@featureData_gene , "data.frame")){
   return( "The 'featureData_gene' is not data.frame")}
 if( !is(object@permute_NBstat_exon , "matrix")){
   return( "The 'permute_NBstat_exon' is not matrix")}
 if( !is(object@permute_NBstat_gene , "matrix")){
   return( "The 'permute_NBstat_gene' is not matrix")}
 
 TRUE
}) 

loadExonCountData <- function(case.files, control.files) {
  nCase <- length(case.files)
  nCtrl <- length(control.files)
  sample.names <- c(paste('S',1:nCase,sep=""), paste('C',1:nCtrl,sep=""))
  lf <- lapply(c(case.files, control.files), function(x) read.table(x, header = FALSE, 
                                                                    stringsAsFactors = FALSE))
  if (!all(sapply(lf[-1], function(x) all(x$V1 == lf[1]$V1)))) 
    stop("Count files have differing gene ID column.")
  dcounts <- sapply(lf, `[[`, "V2")
  rownames(dcounts) <- lf[[1]][, 1]
  colnames(dcounts) <- sample.names
  
  # dcounts for EXONs, and gcounts for GENEs
  dcounts <- dcounts[substr(rownames(dcounts), 1, 1) != "_", ] 
  geneIDs <- sapply(strsplit(rownames(dcounts), ":"), "[[", 1)
  exonIDs <- paste("E", sapply(strsplit(rownames(dcounts), ":"), function(x) {
                               return(x[2])}), sep = "")
  #gcounts <- do.call(rbind, tapply(1:nrow(dcounts), as.factor(geneIDs), function(rows) {colSums(dcounts[rows, , drop = FALSE]) } ))

  idx <- order(geneIDs) # to make sure gene IDs are sorted in lexicographic order; otherwise will cause error in applying `tapply`
  RCS <- newReadCountSet( dcounts[idx, ], exonIDs[idx], geneIDs[idx] )
  RCS 
}

label <- function( RCS ) {
  stopifnot( is( RCS, "ReadCountSet" ) )
  lb <- pData(RCS)$label
  names( lb ) <- colnames( counts(RCS) )
  lb
}

setMethod("counts", signature(object="ReadCountSet"), 
  function( object ) {
    RCS <- object
    assayData(RCS)[["counts"]]
})

setReplaceMethod("counts", signature(object="ReadCountSet", value="matrix"),
  function( object, value ) {
    RCS <- object
    assayData(RCS)[[ "counts" ]] <- value
    validObject(RCS)
    RCS
})

geneID <- function( RCS ) {
  stopifnot( is( RCS, "ReadCountSet" ) )
  g <- fData(RCS)$geneIDs
  names(g) <- rownames( counts(RCS) )
  g
}

`geneID<-` <- function( RCS, value ) {
  stopifnot( is( RCS, "ReadCountSet" ) )
  fData(RCS)$geneIDs <- as.factor(value)
  validObject(RCS)
  RCS
}

exonID <- function( RCS ) {
  stopifnot( is( RCS, "ReadCountSet" ) )
  g <- fData(RCS)$exonIDs
  names(g) <- rownames( counts(RCS) )
  g
}

`exonID<-` <- function( RCS, value ) {
  stopifnot( is( RCS, "ReadCountSet" ) )
  fData(RCS)$exonIDs <- as.character(value)
  validObject(RCS)
  RCS
}

subsetByGenes <- function( RCS, genes ) {
  stopifnot( is( RCS, "ReadCountSet" ) )
  stopifnot( all( genes %in% levels(geneID(RCS)) ) )
  indxExon <- as.character(geneID(RCS)) %in% genes
  indxGene <- unique(as.character(geneID(RCS))) %in% genes
  RCS2 <- RCS[ indxExon, ]
  fData(RCS2)$geneIDs <- as.factor(as.character(fData(RCS2)$geneIDs))
  RCS2@featureData_gene <- RCS@featureData_gene[indxGene,]
  if( length(RCS@permute_NBstat_exon) ) {
    RCS2@permute_NBstat_exon <- as.matrix(RCS@permute_NBstat_exon[indxExon, ])
    RCS2@permute_NBstat_gene <- as.matrix(RCS@permute_NBstat_gene[indxGene, ])
  } else {
    RCS2@permute_NBstat_exon <- matrix(0,0,0)
    RCS2@permute_NBstat_gene <- matrix(0,0,0)
  }
  RCS2
}

Any scripts or data that you put into this service are public.

SeqGSEA documentation built on Nov. 8, 2020, 5:46 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SeqGSEA
Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing

R/Allclasses.R
In SeqGSEA: Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing

Defines functions subsetByGenes `exonID<-` exonID `geneID<-` geneID label loadExonCountData newReadCountSet sizeOfExcludedGS geneList size geneSetSize geneSetDescs geneSetNames newGeneSets

Documented in exonID geneID geneList geneSetDescs geneSetNames geneSetSize label loadExonCountData newGeneSets newReadCountSet size subsetByGenes

Try the SeqGSEA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SeqGSEA Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing

R/Allclasses.R In SeqGSEA: Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing

Defines functions subsetByGenes `exonID<-` exonID `geneID<-` geneID label loadExonCountData newReadCountSet sizeOfExcludedGS geneList size geneSetSize geneSetDescs geneSetNames newGeneSets

Documented in exonID geneID geneList geneSetDescs geneSetNames geneSetSize label loadExonCountData newGeneSets newReadCountSet size subsetByGenes

Try the SeqGSEA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SeqGSEA
Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing

R/Allclasses.R
In SeqGSEA: Gene Set Enrichment Analysis (GSEA) of RNA-Seq Data: integrating differential expression and splicing