knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, dev="png", message=FALSE, error=FALSE, warning=FALSE)
Quantification of data from @alasoo against GENCODE [@gencode] using Salmon [@salmon].
library(macrophage) dir <- system.file("extdata", package="macrophage") coldata <- read.csv(file.path(dir, "coldata.csv")) coldata <- coldata[,c(1,2,3,5)] names(coldata) <- c("names","id","line","condition") coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz") all(file.exists(coldata$files))
suppressPackageStartupMessages(library(SummarizedExperiment))
# This hidden code chunk is only needed for Bioc build machines, # so that 'fishpond' will build regardless of whether # the machine can connect to ftp.ebi.ac.uk. # Using linkedTxomes to point to a GTF that lives in the macrophage pkg. # The chunk can be skipped if you have internet connection, # as tximeta will automatically ID the transcriptome and DL the GTF. library(tximeta) makeLinkedTxome( indexDir=file.path(dir, "gencode.v29_salmon_0.12.0"), source="Gencode", organism="Homo sapiens", release="29", genome="GRCh38", fasta="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.transcripts.fa.gz", gtf=file.path(dir, "gencode.v29.annotation.gtf.gz"), # local version write=FALSE )
We load in the quantification data with tximeta
:
library(dplyr) coldata <- coldata %>% filter(condition %in% c("naive","IFNg")) se <- tximeta(coldata, countsFromAbundance="scaledTPM", varReduce=TRUE)
se <- se[seqnames(se) == "chr1",] se$condition <- factor(se$condition, c("naive","IFNg"))
library(DRIMSeq) counts <- data.frame(gene_id=sapply(mcols(se)$gene_id, `[`, 1), feature_id=mcols(se)$tx_name, assays(se)[["counts"]]) samples <- as.data.frame(colData(se)) names(samples)[1] <- "sample_id" d <- dmDSdata(counts=counts, samples=samples) n <- 12 n.small <- 6 d <- dmFilter(d, min_samps_feature_expr=n.small, min_feature_expr=10, min_samps_feature_prop=n.small, min_feature_prop=0.1, min_samps_gene_expr=n, min_gene_expr=10) d
library(DEXSeq) sample.data <- DRIMSeq::samples(d) count.data <- round(as.matrix(counts(d)[,-c(1:2)])) dxd <- DEXSeqDataSet(countData=count.data, sampleData=sample.data, design=~sample + exon + condition:exon, featureID=counts(d)$feature_id, groupID=counts(d)$gene_id) # this takes a little over a minute on my laptop system.time({ dxd <- estimateSizeFactors(dxd) dxd <- estimateDispersions(dxd, quiet=TRUE) dxd <- testForDEU(dxd, reducedModel=~sample + exon) }) dxr <- DEXSeqResults(dxd, independentFiltering=FALSE) qval <- perGeneQValue(dxr) dxr.g <- data.frame(gene=names(qval),qval) columns <- c("featureID","groupID","pvalue") dxr2 <- as.data.frame(dxr[,columns])
library(pheatmap) pheatmap(log10(as.matrix(dxr[dxr$groupID == names(which.min(qval)),"countData"])+1), cluster_rows=FALSE, cluster_cols=FALSE, show_rownames=FALSE, show_colnames=FALSE)
stageR for stagewise testing
library(stageR) pConfirmation <- matrix(dxr$pvalue,ncol=1) dimnames(pConfirmation) <- list(dxr$featureID,"transcript") pScreen <- qval names(pScreen) <- names(pScreen) tx2gene <- as.data.frame(dxr[,c("featureID", "groupID")]) stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation, pScreenAdjusted=TRUE, tx2gene=tx2gene) stageRObj <- stageWiseAdjustment(stageRObj, method="dtu", alpha=0.05) dex.padj <- getAdjustedPValues(stageRObj, order=TRUE, onlySignificantGenes=FALSE) head(dex.padj, n=10)
We talked about having the test statistics available within
the RangedSummarizedExperiment
prior to calling iSEE. For now, I will just add
the DEXSeq + stageR adjusted p-values to the rowData.
se <- se[rownames(se) %in% dex.padj$txID,] # filter se based on DRIMSeq filtering? Do we want that as a general rule? rowData(se)[,c("gene_padj", "tx_padj")] <- dex.padj[match(rownames(se),dex.padj$txID),c("gene", "transcript")]
library(iSEE) # Helper to compute proportions (based on DEXSeq's classes.R) .getTotalCount <- function(countData, tx2gene) { geneForEachTx <- as.character(tx2gene$gene_id[match(rownames(countData), tx2gene$tx_name)]) forCycle <- split(seq_len(nrow(countData)), as.character(geneForEachTx)) all <- lapply(forCycle, function(i) { sct <- countData[i, , drop = FALSE] rs <- t(vapply(seq_len(nrow(sct)), function(r) colSums(sct[, , drop = FALSE]), numeric(ncol(countData)))) # adapted, removed "-r" to get gene-level counts rownames(rs) <- rownames(sct) rs }) totalCount <- do.call(rbind, all) totalCount <- totalCount[rownames(countData), ] return(totalCount) } # Main function, relies on iSEE::FeatureAssayPlot ProportionsPlot <- function(se, transcript, XAxisColumnData, PanelWidth=6L){ totalCount <- .getTotalCount(countData = assays(se)$counts, tx2gene = rowData(se)) assays(se)$proportions <- assays(se)$counts/totalCount # directly borrow FeatureAssayPlot from iSEE: init <- iSEE::FeatureAssayPlot(YAxisFeatureName=transcript, Assay="proportions", XAxis= "Column data", XAxisColumnData = XAxisColumnData, PanelWidth=6L) # !proportions assay only available in local function environment app <- iSEE(se, initial=list(init)) return(app) }
app <- ProportionsPlot(se = se, transcript = "ENST00000486652.5", XAxisColumnData = "condition", PanelWidth=6L)
shiny::runApp(app)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.