differential_RNA | R Documentation |
Do difference analysis of RNA-seq data
differential_RNA(
counts,
group,
groupCol,
method = "limma",
geneLength = NULL,
gccontent = NULL,
filter = TRUE,
edgeRNorm = TRUE,
adjust.method = "BH",
useTopconfects = TRUE,
ucscData = FALSE
)
counts |
a dataframe or numeric matrix of raw counts data, or SummarizedExperiment object |
group |
sample groups |
groupCol |
group column |
method |
one of "DESeq2", "edgeR" , "limma", "dearseq", "NOISeq", "Wilcoxon", and "auto". |
geneLength |
a vector of gene length. |
gccontent |
a vector of gene GC content. |
filter |
if TRUE, use filterByExpr to filter genes. |
edgeRNorm |
if TRUE, use edgeR to do normalization for dearseq method. |
adjust.method |
character string specifying the method used to adjust p-values for multiple testing. See p.adjust for possible values. |
useTopconfects |
if TRUE, use topconfects to provide a more biologically useful ranked gene list. |
ucscData |
Logical, whether the data comes from UCSC Xena. |
data.frame
library(TCGAbiolinks)
query <- GDCquery(
project = "TCGA-ACC",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts"
)
GDCdownload(query,
method = "api", files.per.chunk = 3,
directory = Your_Path
)
dataRNA <- GDCprepare(
query = query, directory = Your_Path,
save = TRUE, save.filename = "dataRNA.RData"
)
## get raw count matrix
dataPrep <- TCGAanalyze_Preprocessing(
object = dataRNA,
cor.cut = 0.6,
datatype = "STAR - Counts"
)
# Use `differential_RNA` to do difference analysis.
# We provide the data of human gene length and GC content in `gene_cov`.
group <- sample(c("grp1", "grp2"), ncol(dataPrep), replace = TRUE)
library(cqn) # To avoid reporting errors: there is no function "rq"
## get gene length and GC content
library(org.Hs.eg.db)
genes_bitr <- bitr(rownames(gene_cov),
fromType = "ENTREZID", toType = "ENSEMBL",
OrgDb = org.Hs.eg.db, drop = TRUE
)
genes_bitr <- genes_bitr[!duplicated(genes_bitr[, 2]), ]
gene_cov2 <- gene_cov[genes_bitr$ENTREZID, ]
rownames(gene_cov2) <- genes_bitr$ENSEMBL
genes <- intersect(rownames(dataPrep), rownames(gene_cov2))
dataPrep <- dataPrep[genes, ]
geneLength <- gene_cov2(genes, "length")
gccontent <- gene_cov2(genes, "GC")
names(geneLength) <- names(gccontent) <- genes
## Difference analysis
DEGAll <- differential_RNA(
counts = dataPrep, group = group,
geneLength = geneLength, gccontent = gccontent
)
# Use `clusterProfiler` to do enrichment analytics:
diffGenes <- DEGAll$logFC
names(diffGenes) <- rownames(DEGAll)
diffGenes <- sort(diffGenes, decreasing = TRUE)
library(clusterProfiler)
library(enrichplot)
library(org.Hs.eg.db)
gsego <- gseGO(gene = diffGenes, OrgDb = org.Hs.eg.db, keyType = "ENSEMBL")
dotplot(gsego)
# use user-defined data
df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
df <- as.data.frame(df)
rownames(df) <- paste0("gene", 1:25)
colnames(df) <- paste0("sample", 1:16)
group <- sample(c("group1", "group2"), 16, replace = TRUE)
result <- differential_RNA(counts = df, group = group,
filte = FALSE, method = "Wilcoxon")
# use SummarizedExperiment object input
df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
rownames(df) <- paste0("gene", 1:25)
colnames(df) <- paste0("sample", 1:16)
group <- sample(c("group1", "group2"), 16, replace = TRUE)
nrows <- 200; ncols <- 20
counts <- matrix(
runif(nrows * ncols, 1, 1e4), nrows,
dimnames = list(paste0("cg",1:200),paste0("S",1:20))
)
colData <- S4Vectors::DataFrame(
row.names = paste0("sample", 1:16),
group = group
)
data <- SummarizedExperiment::SummarizedExperiment(
assays=S4Vectors::SimpleList(counts=df),
colData = colData)
result <- differential_RNA(counts = data, groupCol = "group",
filte = FALSE, method = "Wilcoxon")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.