shinyInput <- getShinyInput() batch <- shinyInput$batch condition <- shinyInput$condition report_option_vector <- shinyInput$report_option_vector eval_cell_1 = (report_option_vector[1]=="1")
r if (eval_cell_1) 'Summary'
r if (eval_cell_1) '======='
r if (eval_cell_1) '## Confounding'
r if (eval_cell_1) '### Number of samples in each Batch and Condition'
catbatch <- function(x) { paste("Batch", x) } catcondition <- function(x) { paste("Condition", x) } counts <- table(shinyInput$condition, shinyInput$batch) countsmatrix <- as.matrix(counts) colnames(countsmatrix) <- sapply(colnames(countsmatrix), catbatch, simplify = TRUE) rownames(countsmatrix) <- sapply(rownames(countsmatrix), catcondition, simplify = TRUE) panderOptions("table.split.table", 120) ## table split at 100 (default 80) characters in a line pander(countsmatrix)
r if (eval_cell_1) '### Measures of confounding between Batch and Condition'
counts <- table(shinyInput$condition, shinyInput$batch) rowsums <- apply(counts, 1, sum) colsums <- apply(counts, 2, sum) tablesum <- sum(rowsums) expected <- matrix(0, nrow(counts), ncol(counts)) for (i in 1:nrow(counts)) { for (j in 1:ncol(counts)) { expected[i, j] = rowsums[i] * colsums[j]/tablesum } } chi <- sum((counts - expected)^2/expected) mmin <- min(nrow(counts), ncol(counts)) confound1 <- sqrt(chi * mmin/((chi + tablesum) * (mmin - 1))) ## Standardized Pearson Correlation Coefficient confound2 <- sqrt(chi/(tablesum * (mmin - 1))) ## Cramer's V confound <- matrix(c(confound1, confound2), nrow = 1) colnames(confound) <- c("Standardized Pearson Correlation Coefficient", "Cramer's V") rownames(confound) <- c( "Confounding Coefficients (0=no confounding, 1=complete confounding)") panderOptions("table.split.table", 120) ## table split at 100 (default 80) characters in a line pander(confound)
r if (eval_cell_1) '## Variation Analysis'
r if (eval_cell_1) '### Variation explained by Batch and Condition'
batchqc_ev <- tryCatch({ batchqc_explained_variation(shinyInput$lcounts, shinyInput$condition, shinyInput$batch) }, error = function(err) { warning("Error in BatchQC Explained Variation: ",err) return(NULL) }) if (is.null(batchqc_ev)) { eval_cell_1 = FALSE } else { boxplot(batchqc_ev$explained_variation, ylab = "Percent Explained Variation", main = "Percent of Variation Explained by Source") }
panderOptions("table.split.table", 120) ## table split at 100 (default 80) characters in a line pander(apply(batchqc_ev$explained_variation, 2, summary))
r if (eval_cell_1) '## P-value Analysis'
r if (eval_cell_1) '### Distribution of Batch and Condition Effect p-values Across Genes'
cond_ps <- batchqc_ev$cond_test$p batch_ps <- batchqc_ev$batch_test$p pvalue_table <- rbind( `Batch P-values` = c(summary(batch_ps), `Ps<0.05` = mean(batch_ps <= 0.05)), `Condition P-values` = c(summary(cond_ps), `Ps<0.05` = mean(cond_ps <= 0.05))) pander(pvalue_table)
cond_ps <- batchqc_ev$cond_test$p batch_ps <- batchqc_ev$batch_test$p nf <- graphics::layout(mat = matrix(c(1, 2), 2, 1, byrow = TRUE), height = c(1, 3)) # par(mar=c(3.1, 3.1, 1.1, 2.1)) par(mar = c(3, 3, 1, 2)) boxplot(batch_ps, horizontal = TRUE, outline = TRUE, ylim = c(0, 1), frame = F, col = "green1") hist(batch_ps, xlim = c(0, 1), col = "pink", main = "") title("Distribution of Batch Effect p-values Across Genes")
cond_ps <- batchqc_ev$cond_test$p nf <- graphics::layout(mat = matrix(c(1, 2), 2, 1, byrow = TRUE), height = c(1, 3)) # par(mar=c(3.1, 3.1, 1.1, 2.1)) par(mar = c(3, 3, 1, 2)) boxplot(cond_ps, horizontal = TRUE, outline = TRUE, ylim = c(0, 1), frame = F, col = "green1") hist(cond_ps, xlim = c(0, 1), col = "pink", main = "") title("Distribution of Condition Effect p-values Across Genes")
eval_cell_2 = (report_option_vector[2]=="1")
r if (eval_cell_2) 'Differential Expression'
r if (eval_cell_2) '======================='
r if (eval_cell_2) '## Expression Plot'
r if (eval_cell_2) 'Boxplots for all values for each of the samples and are colored by batch membership.'
boxplot(shinyInput$lcounts,col=as.numeric(as.factor(batch)),main="Sample Boxplots (colored by batch)",xlab="Samples")
r if (eval_cell_2) '## LIMMA'
pdata <- data.frame(shinyInput$batch, shinyInput$condition) ncond <- nlevels(as.factor(shinyInput$condition)) nbatch <- nlevels(as.factor(shinyInput$batch)) if (ncond > 1) { if (nbatch <= 1) { mod_full <- model.matrix(~as.factor(shinyInput$condition), data = pdata) } else { mod_full <- model.matrix(~as.factor(shinyInput$condition) + ~as.factor(shinyInput$batch), data = pdata) } tryCatch({ fit <- lmFit(shinyInput$lcounts, mod_full) fit2 <- eBayes(fit) limmaTable <- topTable(fit2, coef = 2:ncond, number=10) for (j in 2:ncond) { colnames(limmaTable)[j-1] <- paste("Condition: ", levels(as.factor(shinyInput$condition))[j], " (logFC)", sep='') } panderOptions("table.split.table", 120) ## table split at 100 (default 80) characters in a line pander(limmaTable) }, error = function(err) { warning("Error in BatchQC Limma Table: ",err) }) }
eval_cell_3 = (report_option_vector[3]=="1")
r if (eval_cell_3) 'Median Correlations'
r if (eval_cell_3) '==================='
r if (eval_cell_3) 'This plot helps identify outlying samples.'
batchqc_corscatter(shinyInput$lcounts, batch=batch, mod=mod)
eval_cell_4 = (report_option_vector[4]=="1")
r if (eval_cell_4) 'Heatmaps'
r if (eval_cell_4) '========'
r if (eval_cell_4) '## Heatmap'
r if (eval_cell_4) 'This is a heatmap of the given data matrix showing the batch effects and variations with different conditions.'
batchqc_heatmap(shinyInput$lcounts, batch=batch, mod=mod)
r if (eval_cell_4) '## Sample Correlations'
r if (eval_cell_4) 'This is a heatmap of the correlation between samples.'
batchqc_correlation(shinyInput$lcounts, batch=batch, mod=mod)
eval_cell_5 = (report_option_vector[5]=="1")
r if (eval_cell_5) 'Circular Dendrogram'
r if (eval_cell_5) '==================='
r if (eval_cell_5) 'This is a Circular Dendrogram of the given data matrix colored by batch to show the batch effects.'
batchqc_circosplot(shinyInput$lcounts, shinyInput$batch, "complete")
eval_cell_6 = (report_option_vector[6]=="1")
r if (eval_cell_6) 'PCA: Principal Component Analysis'
r if (eval_cell_6) '================================='
r if (eval_cell_6) '## PCA'
r if (eval_cell_6) 'This is a plot of the top two principal components colored by batch to show the batch effects.'
pca <- tryCatch({ batchqc_pca(shinyInput$lcounts, batch=batch, mod=mod) }, error = function(err) { warning("Error in BatchQC PCA: ",err) return(NULL) }) if (is.null(pca)) { eval_cell_6 = FALSE } else { shinyInput <- getShinyInput() shinyInput <- c(shinyInput, list("pc"=data.frame(pca$x), "vars"=pca$sdev^2)) setShinyInput(shinyInput) }
r if (eval_cell_6) '## Explained Variation'
pc <- shinyInput$pc pcs <- t(pc) explained_variation <- tryCatch({ explained_variation <- batchqc_pc_explained_variation(pcs, shinyInput$vars, condition, batch) }, error = function(err) { warning("Error in BatchQC PC Explained Variation: ",err) return(NULL) }) if (!is.null(explained_variation)) { panderOptions("table.split.table", 240) ## table split at 240 (default 80) characters in a line pander(explained_variation) }
eval_cell_7 = (report_option_vector[7]=="1")
r if (eval_cell_7) 'Shape'
r if (eval_cell_7) '====='
r if (eval_cell_7) 'This is a heatmap plot showing the variation of gene expression mean, variance, skewness and kurtosis between samples grouped by batch to see the batch effects variation'
lcounts_adj <- tryCatch({ batchQC_condition_adjusted(shinyInput$lcounts, batch, condition) }, error = function(err) { warning("Error in BatchQC Condition adjusted data: ",err) return(shinyInput$lcounts) }) bf <- as.factor(shinyInput$batch) pval <- batchQC_shapeVariation(lcounts_adj, batch, plot = TRUE, groupCol = rainbow(nlevels(bf))[bf]) cat(paste("Note: Sample-wise p-value is calculated for the", "variation across samples on the measure", "across genes. Gene-wise p-value is calculated for the", "variation of each gene between batches", "on the measure across each batch. If the data is", "quantum normalized, then the Sample-wise measure across", "genes is same for all samples and Gene-wise p-value", "is a good measure.", sep=" "))
eval_cell_8 = (report_option_vector[8]=="1")
r if (eval_cell_8) 'Combat Plots'
r if (eval_cell_8) '============'
r if (eval_cell_8) 'This is a plot showing whether parametric or non-parameteric prior is appropriate for this data. It also shows the Kolmogorov-Smirnov test comparing the parametric and non-parameteric prior distribution.'
kstest <- tryCatch({ combatPlot(shinyInput$lcounts, batch=shinyInput$batch, mod=mod) }, error = function(err) { warning("Error in BatchQC ComBat Plot: ",err) return(NULL) }) if (!is.null(kstest)) { shinyInput <- getShinyInput() delta.hat <- shinyInput$delta.hat gamma.hat <- shinyInput$gamma.hat gamma.bar <- shinyInput$gamma.bar a.prior <- shinyInput$a.prior b.prior <- shinyInput$b.prior ksout <- ks.test(gamma.hat[1, ], "pnorm", gamma.bar[1], sqrt(shinyInput$t2[1])) # two-sided, exact summarytext <- "Batch mean distribution across genes: Normal vs Empirical distribution" summarytext <- paste(summarytext, "Two-sided Kolmogorov-Smirnov test", sep = "\n") summarytext <- paste(summarytext, "Selected Batch: ", sep = "\n") summarytext <- paste(summarytext, 1, sep = "") summarytext <- paste(summarytext, "Statistic D = ", sep = "\n") summarytext <- paste(summarytext, signif(ksout$statistic, 4), sep = "") summarytext <- paste(summarytext, "p-value = ", sep = "\n") summarytext <- paste(summarytext, signif(ksout$p.value, 4), sep = "") invgam <- 1/rgamma(ncol(delta.hat), a.prior[1], b.prior[1]) ksvarout <- ks.test(delta.hat[1, ], invgam) # two-sided, exact summarytext <- paste(summarytext, "\n\n\nBatch Variance distribution across genes: ", "Inverse Gamma vs Empirical distribution", sep = "") summarytext <- paste(summarytext, "Two-sided Kolmogorov-Smirnov test", sep = "\n") summarytext <- paste(summarytext, "Selected Batch: ", sep = "\n") summarytext <- paste(summarytext, 1, sep = "") summarytext <- paste(summarytext, "Statistic D = ", sep = "\n") summarytext <- paste(summarytext, signif(ksvarout$statistic, 4), sep = "") summarytext <- paste(summarytext, "p-value = ", sep = "\n") summarytext <- paste(summarytext, signif(ksvarout$p.value, 4), sep = "") cat(summarytext) cat(paste("Note: The non-parametric version of ComBat", "takes much longer time to run and we recommend it", "only when the shape of the non-parametric curve", "widely differs such as a bimodal or highly skewed", "distribution. Otherwise, the difference in batch", "adjustment is very negligible and parametric version", "is recommended even if p-value of KS test above is", "significant.", sep=" ")) }
eval_cell_9 = (report_option_vector[9]=="1")
r if (eval_cell_9) 'SVA'
r if (eval_cell_9) '==='
r if (eval_cell_6) '## Summary'
nsample <- dim(shinyInput$lcounts)[2] sample <- 1:nsample pdata <- data.frame(sample, condition) ncond <- nlevels(as.factor(condition)) if (ncond <= 1) { modmatrix = matrix(rep(1, ncol(shinyInput$lcounts)), ncol = 1) } else { modmatrix = model.matrix(~as.factor(condition), data = pdata) } n.sv <- tryCatch({ batchQC_num.sv(shinyInput$lcounts, modmatrix) }, error = function(err) { warning("Error in BatchQC num.sv surrogate variables compution: ",err) return(NULL) }) if (!is.null(n.sv)) { cat(paste("Number of Surrogate Variables found in the given data:", n.sv)) }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.