Nothing
require(sva)
require(limma)
require(pander)
require(stats)
require(graphics)
#' Batch and Condition indicator for signature data captured when
#' activating different growth pathway genes in human mammary epithelial cells.
#'
#' This data consists of three batches and ten different conditions
#' corresponding to control and nine different pathways
#'
#' @name example_batchqc_data
#' @format A data frame with 89 rows and 2 variables:
#' \describe{
#' \item{V1}{Batch Indicator}
#' \item{V2}{Condition (Pathway) Indicator}
#' }
#' @source GEO accession: GSE73628
#' @return Batch indicator object
"batch_indicator"
#' Signature data captured when activating different growth pathway genes in
#' human mammary epithelial cells
#'
#' This data consists of three batches and ten different conditions
#' corresponding to control and nine different pathways
#'
#' @name example_batchqc_data
#' @format A data frame with 18052 rows and 89 variables:
#' \describe{
#' \item{Columns1-89}{Control and Pathway activated samples}
#' \item{rows1-18052}{Genes 1-18052}
#' }
#' @source GEO accession: GSE73628
#' @return Signature data
"signature_data"
#' Batch and Condition indicator for protein expression data
#'
#' This data consists of two batches and two conditions
#' corresponding to case and control for the protein expression data
#'
#' @name protein_example_data
#' @format A data frame with 24 rows and 4 variables:
#' \describe{
#' \item{Arrayname}{Array Name}
#' \item{samplename}{Sample Name}
#' \item{Batch}{Batch Indicator}
#' \item{category}{Condition (Case vs Control) Indicator}
#' }
#' @return Protein data sample info
"protein_sample_info"
#' Protein data with 39 protein expression levels
#'
#' This data consists of two batches and two conditions
#' corresponding to case and control
#'
#' @name protein_example_data
#' @format A data frame with 39 rows and 24 variables:
#' \describe{
#' \item{Columns1-24}{Control and Case samples}
#' \item{rows1-39}{Proteins 1-39}
#' }
#' @return Protein data
"protein_data"
#' @importFrom methods setOldClass
setOldClass("prcomp")
################################################################################
#' The BatchQC output class to output BatchQC results
#'
#' Contains all currently-supported BatchQC output data classes:
#'
#' slots:
#' \describe{
#' \item{batchqc_ev}{a single object of class list}
#' \item{pca}{a single object of S3 class prcomp}
#' }
#'
#' @name BatchQCout-class
#' @rdname BatchQCout-class
#' @exportClass BatchQCout
setClass(Class="BatchQCout",
representation=representation(
batchqc_ev="list",
pca="prcomp"
),
prototype=prototype(batchqc_ev=NULL, pca=NULL)
)
###############################################################################
#' Checks for presence of batch effect and reports whether the batch
#' needs to be adjusted
#'
#' @param data.matrix Given data or simulated data from rnaseq_sim()
#' @param batch Batch covariate
#' @param mod Model matrix for outcome of interest and other covariates
#' besides batch
#' @return pca Principal Components Analysis object of the data
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' pdata <- data.frame(batch, condition)
#' modmatrix = model.matrix(~as.factor(condition), data=pdata)
#' batchQC_analyze(data.matrix, batch, mod=modmatrix)
batchQC_analyze <- function(data.matrix, batch, mod = NULL) {
batchqc_heatmap(data.matrix, batch = batch, mod = mod)
pca <- batchqc_pca(data.matrix, batch = batch, mod = mod)
batchtest(pca, batch = batch, mod = mod)
return(pca)
}
#' Checks for presence of batch effect and creates a html report
#' with information including whether the batch needs to be adjusted
#'
#' @param dat Given data or simulated data from rnaseq_sim()
#' @param batch Batch covariate
#' @param condition Covariates or conditions of interest besides batch
#' @param report_file Output report file name
#' @param report_dir Output report directory path
#' @param report_option_binary 9 bits Binary String representing
#' the plots to display and hide in the report
#' @param view_report when TRUE, opens the report in a browser
#' @param interactive when TRUE, opens the interactive shinyApp
#' @param batchqc_output when TRUE, creates BatchQCout object in
#' batchqc_output.rda R object file
#' @param log2cpm_transform when TRUE, transforms the data using log2CPM -
#' log2 Counts Per Million transformation function
#' @return outputfile Report file generated by batchQC
#' @import rmarkdown knitr pander ggvis heatmaply reshape2 limma
#' @importFrom grDevices colorRampPalette rainbow
#' @importFrom graphics abline axis hist image layout legend lines
#' mtext par plot plot.new points rect segments text title
#' @importFrom stats IQR as.dendrogram as.dist cor density
#' dist dnorm hclust ks.test lm median model.matrix
#' order.dendrogram pf prcomp qqline qqnorm qqplot quantile
#' reorder rgamma rnbinom rnorm sd var
#' @importFrom utils browseURL
#' @importFrom shiny runApp
#' @importFrom methods new
#' @importFrom Matrix nearPD
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' batchQC(data.matrix, batch=batch, condition=condition, view_report=FALSE,
#' interactive=FALSE)
batchQC <- function(dat, batch, condition = NULL,
report_file = "batchqc_report.html", report_dir = ".",
report_option_binary = "111111111", view_report = FALSE,
interactive = TRUE, batchqc_output=FALSE, log2cpm_transform=FALSE) {
if (is.null(condition)) condition <- rep(1,ncol(dat))
if (is.null(batch)) batch <- rep(1,ncol(dat))
pdata <- data.frame(batch, condition)
ncond <- nlevels(as.factor(condition))
if (ncond <= 1) {
mod = matrix(rep(1, ncol(dat)), ncol = 1)
} else {
mod = model.matrix(~as.factor(condition), data = pdata)
}
if (report_dir == ".") {
report_dir = getwd()
}
dat <- as.matrix(dat)
if (is.null(colnames(dat))) {
colnames(dat) <- 1:ncol(dat)
}
dat <- batchQC_filter_genes(dat, batch, condition)
report_option_vector <- unlist(strsplit(as.character(report_option_binary),
""))
if (log2cpm_transform) {
lcounts <- log2CPM(dat)$y
} else {
lcounts <- dat
}
shinyInput <- list(data = dat, batch = batch, condition = condition,
report_dir = report_dir, report_option_vector = report_option_vector,
lcounts = lcounts, log2cpm_transform=log2cpm_transform)
setShinyInput(shinyInput)
batchqc_ev <- NULL
pca <- NULL
rmdfile <- system.file("reports/batchqc_report.Rmd", package = "BatchQC")
batchqc_html <- system.file("reports/batchQC.html", package = "BatchQC")
# rmarkdown::draft('batchqc_report.Rmd', template =
# 'batchqc', package = 'BatchQC')
static_lib_dir <- system.file("reports/libs", package = "BatchQC")
file.copy(static_lib_dir, report_dir, recursive = TRUE)
file.copy(rmdfile, report_dir, overwrite = TRUE)
file.copy(batchqc_html, report_dir, overwrite = TRUE)
rmdfile_copy <- file.path(report_dir, "batchqc_report.Rmd")
outputfile <- rmarkdown::render(rmdfile_copy, output_file = report_file,
output_dir=report_dir, knit_root_dir=report_dir, clean=FALSE)
shinyInput <- getShinyInput()
setShinyInputOrig(shinyInput)
setShinyInputCombat(NULL)
setShinyInputSVAf(NULL)
setShinyInputSVAr(NULL)
setShinyInputSVA(NULL)
if (batchqc_output) {
if (is.null(batchqc_ev)) batchqc_ev <- list()
if (is.null(pca)) pca <- prcomp(0)
batchQCout <- new ("BatchQCout", batchqc_ev=batchqc_ev, pca=pca)
outfile <- file.path(report_dir, "batchqc_output.rda")
save(batchQCout, file=outfile)
}
if (view_report) {
browseURL(outputfile)
}
if (interactive) {
appDir <- system.file("shiny", "BatchQC", package = "BatchQC")
if (appDir == "") {
stop("Could not find shiny directory. Try re-installing BatchQC.",
call. = FALSE)
}
shiny::runApp(appDir, display.mode = "normal")
}
return(outputfile)
}
#' Adjust for batch effects using an empirical Bayes framework
#' ComBat allows users to adjust for batch effects in datasets where the
#' batch covariate is known, using methodology described in Johnson et al. 2007.
#' It uses either parametric or non-parametric empirical Bayes frameworks for
#' adjusting data for batch effects. Users are returned an expression matrix
#' that has been corrected for batch effects. The input
#' data are assumed to be cleaned and normalized before batch effect removal.
#'
#' @param dat Genomic measure matrix (dimensions probe x sample) - for example,
#' expression matrix
#' @param batch {Batch covariate (only one batch allowed)}
#' @param mod Model matrix for outcome of interest and other covariates
#' besides batch
#' @param par.prior (Optional) TRUE indicates parametric adjustments will be
#' used, FALSE indicates non-parametric adjustments will be used
#' @param prior.plots (Optional)TRUE give prior plots with black as a kernel
#' estimate of the empirical batch effect density and red as the parametric
#' @return data A probe x sample genomic measure matrix, adjusted for
#' batch effects.
#' @export
#' @examples
#' nbatch <- 3
#' ncond <- 2
#' npercond <- 10
#' data.matrix <- rnaseq_sim(ngenes=50, nbatch=nbatch, ncond=ncond, npercond=
#' npercond, basemean=10000, ggstep=50, bbstep=2000, ccstep=800,
#' basedisp=100, bdispstep=-10, swvar=1000, seed=1234)
#' batch <- rep(1:nbatch, each=ncond*npercond)
#' condition <- rep(rep(1:ncond, each=npercond), nbatch)
#' pdata <- data.frame(batch, condition)
#' mod = model.matrix(~as.factor(condition), data = pdata)
#' combatPlot(data.matrix, batch, mod=mod)
combatPlot <- function(dat, batch, mod = NULL, par.prior = TRUE,
prior.plots = TRUE) {
# make batch a factor and make a set of indicators for batch
if (length(dim(batch)) > 1)
{
warning("This version of ComBat only allows one batch variable")
return(NULL)
} ## to be updated soon!
batch <- as.factor(batch)
if (nlevels(batch) <= 1) {
warning("There is no batch")
return(NULL)
}
batchmod <- model.matrix(~-1 + batch)
cat("Found", nlevels(batch), "batches\n")
# A few other characteristics on the batches
n.batch <- nlevels(batch)
batches <- list()
for (i in 1:n.batch) {
batches[[i]] <- which(batch == levels(batch)[i])
} # list of samples in each batch
n.batches <- sapply(batches, length)
n.array <- sum(n.batches)
# combine batch variable and covariates
design <- cbind(batchmod, mod)
# check for intercept in covariates, and drop if present
check <- apply(design, 2, function(x) all(x == 1))
design <- as.matrix(design[, !check])
# Number of covariates or covariate levels
cat("Adjusting for", ncol(design) - ncol(batchmod),
"covariate(s) or covariate level(s)\n")
# Check if the design is confounded
if (qr(design)$rank < ncol(design)) {
# if(ncol(design)<=(n.batch)){stop('Batch variables are
# redundant! Remove one or more of the batch variables so
# they are no longer confounded')}
if (ncol(design) == (n.batch + 1)) {
stop(paste("The covariate is confounded with batch! Remove the ",
"covariate and rerun ComBat", sep = ""))
}
if (ncol(design) > (n.batch + 1)) {
if ((qr(design[, -c(1:n.batch)])$rank < ncol(design[,
-c(1:n.batch)]))) {
stop(paste("The covariates are confounded! Please remove one ",
"or more of the covariates so the design is not confounded",
sep = ""))
} else {
stop(paste("At least one covariate is confounded with batch! ",
"Please remove confounded covariates and rerun ComBat",
sep = ""))
}
}
}
## Check for missing values
NAs = any(is.na(dat))
if (NAs) {
cat(c("Found", sum(is.na(dat)), "Missing Data Values\n"),
sep = " ")
}
# print(dat[1:2,]) Standardize Data across genes
cat("Standardizing Data across genes\n")
if (!NAs) {
B.hat <- solve(t(design) %*% design) %*% t(design) %*%
t(as.matrix(dat))
} else {
B.hat = apply(dat, 1, Beta.NA, design)
} #Standarization Model
grand.mean <- t(n.batches/n.array) %*% B.hat[1:n.batch, ]
if (!NAs) {
var.pooled <- ((dat - t(design %*% B.hat))^2) %*% rep(1/n.array,
n.array)
} else {
var.pooled <- apply(dat - t(design %*% B.hat), 1, var,
na.rm = TRUE)
}
stand.mean <- t(grand.mean) %*% t(rep(1, n.array))
if (!is.null(design)) {
tmp <- design
tmp[, c(1:n.batch)] <- 0
stand.mean <- stand.mean + t(tmp %*% B.hat)
}
s.data <- (dat - stand.mean)/(sqrt(var.pooled) %*% t(rep(1,
n.array)))
## Get regression batch effect parameters
cat("Fitting L/S model and finding priors\n")
batch.design <- design[, 1:n.batch]
if (!NAs) {
gamma.hat <- solve(t(batch.design) %*% batch.design) %*%
t(batch.design) %*% t(as.matrix(s.data))
} else {
gamma.hat <- apply(s.data, 1, Beta.NA, batch.design)
}
shinyInput <- getShinyInput()
if (is.null(shinyInput)) {
shinyInput <- list(data = dat, batch = batch)
}
shinyInput <- c(shinyInput, list(gamma.hat = gamma.hat))
delta.hat <- NULL
for (i in batches) {
delta.hat <- rbind(delta.hat, apply(s.data[, i], 1, var,
na.rm = TRUE))
}
shinyInput <- c(shinyInput, list(delta.hat = delta.hat))
## Find Priors
gamma.bar <- apply(gamma.hat, 1, mean)
shinyInput <- c(shinyInput, list(gamma.bar = gamma.bar))
t2 <- apply(gamma.hat, 1, var)
shinyInput <- c(shinyInput, list(t2 = t2))
a.prior <- apply(delta.hat, 1, aprior)
shinyInput <- c(shinyInput, list(a.prior = a.prior))
b.prior <- apply(delta.hat, 1, bprior)
shinyInput <- c(shinyInput, list(b.prior = b.prior))
setShinyInput(shinyInput)
## Plot empirical and parametric priors
if (prior.plots & par.prior) {
# par(mfrow=c(2,2))
tmp <- density(gamma.hat[1, ])
xx <- seq(min(tmp$x), max(tmp$x), length = 100)
tmp1 <- dnorm(xx, gamma.bar[1], sqrt(t2[1]))
plot(tmp, type = "l", main = "Density Plot",
ylim = c(0, max(tmp$y, tmp1)))
lines(xx, tmp1, col = 2)
qqnorm(gamma.hat[1, ])
qqline(gamma.hat[1, ], col = 2)
tmp <- density(delta.hat[1, ])
invgam <- 1/rgamma(ncol(delta.hat), a.prior[1], b.prior[1])
tmp1 <- density(invgam)
plot(tmp, type = "l", main = "Density Plot", ylim = c(0,
max(tmp$y, tmp1$y)))
lines(tmp1, col = 2)
qqplot(delta.hat[1, ], invgam, xlab = "Sample Quantiles",
ylab = "Theoretical Quantiles")
lines(c(0, max(invgam)), c(0, max(invgam)), col = 2)
title("Q-Q Plot")
}
kstest <- ks.test(gamma.hat[1, ], "pnorm", gamma.bar[1],
sqrt(t2[1]))
# two-sided, exact
return(kstest)
}
# Following four find empirical hyper-prior values
aprior <- function(gamma.hat) {
m = mean(gamma.hat)
s2 = var(gamma.hat)
(2 * s2 + m^2)/s2
}
bprior <- function(gamma.hat) {
m = mean(gamma.hat)
s2 = var(gamma.hat)
(m * s2 + m^3)/s2
}
Beta.NA = function(y, X) {
des = X[!is.na(y), ]
y1 = y[!is.na(y)]
B <- solve(t(des) %*% des) %*% t(des) %*% y1
B
}
#' Getter function to get the shinyInput option
#' @return shinyInput option
#' @export
#' @examples
#' getShinyInput()
getShinyInput <- function() {
shinyInput <- getOption("batchqc.shinyInput")
return(shinyInput)
}
#' Setter function to set the shinyInput option
#' @param x shinyInput option
#' @return shinyInput option
#' @export
#' @examples
#' setShinyInput(NULL)
setShinyInput <- function(x) {
options(batchqc.shinyInput = x)
}
#' Getter function to get the shinyInputOrig option
#' @return shinyInputOrig option
#' @export
#' @examples
#' getShinyInputOrig()
getShinyInputOrig <- function() {
shinyInputOrig <- getOption("batchqc.shinyInputOrig")
return(shinyInputOrig)
}
#' Setter function to set the shinyInputOrig option
#' @param x shinyInputOrig option
#' @return shinyInputOrig option
#' @export
#' @examples
#' setShinyInputOrig(NULL)
setShinyInputOrig <- function(x) {
options(batchqc.shinyInputOrig = x)
}
#' Getter function to get the shinyInputCombat option
#' @return shinyInputCombat option
#' @export
#' @examples
#' getShinyInputCombat()
getShinyInputCombat <- function() {
shinyInputCombat <- getOption("batchqc.shinyInputCombat")
return(shinyInputCombat)
}
#' Setter function to set the shinyInputCombat option
#' @param x shinyInputCombat option
#' @return shinyInputCombat option
#' @export
#' @examples
#' setShinyInputCombat(NULL)
setShinyInputCombat <- function(x) {
options(batchqc.shinyInputCombat = x)
}
#' Getter function to get the shinyInputSVA option
#' @return shinyInputSVA option
#' @export
#' @examples
#' getShinyInputSVA()
getShinyInputSVA <- function() {
shinyInputSVA <- getOption("batchqc.shinyInputSVA")
return(shinyInputSVA)
}
#' Setter function to set the shinyInputSVA option
#' @param x shinyInputSVA option
#' @return shinyInputSVA option
#' @export
#' @examples
#' setShinyInputSVA(NULL)
setShinyInputSVA <- function(x) {
options(batchqc.shinyInputSVA = x)
}
#' Getter function to get the shinyInputSVAf option
#' @return shinyInputSVAf option
#' @export
#' @examples
#' getShinyInputSVAf()
getShinyInputSVAf <- function() {
shinyInputSVAf <- getOption("batchqc.shinyInputSVAf")
return(shinyInputSVAf)
}
#' Setter function to set the shinyInputSVAf option
#' @param x shinyInputSVAf option
#' @return shinyInputSVAf option
#' @export
#' @examples
#' setShinyInputSVAf(NULL)
setShinyInputSVAf <- function(x) {
options(batchqc.shinyInputSVAf = x)
}
#' Getter function to get the shinyInputSVAr option
#' @return shinyInputSVAr option
#' @export
#' @examples
#' getShinyInputSVAr()
getShinyInputSVAr <- function() {
shinyInputSVAr <- getOption("batchqc.shinyInputSVAr")
return(shinyInputSVAr)
}
#' Setter function to set the shinyInputSVAr option
#' @param x shinyInputSVAr option
#' @return shinyInputSVAr option
#' @export
#' @examples
#' setShinyInputSVAr(NULL)
setShinyInputSVAr <- function(x) {
options(batchqc.shinyInputSVAr = x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.