#' @title Simulate true gene expression data for a set of groups
#'
#' @description This function simulates true gene expression data.
#' The gene expression arrays are simulated using a zero inflated
#' log-normal poisson distribution to represent the number or RNA
#' molecules in a sample.
#'
#' @param nGenes number of genes to simulate.
#' @param nGroups number of groups. Default is 2.
#' @param muHyp hyperparameters for mean of log normal. See
#' \code{pickLogNormal} for default values.
#' @param sigHyp hyperparameters for variance of log normal. See
#' \code{pickLogNormal} for default values.
#' @param zeroWeight proportion of zero-inflated counts. Default is 0.10.
#' @param pDiff percent of probes different relative to Group 1. If
#' \code{nGroups == 1}, pDiff should be 0. If \code{nGroups > 1}, the
#' length of pDiff should be equal to \code{nGroups - 1}. Default
#' is \code{pDiff = 0.05}.
#' @param foldDiff fold difference relative to Group 1. If
#' \code{nGroups == 1}, foldDiff is ignored. If \code{nGroups > 1}, the
#' length of foldDiff should be equal to \code{nGroups - 1}. Default is
#' \code{foldDiff = 5}.
#' @param scaleCounts To plot gene expression on log2 scale, pick a
#' small amount to scale the counts by. Default is 1.
#' @param verbose TRUE/FALSE argument specifying if verbose messages
#' should be returned or not. Default is TRUE.
#'
#' @return A list with elements
#' \item{objectType}{A string specifying the type of object.}
#' \item{nGenes}{Number of genes simulated.}
#' \item{zeroWeight}{Proportion of zero-inflated counts used in simulation.}
#' \item{pDiff}{Percent of probes different relative to Group 1.}
#' \item{foldDiff}{fold difference relative to Group 1.}
#' \item{scaleCounts}{An positive real valued number specifying an additive
#' scale for the RNA counts to be able to plot on the log2 scale.}
#' \item{genesDiffInd}{Boolean array referencing which probes are different
#' relative to Group 1.}
#' \item{geneRange}{Data frame containing the simulated RNA molecules
#' for each gene and for each group.}
#'
#' @author Stephanie Hicks
#' @export
#' @seealso \code{\link{simulateMethTruth}}
#' @examples
#' geneTruth <- simulateGExTruth(nGenes = 2e4, nGroups = 2,
#' pDiff = 0.05, foldDiff = 5)
simulateGExTruth <- function(nGenes, nGroups = 2, muHyp = NULL, sigHyp = NULL,
zeroWeight = 0.10, pDiff = 0.05, foldDiff = 5,
scaleCounts = 1, verbose = TRUE)
{
if((nGroups > 1) && (length(pDiff) != (nGroups-1))){
stop("[quantroSim]: pDiff must have length (nGroups - 1) if nGroups >
1")}
if((nGroups == 1) && ((length(pDiff) != 1) || (pDiff != 0) )){
warning("[quantroSim]: Cannot simulate differences between more than
one group if only one group exists. Using pDiff = 0.")
pDiff = 0
}
if((nGroups > 1) && (length(foldDiff) != (nGroups-1))){
stop("[quantroSim]: foldDiff must have length (nGroups - 1) if
nGroups > 1")}
if((nGroups == 1) && (length(foldDiff) != 1)){
stop("[quantroSim]: Cannot simulate differences between more than one
group if only one group exists. Fold diff should be length 1. ")}
if( any(pDiff < 0) || any(pDiff > 1) ){
stop("[quantroSim]: pDiff must be between 0 and 1.")}
if( (zeroWeight < 0) || (zeroWeight > 1) ){
stop("[quantroSim]: zeroWeight must be between 0 and 1.")}
if(!is.null(muHyp) && (nGenes != length(muHyp))){
stop("[quantroSim]: Hyperparameters for muHyp must be equal to number
of genes.")}
if(!is.null(sigHyp) && (nGenes != length(sigHyp))){
stop("[quantroSim]: Hyperparameters for sigHyp must be equal to number
of genes.")}
poisMeanVals <- pickLogNormal(N = nGenes, muHyp = muHyp, sigHyp = sigHyp)
rangeMu <- range(poisMeanVals)
if(verbose){
mes <- "[quantroSim]: Simulating RNA transcript counts using a Poisson
distribution with mean parameters from %.2f to %.2f"
message(sprintf(mes, rangeMu[1], rangeMu[2]))
}
# zero-inflated log-normal Poisson counts
zeros <- sample(c(0,1), nGenes, replace = TRUE,
prob = c(zeroWeight, 1 - zeroWeight))
poisCounts <- sapply(poisMeanVals,
function(x){ rpois(1, lambda = x) })
nRNAmol <- zeros
nRNAmol[which(zeros == 1)] <- poisCounts[which(zeros == 1)]
nR <- length(nRNAmol)
if(nGroups == 1){
output = list("objectType" = "simulateGExTruthObject", "nGenes" = nGenes,
"nGroups" = nGroups, "zeroWeight" = zeroWeight,
"pDiff" = pDiff, "foldDiff" = foldDiff,
"scaleCounts" = scaleCounts, "genesDiffInd" = NA,
"geneRange" = as.matrix(nRNAmol))
}
if(nGroups > 1){
geneRangeMat <- replicate(nGroups, nRNAmol + rpois(nR, 1))
diffInd <- array(NA, dim = c(nR, nGroups - 1))
for(i in seq_len(nGroups-1)){
diffInd[,i] <- sample(c(TRUE, FALSE), nR, replace = TRUE,
prob = c(pDiff[i], 1 - pDiff[i]))
geneRangeMat[diffInd[,i],i+1] <- foldDiff[i] * nRNAmol[diffInd[,i]]
}
colnames(geneRangeMat) <- sapply(1:nGroups, function(x)
paste0("Group_", formatC(x, flag=0, width=2)))
output = list("objectType" = "simulateGExTruthObject", "nGenes" = nGenes,
"nGroups" = nGroups, "zeroWeight" = zeroWeight,
"pDiff" = pDiff, "foldDiff" = foldDiff,
"scaleCounts" = scaleCounts, "genesDiffInd" = diffInd,
"geneRange" = geneRangeMat)
}
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.