#' Read a list of known inactivated genes
#'
#' Read a list of gene symbols of known inactivated genes
#' to be used as training set in \code{betaBinomXI}.
#'
#' @param xciGenes A \code{character} or code{NULL}. By defaults, return a
#' vector of 177 genes. Other available choices include "cotton" and "intersect".
#' If a file path is given, the genes will be read from the file.
#'
#' @details
#' Both gene lists are extracted from Cotton et al. Genome
#' Biology (2013). doi:10.1186/gb-2013-14-11-r122.
#' By default, the function returns a list that was used as training set in
#' the paper. This training set was generated as the intersection of the
#' silenced genes identified by both expression (Carrel & Willard, 2005) and
#' DNA methylation analysis (Cotton et al, 2011).
#' Setting it to "cotton" will instead return a list of 294 genes that were
#' classified as inactivated by Cotton et al.
#' "intersect" is the most stringent list which returns the intersection of
#' training and predicted set.
#'
#' @return A \code{character} vector of gene names.
#'
#' @examples
#' xcig <- readXCI()
#' xcig <- readXCI("cotton")
#'
#' @seealso \code{betaBinomXI}
#' @export
readXCI <- function(xciGenes = NULL){
if(!is.null(xciGenes)){
if(length(xciGenes) > 1){
# If the input is a character vector, assume it's gene symbols
xci <- xciGenes
return(xciGenes)
} else if(xciGenes == "cotton"){
xci <- system.file("extdata", "xciGenes_cotton.txt", package = "XCIR")
xci <- readLines(xci)
} else if(xciGenes == "intersect"){
xci177 <- readLines(system.file("extdata", "xciGene.txt", package = "XCIR"))
xcic <- readLines(system.file("extdata", "xciGenes_cotton.txt", package = "XCIR"))
xci <- intersect(xci177, xcic)
} else if(file.exists(xciGenes)){
xci <- readLines(xciGenes)
} else{
stop("The file does not exist")
}
} else{
xci <- system.file("extdata", "xciGene.txt", package = "XCIR")
xci <- readLines(xci)
}
}
# Find the most skewed samples within an XCIR data.table
.getSkewedSamples <- function(data, n = 2){
nsamples <- length(unique(data$sample))
if(n < 1 | n > nsamples)
stop(paste("n should be a number between 1 and", nsamples))
skewed_samples <- as.character(data[, median(pmin(AD_hap1, AD_hap2)/(AD_hap1+AD_hap2), na.rm = TRUE),
by = sample][order(V1)][seq_len(n), sample])
return(skewed_samples)
}
# Find the allelic imbalance for each sample
# Based on formula in Cotton et al. Genome Biology 2013
.getAI <- function(calls, labels = FALSE){
ai_dt <- unique(calls[, list(sample, GENE, POS, AD_hap1, AD_hap2, tot, f)])
ai_dt <- unique(ai_dt[, list(sum(pmin(AD_hap1, AD_hap2)), sum(tot), f), by = sample]) # Assume that lowest expressed allele is Xi
ai_dt[, pxa := (V2-V1)/V2]
ai_dt[, pxi := V1/V2]
ai_dt[, num := (pxa * (1-f)) + (pxi * f)]
ai_dt[, denom := f * (pxi + pxa) + (1-f) * (pxi + pxa)]
ai_dt[, ai := abs(num/denom -0.5)]
p <- ggplot(ai_dt[!is.na(f)], aes(ai, pxi)) + geom_point() + geom_smooth(method="loess") +
ggtitle("%Xi expression vs. AI") + theme(plot.title = element_text(hjust = .5))
if(labels)
p <- p + geom_text(aes(label = sample), vjust = 1.2)
print(p)
return(ai_dt)
}
.getAI <- function(calls){
ai_dt <- unique(calls[, list(sample, GENE, AD_hap1, AD_hap2, tot, f, tau, p_value)])
ai_dt[, xiexpr := pmin(AD_hap1, AD_hap2)/pmax(AD_hap1, AD_hap2)] #Assuming xaexpr is always 100%
ai_dt[, num := (1-f) + xiexpr * f]
ai_dt[, denom := ((1-f) * (xiexpr + 1)) + (f * (xiexpr + 1))]
ai_dt[, ai := abs(num/denom - 0.5)]
}
# Samples with ai < cutoffs are subject to XCI
plot_escape_fraction <- function(ai, cutoff = .1){
fai <- ai[, Nsamples := .N, by = "GENE"]
fai <- fai[ai < cutoff, Ninac := .N, ]
}
#' Sample estimates
#'
#' Return sample specific information from XCIR results
#'
#' @param bb_table A \code{data.table}. The table returned by \code{betaBinomXI}.
#'
#' @return A \code{data.table} with one entry per sample and information
#' regarding skewing and model fitting.
#'
#' @example inst/examples/betaBinomXI.R
#'
#' @export
sample_clean <- function(bb_table){
clean_cols <- c("sample", "model", "f", "a_est", "b_est")
ret <- unique(bb_table[, clean_cols, with = FALSE])
return(ret)
}
xcir_clean <- function(bb_table){
clean_cols <- c("sample", "GENE", "AD_hap1", "AD_hap2", "f", "p_value", "pbb")
ret <- unique(bb_table[, clean_cols, with = FALSE])
return(ret)
}
#' Classify X-genes
#'
#' Classify X-linked genes between Escape (E), Variable Escape (VE) and Silenced (S)
#'
#' @param xciObj A \code{data.table}. The table returned by \code{betaBinomXI}
#'
#' @return A \code{data.table} with genes and their XCI-state.
#'
#' @example inst/examples/betaBinomXI.R
#'
#' @export
getXCIstate <- function(xciObj){
if(!"status" %in% names(xciObj))
xciObj[, status := ifelse(p_value < 0.05, "E", "S")]
out <- setkey(xciObj, GENE, status)[, .N, by = c("GENE", "status")][CJ(GENE, status, unique = TRUE), allow.cartesian = TRUE][is.na(N), N := 0L]
out[, Ntot := sum(N), by = "GENE"]
outE <- out[status == "E"]#
outE[, pe := N/Ntot]
ret <- outE[, .(GENE, Ntot, Nesc = N, pe)]
ret[, XCIstate := ifelse(pe <= .25, "S", "VE")]
ret[, XCIstate := ifelse(pe >= .75, "E", XCIstate)]
return(ret)
}
.betaAB <- function(m, theta, mu, sigma2){
if(!is.null(m) & !is.null(theta)){
alpha <- m*(theta-2)+1
beta <- (1-m)*(theta-2)+1
} else if(!is.null(mu) & !is.null(sigma2)){
v <- (mu * (1-mu))/sigma2 - 1
alpha <- mu*v
beta <- (1-mu)*v
} else{
stop("At least one pair of m/theta or mu/sigma2 must be specified")
}
return(c(alpha, beta))
}
.betaMT <- function(alpha, beta, mu, sigma2){
if(!is.null(alpha) & !is.null(beta)){
theta <- alpha + beta
m <- (alpha-1)/(theta-2)
} else if(!is.null(mu) & !is.null(sigma2)){
} else{
stop("At least one pair of alpha/beta or mu/sigma2 must be specified")
}
return(c(m, theta))
}
.betaMV <- function(alpha, beta, m, theta){
if(!is.null(alpha) & !is.null(beta)){
mu <- alpha/(alpha + beta)
sigma2 <- (alpha*beta)/((alpha+beta)^2 * (alpha+beta+1))
} else if(!is.null(m) & !is.null(theta)){
mu <- m*(theta-2)+1/theta
# TODO: Simplify this
sigma2 <- ((m*(theta-2)+1) * ((1-m)*(theta-2)+1)) / # a*b
(( m*(theta-2)+1 + (1-m)*(theta-2)+1)^2 * # (a+b)^2
( m*(theta-2)+1 + (1-m)*(theta-2)+1 + 1)) # (a+b+1)
} else{
stop("At least one pair of alpha/beta or m/theta must be specified")
}
return(c(mu, sigma2))
}
#' Converting beta distribution parameters
#'
#' Convert parameter values between different beta distribution parametrization
#'
#' @param alpha A \code{numeric}. First shape parameter
#' @param beta A \code{numeric}. Second shape parameter
#' @param m A \code{numeric}. Mode
#' @param theta A \code{numeric}. Concentration
#' @param mu A \code{numeric}. Mean
#' @param sigma2 A \code{numeric}. Variance
#'
#' @details
#' This function needs two parameters that caracterise the beta distribution
#' (alpha and beta, mode and concentration or mean and variance) and returns
#' all parametrizations.
#'
#' @return A named \code{numeric} with all equivalent formulations of the distribution.
#'
#' @examples
#'
#' betaParam(alpha = 5, beta = 5)
#' betaParam(m = 0.5, theta = 10)
#' betaParam(mu = 0.5, sigma2 = 0.02272727)
#'
#' @rdname BetaConversion
#' @export
betaParam <- function(alpha = NULL, beta = NULL, m = NULL, theta = NULL, mu = NULL, sigma2 = NULL){
if(is.null(alpha)){
ab <- .betaAB(m, theta, mu, sigma2)
alpha <- ab[1]
beta <- ab[2]
}
if(is.null(m)){
mt <- .betaMT(alpha, beta, mu, sigma2)
m <- mt[1]
theta <- mt[2]
}
if(is.null(mu)){
mv <- .betaMV(alpha, beta, m, theta)
mu <- mv[1]
sigma2 <- mv[2]
}
return(c(alpha = alpha, beta = beta, m = m, theta = theta, mu = mu, sigma2 = sigma2))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.