Nothing
#' Computes p-values
#'
#' Returns p-values for each region reflecting the probability of observing the
#' mean test-statistic
#' of the between group comparisons among the inter-replicate comparisons. The
#' 'lite' version works with the outcome of M3D_Wrapper_lite and the 'lite'
#' parallel version.
#'
#' @param rrbs An rrbs object containing methylation and coverage data as
#' created using the BiSeq pacakge
#' @param CpGs A GRanges object with each row being a testing region
#' @param M3D_stat_lite A matrix containing the M3D test-statistic, as returned
#' from the M3D_Wrapper_lite function. This has the mean of the MMDs for the
#' comparisons between groups in the first column, and the within groups MMDs
#' in the others, for each region in the CpGs object.
#' @param group1 The name of the first group for the comparison. This is stored
#' in colData(rrbs)
#' @param group2 The name of the second group for the comparison. This is stored
#' in colData(rrbs)
#' @param smaller Determines whether the p-value is computed whether the
#' test-statistic is greater or lesser than inter-replicate
#' values. For our purposes, it should be set to FALSE.
#' @param comparison Details which groups we are using to define our empirical
#' testing distribution. The default is to
#' use all of them, however, should the user find one group contains unusually
#' high variability, then that group can be selected.
#' Values are either 'allReps', 'Group1' or 'Group2'.
#' @param method Determines which method is used to calculate p-values.
#' 'empirical' uses the empirical distribution directly, without modelling.
#' This is the default. 'model', fits an exponential distribution to the tail
#' of our null distribution.
#' @param outlier_test Logical, indicating whether to screen for outliers in
#' null distribution. Use only when comparison is to one group. Default is FALSE
#' @param thresh The threshold for cutting off regions as highly variable. This
#' is only to be used if results are being standardised across multiple
#' tests. Default is NA and is calculated using cut_off and sds parameters.
#' @param cut_off In the outlier test, we require any outlier to be in this
#' quartile, as a minimum, set to 0.975 by default
#' @param sds In the outlier test, we require any outlier in the null to be
#' greater than the mean of the null plus this many standard deviations, set to
#' 8 by default
#' @param closePara Sets a threshold for how close the exponential curve should
#' fit the empirical distribution in the 'model' method. If the method produces
#' errors, consider raising this parameter.
#' @return Returns a list P, with 2 entries. 'FDRmean' is the Benjamini-Hochberg
#' adjusted p-values. The unadjusted p-values are stored in 'Pmean'. If
#' we are test for outliers, we also have the highly variable regions as a third
#' entry
#' @author Tom Mayo \email{t.mayo@@ed.ac.uk}
#' @export
#' @examples
#' data(rrbsDemo)
#' data(CpGsDemo)
#' library(GenomicRanges)
#' library(BiSeq)
#' CpGsDemo <- CpGsDemo[1:5]
#' overlaps <- GenomicRanges::findOverlaps(CpGsDemo,rrbsDemo)
#' M3D_list <- M3D_Wrapper_lite(rrbsDemo,overlaps)
#' group1 <- unique(colData(rrbsDemo)$group)[1]
#' group2 <-unique(colData(rrbsDemo)$group)[2]
#' PDemo <- pvals_lite(rrbsDemo, CpGsDemo, M3D_list,
#' group1, group2, smaller=FALSE,comparison='allReps')
#' head(PDemo)
pvals_lite <- function(rrbs, CpGs, M3D_stat_lite, group1, group2,
smaller=FALSE,comparison='allReps',method='empirical',
outlier_test = FALSE, thresh = NA, cut_off = 0.975,
sds = 8, closePara=0.005){
# get the names of the columns to compare from M3D_stat_lite object
samples1 <- rownames(colData(rrbs))[colData(rrbs)[,]==group1]
samples2 <- rownames(colData(rrbs))[colData(rrbs)[,]==group2]
within1 <- determineGroupComps(samples1,type='within')
within2 <- determineGroupComps(samples2,type='within')
# within <- c(within1,within2)
# between <- determineGroupComps(samples1,samples2,type='between')
#
# ids1 <- findComps(M3D_stat_lite,within1)
# ids2 <- findComps(M3D_stat_lite,within2)
# ids3 <- findComps(MMD,between)
#
# # get the MMD in order of between groups, then within groups
# Within <- cbind(MMD[,within1],MMD[,within2])
# colnames(Within) <- c(within1,within2)
# Between <- MMD[,between]
# AvsB <- cbind(Between,Within)
Within <- M3D_stat_lite[,2:length(M3D_stat_lite[1,])]
# find the total counts over each island, per sample
nCpGs <- length(CpGs)
ovlaps <- findOverlaps(CpGs,rowRanges(rrbs))
islandList <- unique(queryHits(ovlaps))
nIslands <- length(islandList)
if (method=='model' | method=='model-con'){
# fit an exponential to the tail of the curve, using the 95th percentile
wndSze <- 0.01
cutoff <- quantile(Within,0.95,na.rm=TRUE)
cutoff <- floor(cutoff/wndSze)*wndSze
# create a sliding window and count the occurences in the bins
top <- ceiling(max(Within,na.rm=TRUE)/wndSze)*wndSze
base <- seq(cutoff, top, wndSze)
tot <- length(Within[!is.na(Within)])
cdf <- unlist(lapply(base,function(i) {
sum(Within<=i,na.rm=TRUE)/tot
}))
inds <- cdf!=1 # remove the log(0) terms
cdf <- cdf[inds]
base <- base[inds]
# lambda method - search over lambdas, choose the
# closest without underestimating the probabilities
# (assume lambda between 1 and 150)
lambdaTests <- seq(1,150,0.1)
fit <- rep(NaN,length(lambdaTests))
pass <- rep(TRUE,length(lambdaTests))
countClose <- rep(TRUE,length(lambdaTests))
for (i in 1:length(lambdaTests)){
# sum squared fit
l<- lambdaTests[i]
fvect <- exp(-l*base)-1+cdf
if (any(fvect<0)){
pass[i] <- FALSE
}
countClose[i] <- sum(abs(fvect)>closePara)
# fit[i] <- sum(fvect*fvect)
fit[i] <- abs(prod(fvect))
}
close <- which(countClose-min(countClose)<=2)
passes <- which(pass==TRUE)
fit <- fit[close]
passes <- passes[close]
lambdaTests <- lambdaTests[close]
ind <- which(fit==min(fit,na.rm=TRUE))
lambda <- lambdaTests[ind]
cat('lambda = ',lambda)
# get the pvalues (under the exponential distribution with lambda)
# use the mean of the inter-group M3D stats
Pmean <- lapply(1:length(CpGs), function(i) {
testStat <- M3D_stat_lite[i,1]
if (testStat <= 0) {
testStat <- 0
}
exp(-lambda* testStat)
})
} else if (method=='empirical'){
# use the empirical distribution of the test-statistic as null distribution
if(comparison == 'Group1'){
null <- M3D_stat_lite[,within1]
} else if (comparison == 'Group2'){
null <- M3D_stat_lite[,within2]
} else {
null <- Within
}
if(outlier_test){
if(is.na(thresh)){
tukey_cut_off <- quantile(null, cut_off, na.rm = TRUE)
thresh <- mean(null, na.rm = TRUE) + sds * sd(null, na.rm = TRUE)
thresh <- max(tukey_cut_off, thresh, na.rm=TRUE)
}
if(is.vector(null)){
high_within <- unique(which(null > thresh,
arr.ind = TRUE))
} else {
high_within <- unique(which(null > thresh,
arr.ind = TRUE)[,1]) # cut out any with some islands that different
}
inds <- setdiff(1:nIslands, high_within)
if(is.vector(null)){
null <- null[inds]
} else {
null <- null[inds,]
}
}
Pmean <- lapply(1:nIslands, function(j) {
if (smaller==TRUE){
length(which(null <= M3D_stat_lite[j,1]))/
length(null)
} else {
length(which(null >= M3D_stat_lite[j,1]))/
length(null)
}
})
if(outlier_test){
Pmean <- unlist(Pmean)
Pmean[high_within] <- NA
FDRmean <- p.adjust(as.vector(Pmean),method='BH')
FDRmean[high_within] <- NA
P <- list(Pmean,FDRmean, high_within)
names(P) <- c('Pmean','FDRmean', 'HighVariation')
return(P)
}
}
Pmean <- unlist(Pmean)
FDRmean <- p.adjust(as.vector(Pmean),method='BH')
P <- list(Pmean,FDRmean)
names(P) <- c('Pmean','FDRmean')
return(P)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.