Nothing
snprma <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
if (missing(sns)) sns <- basename(filenames)
if (missing(cdfName))
cdfName <- read.celfile.header(filenames[1])$cdfName
pkgname <- getCrlmmAnnotationName(cdfName)
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname,
", lib.loc='/Altern/Lib/Loc')",
sep="")
msg <- paste("If", pkgname,
"is installed on an alternative location,",
"please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
}
if(verbose) message("Loading annotations and mixture model parameters.")
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
autosomeIndex <- getVarInEnv("autosomeIndex")
pnsa <- getVarInEnv("pnsa")
pnsb <- getVarInEnv("pnsb")
fid <- getVarInEnv("fid")
reference <- getVarInEnv("reference")
aIndex <- getVarInEnv("aIndex")
bIndex <- getVarInEnv("bIndex")
SMEDIAN <- getVarInEnv("SMEDIAN")
theKnots <- getVarInEnv("theKnots")
gns <- getVarInEnv("gns")
##We will read each cel file, summarize, and run EM one by one
##We will save parameters of EM to use later
mixtureParams <- matrix(0, 4, length(filenames))
SNR <- vector("numeric", length(filenames))
SKW <- vector("numeric", length(filenames))
## mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames))
## SNR <- initializeBigVector("crlmmSNR-", length(filenames), "numeric")
## SKW <- initializeBigVector("crlmmSKW-", length(filenames), "numeric")
## This is the sample for the fitting of splines
## BC: I like better the idea of the user passing the seed,
## because this might intefere with other analyses
## (like what happened to GCRMA)
set.seed(seed)
idx <- sort(sample(autosomeIndex, mixtureSampleSize))
##S will hold (A+B)/2 and M will hold A-B
##NOTE: We actually dont need to save S. Only for pics etc...
##f is the correction. we save to avoid recomputing
A <- matrix(as.integer(0), length(pnsa), length(filenames))
B <- matrix(as.integer(0), length(pnsb), length(filenames))
if(verbose){
message("Processing ", length(filenames), " files.")
pb <- txtProgressBar(min=0, max=length(filenames), style=3)
}
##for skewness. no need to do everything
idx2 <- sample(length(fid), 10^5)
##We start looping throug cel files
for(i in seq(along=filenames)){
y <- as.matrix(read.celfile(filenames[i], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
x <- log2(y[idx2])
SKW[i] <- mean((x-mean(x))^3)/(sd(x)^3)
rm(x)
y <- normalize.quantiles.use.target(y, target=reference)
A[, i] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
B[, i] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
##Now to fit the EM
if(fitMixture){
S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
M <- log2(A[idx, i])-log2(B[idx, i])
##we need to test the choice of eps.. it is not the max diff between funcs
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
mixtureParams[, i] <- tmp[["coef"]]
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
}
if (verbose) setTxtProgressBar(pb, i)
}
if (verbose) close(pb)
if (!fitMixture) SNR <- mixtureParams <- NA
## gns comes from preprocStuff.rda
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
}
fitAffySnpMixture56 <- function(S, M, knots, probs=rep(1/3, 3), eps=.01, maxit=10, verbose=FALSE){
##56 stands for 5 and 6 arrays but will also work for Illumina
##Note the unfortunate choice of numbering:
##1 is BB, 2 AB, and 3 AA. Opposite to everything else!
##this is legacy code I decided not to change.
## this why at the end we report -coefs: F1 is the negative f
mus <- append(quantile(M, c(1, 5)/6, names=FALSE), 0, 1)
sigmas <- rep(mad(c(M[M<mus[1]]-mus[1], M[M>mus[3]]-mus[3])), 3)
sigmas[2] <- sigmas[2]/2
weights <- apply(cbind(mus, sigmas), 1, function(p) dnorm(M, p[1], p[2]))
previousF1 <- -Inf
change <- eps+1
it <- 0
if(verbose) message("Max change must be under ", eps, ".")
matS <- stupidSplineBasis(S, knots)
while (change > eps & it < maxit){
it <- it+1
## E
z <- sweep(weights, 2, probs, "*")
LogLik <- rowSums(z)
z <- sweep(z, 1, LogLik, "/")
probs <- colMeans(z)
## M
fit1 <- crossprod(chol2inv(chol(crossprod(sweep(matS, 1, z[, 1], FUN="*"), matS))), crossprod(matS, z[, 1]*M))
fit2 <- sum(z[, 2]*M)/sum(z[, 2])
F1 <- matS%*%fit1
sigmas[c(1, 3)] <- sqrt(sum(z[, 1]*(M-F1)^2)/sum(z[, 1]))
sigmas[2] <- sqrt(sum(z[, 2]*(M-fit2)^2)/sum(z[, 2]))
weights[, 1] <- dnorm(M, F1, sigmas[1])
weights[, 2] <- dnorm(M, fit2, sigmas[2])
weights[, 3] <- dnorm(M, -F1, sigmas[3])
change <- max(abs(F1-previousF1))
previousF1 <- F1
if(verbose) message("Iter ", it, ": ", change, ".")
}
medF1 <- median(-F1)
return(list(coef= -fit1, medF1=medF1, sigma1=sigmas[1], sigma2=sigmas[2]))
}
snprma2 <- function(filenames, mixtureSampleSize=10^5, fitMixture=FALSE,
eps=0.1, verbose=TRUE, seed=1, cdfName, sns){
if (missing(sns)) sns <- basename(filenames)
if (missing(cdfName))
cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
pkgname <- getCrlmmAnnotationName(cdfName)
stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
if(verbose) message("Loading annotations and mixture model parameters.")
obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
pnsa <- getVarInEnv("pnsa")
pnsb <- getVarInEnv("pnsb")
gns <- getVarInEnv("gns")
rm(list=obj, envir=.crlmmPkgEnv)
rm(obj)
##We will read each cel file, summarize, and run EM one by one
##We will save parameters of EM to use later
if(verbose) message("Initializing objects.")
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
## This is the sample for the fitting of splines
## BC: I like better the idea of the user passing the seed,
## because this might intefere with other analyses
## (like what happened to GCRMA)
##S will hold (A+B)/2 and M will hold A-B
##NOTE: We actually dont need to save S. Only for pics etc...
##f is the correction. we save to avoid recomputing
A <- initializeBigMatrix("crlmmA-", length(pnsa), length(filenames), "integer")
B <- initializeBigMatrix("crlmmB-", length(pnsb), length(filenames), "integer")
sampleBatches <- splitIndicesByNode(seq(along=filenames))
if(verbose) message("Processing ", length(filenames), " files.")
ocLapply(sampleBatches, processCEL, filenames=filenames,
fitMixture=fitMixture, A=A, B=B, SKW=SKW, SNR=SNR,
mixtureParams=mixtureParams, eps=eps, seed=seed,
mixtureSampleSize=mixtureSampleSize, pkgname=pkgname,
neededPkgs=c("crlmm", pkgname))
close(mixtureParams)
close(SNR)
close(SKW)
close(A)
close(B)
list(A=A, B=B, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
}
processCEL <- function(i, filenames, fitMixture, A, B, SKW, SNR,
mixtureParams, eps, seed, mixtureSampleSize,
pkgname){
obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
autosomeIndex <- getVarInEnv("autosomeIndex")
pnsa <- getVarInEnv("pnsa")
pnsb <- getVarInEnv("pnsb")
fid <- getVarInEnv("fid")
reference <- getVarInEnv("reference")
aIndex <- getVarInEnv("aIndex")
bIndex <- getVarInEnv("bIndex")
SMEDIAN <- getVarInEnv("SMEDIAN")
theKnots <- getVarInEnv("theKnots")
gns <- getVarInEnv("gns")
rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
rm(obj1, obj2, obj3)
## for mixture
set.seed(seed)
idx <- sort(sample(autosomeIndex, mixtureSampleSize))
##for skewness. no need to do everything
idx2 <- sample(length(fid), 10^5)
open(A)
open(B)
open(SKW)
open(mixtureParams)
open(SNR)
for (k in i){
y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
x <- log2(y[idx2])
SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
rm(x)
y <- normalize.quantiles.use.target(y, target=reference)
A[, k] <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
B[, k] <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
rm(y)
if(fitMixture){
S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
M <- log2(A[idx, k])-log2(B[idx, k])
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
rm(S, M)
mixtureParams[, k] <- tmp[["coef"]]
SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
rm(tmp)
} else {
mixtureParams[, k] <- NA
SNR[k] <- NA
}
}
close(A)
close(B)
close(SKW)
close(mixtureParams)
close(SNR)
rm(list=ls())
gc(verbose=FALSE)
TRUE
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.