liteNormalization2 <- function(celFiles, destDir, batch_size=40000,
verbose=TRUE, check=FALSE){
## Check existence of directory (destination)
## The destDir should not exist (yet)
if (file.exists(destDir))
stop(message(destDir, " already exists. Use the name of a non-existing directoty."))
dir.create(destDir)
if (check){
if (length(chiptype <- unique(readChipTypesFromCels(celFiles))) > 1)
stop("CEL files do not have the same chip type")
}else{
chiptype <- readChipTypesFromCels(celFiles[1])
}
## Determine pkg and number of files to read at once
pkgname <- cleanPlatformName(chiptype)
stopifnot(requireAnnotation(pkgname))
conn <- db(get(pkgname))
nfeat <- dbGetQuery(conn, "SELECT row_count FROM table_info WHERE tbl='pmfeature'")[[1]]
nCels <- length(celFiles)
## how many CELs simultaneously?
nfiles <- max((batch_size * 20 * length(celFiles)) %/% nfeat, 1)
batches <- splitIndicesByLength(1:nCels, nfiles)
normData <- ff(vmode='double', dim=c(nfeat, nCels),
pattern=file.path(destDir, 'normData-'))
open(normData)
## Get probe data
sql <- paste("SELECT fid, man_fsetid, allele, strand",
"FROM pmfeature INNER JOIN featureSet",
"WHERE man_fsetid LIKE 'SNP%'")
snpInfo <- dbGetQuery(conn, sql)
pnVec <- paste(snpInfo[['man_fsetid']],
c('A', 'B')[snpInfo[['allele']]+1],
c('S', 'A')[snpInfo[['strand']]+1])
snpInfo[['man_fsetid']] <- snpInfo[['allele']] <- snpInfo[['strand']] <- NULL
i <- order(pnVec)
snpInfo <- snpInfo[i,]
snpInfo[['pnVec']] <- pnVec[i]
rm(i)
rownames(snpInfo) <- NULL
## load reference
load(system.file("extdata", paste(pkgname, "Ref.rda", sep=""), package=pkgname))
reference <- sort(reference)
if (verbose){
message("Normalization.")
pb <- txtProgressBar(min=1, max=length(batches), style=3, initial=1)
}
n2t <- normalize.quantiles.use.target
for (i in 1:length(batches)){
cels <- batches[[i]]
normData[,cels] <- n2t(readCEL(celFiles[cels], snpInfo[['fid']]),
reference, copy=FALSE)
rm(cels)
if (verbose) setTxtProgressBar(pb, i)
}
if (verbose) close(pb)
nsnps <- dbGetQuery(conn, 'SELECT COUNT(*) FROM featureSet WHERE man_fsetid LIKE "SNP%"')[[1]]
dim1 <- c(nsnps, length(celFiles))
alleleAA <- ff(vmode='double', dim=dim1, pattern=file.path(destDir, 'alleleA-antisense-'))
alleleAS <- ff(vmode='double', dim=dim1, pattern=file.path(destDir, 'alleleA-sense-'))
alleleBA <- ff(vmode='double', dim=dim1, pattern=file.path(destDir, 'alleleB-antisense-'))
alleleBS <- ff(vmode='double', dim=dim1, pattern=file.path(destDir, 'alleleB-sense-'))
## FINISH THIS LATER
## rowsNorm: rows in normData by pnVec
rowsNorm <- split(1:nrow(normData), pnVec)
batches <- splitIndicesByLength(rowsNorm, batch_size)
close(normData)
close(alleleAA)
close(alleleAS)
close(alleleBA)
close(alleleBS)
}
liteNormalization <- function(celFiles, destDir, batch_size=40000, verbose=TRUE){
## Check existence of directory (destination)
## The destDir should not exist (yet)
if (file.exists(destDir)) stop(message(destDir, "exists."))
dir.create(destDir)
## Assumes all files are of the same type.
header <- readCelHeader(celFiles[1])
## Determine pkg and number of files to read at once
pkgname <- cleanPlatformName(header$chiptype)
stopifnot(requireAnnotation(pkgname))
conn <- db(get(pkgname))
nfeat <- dbGetQuery(conn, "SELECT row_count FROM table_info WHERE tbl='pmfeature'")[[1]]
nfiles <- max((batch_size * 20 * length(celFiles)) %/% nfeat, 1)
## Determine batches of CEL
nCels <- length(celFiles)
grps <- rep(1:nCels, each=nfiles, length.out=nCels)
batches <- split(celFiles, grps)
theFiles <- paste("normalized-", basename(celFiles), sep="")
batches.out <- split(file.path(destDir, theFiles), grps)
rm(theFiles, nCels, grps)
if (verbose) message("Preparing environment for normalization.")
suppressWarnings(createCel(batches.out[[1]][1], header=header))
if (length(unlist(batches.out))>1)
sapply(unlist(batches.out)[-1], function(x) file.copy(batches.out[[1]][1], x))
## Get feature IDs and load reference
fid <- dbGetQuery(conn, "SELECT fid FROM pmfeature")[[1]]
fid <- sort(fid)
load(system.file("extdata", paste(pkgname, "Ref.rda", sep=""), package=pkgname))
reference <- sort(reference)
if (verbose){
message("Normalization.")
pb <- txtProgressBar(min=1, max=length(batches), style=3, initial=1)
}
for (i in 1:length(batches)){
new <- normalize.quantiles.use.target(readCelIntensities(batches[[i]], indices=fid), reference, copy=FALSE)
for (j in 1:length(batches[[i]]))
updateCel(batches.out[[i]][j], indices=fid, intensities=new[,j])
rm(new)
if (verbose) setTxtProgressBar(pb, i)
}
if (verbose) close(pb)
}
justCRLMM <- function(filenames, batch_size=40000,
minLLRforCalls=c(5, 1, 5), recalibrate=TRUE,
balance=1.5, phenoData=NULL, verbose=TRUE,
pkgname=NULL, tmpdir=tempdir()){
tmpdir <- tempfile("crlmm.tmp", tmpdir)
if (is.null(phenoData))
stop("phenoData must be provided and must contain a variable called 'gender'.")
if (!is.null(phenoData) & !is(phenoData, "AnnotatedDataFrame"))
stop("phenoData provided, but must be of 'AnnotatedDataFrame' class.")
if (is(phenoData, "AnnotatedDataFrame") & !("gender" %in% names(pData(phenoData))))
stop("phenoData provided, but 'gender' variable not found.")
randomName <- tempfile("crlmm.", tmpdir)
chips <- sapply(filenames, function(x) readCelHeader(x)$chiptype)
if (length(unique(chips)) > 1){
print(table(chips))
stop("All the CEL files must be of the same type.")
}
if (is.null(pkgname))
pkgname <- cleanPlatformName(chips[1])
snpcnv <- pkgname == "pd.genomewidesnp.6"
rm(chips)
requireAnnotation(pkgname)
sql.tmp <- "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' AND chrom = 'X'"
snps.chrX <- dbGetQuery(db(get(pkgname)), sql.tmp)[[1]]
rm(sql.tmp)
sns <- basename(filenames)
liteNormalization(filenames, destDir=tmpdir, batch_size=batch_size, verbose=verbose)
filenames <- paste(tmpdir, "/normalized-", basename(filenames), sep="")
snps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
snps <- split(snps, rep(1:length(snps), each=batch_size, length.out=length(snps)))
theSNR <- matrix(NA, nrow=length(snps), ncol=length(filenames))
prefix <- "SELECT fid, man_fsetid, pmfeature.allele, pmfeature.strand FROM featureSet, pmfeature WHERE man_fsetid IN ("
suffix <- ") AND pmfeature.fsetid = featureSet.fsetid ORDER BY fid"
allSnps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
txt <- sprintf("Genotyping: %06.2f percent done.", 0)
if (verbose) cat(txt)
del <- paste(rep("\b", nchar(txt)), collapse="")
for (i in 1:length(snps)){
t0 <- proc.time()
mid <- paste("'", snps[[i]],"'", collapse=", ", sep="")
sql <- paste(prefix, mid, suffix)
tmp <- dbGetQuery(db(get(pkgname)), sql)
if (!snpcnv){
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
c("S", "A")[tmp[["strand"]]+1], sep="")
}else{
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
sep="")
}
idx <- order(pnVec)
tmp[["man_fsetid"]] <- tmp[["allele"]] <- tmp[["strand"]] <- NULL
pms <- readCelIntensities(filenames, indices=tmp[["fid"]])
pms <- pms[idx, ]
dimnames(pms) <- NULL
theSumm <- basicRMA(pms, pnVec[idx], FALSE, FALSE)
save(theSumm, file=paste(randomName, i, "summ", sep="."))
if (!snpcnv){
sqs <- sqsFrom(theSumm)
}else{
sqs <- sqsFrom.SnpCnv(theSumm)
}
annotation(sqs) <- pkgname
phenoData(sqs) <- phenoData
rm(pms, mid, sql, tmp, pnVec, idx)
### TRYING CRLMM HERE
correction <- fitAffySnpMixture(sqs, verbose=FALSE)
theSNR[i, ] <- correction$snr
load(system.file(paste("extdata/", pkgname, "CrlmmInfo.rda", sep=""), package=pkgname))
myenv <- get(paste(pkgname,"Crlmm",sep="")); rm(list=paste(pkgname,"Crlmm",sep=""))
thePriors <- get("priors", myenv)
snpsIn <- match(featureNames(sqs), allSnps)
myenv$hapmapCallIndex <- myenv$hapmapCallIndex[snpsIn]
if (!snpcnv){
myenv$params$centers <- myenv$params$centers[snpsIn,,]
myenv$params$scales <- myenv$params$scales[snpsIn,,]
}else{
myenv$params$centers <- myenv$params$centers[snpsIn,]
myenv$params$scales <- myenv$params$scales[snpsIn,]
}
myenv$params$N <- myenv$params$N[snpsIn,]
Index <- which(!get("hapmapCallIndex",myenv))
myCalls <- matrix(NA,dim(sqs)[1],dim(sqs)[2])
myCalls[Index,] <- getInitialAffySnpCalls(correction,Index,verbose=FALSE, sqsClass=class(sqs))
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls, correction$fs,
subset=Index,verbose=FALSE, sqsClass=class(sqs))
if (!snpcnv){
oneStrand <- apply(is.na(getM(sqs[,1])[,1,]), 1,
function(v) ifelse(length(ll <- which(v))==0, 0, ll))
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand, verbose=FALSE)
params <- replaceAffySnpParams(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistance(sqs, params, correction$fs)
myDist[,,-2,] <- balance*myDist[,,-2,]
}else{
rparams <- updateAffySnpParamsSingle(rparams, thePriors, verbose=FALSE)
params <- replaceAffySnpParamsSingle(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistanceSingle(sqs, params, correction$fs)
myDist[,,-2] <- balance*myDist[,,-2]
}
XIndex <- which(snps[i] %in% snps.chrX)
myCalls <- getAffySnpCalls(myDist,XIndex,maleIndex=NULL,verbose=FALSE, sqsClass=class(sqs))
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE, sqsClass=class(sqs))
if(recalibrate){
for(k in 1:3)
myCalls[myCalls == k & LLR < minLLRforCalls[k]] <- NA
rm(LLR)
myCalls[, theSNR[i,] < 3.675] <- NA
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls,
correction$fs, verbose=FALSE, sqsClass=class(sqs))
rm(myCalls)
if (!snpcnv){
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand)
myDist <- getAffySnpDistance(sqs, rparams, correction$fs, verbose=FALSE)
myDist[,,-2,] <- balance*myDist[,,-2,]
rm(oneStrand)
}else{
rparams <- updateAffySnpParamsSingle(rparams, thePriors)
myDist <- getAffySnpDistanceSingle(sqs, rparams, correction$fs, verbose=FALSE)
myDist[,,-2] <- balance*myDist[,,-2]
}
myCalls <- getAffySnpCalls(myDist,XIndex, maleIndex=NULL, verbose=FALSE, sqsClass=class(sqs))
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE, sqsClass=class(sqs))
## pacc <- LLR2conf(myCalls, LLR, theSNR[i,], annotation(sqs))
}
save(myDist, myCalls, LLR, file=paste(randomName, i, sep="."))
rm(myDist, correction, Index, k, LLR, myCalls, myenv, params, rparams, snpsIn, sqs, XIndex)
if (verbose){
cat(del)
cat(sprintf("Genotyping: %06.2f percent done.", i/length(snps)*100))
}
}
finalDist <- finalCalls <- finalConfs <- finalSumm <- NULL
if (verbose){
cat("\n")
txt <- sprintf("Finalizing: %06.2f percent done.", 0)
del <- paste(rep("\b", nchar(txt)), collapse="")
cat(txt)
}
for (i in 1:length(snps)){
load(paste(randomName, i, sep="."))
finalCalls <- rbind(finalCalls, myCalls)
rm(myCalls)
finalConfs <- rbind(finalConfs, LLR)
rm(LLR)
if (i == 1){
finalDist <- myDist
}else{
finalDist <- my.abind(finalDist, myDist)
}
rm(myDist)
load(paste(randomName, i, "summ", sep="."))
finalSumm <- rbind(finalSumm, theSumm)
rm(theSumm)
if (verbose){
cat(del)
cat(sprintf("Finalizing: %06.2f percent done.", i/length(snps)*100))
}
}
save(finalDist, file="finalDist.rda")
if (!snpcnv){
finalSQS <- sqsFrom(finalSumm)
ata <- allele(finalSQS, "A", "antisense")
atb <- allele(finalSQS, "B", "antisense")
sta <- allele(finalSQS, "A", "sense")
stb <- allele(finalSQS, "B", "sense")
}else{
finalSQS <- sqsFrom.SnpCnv(finalSumm)
ta <- allele(finalSQS, "A")
tb <- allele(finalSQS, "B")
}
rm(finalSQS)
if (verbose) cat("\n")
warning("Please check the contents and remove the directory: ", tmpdir)
snr <- exp(colMeans(log(theSNR)))
pacc <- LLR2conf(finalCalls, finalConfs, snr, pkgname)
if (!snpcnv){
out <- new("SnpSuperSet", call=finalCalls, callProbability=pacc, LLR=finalConfs,
antisenseAlleleA=ata, antisenseAlleleB=atb, senseAlleleA=sta, senseAlleleB=stb)
rm(ata, atb, sta, stb)
}else{
out <- new("SnpSuperSet", call=finalCalls, callProbability=pacc, LLR=finalConfs,
alleleA=ta, alleleB=tb)
rm(ta, tb)
}
annotation(out) <- pkgname
featureNames(out) <- allSnps
sampleNames(out) <- sns
phenoData(out) <- phenoData
out$crlmmSNR <- snr
return(out)
## return(list(calls=out, sqs=finalSQS))
}
my.abind <- function(x, y){
dim.x <- dim(x)
dim.y <- dim(y)
stopifnot(identical(dim.x[-1], dim.y[-1]))
nrows.x <- dim.x[1]
nrows.y <- dim.y[1]
dim(x) <- c(nrows.x, prod(dim.x[-1]))
dim(y) <- c(nrows.y, prod(dim.y[-1]))
z <- rbind(x,y)
dim(z) = c(nrows.x+nrows.y, dim.x[-1])
z
}
justCRLMM2 <- function(filenames, batch_size=10000, chr=1,
minLLRforCalls=c(5, 1, 5), recalibrate=TRUE,
balance=1.5, phenoData=NULL, verbose=TRUE,
pkgname=NULL, tmpdir=tempdir()){
## To genotype by Chromosome, must assumed normalized CELs
randomName <- tempfile("crlmm.", tmpdir)
chips <- sapply(filenames, function(x) readCelHeader(x)$chiptype)
if (length(unique(chips)) > 1){
print(table(chips))
stop("All the CEL files must be of the same type.")
}
if (is.null(pkgname))
pkgname <- cleanPlatformName(chips[1])
snpcnv <- pkgname == "pd.genomewidesnp.6"
rm(chips)
requireAnnotation(pkgname)
sql.tmp <- "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' AND chrom = 'X'"
snps.chrX <- dbGetQuery(db(get(pkgname)), sql.tmp)[[1]]
rm(sql.tmp)
sns <- basename(filenames)
## liteNormalization(filenames, destDir=tmpdir, pkgname=pkgname, verbose=verbose)
## liteNormalization2(filenames, destDir=tmpdir, batch_size=batch_size, verbose=verbose)
## filenames <- paste(tmpdir, "/normalized-", basename(filenames), sep="")
if (as.character(chr) == "1"){
snps <- dbGetQuery(db(get(pkgname)),
paste("SELECT man_fsetid FROM featureSet WHERE chrom IN ('1', 'MT') OR chrom IS NULL",
"AND man_fsetid LIKE 'SNP%' ORDER BY man_fsetid"))[[1]]
}else{
snps <- dbGetQuery(db(get(pkgname)),
paste("SELECT man_fsetid FROM featureSet WHERE chrom='", chr,
"' AND man_fsetid LIKE 'SNP%' ORDER BY man_fsetid", sep=""))[[1]]
}
snps <- split(snps, rep(1:length(snps), each=batch_size, length.out=length(snps)))
theSNR <- matrix(NA, nrow=length(snps), ncol=length(filenames))
prefix <- "SELECT fid, man_fsetid, pmfeature.allele, pmfeature.strand FROM featureSet, pmfeature WHERE man_fsetid IN ("
suffix <- ") AND pmfeature.fsetid = featureSet.fsetid ORDER BY fid"
allSnps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
txt <- sprintf("Genotyping: %06.2f percent done. Timing: %06.2f", 0, 0)
if (verbose) cat(txt)
del <- paste(rep("\b", nchar(txt)), collapse="")
for (i in 1:length(snps)){
t0 <- proc.time()
mid <- paste("'", snps[[i]],"'", collapse=", ", sep="")
sql <- paste(prefix, mid, suffix)
tmp <- dbGetQuery(db(get(pkgname)), sql)
if (!snpcnv){
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
c("S", "A")[tmp[["strand"]]+1], sep="")
}else{
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
sep="")
}
idx <- order(pnVec)
tmp[["man_fsetid"]] <- tmp[["allele"]] <- tmp[["strand"]] <- NULL
pms <- readCelIntensities(filenames, indices=tmp[["fid"]])
pms <- pms[idx, ]
dimnames(pms) <- NULL
theSumm <- basicRMA(pms, pnVec[idx], FALSE, FALSE)
save(theSumm, file=paste(randomName, i, "summ", sep="."))
if (!snpcnv){
sqs <- sqsFrom(theSumm)
}else{
sqs <- sqsFrom.SnpCnv(theSumm)
}
annotation(sqs) <- pkgname
phenoData(sqs) <- phenoData
rm(pms, mid, sql, tmp, pnVec, idx)
### TRYING CRLMM HERE
correction <- fitAffySnpMixture(sqs, verbose=FALSE)
theSNR[i, ] <- correction$snr
load(system.file(paste("extdata/", pkgname, "CrlmmInfo.rda", sep=""), package=pkgname))
myenv <- get(paste(pkgname,"Crlmm",sep="")); rm(list=paste(pkgname,"Crlmm",sep=""))
thePriors <- get("priors", myenv)
snpsIn <- match(featureNames(sqs), allSnps)
myenv$hapmapCallIndex <- myenv$hapmapCallIndex[snpsIn]
if (!snpcnv){
myenv$params$centers <- myenv$params$centers[snpsIn,,]
myenv$params$scales <- myenv$params$scales[snpsIn,,]
}else{
myenv$params$centers <- myenv$params$centers[snpsIn,]
myenv$params$scales <- myenv$params$scales[snpsIn,]
}
myenv$params$N <- myenv$params$N[snpsIn,]
Index <- which(!get("hapmapCallIndex",myenv))
myCalls <- matrix(NA,dim(sqs)[1],dim(sqs)[2])
myCalls[Index,] <- getInitialAffySnpCalls(correction,Index,verbose=FALSE, sqsClass=class(sqs))
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls, correction$fs,
subset=Index,verbose=FALSE, sqsClass=class(sqs))
if (!snpcnv){
oneStrand <- apply(is.na(getM(sqs[,1])[,1,]), 1,
function(v) ifelse(length(ll <- which(v))==0, 0, ll))
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand, verbose=FALSE)
params <- replaceAffySnpParams(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistance(sqs, params, correction$fs)
myDist[,,-2,] <- balance*myDist[,,-2,]
}else{
rparams <- updateAffySnpParamsSingle(rparams, thePriors, verbose=FALSE)
params <- replaceAffySnpParamsSingle(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistanceSingle(sqs, params, correction$fs)
myDist[,,-2] <- balance*myDist[,,-2]
}
XIndex <- which(snps[i] %in% snps.chrX)
myCalls <- getAffySnpCalls(myDist,XIndex,maleIndex=NULL,verbose=FALSE, sqsClass=class(sqs))
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE, sqsClass=class(sqs))
if(recalibrate){
for(k in 1:3)
myCalls[myCalls == k & LLR < minLLRforCalls[k]] <- NA
rm(LLR)
myCalls[, theSNR[i,] < 3.675] <- NA
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls,
correction$fs, verbose=FALSE, sqsClass=class(sqs))
rm(myCalls)
if (!snpcnv){
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand)
myDist <- getAffySnpDistance(sqs, rparams, correction$fs, verbose=FALSE)
myDist[,,-2,] <- balance*myDist[,,-2,]
rm(oneStrand)
}else{
rparams <- updateAffySnpParamsSingle(rparams, thePriors)
myDist <- getAffySnpDistanceSingle(sqs, rparams, correction$fs, verbose=FALSE)
myDist[,,-2] <- balance*myDist[,,-2]
}
myCalls <- getAffySnpCalls(myDist,XIndex, maleIndex=NULL, verbose=FALSE, sqsClass=class(sqs))
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE, sqsClass=class(sqs))
}
save(myDist, myCalls, LLR, file=paste(randomName, i, sep="."))
rm(myDist, correction, Index, k, LLR, myCalls, myenv, params, rparams, snpsIn, sqs, XIndex)
if (verbose){
cat(del)
cat(sprintf("Genotyping: %06.2f percent done. Timing: %06.2f", i/length(snps)*100, (proc.time()-t0)[3]))
}
}
finalDist <- finalCalls <- finalConfs <- finalSumm <- NULL
if (verbose){
cat("\n")
txt <- sprintf("Finalizing: %06.2f percent done.", 0)
del <- paste(rep("\b", nchar(txt)), collapse="")
cat(txt)
}
for (i in 1:length(snps)){
load(paste(randomName, i, sep="."))
finalCalls <- rbind(finalCalls, myCalls)
rm(myCalls)
finalConfs <- rbind(finalConfs, LLR)
rm(LLR)
if (i == 1){
finalDist <- myDist
}else{
finalDist <- my.abind(finalDist, myDist)
}
rm(myDist)
load(paste(randomName, i, "summ", sep="."))
finalSumm <- rbind(finalSumm, theSumm)
rm(theSumm)
if (verbose){
cat(del)
cat(sprintf("Finalizing: %06.2f percent done.", i/length(snps)*100))
}
}
rm(finalDist)
## save(finalDist, file="finalDist.rda")
if (!snpcnv){
finalSQS <- sqsFrom(finalSumm)
ata <- allele(finalSQS, "A", "antisense")
atb <- allele(finalSQS, "B", "antisense")
sta <- allele(finalSQS, "A", "sense")
stb <- allele(finalSQS, "B", "sense")
}else{
finalSQS <- sqsFrom.SnpCnv(finalSumm)
ta <- allele(finalSQS, "A")
tb <- allele(finalSQS, "B")
}
rm(finalSQS)
if (verbose) cat("\n")
warning("Please check the contents and remove the directory: ", tmpdir)
snr <- exp(colMeans(log(theSNR)))
pacc <- LLR2conf(finalCalls, finalConfs, snr, pkgname)
if (!snpcnv){
out <- new("SnpSuperSet", call=finalCalls, callProbability=pacc, LLR=finalConfs,
antisenseAlleleA=ata, antisenseAlleleB=atb, senseAlleleA=sta, senseAlleleB=stb)
rm(ata, atb, sta, stb)
}else{
out <- new("SnpSuperSet", call=finalCalls, callProbability=pacc, LLR=finalConfs,
alleleA=ta, alleleB=tb)
rm(ta, tb)
}
annotation(out) <- pkgname
featureNames(out) <- unlist(snps)
sampleNames(out) <- sns
phenoData(out) <- phenoData
out$crlmmSNR <- snr
obj <- paste("chr", chr, sep="")
assign(obj, out)
rm(out)
save(list=obj, file=paste(obj, ".rda", sep=""))
}
genotypeByChromosome <- function(filenames, batch_size=10000,
minLLRforCalls=c(5, 1, 5), recalibrate=TRUE,
balance=1.5, phenoData=NULL, verbose=TRUE,
pkgname=NULL, tmpdir=tempdir()){
if (is.null(pkgname))
pkgname <- cleanPlatformName(readCelHeader(filenames[1])$chiptype)
requireAnnotation(pkgname)
sql <- "SELECT DISTINCT chrom FROM featureSet WHERE man_fsetid LIKE 'SNP%'"
chrs <- dbGetQuery(db(get(pkgname)), sql)[[1]]
chrs <- chrs[!is.na(chrs) & chrs != "MT"]
for(i in chrs){
message("Processing chromosome", i)
justCRLMM2(filenames, batch_size=batch_size, chr=i,
minLLRforCalls=minLLRforCalls, recalibrate=recalibrate,
balance=balance, phenoData=phenoData, verbose=verbose,
pkgname=pkgname, tmpdir=tmpdir)
}
}
justCRLMMv2 <- function(filenames, tmpdir, batch_size=40000,
minLLRforCalls=c(5, 1, 5), recalibrate=TRUE,
balance=1.5, verbose=TRUE, pkgname){
stopifnot(!(missing(tmpdir)|file.exists(tmpdir)))
tmpdir <- gsub("\\/$", "", tmpdir)
## PHENODATA
## gender is not a key thing here
## algorithm behaves robustly...
phenoData <- new("AnnotatedDataFrame",
data=data.frame(gender=rep("female",
length(filenames))))
randomName <- tempfile("crlmm.", tmpdir)
chips <- sapply(filenames, function(x) readCelHeader(x)$chiptype)
if (length(unique(chips)) > 1){
print(table(chips))
stop("All the CEL files must be of the same type.")
}
if (missing(pkgname))
pkgname <- cleanPlatformName(chips[1])
rm(chips)
requireAnnotation(pkgname, verbose=verbose)
sql.tmp <- "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' AND chrom = 'X'"
snps.chrX <- dbGetQuery(db(get(pkgname)), sql.tmp)[[1]]
rm(sql.tmp)
sns <- basename(filenames)
liteNormalization(filenames, destDir=tmpdir, batch_size=batch_size, verbose=verbose)
filenames <- paste(tmpdir, "/normalized-", basename(filenames), sep="")
analysis <- data.frame(v1=c("annotation", "nsamples", "batch_size"),
v2=c(pkgname, length(filenames), batch_size),
stringsAsFactors=FALSE)
write.table(analysis, file.path(tmpdir, "analysis.txt"), row.names=FALSE,
col.names=FALSE, quote=FALSE, sep="\t")
snps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
snps <- split(snps, rep(1:length(snps), each=batch_size, length.out=length(snps)))
theSNR <- matrix(NA, nrow=length(snps), ncol=length(filenames))
prefix <- "SELECT fid, man_fsetid, pmfeature.allele, pmfeature.strand FROM featureSet, pmfeature WHERE man_fsetid IN ("
suffix <- ") AND pmfeature.fsetid = featureSet.fsetid ORDER BY fid"
allSnps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
txt <- sprintf("Genotyping: %06.2f percent done.", 0)
if (verbose) cat(txt)
del <- paste(rep("\b", nchar(txt)), collapse="")
for (i in 1:length(snps)){
mid <- paste("'", snps[[i]],"'", collapse=", ", sep="")
sql <- paste(prefix, mid, suffix)
tmp <- dbGetQuery(db(get(pkgname)), sql)
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
c("S", "A")[tmp[["strand"]]+1], sep="")
idx <- order(pnVec)
tmp[["man_fsetid"]] <- tmp[["allele"]] <- tmp[["strand"]] <- NULL
pms <- readCelIntensities(filenames, indices=tmp[["fid"]])
pms <- pms[idx, ]
dimnames(pms) <- NULL
theSumm <- basicRMA(pms, pnVec[idx], FALSE, FALSE)
save(theSumm, file=paste(randomName, i, "summ", sep="."))
sqs <- sqsFrom(theSumm)
annotation(sqs) <- pkgname
phenoData(sqs) <- phenoData
rm(pms, mid, sql, tmp, pnVec, idx)
### TRYING CRLMM HERE
correction <- fitAffySnpMixture(sqs, verbose=FALSE)
theSNR[i, ] <- correction$snr
load(system.file(paste("extdata/", pkgname, "CrlmmInfo.rda", sep=""), package=pkgname))
myenv <- get(paste(pkgname,"Crlmm",sep="")); rm(list=paste(pkgname,"Crlmm",sep=""))
thePriors <- get("priors", myenv)
snpsIn <- match(featureNames(sqs), allSnps)
myenv$hapmapCallIndex <- myenv$hapmapCallIndex[snpsIn]
myenv$params$centers <- myenv$params$centers[snpsIn,,]
myenv$params$scales <- myenv$params$scales[snpsIn,,]
myenv$params$N <- myenv$params$N[snpsIn,]
Index <- which(!get("hapmapCallIndex",myenv))
myCalls <- matrix(NA,dim(sqs)[1],dim(sqs)[2])
myCalls[Index,] <- getInitialAffySnpCalls(correction,Index,verbose=FALSE)
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls, correction$fs,
subset=Index,verbose=FALSE)
oneStrand <- apply(is.na(getM(sqs[,1])[,1,]), 1,
function(v) ifelse(length(ll <- which(v))==0, 0, ll))
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand, verbose=FALSE)
params <- replaceAffySnpParams(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistance(sqs, params, correction$fs)
myDist[,,-2,] <- balance*myDist[,,-2,]
XIndex <- which(snps[i] %in% snps.chrX)
myCalls <- getAffySnpCalls(myDist,XIndex,maleIndex=NULL,verbose=FALSE)
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE)
if(recalibrate){
for(k in 1:3)
myCalls[myCalls == k & LLR < minLLRforCalls[k]] <- NA
rm(LLR)
myCalls[, theSNR[i,] < 3.675] <- NA
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls,
correction$fs,
verbose=FALSE)
rm(myCalls)
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand)
myDist <- getAffySnpDistance(sqs, rparams, correction$fs, verbose=FALSE)
save(myDist, file=paste(randomName, "DONT-REMOVEME", i, sep="."))
myDist[,,-2,] <- balance*myDist[,,-2,]
rm(oneStrand)
myCalls <- getAffySnpCalls(myDist,XIndex, maleIndex=NULL, verbose=FALSE)
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE)
}
save(myDist, myCalls, LLR, file=paste(randomName, i, sep="."))
rm(myDist, correction, Index, k, LLR, myCalls, myenv, params, rparams, snpsIn, sqs, XIndex)
if (verbose){
cat(del)
cat(sprintf("Genotyping: %06.2f percent done.", i/length(snps)*100))
}
}
finalDist <- finalCalls <- finalConfs <- finalSumm <- NULL
if (verbose){
cat("\n")
txt <- sprintf("Finalizing: %06.2f percent done.", 0)
del <- paste(rep("\b", nchar(txt)), collapse="")
cat(txt)
}
for (i in 1:length(snps)){
fname <- paste(randomName, i, sep=".")
load(fname)
finalCalls <- rbind(finalCalls, myCalls)
rm(myCalls)
finalConfs <- rbind(finalConfs, LLR)
rm(LLR)
if (i == 1){
finalDist <- myDist
}else{
finalDist <- my.abind(finalDist, myDist)
}
rm(myDist)
fname2 <- paste(randomName, i, "summ", sep=".")
load(fname2)
finalSumm <- rbind(finalSumm, theSumm)
rm(theSumm)
if (verbose){
cat(del)
cat(sprintf("Finalizing: %06.2f percent done.", i/length(snps)*100))
}
unlink(fname)
rm(fname)
unlink(fname2)
rm(fname2)
}
save(finalDist, file=file.path(tmpdir, "finalDist.rda"))
finalSQS <- sqsFrom(finalSumm)
ata <- allele(finalSQS, "A", "antisense")
atb <- allele(finalSQS, "B", "antisense")
sta <- allele(finalSQS, "A", "sense")
stb <- allele(finalSQS, "B", "sense")
rm(finalSQS)
colnames(ata) <- colnames(atb) <- colnames(sta) <- colnames(stb) <- sns
## remove normalized CEL
sapply(list.celfiles(tmpdir, full.names=T), unlink)
snr <- matrix(exp(colMeans(log(theSNR))), nrow=1)
writeBin(as.numeric(snr), file.path(tmpdir, "snr"))
colnames(snr) <- sns
pacc <- as.matrix(LLR2conf(finalCalls, finalConfs, snr, pkgname))
rownames(pacc) <- allSnps
colnames(pacc) <- sns
if (verbose) cat("\n")
rownames(finalCalls) <- rownames(finalConfs) <- allSnps
colnames(finalCalls) <- colnames(finalConfs) <- sns
write.table(finalCalls, file.path(tmpdir, "crlmm-calls.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(finalCalls)
write.table(pacc, file.path(tmpdir, "crlmm-conf.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(pacc)
write.table(finalConfs, file.path(tmpdir, "crlmm-llr.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(finalConfs)
write.table(ata, file.path(tmpdir, "alleleA-antisense.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(ata)
write.table(atb, file.path(tmpdir, "alleleB-antisense.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(atb)
write.table(sta, file.path(tmpdir, "alleleA-sense.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(sta)
write.table(stb, file.path(tmpdir, "alleleB-sense.txt"), quote=FALSE, sep="\t", col.names=TRUE)
rm(stb)
write.table(snr, file.path(tmpdir, "crlmm-snr.txt"), quote=FALSE, sep="\t", col.names=TRUE, row.names=FALSE)
}
justCRLMMv3 <- function(filenames, tmpdir, batch_size=40000,
minLLRforCalls=c(5, 1, 5), recalibrate=TRUE,
balance=1.5, verbose=TRUE, pkgname){
stopifnot(!(missing(tmpdir)|file.exists(tmpdir)))
tmpdir <- gsub("\\/$", "", tmpdir)
## PHENODATA
## gender is not a key thing here
## algorithm behaves robustly...
phenoData <- new("AnnotatedDataFrame",
data=data.frame(gender=rep("female",
length(filenames))))
randomName <- tempfile("crlmm.", tmpdir)
chips <- sapply(filenames, function(x) readCelHeader(x)$chiptype)
if (length(unique(chips)) > 1){
print(table(chips))
stop("All the CEL files must be of the same type.")
}
if (missing(pkgname))
pkgname <- cleanPlatformName(chips[1])
rm(chips)
requireAnnotation(pkgname, verbose=verbose)
sql.tmp <- "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' AND chrom = 'X'"
snps.chrX <- dbGetQuery(db(get(pkgname)), sql.tmp)[[1]]
rm(sql.tmp)
sns <- basename(filenames)
liteNormalization(filenames, destDir=tmpdir, batch_size=batch_size, verbose=verbose)
filenames <- file.path(tmpdir, paste("normalized-", basename(filenames), sep=""))
analysis <- data.frame(v1=c("annotation", "nsamples", "batch_size"),
v2=c(pkgname, length(filenames), batch_size),
stringsAsFactors=FALSE)
write.table(analysis, file.path(tmpdir, "analysis.txt"), row.names=FALSE,
col.names=FALSE, quote=FALSE, sep="\t")
snps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
snps <- split(snps, rep(1:length(snps), each=batch_size, length.out=length(snps)))
theSNR <- matrix(NA, nrow=length(snps), ncol=length(filenames))
prefix <- "SELECT fid, man_fsetid, pmfeature.allele, pmfeature.strand FROM featureSet, pmfeature WHERE man_fsetid IN ("
suffix <- ") AND pmfeature.fsetid = featureSet.fsetid ORDER BY fid"
allSnps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
if (verbose){
message("Genotyping.")
pb <- txtProgressBar(min=1, max=length(snps), style=3, initial=1)
}
for (i in 1:length(snps)){
mid <- paste("'", snps[[i]],"'", collapse=", ", sep="")
sql <- paste(prefix, mid, suffix)
tmp <- dbGetQuery(db(get(pkgname)), sql)
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
c("S", "A")[tmp[["strand"]]+1], sep="")
idx <- order(pnVec)
tmp[["man_fsetid"]] <- tmp[["allele"]] <- tmp[["strand"]] <- NULL
pms <- readCelIntensities(filenames, indices=tmp[["fid"]])
pms <- pms[idx,, drop=FALSE]
dimnames(pms) <- NULL
colnames(pms) <- sns
theSumm <- basicRMA(pms, pnVec[idx], normalize=FALSE, background=FALSE, verbose=FALSE)
colnames(theSumm) <- sns
sqs <- sqsFrom(theSumm)
saveAppendMatrix(allele(sqs, "A", "antisense"),
file.path(tmpdir, "alleleA-antisense.txt"), i)
saveAppendMatrix(allele(sqs, "B", "antisense"),
file.path(tmpdir, "alleleB-antisense.txt"), i)
saveAppendMatrix(allele(sqs, "A", "sense"),
file.path(tmpdir, "alleleA-sense.txt"), i)
saveAppendMatrix(allele(sqs, "B", "sense"),
file.path(tmpdir, "alleleB-sense.txt"), i)
annotation(sqs) <- pkgname
phenoData(sqs) <- phenoData
rm(pms, mid, sql, tmp, pnVec, idx)
### TRYING CRLMM HERE
correction <- fitAffySnpMixture(sqs, verbose=FALSE)
theF <- correction$fs
rownames(theF) <- featureNames(sqs)
saveAppendMatrix(theF[,,1],
file.path(tmpdir, "antisense-f.txt"), i)
saveAppendMatrix(theF[,,2],
file.path(tmpdir, "sense-f.txt"), i)
rm(theF)
theSNR[i, ] <- correction$snr
load(system.file(paste("extdata/", pkgname, "CrlmmInfo.rda", sep=""), package=pkgname))
myenv <- get(paste(pkgname,"Crlmm",sep="")); rm(list=paste(pkgname,"Crlmm",sep=""))
thePriors <- get("priors", myenv)
snpsIn <- match(featureNames(sqs), allSnps)
myenv$hapmapCallIndex <- myenv$hapmapCallIndex[snpsIn]
myenv$params$centers <- myenv$params$centers[snpsIn,,]
myenv$params$scales <- myenv$params$scales[snpsIn,,]
myenv$params$N <- myenv$params$N[snpsIn,]
Index <- which(!get("hapmapCallIndex",myenv))
myCalls <- matrix(NA,dim(sqs)[1],dim(sqs)[2])
myCalls[Index,] <- getInitialAffySnpCalls(correction,Index,verbose=FALSE)
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls, correction$fs,
subset=Index,verbose=FALSE)
oneStrand <- apply(is.na(getM(sqs[,1])[,1,]), 1,
function(v) ifelse(length(ll <- which(v))==0, 0, ll))
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand, verbose=FALSE)
params <- replaceAffySnpParams(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistance(sqs, params, correction$fs)
myDist[,,-2,] <- balance*myDist[,,-2,]
XIndex <- which(snps[i] %in% snps.chrX)
myCalls <- getAffySnpCalls(myDist,XIndex,maleIndex=NULL,verbose=FALSE)
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE)
if(recalibrate){
for(k in 1:3)
myCalls[myCalls == k & LLR < minLLRforCalls[k]] <- NA
rm(LLR)
myCalls[, theSNR[i,] < 3.675] <- NA
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls,
correction$fs,
verbose=FALSE)
rm(myCalls)
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand)
myDist <- getAffySnpDistance(sqs, rparams, correction$fs, verbose=FALSE)
## save(myDist, file=paste(randomName, "DONT-REMOVEME", i, sep="."))
myDist[,,-2,] <- balance*myDist[,,-2,]
rm(oneStrand)
myCalls <- getAffySnpCalls(myDist,XIndex, maleIndex=NULL, verbose=FALSE)
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE)
pacc <- as.matrix(LLR2conf(myCalls, LLR, matrix(theSNR[i,], nrow=1), pkgname))
dimnames(myCalls) <- dimnames(LLR) <- dimnames(pacc) <- list(snps[[i]], sns)
saveAppendMatrix(myCalls, file.path(tmpdir, "crlmm-calls.txt"), i)
saveAppendMatrix(LLR, file.path(tmpdir, "crlmm-llr.txt"), i)
saveAppendMatrix(pacc, file.path(tmpdir, "crlmm-conf.txt"), i)
}
rm(myDist, correction, Index, k, LLR, myCalls, myenv, params, rparams, snpsIn, sqs, XIndex, pacc)
if (verbose) setTxtProgressBar(pb, i)
}
if (verbose) close(pb)
## remove normalized CEL
cat("Removing temporary files ... ")
sapply(list.celfiles(tmpdir, full.names=T), unlink)
cat("OK.\n")
snr <- matrix(exp(colMeans(log(theSNR))), nrow=1)
writeBin(as.numeric(snr), file.path(tmpdir, "snr"))
colnames(snr) <- sns
write.table(snr, file.path(tmpdir, "crlmm-snr.txt"), quote=FALSE, sep="\t", col.names=TRUE, row.names=FALSE)
}
saveAppendMatrix <- function(x, fname, i){
stopifnot(!missing(x), !missing(fname), !missing(i))
suppressWarnings(write.table(x, fname, append=TRUE,
quote=FALSE, sep="\t",
col.names=(i==1)))
}
justCRLMMv4 <- function(filenames, tmpdir, batch_size=40000,
minLLRforCalls=c(5, 1, 5), recalibrate=TRUE,
balance=1.5, verbose=TRUE, pkgname){
stopifnot(!(missing(tmpdir)|file.exists(tmpdir)))
tmpdir <- gsub("\\/$", "", tmpdir)
## PHENODATA
## gender is not a key thing here
## algorithm behaves robustly...
phenoData <- new("AnnotatedDataFrame",
data=data.frame(gender=rep("female",
length(filenames))))
if (length(chiptype <- unique(readChipTypesFromCels(filenames))) > 1){
print(table(chiptype))
stop("CEL files do not have the same chip type")
}
if (missing(pkgname))
pkgname <- cleanPlatformName(chiptype)
requireAnnotation(pkgname, verbose=verbose)
sql.tmp <- "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' AND chrom = 'X'"
snps.chrX <- dbGetQuery(db(get(pkgname)), sql.tmp)[[1]]
rm(sql.tmp)
sns <- basename(filenames)
liteNormalization(filenames, destDir=tmpdir, batch_size=batch_size, verbose=verbose)
filenames <- file.path(tmpdir, paste("normalized-", basename(filenames), sep=""))
analysis <- data.frame(v1=c("annotation", "nsamples", "batch_size"),
v2=c(pkgname, length(filenames), batch_size),
stringsAsFactors=FALSE)
write.table(analysis, file.path(tmpdir, "analysis.txt"), row.names=FALSE,
col.names=FALSE, quote=FALSE, sep="\t")
snps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
snps <- split(snps, rep(1:length(snps), each=batch_size, length.out=length(snps)))
theSNR <- matrix(NA, nrow=length(snps), ncol=length(filenames))
prefix <- "SELECT fid, man_fsetid, pmfeature.allele, pmfeature.strand FROM featureSet, pmfeature WHERE man_fsetid IN ("
suffix <- ") AND pmfeature.fsetid = featureSet.fsetid ORDER BY fid"
allSnps <- dbGetQuery(db(get(pkgname)), "SELECT man_fsetid FROM featureSet WHERE man_fsetid LIKE 'SNP%' ORDER BY man_fsetid")[[1]]
if (verbose){
message("Genotyping.")
pb <- txtProgressBar(min=1, max=length(snps), style=3, initial=1)
}
for (i in 1:length(snps)){
mid <- paste("'", snps[[i]],"'", collapse=", ", sep="")
sql <- paste(prefix, mid, suffix)
tmp <- dbGetQuery(db(get(pkgname)), sql)
pnVec <- paste(tmp[["man_fsetid"]],
c("A", "B")[tmp[["allele"]]+1],
c("S", "A")[tmp[["strand"]]+1], sep="")
idx <- order(pnVec)
tmp[["man_fsetid"]] <- tmp[["allele"]] <- tmp[["strand"]] <- NULL
pms <- readCelIntensities(filenames, indices=tmp[["fid"]])
pms <- pms[idx,, drop=FALSE]
dimnames(pms) <- NULL
colnames(pms) <- sns
theSumm <- basicRMA(pms, pnVec[idx], normalize=FALSE, background=FALSE, verbose=FALSE)
colnames(theSumm) <- sns
sqs <- sqsFrom(theSumm)
saveAppendMatrix(allele(sqs, "A", "antisense"),
file.path(tmpdir, "alleleA-antisense.txt"), i)
saveAppendMatrix(allele(sqs, "B", "antisense"),
file.path(tmpdir, "alleleB-antisense.txt"), i)
saveAppendMatrix(allele(sqs, "A", "sense"),
file.path(tmpdir, "alleleA-sense.txt"), i)
saveAppendMatrix(allele(sqs, "B", "sense"),
file.path(tmpdir, "alleleB-sense.txt"), i)
annotation(sqs) <- pkgname
phenoData(sqs) <- phenoData
rm(pms, mid, sql, tmp, pnVec, idx)
### TRYING CRLMM HERE
correction <- fitAffySnpMixture(sqs, verbose=FALSE)
theF <- correction$fs
rownames(theF) <- featureNames(sqs)
saveAppendMatrix(theF[,,1],
file.path(tmpdir, "antisense-f.txt"), i)
saveAppendMatrix(theF[,,2],
file.path(tmpdir, "sense-f.txt"), i)
rm(theF)
theSNR[i, ] <- correction$snr
load(system.file(paste("extdata/", pkgname, "CrlmmInfo.rda", sep=""), package=pkgname))
myenv <- get(paste(pkgname,"Crlmm",sep="")); rm(list=paste(pkgname,"Crlmm",sep=""))
thePriors <- get("priors", myenv)
snpsIn <- match(featureNames(sqs), allSnps)
myenv$hapmapCallIndex <- myenv$hapmapCallIndex[snpsIn]
myenv$params$centers <- myenv$params$centers[snpsIn,,]
myenv$params$scales <- myenv$params$scales[snpsIn,,]
myenv$params$N <- myenv$params$N[snpsIn,]
Index <- which(!get("hapmapCallIndex",myenv))
myCalls <- matrix(NA,dim(sqs)[1],dim(sqs)[2])
myCalls[Index,] <- getInitialAffySnpCalls(correction,Index,verbose=FALSE)
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls, correction$fs,
subset=Index,verbose=FALSE)
oneStrand <- apply(is.na(getM(sqs[,1])[,1,]), 1,
function(v) ifelse(length(ll <- which(v))==0, 0, ll))
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand, verbose=FALSE)
params <- replaceAffySnpParams(get("params",myenv), rparams, Index)
myDist <- getAffySnpDistance(sqs, params, correction$fs)
myDist[,,-2,] <- balance*myDist[,,-2,]
XIndex <- which(snps[i] %in% snps.chrX)
myCalls <- getAffySnpCalls(myDist,XIndex,maleIndex=NULL,verbose=FALSE)
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE)
if(recalibrate){
for(k in 1:3)
myCalls[myCalls == k & LLR < minLLRforCalls[k]] <- NA
rm(LLR)
myCalls[, theSNR[i,] < 3.675] <- NA
rparams <- getAffySnpGenotypeRegionParams(sqs, myCalls,
correction$fs,
verbose=FALSE)
rm(myCalls)
rparams <- updateAffySnpParams(rparams, thePriors, oneStrand)
myDist <- getAffySnpDistance(sqs, rparams, correction$fs, verbose=FALSE)
## save(myDist, file=paste(randomName, "DONT-REMOVEME", i, sep="."))
myDist[,,-2,] <- balance*myDist[,,-2,]
rm(oneStrand)
myCalls <- getAffySnpCalls(myDist,XIndex, maleIndex=NULL, verbose=FALSE)
LLR <- getAffySnpConfidence(myDist,myCalls,XIndex,maleIndex=NULL,verbose=FALSE)
pacc <- as.matrix(LLR2conf(myCalls, LLR, matrix(theSNR[i,], nrow=1), pkgname))
dimnames(myCalls) <- dimnames(LLR) <- dimnames(pacc) <- list(snps[[i]], sns)
saveAppendMatrix(myCalls, file.path(tmpdir, "crlmm-calls.txt"), i)
saveAppendMatrix(LLR, file.path(tmpdir, "crlmm-llr.txt"), i)
saveAppendMatrix(pacc, file.path(tmpdir, "crlmm-conf.txt"), i)
}
rm(myDist, correction, Index, k, LLR, myCalls, myenv, params, rparams, snpsIn, sqs, XIndex, pacc)
if (verbose) setTxtProgressBar(pb, i)
}
if (verbose) close(pb)
## remove normalized CEL
cat("Removing temporary files ... ")
sapply(list.celfiles(tmpdir, full.names=T), unlink)
cat("OK.\n")
snr <- matrix(exp(colMeans(log(theSNR))), nrow=1)
writeBin(as.numeric(snr), file.path(tmpdir, "snr"))
colnames(snr) <- sns
write.table(snr, file.path(tmpdir, "crlmm-snr.txt"), quote=FALSE, sep="\t", col.names=TRUE, row.names=FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.