R/cnrma-functions.R

Defines functions calculateRTheta ABpanel dbvn genotypes posteriorMean.snp genotypeCorrelation .posterior numberGenotypes sum.mymatrix .close .open imputeAcrossBatch imputeBB imputeAA imputeAB.BB imputeAA.AB estimateCnParameters crlmmCopynumber2 crlmmCopynumber isCall summarizeSnps summarizeNps whichMarkers genotypeSummary shrinkSummary ellipseCenters constructIlluminaCNSet constructIlluminaAssayData constructIlluminaFeatureData imputeCenterX imputeCentersForMonomorphicSnps indexComplete imputeCenter cnrma2 whichPlatform fit.lm4 fit.lm3 summarizeXGenotypes summarizeMaleXNps fit.lm2 fit.lm1 shrinkGenotypeSummaries fit.wls dqrlsWrapper rowCors shrink rowMAD rowCovs genotype2 cnrmaAffy genotypeAffy genotype snprmaAffy constructAffyCNSet construct getFeatureData getProtocolData.Affy

Documented in ABpanel cnrmaAffy constructAffyCNSet crlmmCopynumber crlmmCopynumber2 genotype genotype2 genotypeAffy genotypes snprmaAffy

##---------------------------------------------------------------------------
##---------------------------------------------------------------------------
getProtocolData.Affy <- function(filenames){
	scanDates <- data.frame(ScanDate=sapply(filenames, celfileDate))
	rownames(scanDates) <- basename(rownames(scanDates))
	protocoldata <- new("AnnotatedDataFrame",
			    data=scanDates,
			    varMetadata=data.frame(labelDescription=colnames(scanDates),
			                           row.names=colnames(scanDates)))
	return(protocoldata)
}

##setMethod("snpNames", signature(object="character"),
##	  function(object){
##		  nm <- grep("Crlmm", object)
##		  if(length(nm)==0){
##			  pkgname <- paste(object, "Crlmm", sep="")
##		  } else pkgname <- object
##		  loader("preprocStuff.rda", .crlmmPkgEnv, object)
##		  gns <- getVarInEnv("gns")
##		  return(gns)
##	  })


getFeatureData <- function(cdfName, copynumber=FALSE, genome){
	pkgname <- getCrlmmAnnotationName(cdfName)
	if(!require(pkgname, character.only=TRUE)){
		suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
		msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
		message(strwrap(msg))
		stop("Package ", pkgname, " could not be found.")
		rm(suggCall, msg)
	}
	loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
	loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
	loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
	gns <- getVarInEnv("gns")
	nm <- grep("Crlmm", pkgname)
	if(length(nm)==0){
		pkgname <- paste(pkgname, "Crlmm", sep="")
	}
	path <- system.file("extdata", package=pkgname)
	##multiple.builds <- length(grep("hg19", list.files(path)) > 0)
	snp.file <- list.files(path, pattern="snpProbes_hg")
	if(length(snp.file)==0){
		no.build <- TRUE
		snp.file <- list.files(path, pattern="snpProbes.rda")
	} else {
		no.build <- FALSE
		if(missing(genome)){
			snp.file <- list.files(path, pattern="snpProbes_hg")
			if(length(snp.file) > 1){
				## use hg19
				message("genome build not specified. Using build hg19 for annotation.")
				snp.file <- snp.file[1]
			}
			genome <- gsub(".rda", "", strsplit(snp.file, "snpProbes_")[[1]][[2]])
		} else snp.file <- paste("snpProbes_", genome, ".rda", sep="")
	}
##	if(!multiple.builds){
##		load(file.path(path, "snpProbes.rda"))
##	} else load(file.path(path, paste("snpProbes_", genome, ".rda", sep="")))
	load(file.path(path, snp.file))
	snpProbes <- get("snpProbes")
	## if we use a different build we may throw out a number of snps...
	snpProbes <- snpProbes[rownames(snpProbes) %in% gns, ]
	if(copynumber){
		if(!no.build){
			cn.file <- paste("cnProbes_", genome, ".rda", sep="")
		} else cn.file <- "cnProbes.rda"
		load(file.path(path, cn.file))
		##		if(!multiple.builds){
		##			load(file.path(path, "cnProbes.rda"))
		##		} else load(file.path(path, paste("cnProbes_", genome, ".rda", sep="")))
		cnProbes <- get("cnProbes")
		snpIndex <- seq(along=gns)
		npIndex <- seq(along=rownames(cnProbes)) + max(snpIndex)
		featurenames <- c(gns, rownames(cnProbes))
	} else featurenames <- gns
	fvarlabels=c("chromosome", "position", "isSnp")
	M <- matrix(NA, length(featurenames), 3, dimnames=list(featurenames, fvarlabels))
	index <- match(rownames(snpProbes), rownames(M)) #only snp probes in M get assigned position
	M[index, "position"] <- snpProbes[, grep("pos", colnames(snpProbes))]
	M[index, "chromosome"] <- chromosome2integer(snpProbes[, grep("chr", colnames(snpProbes))])
	##M[index, "isSnp"] <- 1L
	index <- which(is.na(M[, "isSnp"]))
	M[index, "isSnp"] <- 1L
	if(copynumber){
		index <- match(rownames(cnProbes), rownames(M)) #only snp probes in M get assigned position
		M[index, "position"] <- cnProbes[, grep("pos", colnames(cnProbes))]
		M[index, "chromosome"] <- chromosome2integer(cnProbes[, grep("chr", colnames(cnProbes))])
		M[index, "isSnp"] <- 0L
	}
	##A few of the snpProbes do not match -- I think it is chromosome Y.
	if(any(is.na(M[, "chromosome"])))
		M[is.na(M[, "chromosome"]), "isSnp"] <- 1L
	##return(new("AnnotatedDataFrame", data=M))
	new("GenomeAnnotatedDataFrame",
	    position=as.integer(M[, "position"]),
	    chromosome=as.integer(M[, "chromosome"]),
	    isSnp=as.logical(M[, "isSnp"]),
	    row.names=featurenames)
}

getFeatureData.Affy <- getFeatureData

construct <- function(filenames,
		      cdfName,
		      copynumber=TRUE,
		      sns, verbose=TRUE, batch, fns){
	if(!missing(batch)){
		stopifnot(length(batch) == length(sns))
	}
	if(any(is.na(batch))){
		stop("NA's in batch variable")
	}
	if(missing(sns) & missing(filenames)) stop("one of filenames or samplenames (sns) must be provided")
	if(verbose) message("Initializing container for copy number estimation")
	featureData <- getFeatureData(cdfName, copynumber=copynumber)
	if(!missing(fns)){
		warning("subsetting the featureData can cause the snprma object and CNSet object to be misaligned. This problem is fixed in devel as we match the names prior to assigning values from snprma")
		index <- match(fns, featureNames(featureData))
		if(all(is.na(index))) stop("fns not in featureNames")
		featureData <- featureData[index, ]
	}
	nr <- nrow(featureData); nc <- length(sns)
	cnSet <- new("CNSet",
		     alleleA=initializeBigMatrix(name="A", nr, nc),
		     alleleB=initializeBigMatrix(name="B", nr, nc),
		     call=initializeBigMatrix(name="call", nr, nc),
		     callProbability=initializeBigMatrix(name="callPr", nr,nc),
		     annotation=cdfName,
		     batch=batch)
	if(!missing(sns)){
		sampleNames(cnSet) <- sns
		protocolData <- annotatedDataFrameFrom(A(cnSet), byrow=FALSE)
	} else {
		sampleNames(cnSet) <- basename(filenames)
		protocolData <- getProtocolData.Affy(filenames)
	}
	rownames(pData(protocolData)) <- sns
	protocolData(cnSet) <- protocolData
	featureData(cnSet) <- featureData
	featureNames(cnSet) <- featureNames(featureData)
	pd <- data.frame(matrix(NA, nc, 3), row.names=sns)
	colnames(pd)=c("SKW", "SNR", "gender")
	phenoData(cnSet) <- new("AnnotatedDataFrame", data=pd)
	gns <- getVarInEnv("gns")
	stopifnot(identical(gns, featureNames(cnSet)[1:length(gns)]))
	return(cnSet)
}

constructAffyCNSet <- function(filenames, sns, cdfName, batch, verbose=TRUE, genome){
	##stopifnot(!missing(filenames))
	if(missing(sns)) sns <- basename(filenames)
	stopifnot(!missing(batch))
	##---------------------------------------------------------------------------
	##
	## from snprma2.  Goal is to initialize A and B with
	## appropriate dimension for snps+nps
	##
	##---------------------------------------------------------------------------
	if (missing(cdfName))
		cdfName <- read.celfile.header(filenames[1])[["cdfName"]]
	pkgname <- getCrlmmAnnotationName(cdfName)
	stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
	if(verbose) message("Loading annotations and mixture model parameters.")
	obj <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
	pnsa <- getVarInEnv("pnsa")
	pnsb <- getVarInEnv("pnsb")
	gns <- getVarInEnv("gns")
	rm(list=obj, envir=.crlmmPkgEnv)
	rm(obj)
	if(verbose) message("Initializing ff objects.")
	##mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
	SNR <- initializeBigVector("crlmmSNR-", length(filenames), "double")
	SKW <- initializeBigVector("crlmmSKW-", length(filenames), "double")
	genderff <- initializeBigVector("gender", length(filenames), "integer")
	featureData <- getFeatureData(cdfName, copynumber=TRUE, genome=genome)
	nr <- nrow(featureData); nc <- length(sns)
	A <- initializeBigMatrix("crlmmA-", nr, length(filenames), "integer")
	B <- initializeBigMatrix("crlmmB-", nr, length(filenames), "integer")
	call <- initializeBigMatrix("call-", nr, length(filenames), "integer")
	callPr <- initializeBigMatrix("callPr-", nr, length(filenames), "integer")
	mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, length(filenames), "double")
	rownames(A) <- rownames(B) <- featureNames(featureData)
	rownames(call) <- rownames(callPr) <- featureNames(featureData)
	dirNames <- dirname(filenames)
	unames <- unique(dirNames)
	dirNames <- factor(dirNames, levels=unames)
	dd <- split(seq_len(length(filenames)), dirNames)
	datadir <- list(dirname=names(dd), n=as.integer(sapply(dd, length)))
	if(verbose) message("Instantiating CNSet container")
	cnSet <- new("CNSet",
		     alleleA=A,
		     alleleB=B,
		     call=call,
		     callProbability=callPr,
		     featureData=featureData,
		     annotation=cdfName,
		     batch=as.character(batch),
		     genome=genome,
		     datadir=datadir)
	cnSet@mixtureParams <- mixtureParams
	sampleNames(cnSet) <- sns
	protocolData <- getProtocolData.Affy(filenames)
	protocolData$filename <- basename(filenames)
	rownames(pData(protocolData)) <- sns
	protocolData(cnSet) <- protocolData
	cnSet$SKW <- SKW
	cnSet$SNR <- SNR
	cnSet$gender <- genderff
	stopifnot(validObject(cnSet))
	return(cnSet)
}

snprmaAffy <- function(cnSet,
		       mixtureSampleSize=10^5,
		       eps=0.1,
		       seed=1,
		       verbose=TRUE){
	filenames <- celfileName(cnSet)
	if(verbose) message("Preprocessing ", length(filenames), " files.")
	pkgname <- getCrlmmAnnotationName(annotation(cnSet))
	stopifnot(require(pkgname, character.only=TRUE, quietly=!verbose))
	sampleBatches <- splitIndicesByNode(seq_along(filenames))
	mixtureParams <- cnSet@mixtureParams
	obj1 <- loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
	obj2 <- loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
	obj3 <- loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
	autosomeIndex <- getVarInEnv("autosomeIndex")
	pnsa <- getVarInEnv("pnsa")
	pnsb <- getVarInEnv("pnsb")
	fid <- getVarInEnv("fid")
	reference <- getVarInEnv("reference")
	aIndex <- getVarInEnv("aIndex")
	bIndex <- getVarInEnv("bIndex")
	SMEDIAN <- getVarInEnv("SMEDIAN")
	theKnots <- getVarInEnv("theKnots")
	gns <- getVarInEnv("gns")
	rm(list=c(obj1, obj2, obj3), envir=.crlmmPkgEnv)
	rm(obj1, obj2, obj3)
	## for mixture
	set.seed(seed)
	idx <- sort(sample(autosomeIndex, mixtureSampleSize))
	##for skewness. no need to do everything
	idx2 <- sample(length(fid), 10^5)
	A <- A(cnSet)
	B <- B(cnSet)
	SKW <- cnSet$SKW; SNR <- cnSet$SNR
	open(A)
	open(B)
	open(SKW)
	open(mixtureParams)
	open(SNR)
	## RS ADDED
	index <- match(gns, rownames(A))
	rsprocessCEL <- function(i){
		for (k in i){
			y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
			x <- log2(y[idx2])
			SKW[k] <- mean((x-mean(x))^3)/(sd(x)^3)
			rm(x)
			y <- normalize.quantiles.use.target(y, target=reference)
			## RS: add index for row assignment
			ya <- intMedianSummaries(y[aIndex, 1, drop=FALSE], pnsa)
			yb <- intMedianSummaries(y[bIndex, 1, drop=FALSE], pnsb)
			A[index, k] <- ya
			B[index, k] <- yb
			rm(y)
			S <- (log2(A[idx,k])+log2(B[idx, k]))/2 - SMEDIAN
			M <- log2(A[idx, k])-log2(B[idx, k])
			tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
			rm(S, M)
			mixtureParams[, k] <- tmp[["coef"]]
			SNR[k] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
			rm(tmp)
		}
		TRUE
	}
	ocLapply(sampleBatches, rsprocessCEL, neededPkgs="crlmm")
	close(A)
	close(B)
	close(SKW)
	close(mixtureParams)
	close(SNR)
	return()
}

genotype <- function(filenames,
		     cdfName,
		     batch,
		     mixtureSampleSize=10^5,
		     eps=0.1,
		     verbose=TRUE,
		     seed=1,
		     sns,
		     probs=rep(1/3, 3),
		     DF=6,
		     SNRMin=5,
		     recallMin=10,
		     recallRegMin=1000,
		     gender=NULL,
		     returnParams=TRUE,
		     badSNP=0.7,
		     genome=c("hg19", "hg18")){
	if(!isPackageLoaded("ff")) stop("ff package must be loaded")
	if (missing(sns)) sns <- basename(filenames)
	if (any(duplicated(sns))) stop("sample identifiers must be unique")
	genome <- match.arg(genome)
	cnSet <- constructAffyCNSet(filenames=filenames,
				    sns=sns,
				    cdfName=cdfName,
				    batch=batch, verbose=verbose,
				    genome=genome)
	if(!(is(A(cnSet), "ff") || is(A(cnSet), "ffdf"))){
		stop("The ff package is required for this function.")
	}
	ok <- snprmaAffy(cnSet,
			 mixtureSampleSize=mixtureSampleSize,
			 eps=eps,
			 seed=seed,
			 verbose=verbose)
	ok <- cnrmaAffy(cnSet=cnSet,
			seed=seed,
			verbose=verbose)
	stopifnot(ok)
	ok <- genotypeAffy(cnSet=cnSet,
			   SNRMin=SNRMin,
			   recallMin=recallMin,
			   recallRegMin=recallRegMin,
			   gender=gender,
			   badSNP=badSNP,
			   returnParams=returnParams,
			   verbose=verbose)
	return(cnSet)
}

genotypeAffy <- function(cnSet, SNRMin=5, recallMin=10,
			 recallRegMin=1000,
			 gender=NULL, badSNP=0.7, returnParams=TRUE,
			 verbose=TRUE){
	## The passed arguments A and B currently contain the intensities
	## (not calls and call probabilities)
	tmp <- crlmmGT2(A=A(cnSet),
			B=B(cnSet),
			SNR=cnSet$SNR,
			mixtureParams=cnSet@mixtureParams,
			cdfName=annotation(cnSet),
			row.names=NULL,
			col.names=sampleNames(cnSet),
			SNRMin=SNRMin,
			recallMin=recallMin,
			recallRegMin=recallRegMin,
			gender=gender,
			verbose=verbose,
			returnParams=returnParams,
			badSNP=badSNP,
			callsGt=calls(cnSet),
			callsPr=snpCallProbability(cnSet))
	cnSet$gender[] <- tmp$gender
	if(verbose) message("Genotyping finished.")
	return(TRUE)
}

cnrmaAffy <- function(cnSet, seed=1, verbose=TRUE){
	filenames <- celfileName(cnSet)
	np.index <- which(!isSnp(cnSet))
	A <- A(cnSet)
	cdfName <- annotation(cnSet)
	if(verbose) message("Quantile normalizing nonpolymorphic markers")
	##if(missing(cdfName)) stop("must specify cdfName")
	pkgname <- getCrlmmAnnotationName(cdfName)
	require(pkgname, character.only=TRUE) || stop("Package ", pkgname, " not available")
	sampleBatches <- splitIndicesByNode(seq_along(filenames))
	if(verbose) message("Processing nonpolymorphic probes for ", length(filenames), " files.")
	if(pkgname=="genomewidesnp6Crlmm"){
		loader("1m_reference_cn.rda", .crlmmPkgEnv, pkgname)
	}
	if(pkgname=="genomewidesnp5Crlmm"){
		loader("5.0_reference_cn.rda", .crlmmPkgEnv, pkgname)
	}
	reference <- getVarInEnv("reference")
        loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
	fid <- getVarInEnv("npProbesFid")
	row.names <- featureNames(cnSet)[np.index]
	row.names <- row.names[row.names %in% names(fid)] ##removes chromosome Y
	fid <- fid[match(row.names, names(fid))]
	np.index <- match(row.names, rownames(A))
	gns <- names(fid)
	set.seed(seed)
	## updates A
	open(A)
	processCEL2 <- function(i){
		for (k in i){
			y <- as.matrix(read.celfile(filenames[k], intensity.means.only=TRUE)[["INTENSITY"]][["MEAN"]][fid])
			A[np.index, k] <- as.integer(normalize.quantiles.use.target(y, target=reference))
		}
		return(TRUE)
	}
	ocLapply(sampleBatches, processCEL2, neededPkgs="crlmm")
	close(A)
	TRUE
}



genotype2 <- function(){
	.Defunct(msg="The genotype2 function has been deprecated. The function genotype should be used instead.  genotype will support large data using ff provided that the ff package is loaded.")
}
genotypeLD <- genotype2

rowCovs <- function(x, y, ...){
	notna <- !is.na(x)
	N <- rowSums(notna)
	x <- suppressWarnings(log2(x))
	if(!missing(y)) y <- suppressWarnings(log2(y))
	return(rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1))
}

rowMAD <- function(x, y, ...){
	##notna <- !is.na(x)
	mad <- 1.4826*rowMedians(abs(x-rowMedians(x, ...)), ...)
	return(mad)
}

shrink <- function(x, Ns, DF.PRIOR){
	DF <- Ns-1
	DF[DF < 1] <- 1
	x.0 <- apply(x, 2, median, na.rm=TRUE)
	x <- (x*DF + x.0*DF.PRIOR)/(DF.PRIOR + DF)
	for(j in 1:ncol(x)) x[is.na(x[, j]), j] <- x.0[j]
	return(x)
}

rowCors <- function(x, y, ...){
	N <- rowSums(!is.na(x))
	x <- suppressWarnings(log2(x))
	y <- suppressWarnings(log2(y))
	sd.x <- rowSds(x, ...)
	sd.y <- rowSds(y, ...)
	covar <- rowSums((x - rowMeans(x, ...)) * (y - rowMeans(y, ...)), ...)/(N-1)
	return(covar/(sd.x*sd.y))
}

## dqrlsWrapper <- function(x, y, wts, tol=1e-7){
## 	n <- NROW(y)
## 	p <- ncol(x)
## 	ny <- NCOL(y)
## 	.Fortran("dqrls", qr=x*wts, n=n, p=p, y=y * wts, ny=ny,
## 		 tol=as.double(tol), coefficients=mat.or.vec(p, ny),
## 		 residuals=y, effects=mat.or.vec(n, ny),
## 		 rank=integer(1L), pivot=1L:p, qraux=double(p),
## 		 work=double(2 * p), PACKAGE="base")[["coefficients"]]
## }


dqrlsWrapper <- function(x, y, wts, ...)
    fastLmPure(X=x*wts, y=y*wts, method=3)[['coefficients']]

fit.wls <- function(NN, sigma, allele, Y, autosome, X){
	Np <- NN
	Np[Np < 1] <- 1
	W <- (sigma/sqrt(Np))^-1
	Ystar <- Y*W
	complete <- which(rowSums(is.na(W)) == 0 & rowSums(is.na(Ystar)) == 0)
	if(missing(allele)) stop("must specify allele")
	if(autosome & missing(X)){
		if(allele == "A") X <- cbind(1, 2:0)
		if(allele == "B") X <- cbind(1, 0:2)
	}
	if(!autosome & missing(X)){
		if(allele == "A") X <- cbind(1, c(1, 0, 2, 1, 0), c(0, 1, 0, 1, 2))
		if(allele == "B") X <- cbind(1, c(0, 1, 0, 1, 2), c(1, 0, 2, 1, 0))
	}
	betahat <- matrix(NA, ncol(X), nrow(Ystar))
	ww <- rep(1, ncol(Ystar))
	for(i in complete){
		betahat[, i] <- dqrlsWrapper(W[i, ] * X, Ystar[i, ], ww)
		##ssr <- sum((Ystar[i, ] - matrix(Xstar[, i], nrow(X), ncol(X)) %*% matrix(betahat[, i], ncol(X), 1))^2)
	}
	return(betahat)
}

shrinkGenotypeSummaries <- function(strata, index.list, object, MIN.OBS, MIN.SAMPLES, DF.PRIOR,
				    verbose, is.lds=TRUE){
	##if(is.lds) {physical <- get("physical"); open(object)}
	open(object)
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	marker.index <- index.list[[strata]]
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
	batch.names <- names(batches)
	batch.index <- which(batchNames(object) %in% batch.names)
	N.AA <- as.matrix(N.AA(object)[marker.index, batch.index])
	N.AB <- as.matrix(N.AB(object)[marker.index, batch.index])
	N.BB <- as.matrix(N.BB(object)[marker.index, batch.index])
	medianA.AA <- as.matrix(medianA.AA(object)[marker.index, batch.index])
	medianA.AB <- as.matrix(medianA.AB(object)[marker.index, batch.index])
	medianA.BB <- as.matrix(medianA.BB(object)[marker.index, batch.index])
	medianB.AA <- as.matrix(medianB.AA(object)[marker.index, batch.index])
	medianB.AB <- as.matrix(medianB.AB(object)[marker.index, batch.index])
	medianB.BB <- as.matrix(medianB.BB(object)[marker.index, batch.index])
	madA.AA <- as.matrix(madA.AA(object)[marker.index, batch.index])
	madA.AB <- as.matrix(madA.AB(object)[marker.index, batch.index])
	madA.BB <- as.matrix(madA.BB(object)[marker.index, batch.index])
	madB.AA <- as.matrix(madB.AA(object)[marker.index, batch.index])
	madB.AB <- as.matrix(madB.AB(object)[marker.index, batch.index])
	madB.BB <- as.matrix(madB.BB(object)[marker.index, batch.index])
	medianA <- medianB <- shrink.madB <- shrink.madA <- vector("list", length(batch.names))
	shrink.tau2A.AA <- tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, batch.index])
	shrink.tau2B.BB <- tau2B.BB <- as.matrix(tau2B.BB(object)[marker.index, batch.index])
	shrink.tau2A.BB <- tau2A.BB <- as.matrix(tau2A.BB(object)[marker.index, batch.index])
	shrink.tau2B.AA <- tau2B.AA <- as.matrix(tau2B.AA(object)[marker.index, batch.index])
	shrink.corrAA <- corrAA <- as.matrix(corrAA(object)[marker.index, batch.index])
	shrink.corrAB <- corrAB <- as.matrix(corrAB(object)[marker.index, batch.index])
	shrink.corrBB <- corrBB <- as.matrix(corrBB(object)[marker.index, batch.index])
	flags <- as.matrix(flags(object)[marker.index, batch.index])
	for(k in seq(along=batches)){
		sample.index <- batches[[k]]
		this.batch <- unique(as.character(batch(object)[sample.index]))
		medianA[[k]] <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
		medianB[[k]] <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
		##RS: estimate DF.PRIOR
		shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
		shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
		## an estimate of the background variance is the MAD
		## of the log2(allele A) intensities among subjects with
		## genotypes BB
		shrink.tau2A.BB[, k] <- shrink(tau2A.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
		shrink.tau2B.AA[, k] <- shrink(tau2B.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
		## an estimate of the signal variance is the MAD
		## of the log2(allele A) intensities among subjects with
		## genotypes AA
		shrink.tau2A.AA[, k] <- shrink(tau2A.AA[, k, drop=FALSE], NN[, 1], DF.PRIOR)[, drop=FALSE]
		shrink.tau2B.BB[, k] <- shrink(tau2B.BB[, k, drop=FALSE], NN[, 3], DF.PRIOR)[, drop=FALSE]
		cor.AA <- corrAA[, k, drop=FALSE]
		cor.AB <- corrAB[, k, drop=FALSE]
		cor.BB <- corrBB[, k, drop=FALSE]
		shrink.corrAA[, k] <- shrink(cor.AA, NN[, 1], DF.PRIOR)
		shrink.corrAB[, k] <- shrink(cor.AB, NN[, 2], DF.PRIOR)
		shrink.corrBB[, k] <- shrink(cor.BB, NN[, 3], DF.PRIOR)
		##
		##---------------------------------------------------------------------------
		## SNPs that we'll use for imputing location/scale of unobserved genotypes
		##---------------------------------------------------------------------------
		index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
		##
		##---------------------------------------------------------------------------
		## Impute sufficient statistics for unobserved genotypes (plate-specific)
		##---------------------------------------------------------------------------
		unobservedAA <- NN[, 1] < MIN.OBS
		unobservedAB <- NN[, 2] < MIN.OBS
		unobservedBB <- NN[, 3] < MIN.OBS
		unobserved.index <- vector("list", 3)
		## AB, BB observed
		unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
		## AA and BB observed (strange)
		unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
		## AB and AA observed
		unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
		res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
		medianA[[k]] <- res[[1]]
		medianB[[k]] <- res[[2]]
		rm(res)
		##the NA's in 'medianA' and 'medianB' are monomorphic if MIN.OBS = 1
		##
		## RS: For Monomorphic SNPs a mixture model may be better
		## RS: Further, we can improve estimation by borrowing strength across batch
		unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
		unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
		unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
		res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
						       index.complete,
						       unobserved.index)
		medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
		rm(res)
		negA <- rowSums(medianA[[k]] < 0) > 0
		negB <- rowSums(medianB[[k]] < 0) > 0
		flags[, k] <- as.integer(rowSums(NN == 0) > 0 | negA | negB)
	}
	flags(object)[marker.index, batch.index] <- flags
	medianA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 1]))
	medianA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 2]))
	medianA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA, function(x) x[, 3]))
	medianB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 1]))
	medianB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 2]))
	medianB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB, function(x) x[, 3]))
	##
	madA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 1]))
	madA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 2]))
	madA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madA, function(x) x[, 3]))
	madB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 1]))
	madB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 2]))
	madB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(shrink.madB, function(x) x[, 3]))
	##
	corrAA(object)[marker.index, batch.index] <- shrink.corrAA
	corrAB(object)[marker.index, batch.index] <- shrink.corrAB
	corrBB(object)[marker.index, batch.index] <- shrink.corrBB
	tau2A.AA(object)[marker.index, batch.index] <- shrink.tau2A.AA
	tau2A.BB(object)[marker.index, batch.index] <- shrink.tau2A.BB
	tau2B.AA(object)[marker.index, batch.index] <- shrink.tau2B.AA
	tau2B.BB(object)[marker.index, batch.index] <- shrink.tau2B.BB
	if(is.lds) return(TRUE) else return(object)
}


fit.lm1 <- function(strata,
		    index.list,
		    object,
		    MIN.SAMPLES,
		    THR.NU.PHI,
		    MIN.NU,
		    MIN.PHI,
		    verbose, is.lds,
		    CHR.X, ...){
	open(object$gender)
	if(is.lds) {physical <- get("physical"); open(object)}
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	snps <- index.list[[strata]]
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
	##batchnames <- batchNames(object)
	batch.names <- names(batches)
	batch.index <- which(batchNames(object) %in% batch.names)
	if(length(batches) > 1 && "grandMean" %in% batchNames(object))
		batch.index <- c(batch.index, match("grandMean", batchNames(object)))
	N.AA <- as.matrix(N.AA(object)[snps, batch.index])
	N.AB <- as.matrix(N.AB(object)[snps, batch.index])
	N.BB <- as.matrix(N.BB(object)[snps, batch.index])
	medianA.AA <- as.matrix(medianA.AA(object)[snps, batch.index])
	medianA.AB <- as.matrix(medianA.AB(object)[snps, batch.index])
	medianA.BB <- as.matrix(medianA.BB(object)[snps, batch.index])
	medianB.AA <- as.matrix(medianB.AA(object)[snps, batch.index])
	medianB.AB <- as.matrix(medianB.AB(object)[snps, batch.index])
	medianB.BB <- as.matrix(medianB.BB(object)[snps, batch.index])
	madA.AA <- as.matrix(madA.AA(object)[snps, batch.index])
	madA.AB <- as.matrix(madA.AB(object)[snps, batch.index])
	madA.BB <- as.matrix(madA.BB(object)[snps, batch.index])
	madB.AA <- as.matrix(madB.AA(object)[snps, batch.index])
	madB.AB <- as.matrix(madB.AB(object)[snps, batch.index])
	madB.BB <- as.matrix(madB.BB(object)[snps, batch.index])
	tau2A.AA <- as.matrix(tau2A.AA(object)[snps, batch.index])
	tau2B.BB <- as.matrix(tau2B.BB(object)[snps, batch.index])
	tau2A.BB <- as.matrix(tau2A.BB(object)[snps, batch.index])
	tau2B.AA <- as.matrix(tau2B.AA(object)[snps, batch.index])
	corrAA <- as.matrix(corrAA(object)[snps, batch.index])
	corrAB <- as.matrix(corrAB(object)[snps, batch.index])
	corrBB <- as.matrix(corrBB(object)[snps, batch.index])
	nuA <- as.matrix(nuA(object)[snps,  batch.index])
	phiA <- as.matrix(phiA(object)[snps, batch.index])
	nuB <- as.matrix(nuB(object)[snps, batch.index])
	phiB <- as.matrix(phiB(object)[snps, batch.index])
	flags <- as.matrix(flags(object)[snps, batch.index])
	for(k in seq(along=batches)){
		B <- batches[[k]]
		if(length(B) < MIN.SAMPLES) next()
		this.batch <- unique(as.character(batch(object)[B]))
		medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
		medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
		## regress on the medians using the standard errors (hence the division by N) as weights
		res <- fit.wls(NN=NN, sigma=madA, allele="A", Y=medianA, autosome=!CHR.X)
		nuA[, k] <- res[1, ]
		phiA[, k] <- res[2, ]
		rm(res)
		res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
		nuB[, k] <- res[1, ]
		phiB[, k] <- res[2, ]
	}
	##---------------------------------------------------------------------------
	##
	## Grand mean
	##
	##---------------------------------------------------------------------------
##	if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
	##  TODO:  There are NA's in the medianA.AA for the grandMean and 0's in the madA
	##         - both need to be handled prior to estimating a grand intercept and slope
	if(FALSE){
		## then the last column is for the grandMean
		k <- ncol(medianA.AA)
		medianA <- cbind(medianA.AA[, k], medianA.AB[, k], medianA.BB[, k])
		medianB <- cbind(medianB.AA[, k], medianB.AB[, k], medianB.BB[, k])
		madA <- cbind(madA.AA[, k], madA.AB[, k], madA.BB[, k])
		madB <- cbind(madB.AA[, k], madB.AB[, k], madB.BB[, k])
		NN <- cbind(N.AA[, k], N.AB[, k], N.BB[, k])
		index <- which(rowSums(is.na(medianA)) == 0)
		res <- fit.wls(NN=NN[index, ], sigma=madA[index, ], allele="A", Y=medianA[index, ], autosome=!CHR.X)
		nuA[, k] <- res[1, ]
		phiA[, k] <- res[2, ]
		rm(res)
		res <- fit.wls(NN=NN, sigma=madB, allele="B", Y=medianB, autosome=!CHR.X)##allele="B", Ystar=YB, W=wB, Ns=Ns)
		nuB[, k] <- res[1, ]
		phiB[, k] <- res[2, ]
	}
	if(THR.NU.PHI){
		nuA[nuA < MIN.NU] <- MIN.NU
		nuB[nuB < MIN.NU] <- MIN.NU
		phiA[phiA < MIN.PHI] <- MIN.PHI
		phiB[phiB < MIN.PHI] <- MIN.PHI
	}
	nuA(object)[snps, batch.index] <- nuA
	nuB(object)[snps, batch.index] <- nuB
	phiA(object)[snps, batch.index] <- phiA
	phiB(object)[snps, batch.index] <- phiB
	if(is.lds){
		close(object)
		return(TRUE)
	} else{
		return(object)
	}
}

## nonpolymorphic markers
fit.lm2 <- function(strata,
		    index.list,
		    object,
		    MIN.SAMPLES,
		    THR.NU.PHI,
		    MIN.NU,
		    MIN.PHI,
		    verbose, is.lds, CHR.X, ...){
	if(is.lds) {physical <- get("physical"); open(object)}
	open(object$gender)
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	marker.index <- index.list[[strata]]
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
	batch.names <- names(batches)
	batch.index <- which(batchNames(object) %in% batch.names)
	##
	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
	flags <- as.matrix(flags(object)[ii, batch.index])
	fns <- featureNames(object)[ii]
	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
	if(length(fns.noflags) == 0) {
		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
	}
	index.flag <- match(fns.noflags, featureNames(object))
	snp.index <- sample(index.flag, min(c(length(index.flag), 5000)))
	##
	nuA.np <- as.matrix(nuA(object)[marker.index, batch.index])
	phiA.np <- as.matrix(phiA(object)[marker.index, batch.index])
	tau2A.AA <- as.matrix(tau2A.AA(object)[marker.index, batch.index])
	##
	nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index])
	nuB.snp <- as.matrix(nuB(object)[snp.index, batch.index])
	phiA.snp <- as.matrix(phiA(object)[snp.index, batch.index])
	phiB.snp <- as.matrix(phiB(object)[snp.index, batch.index])
	medianA.AA <- as.matrix(medianA.AA(object)[snp.index, batch.index])
	medianB.BB <- as.matrix(medianB.BB(object)[snp.index, batch.index])
	medianA.AA.np <- as.matrix(medianA.AA(object)[marker.index, batch.index])
	for(k in seq_along(batches)){
		B <- batches[[k]]
		this.batch <- unique(as.character(batch(object)[B]))
		X <- cbind(1, log2(c(medianA.AA[, k], medianB.BB[, k])))
		Y <- log2(c(phiA.snp[, k], phiB.snp[, k]))
		not.missing <- rowSums(is.na(X)) == 0 & !is.na(Y)
		X <- X[not.missing, ]
		Y <- Y[not.missing]
		betahat <- solve(crossprod(X), crossprod(X, Y))
		##crosshyb <- max(median(medianA.AA[, k]) - median(medianA.AA.np[, k]), 0)
		crosshyb <- 0
		X <- cbind(1, log2(medianA.AA.np[, k] + crosshyb))
		logPhiT <- X %*% betahat
		phiA.np[, k] <- 2^(logPhiT)
		nuA.np[, k] <- medianA.AA.np[,k]-2*phiA.np[, k]
##		cA[, k] <- 1/phiA.np[, J] * (A.np - nuA.np[, J])
	}
	if(THR.NU.PHI){
		nuA.np[nuA.np < MIN.NU] <- MIN.NU
		phiA.np[phiA.np < MIN.PHI] <- MIN.PHI
	}
	nuA(object)[marker.index, batch.index] <- nuA.np
	phiA(object)[marker.index, batch.index] <- phiA.np
	if(is.lds) { close(object); return(TRUE)}
	return(object)
}

summarizeMaleXNps <- function(marker.index,
			      batches,
			      object, MIN.SAMPLES){
	nr <- length(marker.index)
	nc <- length(batchNames(object))
	NN.Mlist <- imputed.medianA <- imputed.medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
	open(object$gender)
	gender <- object$gender[]
	open(A(object))
	AA <- as.matrix(A(object)[marker.index, gender==1])
	madA.AA <- medianA.AA <- matrix(NA, nr, nc)
	colnames(madA.AA) <- colnames(medianA.AA) <- batchNames(object)
	numberMenPerBatch <- rep(NA, nc)
	for(k in seq_along(batches)){
		B <- batches[[k]]
		this.batch <- unique(as.character(batch(object)[B]))
		gender <- object$gender[B]
		if(sum(gender==1) < MIN.SAMPLES) next()
		sns.batch <- sampleNames(object)[B]
		##subset GG apppriately
		sns <- colnames(AA)
		J <- sns%in%sns.batch
		numberMenPerBatch[k] <- length(J)
		medianA.AA[, k] <- rowMedians(AA[, J, drop=FALSE], na.rm=TRUE)
		madA.AA[, k] <- rowMAD(AA[, J, drop=FALSE], na.rm=TRUE)
	}
	return(list(medianA.AA=medianA.AA,
		    madA.AA=madA.AA))
}


summarizeXGenotypes <- function(marker.index,
				batches,
				object,
				GT.CONF.THR,
				MIN.OBS,
				MIN.SAMPLES,
				verbose,
				is.lds,
				DF.PRIOR,
				gender="male", ...){
	I <- unlist(batches)
	open(object$gender)
	if(gender == "male"){
		gender.index <- which(object$gender[] == 1)
	} else {
		gender.index <- which(object$gender[] == 2)
	}
	batches <- lapply(batches, function(x, gender.index) intersect(x, gender.index), gender.index)
	batch.names <- names(batches)
	batch.index <- which(batchNames(object) %in% batch.names)
	nr <- length(marker.index)
	nc <- length(batch.index)
	NN.Mlist <- medianA <- medianB <- shrink.madA <- shrink.madB <- vector("list", nc)
	names(NN.Mlist) <- names(medianA) <- names(medianB) <- names(shrink.madA) <- names(shrink.madB) <- batch.names
	open(calls(object))
	open(snpCallProbability(object))
	open(A(object))
	open(B(object))
	GG <- as.matrix(calls(object)[marker.index, ])
	CP <- as.matrix(snpCallProbability(object)[marker.index, ])
	AA <- as.matrix(A(object)[marker.index, ])
	BB <- as.matrix(B(object)[marker.index, ])
	for(k in seq_along(batches)){
		sample.index <- batches[[k]]
		if(length(sample.index) == 0) next()
		this.batch <- unique(as.character(batch(object)[sample.index]))
		##gender <- object$gender[sample.index]
		if(length(sample.index) < MIN.SAMPLES) next()
		sns.batch <- sampleNames(object)[sample.index]
		##subset GG apppriately
		sns <- colnames(GG)
		J <- sns%in%sns.batch
		G <- GG[, J, drop=FALSE]
		xx <- CP[, J, drop=FALSE]
		p <- i2p(xx)
		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
		G <- G*highConf
		A <- AA[, J, drop=FALSE]
		B <- BB[, J, drop=FALSE]
		G.AA <- G==1
		G.AA[G.AA==FALSE] <- NA
		G.AB <- G==2
		G.AB[G.AB==FALSE] <- NA
		G.BB <- G==3
		G.BB[G.BB==FALSE] <- NA
		N.AA <- rowSums(G.AA, na.rm=TRUE)
		if(gender == "female")
			N.AB <- rowSums(G.AB, na.rm=TRUE)
		N.BB <- rowSums(G.BB, na.rm=TRUE)
		summaryStats <- function(X, INT, FUNS){
			tmp <- matrix(NA, nrow(X), length(FUNS))
			for(j in seq_along(FUNS)){
				FUN <- match.fun(FUNS[j])
				tmp[, j] <- FUN(X*INT, na.rm=TRUE)
			}
			tmp
		}
		statsA.AA <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
		if(gender == "female")
			statsA.AB <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
		statsA.BB <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
		statsB.AA <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
		if(gender == "female")
			statsB.AB <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
		statsB.BB <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
		if(gender=="male"){
			medianA[[k]] <- cbind(statsA.AA[, 1], ##statsA.AB[, 1],
					      statsA.BB[, 1])
			medianB[[k]] <- cbind(statsB.AA[, 1], ##statsB.AB[, 1],
					      statsB.BB[, 1])
			madA <- cbind(statsA.AA[, 2],  ##statsA.AB[, 2],
				      statsA.BB[, 2])
			madB <- cbind(statsB.AA[, 2], ##statsB.AB[, 2],
				      statsB.BB[, 2])
			NN <- cbind(N.AA, N.BB)
			rm(statsA.AA, statsA.BB, statsB.AA, statsB.BB)
		} else {
			medianA[[k]] <- cbind(statsA.AA[, 1], statsA.AB[, 1],
					      statsA.BB[, 1])
			medianB[[k]] <- cbind(statsB.AA[, 1], statsB.AB[, 1],
					      statsB.BB[, 1])
			madA <- cbind(statsA.AA[, 2],  statsA.AB[, 2],
				      statsA.BB[, 2])
			madB <- cbind(statsB.AA[, 2], statsB.AB[, 2],
				      statsB.BB[, 2])
			NN <- cbind(N.AA, N.AB, N.BB)
			rm(statsA.AA, statsA.AB, statsA.BB, statsB.AA, statsB.AB, statsB.BB)
		}
		NN.Mlist[[k]] <- NN
		shrink.madA[[k]] <- shrink(madA, NN, DF.PRIOR)
		shrink.madB[[k]] <- shrink(madB, NN, DF.PRIOR)
		##---------------------------------------------------------------------------
		## SNPs that we'll use for imputing location/scale of unobserved genotypes
		##---------------------------------------------------------------------------
		index.complete <- indexComplete(NN, medianA[[k]], medianB[[k]], MIN.OBS)
		##---------------------------------------------------------------------------
		## Impute sufficient statistics for unobserved genotypes (plate-specific)
		##---------------------------------------------------------------------------
		if(gender=="male"){
			res <- imputeCenterX(medianA[[k]], medianB[[k]], NN, index.complete, MIN.OBS)
		} else {
			unobservedAA <- NN[, 1] < MIN.OBS
			unobservedAB <- NN[, 2] < MIN.OBS
			unobservedBB <- NN[, 3] < MIN.OBS
			unobserved.index <- vector("list", 3)
			## AB, BB observed
			unobserved.index[[1]] <- which(unobservedAA & (NN[, 2] >= MIN.OBS & NN[, 3] >= MIN.OBS))
			## AA and BB observed (strange)
			unobserved.index[[2]] <- which(unobservedAB & (NN[, 1] >= MIN.OBS & NN[, 3] >= MIN.OBS))
			## AB and AA observed
			unobserved.index[[3]] <- which(unobservedBB & (NN[, 2] >= MIN.OBS & NN[, 1] >= MIN.OBS))
			res <- imputeCenter(medianA[[k]], medianB[[k]], index.complete, unobserved.index)
			medianA[[k]] <- res[[1]]
			medianB[[k]] <- res[[2]]

			## RS: For Monomorphic SNPs a mixture model may be better
			## RS: Further, we can improve estimation by borrowing strength across batch
			unobserved.index[[1]] <- which(unobservedAA & unobservedAB)
			unobserved.index[[2]] <- which(unobservedBB & unobservedAB)
			unobserved.index[[3]] <- which(unobservedAA & unobservedBB) ## strange
			res <- imputeCentersForMonomorphicSnps(medianA[[k]], medianB[[k]],
							       index.complete,
							       unobserved.index)
			medianA[[k]] <- res[[1]]; medianB[[k]] <- res[[2]]
		}
		medianA[[k]] <- res[[1]]
		medianB[[k]] <- res[[2]]
	}
	close(calls(object))
	close(snpCallProbability(object))
	close(A(object))
	close(B(object))
	return(list(madA=shrink.madA,
		    madB=shrink.madB,
		    NN=NN.Mlist,
		    medianA=medianA,
		    medianB=medianB))
}

## X chromosome, SNPs
fit.lm3 <- function(strata,
		    index.list,
		    object,
		    SNRMin,
		    MIN.SAMPLES,
		    MIN.OBS,
		    DF.PRIOR,
		    GT.CONF.THR,
		    THR.NU.PHI,
		    MIN.NU,
		    MIN.PHI,
		    verbose, is.lds, CHR.X, ...){
	##if(is.lds) {physical <- get("physical"); open(object)}
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	open(object$gender)
	gender <- object$gender[]
	enough.males <- sum(gender==1) >= MIN.SAMPLES
	enough.females <- sum(gender==2) >= MIN.SAMPLES
	if(!enough.males & !enough.females){
		message(paste("fewer than", MIN.SAMPLES, "men and women.  Copy number not estimated for CHR X"))
		return(object)
	}
	marker.index <- index.list[[strata]]
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batches <- batches[sapply(batches, length) >= MIN.SAMPLES]
	batch.names <- names(batches)
	batch.index <- which(batchNames(object) %in% batch.names)

	nuA <- as.matrix(nuA(object)[marker.index, batch.index])
	nuB <- as.matrix(nuB(object)[marker.index, batch.index])
	phiA <- as.matrix(phiA(object)[marker.index, batch.index])
	phiB <- as.matrix(phiB(object)[marker.index, batch.index])
	phiA2 <- as.matrix(phiPrimeA(object)[marker.index, batch.index])
	phiB2 <- as.matrix(phiPrimeB(object)[marker.index, batch.index])
	if(enough.males){
		res <- summarizeXGenotypes(marker.index=marker.index,
					   batches=batches,
					   object=object,
					   GT.CONF.THR=GT.CONF.THR,
					   MIN.SAMPLES=MIN.SAMPLES,
					   MIN.OBS=MIN.OBS,
					   verbose=verbose,
					   is.lds=is.lds,
					   DF.PRIOR=DF.PRIOR/2,
					   gender="male")
		madA.Mlist <- res[["madA"]]
		madB.Mlist <- res[["madB"]]
		medianA.Mlist <- res[["medianA"]]
		medianB.Mlist <- res[["medianB"]]
		NN.Mlist <- res[["NN"]]
		rm(res)
		## Need N, median, mad
	}
	if(enough.females){
		res <- summarizeXGenotypes(marker.index=marker.index,
						 batches=batches,
						 object=object,
						 GT.CONF.THR=GT.CONF.THR,
						 MIN.SAMPLES=MIN.SAMPLES,
						 MIN.OBS=MIN.OBS,
						 verbose=verbose,
						 is.lds=is.lds,
						 DF.PRIOR=DF.PRIOR/2,
					   gender="female")
		madA.Flist <- res[["madA"]]
		madB.Flist <- res[["madB"]]
		medianA.Flist <- res[["medianA"]]
		medianB.Flist <- res[["medianB"]]
		NN.Flist <- res[["NN"]]
		rm(res)
              }
        enough.men <- enough.women <- FALSE
	for(k in seq_along(batches)){
		sample.index <- batches[[k]]
		if(is.null(sample.index)) next()
		this.batch <- unique(as.character(batch(object)[sample.index]))
		gender <- object$gender[sample.index]
		enough.men <- sum(gender==1) >= MIN.SAMPLES
		enough.women <- sum(gender==2) >= MIN.SAMPLES
		if(!enough.men & !enough.women) {
			if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
			next()
		}
		if(enough.women){
			madA.F <- madA.Flist[[k]]
			madB.F <- madB.Flist[[k]]
			medianA.F <- medianA.Flist[[k]]
			medianB.F <- medianB.Flist[[k]]
			NN.F <- NN.Flist[[k]]
		}
		if(enough.men){
			madA.M <- madA.Mlist[[k]]
			madB.M <- madB.Mlist[[k]]
			medianA.M <- medianA.Mlist[[k]]
			medianB.M <- medianB.Mlist[[k]]
			NN.M <- NN.Mlist[[k]]
		}
		if(enough.men & enough.women){
			if(any(madA.F == 0)){
				message("Zeros in median absolute deviation.  Not estimating CN for chrX for batch ", names(batches)[k])
				next()
			}
			betas <- fit.wls(cbind(NN.M, NN.F),
					 sigma=cbind(madA.M, madA.F),
					 allele="A",
					 Y=cbind(medianA.M, medianA.F),
					 autosome=FALSE)
			nuA[, k] <- betas[1, ]
			phiA[, k] <- betas[2, ]
			phiA2[, k] <- betas[3, ]
			betas <- fit.wls(cbind(NN.M, NN.F),
					 sigma=cbind(madB.M, madB.F),
					 allele="B",
					 Y=cbind(medianB.M, medianB.F),
					 autosome=FALSE)
			nuB[, k] <- betas[1, ]
			phiB[, k] <- betas[2, ]
			phiB2[, k] <- betas[3, ]
		}
		if(enough.men & !enough.women){
			betas <- fit.wls(NN.M,##[, c(1,3)],
					 sigma=madA.M,##[, c(1,3)],
					 allele="A",
					 Y=medianA.M,##[, c(1,3)],
					 autosome=FALSE,
					 X=cbind(1, c(1, 0)))
			nuA[, k] <- betas[1, ]
			phiA[, k] <- betas[2, ]
			betas <- fit.wls(NN.M,##[, c(1,3)],
					 sigma=madB.M,#[, c(1,3)],
					 allele="B",
					 Y=medianB.M,#[, c(1,3)],
					 autosome=FALSE,
					 X=cbind(1, c(0, 1)))
			nuB[, k] <- betas[1, ]
			phiB[, k] <- betas[2, ]
		}
		if(!enough.men & enough.women){
			if(any(madA.F == 0)){
				message("Zeros in median absolute deviation.  Not estimating CN for chrX for batch ", names(batches)[k])
				next()
			}
			betas <- fit.wls(NN.F,
					 sigma=madA.F,
					 allele="A",
					 Y=medianA.F,
					 autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
			nuA[, k] <- betas[1, ]
			phiA[, k] <- betas[2, ]
			betas <- fit.wls(NN.F,
					 sigma=madB.F,
					 allele="B",
					 Y=medianB.F,
					 autosome=TRUE) ## can just use the usual design matrix for the women-only analysis
			nuB[, k] <- betas[1, ]
			phiB[, k] <- betas[2, ]
		}
	}
	if(THR.NU.PHI){
		nuA[nuA < MIN.NU] <- MIN.NU
		nuB[nuB < MIN.NU] <- MIN.NU
		phiA[phiA < MIN.PHI] <- MIN.PHI
		phiA2[phiA2 < 1] <- 1
		phiB[phiB < MIN.PHI] <- MIN.PHI
		phiB2[phiB2 < 1] <- 1
	}
	nuA(object)[marker.index, batch.index] <- nuA
	nuB(object)[marker.index, batch.index] <- nuB
	phiA(object)[marker.index, batch.index] <- phiA
	phiB(object)[marker.index, batch.index] <- phiB
	phiPrimeA(object)[marker.index, batch.index] <- phiA2
	phiPrimeB(object)[marker.index, batch.index] <- phiB2
	##
	if(enough.women){
		medianA.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 1]))
		medianA.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 2]))
		medianA.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianA.Flist, function(x) x[, 3]))
		medianB.AA(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 1]))
		medianB.AB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 2]))
		medianB.BB(object)[marker.index, batch.index] <- do.call("cbind", lapply(medianB.Flist, function(x) x[, 3]))
	}
	if(is.lds) {close(object); return(TRUE)} else return(object)
}

##nonpolymorphic markers on X
fit.lm4 <- function(strata,
		    index.list,
		    object,
		    MIN.SAMPLES,
		    THR.NU.PHI,
		    MIN.NU,
		    MIN.PHI,
		    verbose, is.lds=TRUE, ...){
	##if(is.lds) {physical <- get("physical"); open(object)}
	## exclude batches that have fewer than MIN.SAMPLES
	open(object$gender)
	gender <- object$gender[]
	enough.males <- sum(gender==1) >= MIN.SAMPLES
	enough.females <- sum(gender==2) >= MIN.SAMPLES
	if(!enough.males & !enough.females){
		message(paste("fewer than", MIN.SAMPLES, "men and women.  Copy number not estimated for CHR X"))
		return(object)
	}
	excludeBatch <- names(table(batch(object)))[table(batch(object)) < MIN.SAMPLES]
	if(length(excludeBatch) > 0){
		bns <- unique(batch(object))
		batch.index <- which(!bns %in% excludeBatch)
		sample.index <- which(!batch(object)%in% excludeBatch)
	} else {
		sample.index <- 1:ncol(object)
		batch.index <- as.integer(as.factor(unique(batch(object))))
	}
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	marker.index <- index.list[[strata]]
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batches <- batches[batch.index]
	nc <- length(batch.index)
	if(enough.males){
		res <- summarizeMaleXNps(marker.index=marker.index,
					 batches=batches,
					 object=object, MIN.SAMPLES=MIN.SAMPLES)
		medianA.AA.M <- res[["medianA.AA"]][, batch.index, drop=FALSE]
		madA.AA.M <- res[["madA.AA"]][, batch.index, drop=FALSE]
	}
	medianA.AA.F <- as.matrix(medianA.AA(object)[marker.index, batch.index, drop=FALSE]) ## median for women
	madA.AA.F <- as.matrix(madA.AA(object)[marker.index, batch.index, drop=FALSE]) ## median for women
	split.gender <- split(gender[sample.index], as.character(batch(object)[sample.index]))
	N.M <- sapply(split.gender, function(x) sum(x==1))
	N.F <- sapply(split.gender, function(x) sum(x==2))
	nuA <- as.matrix(nuA(object)[marker.index, batch.index, drop=FALSE])
	nuB <- as.matrix(nuB(object)[marker.index, batch.index, drop=FALSE])
	phiA <- as.matrix(phiA(object)[marker.index, batch.index, drop=FALSE])
	phiB <- as.matrix(phiB(object)[marker.index, batch.index, drop=FALSE])
	ii <- isSnp(object) & chromosome(object) < 23 & !is.na(chromosome(object))
	fns <- featureNames(object)[ii]
	flags <- as.matrix(flags(object)[ii, batch.index])
	fns.noflags <- fns[rowSums(flags, na.rm=T) == 0]
	if(length(fns.noflags) == 0){
		warning("All features in one of the probeset batches were flagged. The data is processed in probeset batches to reduce RAM, but the error suggests that increasing the size of the batches may help.  For example, ocProbesets(4*ocProbesets()) would increase the current size of the probeset batches by 4.")
		fns.noflags <- featureNames(object)[marker.index] ## allow processing to continue
	}
	index.flag <- match(fns.noflags, featureNames(object))
	snp.index <- sample(index.flag, min(c(length(index.flag), 10000)))
	##
	N.AA <- as.matrix(N.AA(object)[snp.index, batch.index, drop=FALSE])
	N.AB <- as.matrix(N.AB(object)[snp.index, batch.index, drop=FALSE])
	N.BB <- as.matrix(N.BB(object)[snp.index, batch.index, drop=FALSE])
	enoughAA <- rowSums(N.AA < 5) == 0
	enoughAB <- rowSums(N.AB < 5) == 0
	enoughBB <- rowSums(N.BB < 5) == 0
	snp.index <- snp.index[enoughAA & enoughAB & enoughBB]
	if(length(snp.index) < 100){
		message("too few snps pass criteria for estimating parameters for NP markers on chr X")
		return(object)
	}
	nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index, drop=FALSE])
	nuA.snp.notmissing <- rowSums(is.na(nuA.snp)) == 0
	nuA.snp.notnegative <- rowSums(as.matrix(nuA(object)[snp.index, batch.index, drop=FALSE]) < 20) == 0
	snp.index <- snp.index[nuA.snp.notmissing & nuA.snp.notnegative]
	medianA.AA.snp <- as.matrix(medianA.AA(object)[snp.index, batch.index])
	medianB.BB.snp <- as.matrix(medianB.BB(object)[snp.index, batch.index])
	nuA.snp <- as.matrix(nuA(object)[snp.index, batch.index])
	nuB.snp <- as.matrix(nuB(object)[snp.index, batch.index])
	phiA.snp <- as.matrix(phiA(object)[snp.index, batch.index])
	phiB.snp <- as.matrix(phiB(object)[snp.index, batch.index])
##	pseudoAR <- position(object)[snp.index] < 2709520 | (position(object)[snp.index] > 154584237 & position(object)[snp.index] < 154913754)
##	pseudoAR[is.na(pseudoAR)] <- FALSE
	for(k in seq_along(batches)){
		B <- batches[[k]]
		this.batch <- unique(as.character(batch(object)[B]))
		gender <- object$gender[B]
		enough.men <- N.M[[this.batch]] >= MIN.SAMPLES
		enough.women <- N.F[[this.batch]] >= MIN.SAMPLES
		if(!enough.men & !enough.women) {
			if(verbose) message(paste("fewer than", MIN.SAMPLES, "men and women in batch", this.batch, ". CHR X copy number not available. "))
			next()
		}
		tmp <- cbind(medianA.AA.snp[, k], medianB.BB.snp[,k], phiA.snp[, k], phiB.snp[, k])
		tmp <- tmp[rowSums(is.na(tmp) == 0) & rowSums(tmp < 20) == 0, ]
		if(nrow(tmp) < 100){
			stop("too few markers for estimating nonpolymorphic CN on chromosome X")
		}
		X <- cbind(1, log2(c(tmp[, 1], tmp[, 2])))
		Y <- log2(c(tmp[, 3], tmp[, 4]))
		notmissing.index <- which(rowSums(is.na(X)) == 0 & !is.na(Y))
		X <- X[notmissing.index, ]
		Y <- Y[notmissing.index]
		betahat <- solve(crossprod(X), crossprod(X, Y))
		if(enough.men){
			X.men <- cbind(1, log2(medianA.AA.M[, k]))
			Yhat1 <- as.numeric(X.men %*% betahat)
			## put intercept and slope for men in nuB and phiB
			phiB[, k] <- 2^(Yhat1)
			nuB[, k] <- medianA.AA.M[, k] - 1*phiB[, k]
		}
		X.fem <- cbind(1, log2(medianA.AA.F[, k]))
		Yhat2 <- as.numeric(X.fem %*% betahat)
		phiA[, k] <- 2^(Yhat2)
		nuA[, k] <- medianA.AA.F[, k] - 2*phiA[, k]
	}
	if(THR.NU.PHI){
		nuA[nuA < MIN.NU] <- MIN.NU
		phiA[phiA < MIN.PHI] <- MIN.PHI
		nuB[nuB < MIN.NU] <- MIN.NU
		phiB[phiB < MIN.PHI] <- MIN.PHI
	}
	nuA(object)[marker.index, batch.index] <- nuA
	phiA(object)[marker.index, batch.index] <- phiA
	nuB(object)[marker.index, batch.index] <- nuB
	phiB(object)[marker.index, batch.index] <- phiB
	##if(is.lds) {close(object); return(TRUE)} else return(object)
	if(is.lds) {close(object); return(TRUE)} else return(object)
}

whichPlatform <- function(cdfName){
	index <- grep("genomewidesnp", cdfName)
	if(length(index) > 0){
		platform <- "affymetrix"
	} else{
		index <- grep("human", cdfName)
		platform <- "illumina"
	}
	return(platform)
}

cnrma2 <- function(A, filenames, row.names, verbose=TRUE, seed=1, cdfName, sns){

}

imputeCenter <- function(muA, muB, index.complete, index.missing){
	index <- index.missing
	mnA <- muA
	mnB <- muB
	if(length(index.complete) >= 100){
		for(j in 1:3){
			if(length(index[[j]]) == 0) next()
			X <- cbind(1, mnA[index.complete,  -j, drop=FALSE], mnB[index.complete,  -j, drop=FALSE])
			Y <- cbind(mnA[index.complete, j], mnB[index.complete,  j])
			betahat <- solve(crossprod(X), crossprod(X,Y))
			X <- cbind(1, mnA[index[[j]],  -j, drop=FALSE],  mnB[index[[j]],  -j, drop=FALSE])
			mus <- X %*% betahat
			## threshold imputed intensities that are very small
			mus[mus < 64] <- 64
			muA[index[[j]], j] <- mus[, 1]
			muB[index[[j]], j] <- mus[, 2]
		}
	}
	results <- list(muA, muB)
	return(results)
}

indexComplete <- function(NN, medianA, medianB, MIN.OBS){
	Nindex <- which(rowSums(NN > MIN.OBS) == ncol(NN))
	correct.order <- which(medianA[, 1] > medianA[, ncol(medianA)] & medianB[, ncol(medianB)] > medianB[, 1])
	index.complete <- intersect(Nindex, correct.order)
	size <- min(5000, length(index.complete))
	if(size == 5000) index.complete <- sample(index.complete, 5000, replace=TRUE)
##	if(length(index.complete) < 100){
##		stop("fewer than 100 snps pass criteria for imputing unobserved genotype location/scale")
##	}
	return(index.complete)
}

imputeCentersForMonomorphicSnps <- function(medianA, medianB, index.complete, unobserved.index){
	cols <- c(3, 1, 2)
	if(length(index.complete) < 100){
		##message("index.complete less than 100.  No imputation")
		results <- list(medianA=medianA, medianB=medianB)
		return(results)
	}
	for(j in 1:3){
		if(length(unobserved.index[[j]]) == 0) next()
		kk <- cols[j]
		X <- cbind(1, medianA[index.complete, kk], medianB[index.complete, kk])
		Y <- cbind(medianA[index.complete,  -kk],
			   medianB[index.complete,  -kk])
		betahat <- solve(crossprod(X), crossprod(X,Y))
		X <- cbind(1, medianA[unobserved.index[[j]],  kk], medianB[unobserved.index[[j]],  kk])
		mus <- X %*% betahat
		medianA[unobserved.index[[j]], -kk] <- mus[, 1:2]
		medianB[unobserved.index[[j]], -kk] <- mus[, 3:4]
	}
	results <- list(medianA=medianA, medianB=medianB)
	return(results)
}


imputeCenterX <- function(muA, muB, Ns, index.complete, MIN.OBS){
	##index1 <- which(Ns[, 1] == 0 & Ns[, 3] > MIN.OBS)
	index1 <- which(Ns[, 1] == 0 & Ns[, 2] > MIN.OBS)
	if(length(index1) > 0){
		##X <- cbind(1, muA[index.complete, 3], muB[index.complete, 3])
		X <- cbind(1, muA[index.complete, 2], muB[index.complete, 2])
		Y <- cbind(1, muA[index.complete, 1], muB[index.complete, 1])
##		X <- cbind(1, muA[index.complete[[1]], 3], muB[index.complete[[1]], 3])
##		Y <- cbind(1, muA[index.complete[[1]], 1], muB[index.complete[[1]], 1])
		betahat <- solve(crossprod(X), crossprod(X,Y))
		##now with the incomplete SNPs
		##X <- cbind(1, muA[index1, 3], muB[index1, 3])
		X <- cbind(1, muA[index1, 2], muB[index1, 2])
		mus <- X %*% betahat
		muA[index1, 1] <- mus[, 2]
		muB[index1, 1] <- mus[, 3]
	}
	index1 <- which(Ns[, 2] == 0)
	if(length(index1) > 0){
		X <- cbind(1, muA[index.complete, 1], muB[index.complete, 1])
		Y <- cbind(1, muA[index.complete, 2], muB[index.complete, 2])
##		X <- cbind(1, muA[index.complete[[2]], 1], muB[index.complete[[2]], 1])
##		Y <- cbind(1, muA[index.complete[[2]], 3], muB[index.complete[[2]], 3])
		betahat <- solve(crossprod(X), crossprod(X,Y))
		##now with the incomplete SNPs
		X <- cbind(1, muA[index1, 1], muB[index1, 1])
		mus <- X %*% betahat
		muA[index1, 2] <- mus[, 2]
		muB[index1, 2] <- mus[, 3]
	}
	list(muA, muB)
}


## constructors for Illumina platform
constructIlluminaFeatureData <- function(gns, cdfName){
	pkgname <- paste(cdfName, "Crlmm", sep="")
	path <- system.file("extdata", package=pkgname)
	load(file.path(path, "cnProbes.rda"))
	load(file.path(path, "snpProbes.rda"))
	cnProbes$chr <- chromosome2integer(cnProbes$chr)
	cnProbes <- as.matrix(cnProbes)
	snpProbes$chr <- chromosome2integer(snpProbes$chr)
	snpProbes <- as.matrix(snpProbes)
	mapping <- rbind(snpProbes, cnProbes, deparse.level=0)
	mapping <- mapping[match(gns, rownames(mapping)), ]
	isSnp <- 1L-as.integer(gns %in% rownames(cnProbes))
	mapping <- cbind(mapping, isSnp, deparse.level=0)
	stopifnot(identical(rownames(mapping), gns))
	colnames(mapping) <- c("chromosome", "position", "isSnp")
	new("AnnotatedDataFrame",
	    data=data.frame(mapping),
	    varMetadata=data.frame(labelDescription=colnames(mapping)))
}


constructIlluminaAssayData <- function(np, snp, object, storage.mode="environment", nr){
	stopifnot(identical(snp$gns, featureNames(object)))
	gns <- c(featureNames(object), np$gns)
	sns <- np$sns
	np <- np[1:2]
	snp <- snp[1:2]
	stripnames <- function(x) {
		dimnames(x) <- NULL
		x
	}
	np <- lapply(np, stripnames)
	snp <- lapply(snp, stripnames)
	is.ff <- is(snp[[1]], "ff") | is(snp[[1]], "ffdf")
	if(is.ff){
		lapply(snp, open)
		open(calls(object))
		open(snpCallProbability(object))
##		lapply(np, open)
	}
	##tmp <- rbind(as.matrix(snp[[1]]), as.matrix(np[[1]]), deparse.level=0)
	A.snp <- snp[[1]]
	B.snp <- snp[[2]]
	##Why is np not a ff object?
	A.np <- np[[1]]
	B.np <- np[[2]]
	nc <- ncol(object)
	if(is.ff){
		NA.vec <- rep(NA, nrow(A.np))
		AA <- initializeBigMatrix("A", nr, nc, vmode="integer")
		BB <- initializeBigMatrix("B", nr, nc, vmode="integer")
		GG <- initializeBigMatrix("calls", nr, nc, vmode="integer")
		PP <- initializeBigMatrix("confs", nr, nc, vmode="integer")
		for(j in 1:ncol(object)){
			AA[, j] <- c(snp[[1]][, j], np[[1]][, j])
			BB[, j] <- c(snp[[2]][, j], np[[2]][, j])
			GG[, j] <- c(calls(object)[, j], NA.vec)
			PP[, j] <- c(snpCallProbability(object)[, j], NA.vec)

		}
	} else {
		AA <- rbind(snp[[1]], np[[1]], deparse.level=0)
		BB <- rbind(snp[[2]], np[[2]], deparse.level=0)
		gt <- stripnames(calls(object))
		emptyMatrix <- matrix(integer(), nrow(np[[1]]), ncol(A))
		GG <- rbind(gt, emptyMatrix, deparse.level=0)
		pr <- stripnames(snpCallProbability(object))
		PP <- rbind(pr, emptyMatrix, deparse.level=0)
	}
	assayDataNew(storage.mode,
		     alleleA=AA,
		     alleleB=BB,
		     call=GG,
		     callProbability=PP)
}
constructIlluminaCNSet <- function(crlmmResult,
				   path,
				   snpFile,
				   cnFile){
	load(file.path(path, "snpFile.rda"))
	res <- get("res")
	load(file.path(path, "cnFile.rda"))
	cnAB <- get("cnAB")
	fD <- constructIlluminaFeatureData(c(res$gns, cnAB$gns), cdfName="human370v1c")
	##new.order <- order(fD$chromosome, fD$position)
	##fD <- fD[new.order, ]
	aD <- constructIlluminaAssayData(cnAB, res, crlmmResult, nr=nrow(fD))
	##protocolData(crlmmResult)$batch <- vector("integer", ncol(crlmmResult))
	new("CNSet",
	    call=aD[["call"]],
	    callProbability=aD[["callProbability"]],
	    alleleA=aD[["alleleA"]],
	    alleleB=aD[["alleleB"]],
	    phenoData=phenoData(crlmmResult),
	    protocolData=protocolData(crlmmResult),
	    featureData=fD,
	    batch=batch,
	    annotation="human370v1c")

}



ellipseCenters <- function(object, index, allele, batch, log.it=TRUE){
	ubatch <- unique(batch(object))[batch]
	Nu <- nu(object, allele)[index, batch]
	Phi <- phi(object, allele)[index, batch]
	centers <- list(Nu, Nu+Phi, Nu+2*Phi)
	if(log.it)
		centers <- lapply(centers, log2)
	myLabels <- function(allele){
		switch(allele,
		       A=c("BB", "AB", "AA"),
		       B=c("AA", "AB", "BB"),
		       stop("allele must be 'A' or 'B'")
		       )
	}
	nms <- myLabels(allele)
	names(centers) <- nms
	fns <- featureNames(object)[index]
	centers$fns <- fns
	return(centers)
}


shrinkSummary <- function(object,
			  type="SNP",
			  MIN.OBS=1,
			  MIN.SAMPLES=10,
			  DF.PRIOR=50,
			  verbose=TRUE,
			  marker.index){
	stopifnot(type[[1]] == "SNP")
	CHR.X <- FALSE ## this is no longer needed
	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
	if(is.lds) stopifnot(isPackageLoaded("ff"))
	if(missing(marker.index)){
		batch <- batch(object)
		is.snp <- isSnp(object)
		is.autosome <- chromosome(object) < 23
		is.annotated <- !is.na(chromosome(object))
		is.X <- chromosome(object) == 23
		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
	}
	if(is.lds){
		index.list <- splitIndicesByLength(marker.index, ocProbesets())
##		if(parStatus()){
##			index.list <- splitIndicesByNode(marker.index)
##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
		ocLapply(seq(along=index.list),
			 shrinkGenotypeSummaries,
			 index.list=index.list,
			 object=object,
			 verbose=verbose,
			 MIN.OBS=MIN.OBS,
			 MIN.SAMPLES=MIN.SAMPLES,
			 DF.PRIOR=DF.PRIOR,
			 is.lds=is.lds,
			 neededPkgs="crlmm")
	} else {
		object <- shrinkGenotypeSummaries(strata=1,
			      index.list=list(marker.index),
			      object=object,
			      verbose=verbose,
			      MIN.OBS=MIN.OBS,
			      MIN.SAMPLES=MIN.SAMPLES,
			      DF.PRIOR=DF.PRIOR,
			      is.lds=is.lds)
	}
	if(is.lds) return(TRUE) else return(object)
}

genotypeSummary <- function(object,
			    GT.CONF.THR=0.80,
			    type=c("SNP", "NP", "X.SNP", "X.NP"), ##"X.snps", "X.nps"),
			    verbose=TRUE,
			    marker.index){
	if(type == "X.SNP" | type=="X.NP"){
		CHR.X <- TRUE
	} else CHR.X <- FALSE
	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
	if(is.lds) stopifnot(isPackageLoaded("ff"))
	if(missing(marker.index)){
		batch <- batch(object)
		is.snp <- isSnp(object)
		is.autosome <- chromosome(object) < 23
		is.annotated <- !is.na(chromosome(object))
		is.X <- chromosome(object) == 23
		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
	}
	summaryFxn <- function(type){
		switch(type,
		       SNP="summarizeSnps",
		       NP="summarizeNps",
		       X.NP="summarizeNps")
	}
	myf <- summaryFxn(type[[1]])
	FUN <- get(myf)
	if(is.lds){
		index.list <- splitIndicesByLength(marker.index, ocProbesets())
		ocLapply(seq(along=index.list),
			 FUN,
			 index.list=index.list,
			 object=object,
			 batchSize=ocProbesets(),
			 GT.CONF.THR=GT.CONF.THR,
			 verbose=verbose,
			 CHR.X=CHR.X,
			 neededPkgs="crlmm")
	} else{
		object <- FUN(strata=1,
			      index.list=list(marker.index),
			      object=object,
			      batchSize=ocProbesets(),
			      GT.CONF.THR=GT.CONF.THR,
			      verbose=verbose,
			      CHR.X=CHR.X)
	}
	if(is.lds) return(TRUE) else return(object)
}

whichMarkers <- function(type, is.snp, is.autosome, is.annotated, is.X){
	switch(type,
	       SNP=which(is.snp & is.autosome & is.annotated),
	       NP=which(!is.snp & is.autosome),
	       X.SNP=which(is.snp & is.X),
	       X.NP=which(!is.snp & is.X),
	       stop("'type' must be one of 'SNP', 'NP', 'X.SNP', or 'X.NP'")
	       )
}

summarizeNps <- function(strata, index.list, object, batchSize,
			 GT.CONF.THR, verbose=TRUE, CHR.X=FALSE, ...){
	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
	if(is.lds) {physical <- get("physical"); open(object)}
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	index <- index.list[[strata]]
##	if(CHR.X) {
##		sample.index <- which(object$gender==2)
##		batches <- split(sample.index, as.character(batch(object))[sample.index])
##	} else {
##		batches <- split(seq_along(batch(object)), as.character(batch(object)))
##	}
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batchnames <- batchNames(object)
	nr <- length(index)
	nc <- length(batchnames)
	N.AA <- medianA.AA <- madA.AA <- tau2A.AA <- matrix(NA, nr, nc)
	is.illumina <- whichPlatform(annotation(object)) == "illumina"
	open(A(object))
	open(object$gender)
	AA <- as.matrix(A(object)[index, ])
	if(is.illumina){
		BB <- as.matrix(B(object)[index, ])
		AVG <- (AA+BB)/2
		A(object)[index, ] <- AVG
		AA <- AVG
		rm(AVG, BB)
	}
	for(k in seq_along(batches)){
		sample.index <- batches[[k]]
		N.AA[, k] <- length(sample.index)
		if(CHR.X){
			gender <- object$gender[sample.index]
			sample.index <- sample.index[gender == 2]
			if(length(sample.index) == 0) next()
		}
		this.batch <- unique(as.character(batch(object)[sample.index]))
		j <- match(this.batch, batchnames)
		I.A <- AA[, sample.index, drop=FALSE]
		medianA.AA[, k] <- rowMedians(I.A, na.rm=TRUE)
		madA.AA[, k] <- rowMAD(I.A, na.rm=TRUE)
		## log2 Transform Intensities
		I.A <- log2(I.A)
		tau2A.AA[, k] <- rowMAD(I.A, na.rm=TRUE)^2
	}
	N.AA(object)[index,] <- N.AA
	medianA.AA(object)[index,] <- medianA.AA
	madA.AA(object)[index, ] <- madA.AA
	tau2A.AA(object)[index, ] <- tau2A.AA
	if(is.lds) return(TRUE) else return(object)
}

summarizeSnps <- function(strata,
			  index.list,
			  object,
			  GT.CONF.THR=0.80,
			  verbose=TRUE,
			  CHR.X=FALSE, ...){
##	if(is.lds) {
##		physical <- get("physical")
##		open(object)
##	}
	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
	if(is.lds) stopifnot(isPackageLoaded("ff"))
	if(verbose) message("      Probe stratum ", strata, " of ", length(index.list))
	index <- index.list[[strata]]
	batches <- split(seq_along(batch(object)), as.character(batch(object)))
	batchnames <- batchNames(object)
	nr <- length(index)
	nc <- length(batchnames)
	statsA.AA <- statsA.AB <- statsA.BB <- statsB.AA <- statsB.AB <- statsB.BB <- vector("list", nc)
	corrAA <- corrAB <- corrBB <- tau2A.AA <- tau2A.BB <- tau2B.AA <- tau2B.BB <- matrix(NA, nr, nc)
	Ns.AA <- Ns.AB <- Ns.BB <- matrix(NA, nr, nc)
	if(verbose) message("        Begin reading...")
	GG <- as.matrix(calls(object)[index, ])
	CP <- as.matrix(snpCallProbability(object)[index, ])
	AA <- as.matrix(A(object)[index, ])
	BB <- as.matrix(B(object)[index, ])
	FL <- as.matrix(flags(object)[index, ])
	summaryStats <- function(X, INT, FUNS){
		tmp <- matrix(NA, nrow(X), length(FUNS))
		for(j in seq_along(FUNS)){
			FUN <- match.fun(FUNS[j])
			tmp[, j] <- FUN(X*INT, na.rm=TRUE)
		}
		tmp
	}
	if(verbose) message("        Computing summaries...")
	for(k in seq_along(batches)){
		##note that the genotype frequency for AA would include 'A' on chromosome X for men
		sample.index <- batches[[k]]
		this.batch <- unique(as.character(batch(object)[sample.index]))
		j <- match(this.batch, batchnames)
		G <- GG[, sample.index, drop=FALSE]
		xx <- CP[, sample.index, drop=FALSE]
		highConf <- (1-exp(-xx/1000)) > GT.CONF.THR
		## Some markers may have genotype confidences scores that are ALL below the threshold
		## For these markers, provide statistical summaries based on all the samples and flag
		## Provide summaries for these markers and flag to indicate the problem
		## RS: check whether we need the next to lines and, if so, effect downstream
		ii <- which(rowSums(highConf) == 0)
		if(length(ii) > 0) highConf[ii, ] <- TRUE
		G <- G*highConf
		## table(rowSums(G==0))
		##G <- G*highConf*NORM
		A <- AA[, sample.index, drop=FALSE]
		B <- BB[, sample.index, drop=FALSE]
		## this can be time consuming...do only once
		G.AA <- G==1
		G.AA[G.AA==FALSE] <- NA
		G.AB <- G==2
		G.AB[G.AB==FALSE] <- NA
		G.BB <- G==3
		G.BB[G.BB==FALSE] <- NA
		Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
		Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
		Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
		if(CHR.X){
			gender <- object$gender[sample.index]
			sample.index <- sample.index[gender == 2]
			if(length(sample.index) == 0) next()
		}
		stats <- summaryStats(G.AA, A, FUNS=c("rowMedians", "rowMAD"))
		medianA.AA(object)[index, k] <- stats[, 1]
		##missing.index <- which(rowSums(is.na(medianA.AA(object)[index, ,drop=FALSE])) > 0)
		madA.AA(object)[index, k] <- stats[, 2]
		stats <- summaryStats(G.AB, A, FUNS=c("rowMedians", "rowMAD"))
		medianA.AB(object)[index, k] <- stats[, 1]
		madA.AB(object)[index, k] <- stats[, 2]
		stats <- summaryStats(G.BB, A, FUNS=c("rowMedians", "rowMAD"))
		medianA.BB(object)[index, k] <- stats[, 1]
		madA.BB(object)[index, k] <- stats[, 2]
		stats <- summaryStats(G.AA, B, FUNS=c("rowMedians", "rowMAD"))
		medianB.AA(object)[index, k] <- stats[, 1]
		madB.AA(object)[index, k] <- stats[, 2]
		stats <- summaryStats(G.AB, B, FUNS=c("rowMedians", "rowMAD"))
		medianB.AB(object)[index, k] <- stats[, 1]
		madB.AB(object)[index, k] <- stats[, 2]
		stats <- summaryStats(G.BB, B, FUNS=c("rowMedians", "rowMAD"))
		medianB.BB(object)[index, k] <- stats[, 1]
		madB.BB(object)[index, k] <- stats[, 2]
		## log2 Transform Intensities
		A <- log2(A); B <- log2(B)
		tau2A.AA[, k] <- summaryStats(G.AA, A, FUNS="rowMAD")^2
		tau2A.BB[, k] <- summaryStats(G.BB, A, FUNS="rowMAD")^2
		tau2B.AA[, k] <- summaryStats(G.AA, B, FUNS="rowMAD")^2
		tau2B.BB[, k] <- summaryStats(G.BB, B, FUNS="rowMAD")^2
		corrAA[, k] <- rowCors(A*G.AA, B*G.AA, na.rm=TRUE)
		corrAB[, k] <- rowCors(A*G.AB, B*G.AB, na.rm=TRUE)
		corrBB[, k] <- rowCors(A*G.BB, B*G.BB, na.rm=TRUE)
		rm(A, B, G.AA, G.AB, G.BB, xx, highConf, G)
		gc()
	}
	##---------------------------------------------------------------------------
	## grand mean
	##---------------------------------------------------------------------------
	if(FALSE){ ## no implemented
		if(length(batches) > 1 && "grandMean" %in% batchNames(object)){
			k <- match("grandMean", batchNames(object))
			if(verbose) message("        computing grand means...")
			highConf <- (1-exp(-CP/1000)) > GT.CONF.THR
			rm(CP); gc()
			## Some markers may have genotype confidences scores that are ALL below the threshold
			## For these markers, provide statistical summaries based on all the samples and flag
			## Provide summaries for these markers and flag to indicate the problem
			ii <- which(rowSums(highConf) == 0)
			if(length(ii) > 0) highConf[ii, ] <- TRUE
			GG <- GG*highConf
			rm(highConf); gc()
			Ns <- list(Ns.AA, Ns.AB, Ns.BB)
			rm(Ns.AA, Ns.AB, Ns.BB) ; gc()
			I <- list(AA, BB)
			lI <- lapply(I, log2)
			cors <- list(corrAA, corrAB, corrBB)
			rm(corrAA, corrAB, corrBB); gc()
			rm(AA,BB); gc()
			tau2 <- list(AA=list(tau2A.AA,
				     tau2B.AA),
				     AB=list(NULL, NULL),
				     BB=list(tau2A.BB,
				     tau2B.BB))
			rm(tau2A.AA, tau2B.AA, tau2A.BB, tau2B.BB); gc()
			for(i in 1:3){
				G.call <- isCall(GG, call=i)
				for(j in 1:2){
					computeSummary(object,
						       G.call,
						       call=i,
						       I=I[[j]],
						       allele=c("A", "B")[j],
						       Ns=Ns[[i]],
						       call=i,
						       tau2=tau2[[i]][[j]],
						       index=index)
				}
				updateCors(cors[[i]], G.call, lI)
			}
			##I <- lapply(I, log2)
			##AA <- log2(AA)
			##BB <- log2(BB)
			##		tau2A.AA[, k] <- summaryStats(G.AA, AA, FUNS="rowMAD")^2
			##		tau2A.BB[, k] <- summaryStats(G.BB, AA, FUNS="rowMAD")^2
			##tau2B.AA[, k] <- summaryStats(G.AA, BB, FUNS="rowMAD")^2
			##tau2B.BB[, k] <- summaryStats(G.BB, BB, FUNS="rowMAD")^2
			##		corrAA[, k] <- rowCors(AA*G.AA, BB*G.AA, na.rm=TRUE)
			##		corrAB[, k] <- rowCors(AA*G.AB, BB*G.AB, na.rm=TRUE)
			##		corrBB[, k] <- rowCors(AA*G.BB, BB*G.BB, na.rm=TRUE)
			##		rm(AA, BB); gc()
			##
			## TODO:   fill in NAs -- use code from shrinkGenotypeSummaries
			##
			##		rm(GG, CP, AA, BB, FL, stats)
			##		gc()
			##		G.AA <- GG==1
			##		G.AA[G.AA==FALSE] <- NA
			##		G.AB <- GG==2
			##		G.AB[G.AB==FALSE] <- NA
			##		G.BB <- GG==3
			##		G.BB[G.BB==FALSE] <- NA
			##		Ns.AA[, k] <- rowSums(G.AA, na.rm=TRUE)
			##		Ns.AB[, k] <- rowSums(G.AB, na.rm=TRUE)
			##		Ns.BB[, k] <- rowSums(G.BB, na.rm=TRUE)
			##		N.AA(object)[index,] <- Ns.AA
			##		N.AB(object)[index,] <- Ns.AB
			##		N.BB(object)[index,] <- Ns.BB

			## Calculate row medians and MADs
			##		medianA.AA(object)[index, k] <- stats[, 1]
			##		madA.AA(object)[index, k] <- stats[, 2]
			##		stats <- summaryStats(G.AB, AA, FUNS=c("rowMedians", "rowMAD"))
			##		medianA.AB(object)[index, k] <- stats[, 1]
			##		madA.AB(object)[index, k] <- stats[, 2]
			##		stats <- summaryStats(G.BB, AA, FUNS=c("rowMedians", "rowMAD"))
			##		medianA.BB(object)[index, k] <- stats[, 1]
			##		madA.BB(object)[index, k] <- stats[, 2]
			##		stats <- summaryStats(G.AA, BB, FUNS=c("rowMedians", "rowMAD"))
			##		medianB.AA(object)[index, k] <- stats[, 1]
			##		madB.AA(object)[index, k] <- stats[, 2]
			##		stats <- summaryStats(G.AB, BB, FUNS=c("rowMedians", "rowMAD"))
			##		medianB.AB(object)[index, k] <- stats[, 1]
			##		madB.AB(object)[index, k] <- stats[, 2]
			##		stats <- summaryStats(G.BB, BB, FUNS=c("rowMedians", "rowMAD"))
			##		medianB.BB(object)[index, k] <- stats[, 1]
			##		madB.BB(object)[index, k] <- stats[, 2]
			## log2 Transform Intensities
		}
	}
	##	if(verbose) message("        Begin writing...")
	N.AA(object)[index,] <- Ns.AA
	N.AB(object)[index,] <- Ns.AB
	N.BB(object)[index,] <- Ns.BB
	corrAA(object)[index,] <- corrAA
	corrAB(object)[index,] <- corrAB
	corrBB(object)[index,] <- corrBB
	tau2A.AA(object)[index, ] <- tau2A.AA
	tau2A.BB(object)[index, ] <- tau2A.BB
	tau2B.AA(object)[index, ] <- tau2B.AA
	tau2B.BB(object)[index, ] <- tau2B.BB
	if(is.lds) return(TRUE) else return(object)
}

isCall <- function(G, call){
	G.call <- G==call
	G.call[G.call==FALSE] <- NA
	G.call
}

##computeSummary <- function(object, G.call, call, I, allele, Ns, index){
##	k <- match("grandMean", batchNames(object))
##	stats <- summaryStats(G.call, I, FUNS=c("rowMedians", "rowMAD"))
##	Ns[, k] <- rowSums(G.call, na.rm=TRUE)
##	updateStats(stats, Ns, object, call, allele, index)
##	gc()
##	return()
##}

##updateTau <- function(object, tau2, G.call, call, I, allele, index){
##	k <- match("grandMean", batchNames(object))
##	logI <- log2(I)
##	rm(I); gc()
##	tau2[, k] <- summaryStats(G.call, logI, FUNS="rowMAD")^2
##	if(call==1 & allele=="A"){
##		tau2A.AA(object)[index, ] <- tau2
##	}
##	if(call==1 & allele=="B"){
##		tau2B.AA(object)[index, ] <- tau2
##	}
##	if(call==3 & allele=="A"){
##		tau2A.BB(object)[index, ] <- tau2
##	}
##	if(call==3 & allele=="B"){
##		tau2B.BB(object)[index, ] <- tau2
##	}
##	NULL
##}

##updateCors <- function(cors, G.call, I){
##	k <- match("grandMean", batchNames(object))
##	cors[, k] <- rowCors(I[[1]]*G.call, I[[2]]*G.call, na.rm=TRUE)
##	if(call==1){
##		corrAA(object)[index, ] <- cors
##	}
##	if(call==2){
##		corrAB(object)[index, ] <- cors
##	}
##	if(call==3){
##		corrBB(object)[index, ] <- cors
##	}
##}

##updateStats <- function(stats, Ns, object, call, allele, tau2, index){
##	if(call==1){
##		Ns.AA(object)[index, ] <- Ns
##		if(allele=="A"){
##			medianA.AA(object)[index, k] <- stats[, 1]
##			madA.AA(object)[index, k] <- stats[, 2]
##			updateTau(object, tau2, G.call, call, I, allele, index)
##		} else {
##			medianB.AA(object)[index, k] <- stats[, 1]
##			madB.AA(object)[index, k] <- stats[, 2]
##			updateTau(object, tau2, G.call, call, I, allele, index)
##		}
##	}
##	if(call==2){
##		Ns.AB(object)[index, ] <- Ns
##		if(allele=="A"){
##			medianA.AB(object)[index, k] <- stats[, 1]
##			madA.AB(object)[index, k] <- stats[, 2]
##		} else {
##			medianB.AB(object)[index, k] <- stats[, 1]
##			madB.AB(object)[index, k] <- stats[, 2]
##		}
##	}
##	if(call==3){
##		Ns.BB(object)[index, ] <- Ns
##		if(allele=="A"){
##			medianA.BB(object)[index, k] <- stats[, 1]
##			madA.BB(object)[index, k] <- stats[, 2]
##			updateTau(object, tau2, G.call, call, I, allele, index)
##		} else {
##			medianB.BB(object)[index, k] <- stats[, 1]
##			madB.BB(object)[index, k] <- stats[, 2]
##			updateTau(object, tau2, G.call, call, I, allele, index)
##		}
##	}
##}

crlmmCopynumber <- function(object,
			    MIN.SAMPLES=10,
			    SNRMin=5,
			    MIN.OBS=1,
			    DF.PRIOR=50,
			    bias.adj=FALSE,
			    prior.prob=rep(1/4,4),
			    seed=1,
			    verbose=TRUE,
			    GT.CONF.THR=0.80,
			    MIN.NU=2^3,
			    MIN.PHI=2^3,
			    THR.NU.PHI=TRUE,
			    type=c("SNP", "NP", "X.SNP", "X.NP"),
			    fit.linearModel=TRUE){
	typeof <- paste(type, collapse=",")
	stopifnot(typeof %in% c("SNP", "NP", "SNP,NP", "SNP,X.SNP", "SNP,X.NP", "SNP,NP,X.SNP", "SNP,NP,X.SNP,X.NP"))
	if(GT.CONF.THR >= 1 | GT.CONF.THR < 0) stop("GT.CONF.THR must be in [0,1)")
	batch <- batch(object)
	is.snp <- isSnp(object)
	is.autosome <- chromosome(object) < 23
	is.annotated <- !is.na(chromosome(object))
	is.X <- chromosome(object) == 23
	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
	if(is.lds) stopifnot(isPackageLoaded("ff"))
	samplesPerBatch <- table(as.character(batch(object)))
	open(object)
	if(any(samplesPerBatch < MIN.SAMPLES)){
		tmp <- paste(names(samplesPerBatch)[samplesPerBatch < MIN.SAMPLES], collapse=", ")
		message("The following batches have fewer than ", MIN.SAMPLES, " samples: ",  tmp)
		message("Not estimating copy number parameters for batch ", tmp)
	}
	mylabel <- function(marker.type){
		switch(marker.type,
		       SNP="autosomal SNPs",
		       NP="autosomal nonpolymorphic markers",
		       X.SNP="chromosome X SNPs",
		       X.NP="chromosome X nonpolymorphic markers")
	}
	if(verbose) message("Computing summary statistics of the genotype clusters for each batch")
	I <- which(type %in% c("SNP", "NP", "X.NP"))
	for(j in seq_along(I)){ ## do not do X.SNP.  Do this during fit.lm3
		i <- I[j]
		marker.type <- type[i]
		if(verbose) message(paste("...", mylabel(marker.type)))
		##if(verbose) message(paste("Computing summary statistics for ", mylabel(marker.type), " genotype clusters for each batch")
		marker.index <- whichMarkers(marker.type, is.snp,
					     is.autosome, is.annotated, is.X)
		if(length(marker.index) == 0) next()
		res <- genotypeSummary(object=object,
				       GT.CONF.THR=GT.CONF.THR,
				       type=marker.type,
				       verbose=verbose,
				       marker.index=marker.index)
		if(!is.lds) {object <- res; rm(res); gc()}
	}
	if(verbose) message("Imputing unobserved genotype medians and shrinking the variances (within-batch, across loci) ")##SNPs only
	if("SNP" %in% type){
		marker.index <- whichMarkers("SNP", is.snp,
					     is.autosome, is.annotated, is.X)
		if(length(marker.index) > 0){
			res <- shrinkSummary(object=object,
					     MIN.OBS=MIN.OBS,
					     MIN.SAMPLES=MIN.SAMPLES,
					     DF.PRIOR=DF.PRIOR,
					     type="SNP",
					     verbose=verbose,
					     marker.index=marker.index)
			if(!is.lds) {object <- res; rm(res); gc()}
		}
	}
	if(verbose) message("Estimating parameters for copy number")##SNPs only
	if(fit.linearModel){
		for(i in seq_along(type)){
			marker.type <- type[i]
			message(paste("...", mylabel(marker.type)))
			CHR.X <- ifelse(marker.type %in% c("X.SNP", "X.NP"), TRUE, FALSE)
			marker.index <- whichMarkers(marker.type, is.snp,
						     is.autosome, is.annotated, is.X)
			if(length(marker.index) == 0) next()
			res <- estimateCnParameters(object=object,
						    type=marker.type,
						    SNRMin=SNRMin,
						    DF.PRIOR=DF.PRIOR,
						    GT.CONF.THR=GT.CONF.THR,
						    MIN.SAMPLES=MIN.SAMPLES,
						    MIN.OBS=MIN.OBS,
						    MIN.NU=MIN.NU,
						    MIN.PHI=MIN.PHI,
						    THR.NU.PHI=THR.NU.PHI,
						    verbose=verbose,
						    marker.index=marker.index,
						    is.lds=is.lds,
						    CHR.X=CHR.X)
		}
		close(object)
	}
	if(is.lds) return(TRUE) else return(object)
}

crlmmCopynumber2 <- function(){
	.Defunct(msg="The crlmmCopynumber2 function has been deprecated. The function crlmmCopynumber should be used instead.  crlmmCopynumber will support large data using ff provided that the ff package is loaded.")
}
crlmmCopynumberLD <- crlmmCopynumber2



estimateCnParameters <- function(object,
				 type=c("SNP", "NP", "X.SNP", "X.NP"),
				 verbose=TRUE,
				 SNRMin=5,
				 DF.PRIOR=50,
				 GT.CONF.THR=0.95,
				 MIN.SAMPLES=10,
				 MIN.OBS=1,
				 MIN.NU=8,
				 MIN.PHI=8,
				 THR.NU.PHI=TRUE,
				 marker.index,
				 CHR.X,
				 is.lds){
	if(missing(marker.index)){
		batch <- batch(object)
		is.snp <- isSnp(object)
		is.autosome <- chromosome(object) < 23
		is.annotated <- !is.na(chromosome(object))
		is.X <- chromosome(object) == 23
		is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
		if(is.lds) stopifnot(isPackageLoaded("ff"))
		CHR.X <- ifelse(type[[1]] %in% c("X.SNP", "X.NP"), TRUE, FALSE)
		marker.index <- whichMarkers(type[[1]], is.snp, is.autosome, is.annotated, is.X)
	}
	lmFxn <- function(type){
		switch(type,
		       SNP="fit.lm1",
		       NP="fit.lm2",
		       X.SNP="fit.lm3",
		       X.NP="fit.lm4")
	}
	myfun <- lmFxn(type[[1]])
	FUN <- get(myfun)
	if(is.lds){
		index.list <- splitIndicesByLength(marker.index, ocProbesets())
##		if(parStatus()){
##			index.list <- splitIndicesByNode(marker.index)
##		} else index.list <- splitIndicesByLength(marker.index, ocProbesets())
		ocLapply(seq(along=index.list),
			 FUN,
			 index.list=index.list,
			 marker.index=marker.index,
			 object=object,
			 batchSize=ocProbesets(),
			 SNRMin=SNRMin,
			 MIN.SAMPLES=MIN.SAMPLES,
			 MIN.OBS=MIN.OBS,
			 DF=DF.PRIOR,
			 GT.CONF.THR=GT.CONF.THR,
			 THR.NU.PHI=THR.NU.PHI,
			 MIN.NU=MIN.NU,
			 MIN.PHI=MIN.PHI,
			 verbose=verbose,
			 is.lds=is.lds,
			 CHR.X=CHR.X,
			 neededPkgs="crlmm")
	} else {
		##FUN <- match.fun(FUN)
		object <- FUN(strata=1,
			      index.list=list(marker.index),
			      marker.index=marker.index,
			      object=object,
			      batchSize=ocProbesets(),
			      SNRMin=SNRMin,
			      MIN.SAMPLES=MIN.SAMPLES,
			      MIN.OBS=MIN.OBS,
			      DF.PRIOR=DF.PRIOR,
			      GT.CONF.THR=GT.CONF.THR,
			      THR.NU.PHI=THR.NU.PHI,
			      MIN.NU=MIN.NU,
			      MIN.PHI=MIN.PHI,
			      is.lds=is.lds,
			      CHR.X=CHR.X,
			      verbose=verbose)
	}
	message("finished")
	return(object)
}


imputeAA.AB <- function(index, N.AA, N.AB, N.BB,
			medianA.AA, medianA.AB, medianA.BB,
			medianB.AA, medianB.AB, medianB.BB){
	gt.to.impute <- 1:2
	imputed <- rep(FALSE, length(index))
	for(i in index){
		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")

		##Find batches with sufficient data
		K <- which(rowSums(Ns < 3) == 0)
		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
		if(is.null(tmp)) {
			##cat(".");
			next()
		}
		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
		L <- which(rowSums(Ns < 3) == 2)
		if(length(L) == 0) next()
		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
		imputedVals <- Z %*% betahat
		medianA.AA[i, L] <- imputedVals[, 1]
		medianA.AB[i, L] <- imputedVals[, 2]
		medianB.AA[i, L] <- imputedVals[, 3]
		medianB.AB[i, L] <- imputedVals[, 4]
		imputed[i] <- TRUE
	}
	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
		    imputed=imputed))
}

imputeAB.BB <- function(index, N.AA, N.AB, N.BB,
			medianA.AA, medianA.AB, medianA.BB,
			medianB.AA, medianB.AB, medianB.BB){
	gt.to.impute <- 2:3
	imputed <- rep(FALSE, length(index))
	for(j in seq_along(index)){
		i <- index[j]
		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")

		##Find batches with sufficient data
		K <- which(rowSums(Ns < 3) == 0)
		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
		if(is.null(tmp)) {
##			cat(".");
			next()
		}

		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
		L <- which(rowSums(Ns < 3) == 2)
		if(length(L) == 0) next()
		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
		imputedVals <- Z %*% betahat
		medianA.AB[i, L] <- imputedVals[, 1]
		medianA.BB[i, L] <- imputedVals[, 2]
		medianB.AB[i, L] <- imputedVals[, 3]
		medianB.BB[i, L] <- imputedVals[, 4]
		imputed[j] <- TRUE
	}
	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
		    imputed=imputed))
}

imputeAA <- function(index, N.AA, N.AB, N.BB,
		     medianA.AA, medianA.AB, medianA.BB,
		     medianB.AA, medianB.AB, medianB.BB){
	gt.to.impute <- 1
	imputed <- rep(FALSE, length(index))
	for(j in seq_along(index)){
		i <- index[j]
		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")

		##Find batches with sufficient data
		K <- which(rowSums(Ns < 3) == 0)
		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
		if(is.null(tmp)) {
			##cat(".");
			next()
		}
		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
		L <- which(rowSums(Ns < 3) == 1)
		if(length(L) == 0) next()
		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
		imputedVals <- Z %*% betahat
		medianA.AA[i, L] <- imputedVals[, 1]
		medianB.AA[i, L] <- imputedVals[, 2]
		imputed[j] <- TRUE
	}
	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
		    imputed=imputed))
}

imputeBB <- function(index, N.AA, N.AB, N.BB,
		     medianA.AA, medianA.AB, medianA.BB,
		     medianB.AA, medianB.AB, medianB.BB){
	gt.to.impute <- 3
	imputed <- rep(FALSE, length(index))
	for(j in seq_along(index)){
		i <- index[j]
		Ns <- cbind(N.AA[i, ], N.AB[i, ], N.BB[i, ])
		medianA <- cbind(medianA.AA[i, ], medianA.AB[i, ], medianA.BB[i, ])
		medianB <- cbind(medianB.AA[i, ], medianB.AB[i, ], medianB.BB[i, ])
		colnames(medianA) <- colnames(medianB) <- c("AA", "AB", "BB")

		##Find batches with sufficient data
		K <- which(rowSums(Ns < 3) == 0)
		if(length(K) < 1) next() ## nothing we can do.  use information from other snps
		X <- cbind(1, medianA[K, -gt.to.impute, drop=FALSE], medianB[K, -gt.to.impute, drop=FALSE])
		Y <- cbind(medianA[K, gt.to.impute, drop=FALSE], medianB[K, gt.to.impute, drop=FALSE])
		colnames(Y) <- c("A.BB", "B.BB")
		tmp <- tryCatch(betahat <- solve(crossprod(X), crossprod(X,Y)), error=function(e) NULL)
		if(is.null(tmp)) {
			##cat(".");
			next()
		}
##		else{
##			R <- Y-crossprod(X, betahat)
##			RSS <- t(R)%*%R
##		}
		## Get data from observed genotypes in batch with insufficient data for genotypes 'gt.to.impute'
		L <- which(rowSums(Ns < 3) == 1)
		if(length(L) == 0) next()
		Z <- cbind(1, medianA[L, -gt.to.impute, drop=FALSE], medianB[L, -gt.to.impute, drop=FALSE])
		imputedVals <- Z %*% betahat
		medianA.BB[i, L] <- imputedVals[, 1]
		medianB.BB[i, L] <- imputedVals[, 2]
		imputed[j] <- TRUE
	}
	return(list(medianA.AA=medianA.AA, medianA.AB=medianA.AB, medianA.BB=medianA.BB,
		    medianB.AA=medianB.AA, medianB.AB=medianB.AB, medianB.BB=medianB.BB,
		    imputed=imputed))
}

imputeAcrossBatch <- function(N.AA, N.AB, N.BB,
			      medianA.AA, medianA.AB, medianA.BB,
			      medianB.AA, medianB.AB, medianB.BB){
	N.missing <- (N.AA < 3) + (N.AB < 3) + (N.BB < 3)
	## find all indices in which one or more batches need to have 2 genotypes imputed
	missingAA.AB <- (N.AA < 3) & (N.AB < 3)
	missingAB.BB <- (N.AB < 3) & (N.BB < 3)
	missingAA <- (N.AA < 3) & (N.AB >= 3)
	missingBB <- (N.BB < 3) & (N.AB >= 3)
	index <- list(AA.AB=which(rowSums(missingAA.AB) > 0),
		      AB.BB=which(rowSums(missingAB.BB) > 0),
		      AA=which(rowSums(missingAA) > 0),
		      BB=which(rowSums(missingBB) > 0))
	imputeNone <- which(rowSums(N.missing == 0) > 0)
	## only works if there are batches with complete data
	index <- lapply(index, intersect, y=imputeNone)
	##indices.to.update <- rep(1:4, each=sapply(index, length))
	updated <- vector("list", 4)
	names(updated) <- c("AA.AB", "AB.BB", "AA", "BB")
	if(length(index[["AA.AB"]] > 0)){
		res <- imputeAA.AB(index[["AA.AB"]],
				   N.AA,
				   N.AB,
				   N.BB,
				   medianA.AA,
				   medianA.AB,
				   medianA.BB,
				   medianB.AA, medianB.AB, medianB.BB)
		updated$AA.AB <- res$imputed
	}
	if(length(index[["AB.BB"]] > 0)){
		res <- imputeAB.BB(index[["AB.BB"]],
				   N.AA,
				   N.AB,
				   N.BB,
				   res[["medianA.AA"]],
				   res[["medianA.AB"]],
				   res[["medianA.BB"]],
				   res[["medianB.AA"]],
				   res[["medianB.AB"]],
				   res[["medianB.BB"]])
		updated$AB.BB <- res$imputed
	}
	if(length(index[["AA"]] > 0)){
		res <- imputeAA(index[["AA"]],
				N.AA,
				N.AB,
				N.BB,
				res[["medianA.AA"]],
				res[["medianA.AB"]],
				res[["medianA.BB"]],
				res[["medianB.AA"]], res[["medianB.AB"]], res[["medianB.BB"]])
		updated$AA <- res$imputed
	}
	if(length(index[["BB"]] > 0)){
		res <- imputeBB(index[["BB"]],
				N.AA,
				N.AB,
				N.BB,
				res[["medianA.AA"]],
				res[["medianA.AB"]],
				res[["medianA.BB"]],
				res[["medianB.AA"]],
				res[["medianB.AB"]],
				res[["medianB.BB"]])
		updated$BB <- res$imputed
	}
	updated.indices <- unlist(updated)
	return(list(res, updated))
}


##calculatePosteriorMean <- function(object, type=c("SNP", "NP", "X.SNP", "X.NP"), verbose=TRUE,
##				   prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7),
##				   CN=0:4, scale.sd=1){
##	stopifnot(type %in% c("SNP", "NP", "X.SNP", "X.NP"))
##	stopifnot(sum(prior.prob)==1)
##	stopifnot(length(CN) == length(prior.prob))
##	batch <- batch(object)
##	is.snp <- isSnp(object)
##	is.autosome <- chromosome(object) < 23
##	is.annotated <- !is.na(chromosome(object))
##	is.X <- chromosome(object) == 23
##	is.lds <- is(calls(object), "ffdf") | is(calls(object), "ff_matrix")
##	if(is.lds) stopifnot(isPackageLoaded("ff"))
##	samplesPerBatch <- table(as.character(batch(object)))
##	if(!"posteriorMean" %in% assayDataElementNames(object)){
##		message("adding <posteriorMean> slot to assayData")
##		pM <- matrix(NA, nrow(object), ncol(object), dimnames=list(featureNames(object), sampleNames(object)))
##		tmp <- assayDataNew(alleleA=A(object),
##				    alleleB=B(object),
##				    call=calls(object),
##				    callProbability=snpCallProbability(object),
##				    posteriorMean=pM)
##		assayData(object) <- tmp
##	}
##	## add new assay data element for posterior probabilities
##	mylabel <- function(marker.type){
##		switch(marker.type,
##		       SNP="autosomal SNPs",
##		       NP="autosomal nonpolymorphic markers",
##		       X.SNP="chromosome X SNPs",
##		       X.NP="chromosome X nonpolymorphic markers")
##	}
##	if(type=="SNP"){
##		if(verbose) message(paste("...", mylabel(type)))
##		marker.index <- whichMarkers("SNP",
##					     is.snp,
##					     is.autosome,
##					     is.annotated,
##					     is.X)
##		marker.list <- splitIndicesByLength(marker.index, ocProbesets())
##		emit <- ocLapply(seq_along(marker.list),
##				 posteriorMean.snp,
##				 object=object,
##				 index.list=marker.list,
##				 verbose=verbose,
##				 prior.prob=prior.prob,
##				 CN=CN,
##				 scale.sd=scale.sd)
##		#for(i in seq_along(marker.list)){
##		#index <- marker.list[[i]]
##
##	#}
##	} else stop("type not available")
##	if(length(emit)==1) emit <- emit[[1]] else {
##		state.names <- dimnames(emit[[1]])[[3]]
##		tmp <- array(NA, dim=c(length(unlist(marker.list)), ncol(object), length(prior.prob)))
##		for(i in seq_along(marker.list)){
##			marker.index <- marker.list[[i]]
##			tmp[marker.index, , ] <- emit[[i]]
##		}
##		emit <- tmp
##		dimnames(emit) <- list(featureNames(object)[unlist(marker.list)],
##				       sampleNames(object),
##				       state.names)
##	}#stop("need to rbind elements of emit list?")
##	##tmp <- do.call("rbind", emit)
##	match.index <- match(rownames(emit), featureNames(object))
##	S <- length(prior.prob)
##	pM <- matrix(0, length(match.index), ncol(object), dimnames=list(featureNames(object)[match.index], sampleNames(object)))
##	for(i in 1:S) pM <- pM + CN[i]*emit[, , i]
##	posteriorMean(object)[match.index, ] <- pM
##	return(object)
##}

.open <- function(object){
		open(B(object))
		open(tau2A.AA(object))
		open(tau2B.BB(object))
		open(tau2A.BB(object))
		open(tau2B.AA(object))
		open(corrAA(object))
		open(corrAB(object))
		open(corrBB(object))
		open(nuA(object))
		open(nuB(object))
		open(phiA(object))
		open(phiB(object))
}
.close <- function(object){
	close(A(object))
	close(B(object))
	close(tau2A.AA(object))
	close(tau2B.BB(object))
	close(tau2A.BB(object))
	close(tau2B.AA(object))
	close(corrAA(object))
	close(corrAB(object))
	close(corrBB(object))
	close(nuA(object))
	close(nuB(object))
	close(phiA(object))
	close(phiB(object))
}

sum.mymatrix <- function(...){
	x <- list(...)
	return(x[[1]] + do.call(sum, x[-1]))
}
numberGenotypes <- function(CT){
	stopifnot(length(CT)==1)
	copynumber <- paste("cn", CT, sep="")
	switch(copynumber,
		cn0=1,
		cn1=2,
		cn2=3,
		cn3=4,
		cn4=4,
		cn5=6, NULL)
}

.posterior <- function(CT,
		       tau2A,
		       tau2B,
		       sig2A,
		       sig2B,
		       nuA,
		       nuB,
		       phiA,
		       phiB,
		       corrAA,
		       corrAB,
		       corrBB,
		       a,
		       b,
		       scale.sd){
	## 5: AAAAA, AAAAB, AAABB, AABBB, ABBBB, BBBBB
	##CN=4
	## AAAA, AAAB, AABB, ABBB, BBBB:  L = 4
	##CN=3
	##  AAA, AAB, ABB, BBB ; L = 4
	## CN=2
	##  AA, AB, BB; L=3
	## CN = 1: A, B; L=2
	## CN = 0: null; L=1
	L <- numberGenotypes(CT)
	stopifnot(!is.null(L))
	f.x.y <- vector("list", L)
	for(i in seq_along(f.x.y)){
		CA <- (0:CT)[i]
		CB <- CT-CA
		A.scale <- sqrt(tau2A*(CA==0) + sig2A*(CA > 0))
		B.scale <- sqrt(tau2B*(CB==0) + sig2B*(CB > 0))
		if(CA == 0 | CB == 0){## possibly homozygous BB and AA, respectively
			A.scale <- A.scale*scale.sd[1]
			B.scale <- B.scale*scale.sd[1]
		} else { ## one or both greater than zero
			if(length(scale.sd) == 1) scale.sd <- rep(scale.sd, 2)
			A.scale <- A.scale*scale.sd[2]
			B.scale <- B.scale*scale.sd[2]
		}
		A.scale <- as.numeric(A.scale)
		B.scale <- as.numeric(B.scale)
		A.scale2 <- A.scale^2
		B.scale2 <- B.scale^2
		if(CA == 0 & CB == 0) rho <- 0
		if(CA == 0 & CB > 0) rho <- corrBB
		if(CA > 0 & CB == 0) rho <- corrAA
		if(CA > 0 & CB > 0) rho <- corrAB
		rho <- as.numeric(rho)
		meanA <- as.numeric(log2(nuA+CA*phiA))
		meanB <- as.numeric(log2(nuB+CB*phiB))
		covs <- rho*A.scale*B.scale
		##x <- a[, J]
		x <- a
		y <- b
		Q.x.y <- 1/(1-rho^2)*((x - meanA)^2/A.scale2 + (y - meanB)^2/B.scale2 - (2*rho*((x - meanA)*(y - meanB)))/(A.scale*B.scale))
		f.x.y[[i]] <- 1/(2*pi*A.scale*B.scale*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
		class(f.x.y[[i]]) <- "mymatrix"
	}
	res <- do.call("sum", f.x.y)
	return(res)
}





genotypeCorrelation <- function(genotype){
	switch(genotype,
	       NULL="NULL",
	       A="AA",
	       B="BB",
	       AA="AA",
	       AB="AB",
	       BB="BB",
	       AAA="AA",
	       AAB="AB",
	       ABB="AB",
	       BBB="BB",
	       AAAA="AA",
	       AAAB="AB",
	       AABB="AB",
	       ABBB="AB",
	       BBBB="BB")
}

posteriorMean.snp <- function(stratum, object, index.list, CN,
			      prior.prob=c(1/7, 1/7, 3/7, 1/7, 1/7), is.lds=TRUE, verbose=TRUE,
			      scale.sd=1){
	if(length(scale.sd) == 1) rep(scale.sd,2)
	if(verbose) message("Probe stratum ", stratum, " of ", length(index.list))
	index <- index.list[[stratum]]
	test <- tryCatch(open(A(object)), error=function(e) NULL)
	if(!is.null(test)) invisible(.open(object))
	a <- log2(as.matrix(A(object)[index, ]))
	b <- log2(as.matrix(B(object)[index, ]))
	NN <- Ns(object, i=index)[, , 1]
	sig2A <- as.matrix(tau2A.AA(object)[index,])
	sig2B <- as.matrix(tau2B.BB(object)[index,])
	tau2A <- as.matrix(tau2A.BB(object)[index,])
	tau2B <- as.matrix(tau2B.AA(object)[index,])
	corrAA <- as.matrix(corrAA(object)[index, ])
	corrAB <- as.matrix(corrAB(object)[index, ])
	corrBB <- as.matrix(corrBB(object)[index, ])
	nuA <- as.matrix(nuA(object)[index, ])
	phiA <- as.matrix(phiA(object)[index, ])
	nuB <- as.matrix(nuB(object)[index, ])
	phiB <- as.matrix(phiB(object)[index, ])
	if(!is.null(test)) invisible(.close(object))
	S <- length(prior.prob)
	emit <- array(NA, dim=c(nrow(a), ncol(a), S))##SNPs x sample x 'truth'
	sample.index <- split(1:ncol(object), batch(object))
	##emit <- vector("list", length(sample.index))
	for(j in seq_along(sample.index)){
		cat("batch ", j, "\n")
		J <- sample.index[[j]]
		probs <- array(NA, dim=c(nrow(a), length(J), S))
		for(k in seq_along(CN)){
			##CT <- CN[k]
			probs[, , k] <- .posterior(CT=CN[k],
						   tau2A=tau2A,
						   tau2B=tau2B,
						   sig2A=sig2A,
						   sig2B=sig2B,
						   nuA=nuA,
						   nuB=nuB,
						   phiA=phiA,
						   phiB=phiB,
						   corrAA=corrAA,
						   corrAB=corrAB,
						   corrBB=corrBB,
						   a=a[, J],
						   b=b[, J],
						   scale.sd=scale.sd)
			##probs[, , k] <- do.call("sum", f.x.y)
			##if none of the states are likely (outlier), assign NA
			##		emit[, , counter] <- f.x.y
			##		counter <- counter+1
		}
		emit[, J, ] <- probs
	}
	for(i in 1:S) emit[, , i] <- emit[, , i]*prior.prob[i]
	total <- matrix(0, nrow(emit), ncol(emit))
	for(i in 1:S) total <- total+emit[, , i]
	## how to handle outliers...
	##  - use priors (posterior mean will likely be near 2).  smoothing needs to take into account the uncertainty
	##  - need uncertainty estimates for posterior means
	for(i in 1:S) emit[, , i] <- emit[, , i]/total
	dimnames(emit) <- list(featureNames(object)[index], sampleNames(object), paste("states", 1:S, sep="_"))
	## for one marker/one sample, the emission probs must sum to 1
	return(emit)
}





## used for testing
##crlmm2.2 <- function(filenames, row.names=TRUE, col.names=TRUE,
##                   probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
##                   save.it=FALSE, load.it=FALSE, intensityFile,
##                   mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
##                   cdfName, sns, recallMin=10, recallRegMin=1000,
##                   returnParams=FALSE, badSNP=.7){
##  if ((load.it || save.it) && missing(intensityFile))
##    stop("'intensityFile' is missing, and you chose either load.it or save.it")
##  if (missing(sns)) sns <- basename(filenames)
##  if (!missing(intensityFile))
##    if (load.it & !file.exists(intensityFile)){
##      load.it <- FALSE
##      message("File ", intensityFile, " does not exist.")
##      message("Not loading it, but running SNPRMA from scratch.")
##    }
##  if (!load.it){
##    res <- snprma2(filenames, fitMixture=TRUE,
##                   mixtureSampleSize=mixtureSampleSize, verbose=verbose,
##                   eps=eps, cdfName=cdfName, sns=sns)
##    open(res[["A"]])
##    open(res[["B"]])
##    open(res[["SNR"]])
##    open(res[["mixtureParams"]])
##    if(save.it){
##      t0 <- proc.time()
##      save(res, file=intensityFile)
##      t0 <- proc.time()-t0
##      if (verbose) message("Used ", t0[3], " seconds to save ", intensityFile, ".")
##    }
##  }else{
##    if (verbose) message("Loading ", intensityFile, ".")
##    obj <- load(intensityFile)
##    if (verbose) message("Done.")
##    if (obj != "res")
##      stop("Object in ", intensityFile, " seems to be invalid.")
##  }
##  if(row.names) row.names=res$gns else row.names=NULL
##  if(col.names) col.names=res$sns else col.names=NULL
##  res2 <- crlmmGT2(res[["A"]], res[["B"]], res[["SNR"]],
##                   res[["mixtureParams"]], res[["cdfName"]],
##                   gender=gender, row.names=row.names,
##                   col.names=col.names, recallMin=recallMin,
##                   recallRegMin=1000, SNRMin=SNRMin,
##                   returnParams=returnParams, badSNP=badSNP,
##                   verbose=verbose)
##
##  res2[["SNR"]] <- res[["SNR"]]
##  res2[["SKW"]] <- res[["SKW"]]
##  return(list2SnpSet(res2, returnParams=returnParams))
##}

genotypes <- function(copyNumber, is.snp=TRUE){
	stopifnot(copyNumber %in% 0:4)
	cn <- paste("x", copyNumber, sep="")
	if(is.snp){
		res <- switch(cn,
			      x0="NULL",
			      x1=LETTERS[1:2],
			      x2=c("AA", "AB", "BB"),
			      x3=c("AAA", "AAB", "ABB", "BBB"),
			      x4=c("AAAA", "AAAB", "AABB", "ABBB", "BBBB"))
	} else {
		res <- switch(cn,
			      x0="NULL",
			      x1="A",
			      x2="AA",
			      x3="AAA",
			      x4="AAAA")
	}
	return(res)
}

dbvn <- function(x, mu, Sigma){
	##stopifnot(ncol(x)==2)
	logA <- x[, , 1]
	logB <- x[, , 2]
	sigma2A <- Sigma[,1]
	sigma2B <- Sigma[,3]
	sigmaAB <- sqrt(sigma2A*sigma2B)
	rho <- Sigma[,2]
	muA <- mu[, 1]
	muB <- mu[, 2]
	Q.x.y <- 1/(1-rho^2)*((logA - muA)^2/sigma2A + (logB - muB)^2/sigma2B - (2*rho*((logA - muA)*(logB - muB)))/sigmaAB)
	f.x.y <- 1/(2*pi*sigmaAB*sqrt(1-rho^2))*exp(-0.5*Q.x.y)
}

ABpanel <- function(x, y, predictRegion,
		    copyNumber=0:4,
		    fill,
		    ...,
		    subscripts){
	panel.grid(h=5, v=5)
	if(!missing(fill)){
		panel.xyplot(x, y, fill=fill[subscripts], ...)
	} else {
		panel.xyplot(x, y, ...)
	}
	pn <- panel.number()
	if(!is.null(copyNumber)){
		for(CN in copyNumber){
			gts <- genotypes(CN)
			index <- match(gts, names(predictRegion))
			pr <- predictRegion[index]
			for(i in seq_along(pr)){ ## for each genotype
				## scale?
				pr2 <- pr[[i]]
				mu <- pr2$mu[pn, , , drop=FALSE] ## pn=panel number
				Sigma <- pr2$cov[pn, , ,drop=FALSE]
				for(j in seq_len(dim(mu)[3])){  ## for each batch
					dat.ellipse <- ellipse(x=Sigma[, 2, j],
							       centre=mu[ , , j],
							       scale=c(sqrt(Sigma[ , 1, j]),
							       sqrt(Sigma[, 3, j])))
					lpolygon(dat.ellipse[,1], dat.ellipse[,2], ...)
				}
			}
		}
	}
}

calculateRTheta <- function(object, ##genotype=c("AA", "AB", "BB"),
			    batch.name,
			    feature.index){
	index.np <- which(!(isSnp(object)[feature.index]))
	j <- match(batch.name, batchNames(object))
	if(missing(j)) stop("batch.name did not match in batchNames(object)")
	CHR <- unique(chromosome(object)[feature.index])
	isX <- CHR == "X"
	centers <- getMedians(batchStatistics(object))
	lapply(centers, open)
	centersMatrix <- lapply(centers, function(x, i, j) x[i, j], j=j, i=feature.index)
	lapply(centers, close)
	centersMatrix <- do.call(cbind, centersMatrix)
	centersA <- centersMatrix[, 1:3, drop=FALSE]
	centersB <- centersMatrix[, 4:6, drop=FALSE]
	rm(centers, centersMatrix); gc()
	centersA[centersA < 64] <- 64
	centersB[centersB < 64] <- 64
	theta <- as.matrix(atan2(centersB, centersA) * 2/pi)
	centersA[is.na(centersA)] <- 0
	centersB[is.na(centersB)] <- 0
	if(length(index.np) > 0) theta[index.np, ] <- 1
	##
	r <- centersA+centersB
	J <- which(batch(object)==batch.name)
	open(A(object))
	open(B(object))
	a <- as.matrix(A(object)[feature.index, J, drop=FALSE])
	b <- as.matrix(B(object)[feature.index, J, drop=FALSE])
	close(A(object))
	close(B(object))
	if(length(index.np) > 0) b[index.np, ] <- 0L
	obs.theta <- atan2(b, a)*2/pi
	calculateBandR <- function(o, r, theta, not.na, M){
		lessAA <- o < theta[, 1]
		lessAB <- o < theta[, 2]
		lessBB <- o < theta[, 3]
		ii <- which(lessAA & not.na)
		jj <- which(!lessBB & not.na)
		i <- which(!lessAA & lessAB & not.na)
		j <- which(!lessAB & lessBB & not.na)
		M[i, 1] <- 0.5*(o[i]-theta[i,1])/(theta[i,2]-theta[i,1])
		M[j, 1] <- 0.5*(o[j]-theta[j,2])/(theta[j,3]-theta[j,2]) + 0.5
		M[ii, 1] <- 0
		M[jj, 1] <- 1
		M[i, 2] <- (o[i]-theta[i,1])*(r[i,2]-r[i,1])/(theta[i,2]-theta[i,1]) + r[i, 1]
		M[j, 2] <- (o[j]-theta[j,2])*(r[j,3]-r[j,2])/(theta[j,3]-theta[j,2]) + r[j, 2]
		M[ii, 2] <- r[ii, 1]
		M[jj, 2] <- r[jj, 3]
		return(M)
	}
	not.na <- !is.na(theta[,1])
	r.expected <- bf <- matrix(NA, length(feature.index), ncol(a))
	M <- matrix(NA, length(feature.index), 2)
	for(j in seq_len(ncol(a))){
		tmp <- calculateBandR(obs.theta[, j], r=r, theta=theta, not.na=not.na, M=M)
		bf[, j] <- tmp[,1]
		r.expected[,j] <- tmp[,2]
	}
	bf[bf < 0] <- 0
	bf[bf > 1] <- 1
	if(length(index.np) > 0) r.expected[index.np, ] <- centersA[index.np, 1]
	obs.r <- a+b
	lrr <- log2(obs.r/r.expected)
	dimnames(bf) <- dimnames(lrr) <- list(featureNames(object)[feature.index],
					      sampleNames(object)[J])
	bf <- integerMatrix(bf, 1000)
	lrr <- integerMatrix(lrr, 100)
	return(list(baf=bf, lrr=lrr))
}

Try the crlmm package in your browser

Any scripts or data that you put into this service are public.

crlmm documentation built on Nov. 8, 2020, 4:55 p.m.