qualityScoreBySnp<-
function(intenData,
genoData,
scan.exclude = NULL,
block.size = 5000,
verbose = TRUE)
{
# block.size is the number of rows to read and process at one time (i.e. one pass of the loop below)
# scan.exclude is a vector of integer ids of samples to exclude from the missing fraction calculation (use NULL if none to exclude)
# check that intenData has quality
if (!hasQuality(intenData)) stop("quality not found in intenData")
# check that dimensions of intenData and genoData are equal
intenSnpID <- getSnpID(intenData)
genoSnpID <- getSnpID(genoData)
if (!all(intenSnpID == genoSnpID)) stop("snp dimensions of intenData and genoData differ")
intenScanID <- getScanID(intenData)
genoScanID <- getScanID(genoData)
if (!all(intenScanID == genoScanID)) stop("scan dimensions of intenData and genoData differ")
# what to analyze
nSnp <- length(intenSnpID) # number of rows to analyze
nblock <- floor(nSnp/block.size)
remain <- nSnp-block.size*nblock
k <- 1 # first row of first block.size
if(remain==0) {N <- nblock} else { N <- nblock+1 } # N is the number of blocks
# get logical vector for samples to be excluded
incl <- !is.element(intenScanID, scan.exclude)
# vector to store results
mnqual <- rep(NA, nSnp)
medqual <- rep(NA, nSnp)
# read each block and calculate the mean quality over non-missing snps
for(i in 1:N){
if(i<=nblock) {n <- block.size}
else {n <- remain}
if (verbose)
message(paste("block number=", i, "vector index=", k))
# get the data and exclude certain samples
geno <- getGenotype(genoData, snp=c(k,n), scan=c(1,-1), drop=FALSE)
qual <- getQuality(intenData, snp=c(k,n), scan=c(1,-1), drop=FALSE)
# remove excluded scans
geno <- geno[,incl,drop=FALSE]
qual <- qual[,incl,drop=FALSE]
qual[is.na(geno)] <- NA
mnqual[k:(k+n-1)] <- rowMeans(qual, na.rm=TRUE)
medqual[k:(k+n-1)] <- rowMedians(qual, na.rm=TRUE)
k <- k+n
rm(geno); rm(qual)
}
qual.res <- cbind(mnqual, medqual)
rownames(qual.res) <- intenSnpID
colnames(qual.res) <- c("mean.quality", "median.quality")
return(qual.res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.