test_check_beagle <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
nsnp <- nrow(markers)
nscan <- ncol(header)-3
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(1:nscan, 1)
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
verbose=FALSE, compress="")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b,
na.logfile=logfile)
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
geno.orig <- getGenotype(genoData) # for checking later
close(genoData)
# change a snp
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
checkException(checkEquals(geno.orig, geno)) # make sure they actualy are different
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
test_check_beagle_missing <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
newprobfile <- tempfile()
prob <- read.table(probfile, header=TRUE, stringsAsFactors=FALSE)
prob[1, 4:6] <- -1
write.table(prob, file=newprobfile, row.names=FALSE, col.names=TRUE)
x <- read.table(newprobfile, header=TRUE, stringsAsFactors=FALSE)
checkEquals(prob, x)
newdosefile <- tempfile()
dose <- read.table(dosefile, header=TRUE, stringsAsFactors=FALSE)
dose[1, 4] <- -1
write.table(dose, file=newdosefile, row.names=FALSE, col.names=TRUE)
x <- read.table(newdosefile, header=TRUE, stringsAsFactors=FALSE)
checkEquals(dose, x)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
nsnp <- nrow(markers)
nscan <- ncol(header)-3
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(1:nscan, 1)
files <- c(newprobfile, newdosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
verbose=FALSE, compress="")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b,
na.logfile=logfile)
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
geno.orig <- getGenotype(genoData) # for checking later
close(genoData)
# change a snp
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
checkException(checkEquals(geno.orig, geno)) # make sure they actualy are different
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
# tests adding only a subset of samples/snps
test_check_beagle_subset <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
# reverse the samples and insert a missing sample between
scan.df <- data.frame(sampleID=c(header[1,5], "missing", header[1,4]), scanID=c(200, 205, 208))
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
i_snp_rm <- sample(1:nrow(markers), 5)
#snp.names <- markers$V1[i_snp_rm] # remove 5 random SNPs
nsnp <- nrow(markers) - length(i_snp_rm)
snp.id.start <- 100
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
# random indices to change
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(which(scan.df$sampleID != "missing"), 1)
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99, 1)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
# test reading snp.names and scan.df
imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
snp.id.start=snp.id.start, verbose=FALSE, compress="")
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b,
snp.exclude=i_snp_rm,
na.logfile=logfile)
# check the log
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
# change it in a random place
close(genoData)
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b,
snp.exclude=i_snp_rm))
close(genoData)
# change an added=FALSE sample
tmp <- which(!scanAnnot$added)
i_scan2 <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1))
i_snp2 <- sample(1:nsnp, 1)
gds <- openfn.gds(gdsfile, readonly=FALSE)
# put the original value back
write.gdsn(index.gdsn(gds, "genotype"), orig, start=c(i_snp,i_scan), count=c(1,1))
# write
write.gdsn(index.gdsn(gds, "genotype"), 1, start=c(i_snp2, i_scan2), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile), chromosome,
input.type="BEAGLE",
input.dosage=inputs[i], block.size=b,
snp.exclude=i_snp_rm))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
test_check_mach <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
# set up scan.df for subsetting later.
dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
nsnp <- nrow(markers)
nscan <- nrow(dosages)
# random snp and scan to remove
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(1:nscan, 1)
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim, verbose=FALSE, compress="")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b, na.logfile=logfile)
# check the log
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
geno.orig <- getGenotype(genoData)
close(genoData)
# change a snp
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
checkException(checkEquals(geno, geno.orig))
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
test_check_mach_missing <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
newprobfile <- tempfile()
newdosefile <- tempfile()
logfile <- tempfile()
dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
dosages[1, 3] <- -1
write.table(dosages, file=newdosefile, row.names=FALSE, col.names=FALSE)
x <- read.table(newdosefile, stringsAsFactors=FALSE, header=FALSE)
checkEquals(x, dosages)
# mach probs are AA, AB not AA, AB, BB
prob <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
prob[1, 3:4] <- -1
write.table(prob, file=newprobfile, row.names=FALSE, col.names=FALSE)
x <- read.table(newprobfile, stringsAsFactors=FALSE, header=FALSE)
checkEquals(x, prob)
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
nsnp <- nrow(markers)
nscan <- nrow(dosages)
# random snp and scan to remove
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(1:nscan, 1)
files <- c(newprobfile, newdosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim, verbose=FALSE, compress="")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b, na.logfile=logfile)
# check the log
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
geno.orig <- getGenotype(genoData)
close(genoData)
# change a snp
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
checkException(checkEquals(geno, geno.orig))
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
# tests adding only a subset of samples/snps
test_check_mach_subset <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
# set up scan.df for subsetting later.
dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
# remove 5 random samples
i_samp_rm <- sample(1:nrow(dosages), nrow(dosages)-5)
scan.df <- data.frame(sampleID=c("MISSING->MISSING", dosages$V1[i_samp_rm]), stringsAsFactors=FALSE)
scan.df$scanID <- 200:(200+nrow(scan.df)-1)
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
i_snp_rm <- sample(1:nrow(markers), 5)
#message(paste(i_snp_rm, collapse=" "))
#snp.names <- markers$SNP[i_snp_rm] # remove 5 random SNPs
snp.id.start <- 100
nsnp <- nrow(markers) - length(i_snp_rm)
nscan <- nrow(scan.df)
# random snp and scan to remove
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(which(scan.df$sampleID != "MISSING->MISSING"), 1)
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 1)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
# test reading snp.names and scan.df
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
snp.id.start=snp.id.start, verbose=FALSE, compress="")
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b,
snp.exclude=i_snp_rm, na.logfile=logfile)
# check the log
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
# change it in a random place
close(genoData)
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp, i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp, i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b,
snp.exclude=i_snp_rm))
close(genoData)
# change an added=FALSE sample
tmp <- which(!scanAnnot$added)
i_scan2 <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1))
i_snp2 <- sample(1:nsnp, 1)
gds <- openfn.gds(gdsfile, readonly=FALSE)
# put the original value back
write.gdsn(index.gdsn(gds, "genotype"), orig, start=c(i_snp,i_scan), count=c(1,1))
# write
write.gdsn(index.gdsn(gds, "genotype"), 1, start=c(i_snp2, i_scan2), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(files[i], markfile, posfile), chromosome,
input.type="MaCH",
input.dosage=inputs[i], block.size=b,
snp.exclude=i_snp_rm))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
test_check_impute2 <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
nsnp <- length(snps)
nscan <- nrow(samp)
# random snp and scan to remove
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(1:nscan, 1)
# 33 lines in file
blocks <- c(5000, 10, 32)
genoDim <- "snp,scan"
#for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
# make a normal one
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
verbose=FALSE, compress="")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b, na.logfile=logfile)
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
close(genoData)
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b))
close(genoData)
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
test_check_impute2_missing <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
# make a dosage missing
newprobfile <- tempfile()
dos[1, 6:8] <- -1
write.table(dos, file=newprobfile, row.names=FALSE, col.names=FALSE)
x <- read.table(newprobfile, stringsAsFactors=FALSE, header=FALSE)
checkEquals(x, dos)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
nsnp <- length(snps)
nscan <- nrow(samp)
# random snp and scan to remove
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(1:nscan, 1)
# 33 lines in file
blocks <- c(5000, 10, 32)
genoDim <- "snp,scan"
#for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
# make a normal one
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
verbose=FALSE, compress="")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b,
na.logfile=logfile)
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
close(genoData)
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b))
close(genoData)
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
# tests adding only a subset of samples/snps
test_check_impute2_subset <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
i_samp_rm <- sample(-1:-nrow(samp), 5)
scan.df <- data.frame(sampleID=paste(samp$ID_1[i_samp_rm], samp$ID_2[i_samp_rm]), stringsAsFactors=FALSE)
scan.df <- rbind(data.frame(sampleID="MISSING MISSING", stringsAsFactors=FALSE), scan.df)
scan.df$scanID <- 200:(200+nrow(scan.df)-1)
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
# remove 5 random snps
i_snp_rm <- sample(1:length(snps), 5)
snp.id.start <- 100
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
logfile <- tempfile()
nsnp <- length(snps) - length(i_snp_rm)
nscan <- nrow(samp)
# random snp and scan to remove
i_snp <- sample(1:nsnp, 1)
i_scan <- sample(which(scan.df$sampleID != "MISSING MISSING"), 1)
# 33 lines in file
blocks <- c(5000, 10, 32, 1)
genoDim <- "snp,scan"
for (b in blocks) {
# now the subset of samples/snps
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
snp.id.start=snp.id.start, verbose=FALSE, compress="")
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b,
snp.exclude=i_snp_rm, verbose=FALSE,
na.logfile=logfile)
log <- read.table(logfile, header=T, sep="\t")
geno <- getGenotype(genoData)
i1 <- match(log$snpID, getSnpID(genoData))
i2 <- match(log$scanID, getScanID(genoData))
ind <- matrix(c(i1,i2), ncol=2)
ind.geno <- arrayInd(which(is.na(geno)), dim(geno))
checkTrue(setequal(paste(ind[,1], ind[,2]), paste(ind.geno[,1], ind.geno[,2])))
geno[ind] <- -1
checkTrue(all(!is.na(geno)))
rm(geno)
# change it in a random place
close(genoData)
gds <- openfn.gds(gdsfile, readonly=FALSE)
orig <- read.gdsn(index.gdsn(gds, "genotype"), start=c(i_snp,i_scan), count=c(1,1))
val <- ifelse(is.na(orig) | orig < 0, 1, orig-1)
write.gdsn(index.gdsn(gds, "genotype"), val, start=c(i_snp,i_scan), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b,
snp.exclude=i_snp_rm, verbose=FALSE))
close(genoData)
# change an added=FALSE sample
tmp <- which(!scanAnnot$added)
i_scan2 <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1))
i_snp2 <- sample(1:nsnp, 1)
gds <- openfn.gds(gdsfile, readonly=FALSE)
# put the original value back
write.gdsn(index.gdsn(gds, "genotype"), orig, start=c(i_snp,i_scan), count=c(1,1))
# write
write.gdsn(index.gdsn(gds, "genotype"), 1, start=c(i_snp2, i_scan2), count=c(1,1))
closefn.gds(gds)
# check exception
gds <- GdsGenotypeReader(gdsfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
checkException(checkImputedDosageFile(genoData, snpAnnot, scanAnnot,
input.files=c(probfile, sampfile), chromosome,
input.type="IMPUTE2",
input.dosage=FALSE, block.size=b,
snp.exclude=i_snp_rm))
close(genoData)
}
unlink(c(gdsfile, snpfile, scanfile, logfile))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.