Nothing
context("runAbsoluteCN")
normal.coverage.file <- system.file("extdata", "example_normal.txt",
package = "PureCN")
normal2.coverage.file <- system.file("extdata", "example_normal2.txt",
package = "PureCN")
normal.coverage.files <- c(normal.coverage.file, normal2.coverage.file)
tumor.coverage.file <- system.file("extdata", "example_tumor.txt",
package = "PureCN")
vcf.file <- system.file("extdata", "example.vcf.gz", package = "PureCN")
interval.file <- system.file("extdata", "example_intervals.txt",
package = "PureCN")
seg.file <- system.file("extdata", "example_seg.txt", package = "PureCN")
cosmic.vcf.file <- system.file("extdata", "example_cosmic.vcf.gz",
package = "PureCN")
data(purecn.example.output)
test_that("VCF is not necessary to produce output", {
set.seed(123)
normalDB <- createNormalDatabase(normal.coverage.files)
expect_true(!is.null(normalDB$sd))
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file,
candidates = purecn.example.output$candidates,
normalDB = normalDB,
genome = "hg19",
test.purity = seq(0.4, 0.7, by = 0.05), min.ploidy = 1.5,
max.ploidy = 2.4, max.candidate.solutions = 1,
BPPARAM=BiocParallel::bpparam())
tmpFile <- tempfile(fileext = ".rds")
saveRDS(ret, tmpFile)
createCurationFile(tmpFile)
readCurationFile(tmpFile)
file.remove(tmpFile)
chrom <- ret$results[[1]]$seg$chrom
expect_equal(chrom[!duplicated(chrom)], 1:22)
expect_error(callLOH(ret), "runAbsoluteCN was run without a VCF file")
expect_error(callMutationBurden(ret),
"runAbsoluteCN was run without a VCF file")
})
test_that("Exceptions happen with incorrect input data", {
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
genome = "hg19"),
"Need a normal coverage file if log.ratio and seg.file")
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
min.ploidy = 0, genome = "hg19"),
"min.ploidy or max.ploidy not within expected range")
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
max.ploidy = 0, genome = "hg19"),
"min.ploidy or max.ploidy not within expected range")
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
normal.coverage.file = normal.coverage.file, max.ploidy = "a",
genome = "hg19"), "max.ploidy")
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
normal.coverage.file = normal.coverage.file, min.ploidy = "a",
genome = "hg19"), "min.ploidy")
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
normal.coverage.file = normal.coverage.file, test.num.copy = -1:7,
genome = "hg19"), "test.num.copy")
expect_error(expect_warning(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
normal.coverage.file = normal.coverage.file, test.num.copy = 0:10,
genome = "hg19", max.non.clonal = 1.1), "test.num.copy outside"))
expect_error(expect_warning(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
normal.coverage.file = normal.coverage.file, test.num.copy = 2:7,
genome = "hg19", max.non.clonal = 1.1), "test.num.copy outside"))
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
normal.coverage.file = normal.coverage.file, test.purity = "a",
genome = "hg19"), "test.purity")
expect_error(runAbsoluteCN(tumor.coverage.file, tumor.coverage.file,
genome = "hg19"), "Tumor and normal are identical")
expect_error(runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
log.ratio = head(purecn.example.output$input$log.ratio$log.ratio),
genome = "hg19"),
"Length of log.ratio different from tumor coverage")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
prior.purity = 1, genome = "hg19"),
"prior.purity must have the same")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
min.gof = 70, genome = "hg19"), "min.gof")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
prior.purity = c(0.6, 1.1), test.purity = c(0.2, 0.6),
genome = "hg19"),
"prior.purity not within expected range")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
prior.purity = c(0.6, 0.9), test.purity = c(0.2, 0.6),
genome = "hg19"),
"prior.purity must add to 1. Sum is 1.5")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", max.homozygous.loss = 1.1),
"max.homozygous.loss not within expected range")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", prior.K = -1.1),
"prior.K not within expected range")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", prior.contamination = c(0.1, -1.1)),
"prior.contamination not within expected range")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", iterations = 1),
"Iterations not in the expected range from")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", iterations = 3000),
"Iterations not in the expected range from")
expect_error(runAbsoluteCN(normal.coverage.file=normal.coverage.file),
"Missing tumor")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", model.homozygous = NULL), "model.homozygous")
normalCov <- readCoverageFile(normal.coverage.file)
expect_error(runAbsoluteCN(normalCov[sample(length(normalCov)),
], tumor.coverage.file, genome = "hg19"),
"Interval files in normal and tumor different")
tumorCov <- readCoverageFile(tumor.coverage.file)
tumorCov$average.coverage <- 0
tumorCov$coverage <- 0
expect_error( runAbsoluteCN(normal.coverage.file=normal.coverage.file,
tumor.coverage.file=tumorCov, genome="hg19"),
"intervals")
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", vcf.file=vcf.file, args.filterVcf=list(snp.blacklist=normal.coverage.file)),
"Could not import snp.blacklist")
})
test_that("Example data with VCF produces expected output", {
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file, vcf.file = vcf.file,
genome = "hg19", test.purity = seq(0.3, 0.7, by = 0.05), plot.cnv = FALSE,
max.candidate.solutions = 1, min.ploidy = 1.5, max.ploidy = 2.1,
vcf.field.prefix = "PureCN."
)
expect_equal(ret$results[[1]]$fraction.balanced, 0.21, tolerance = 0.01)
s <- predictSomatic(ret)
expect_equal(s$AR/s$MAPPING.BIAS, s$AR.ADJUSTED)
expect_equal(0.65, ret$results[[1]]$purity)
expect_equal(0.93, ret$results[[1]]$GoF, tolerance = 0.01)
expect_equal(as.numeric(colnames(ret$candidates$all)), c(seq(0.3,
0.34, by = 1/50), seq(0.35, 0.7, by = 1/30)))
plotAbs(ret, type = "overview")
expect_equal(info(ret$input$vcf)$PureCN.OnTarget, rep(1, length(ret$input$vcf)))
})
test_that("Mapping bias function works", {
vcf <- readVcf(vcf.file, "hg19", param = ScanVcfParam(samples = "LIB-02240e4"))
# make sure that NA's in DB are replaced with FALSE
info(vcf)$DB[1:5] <- NA
geno(vcf)$DP[6:8,1] <- NA
myMappingBiasTestFun <- function(vcf, ...) {
tmp <- rep(1, nrow(vcf))
tmp2 <- rep(0, nrow(vcf))
idx <- as.logical(seqnames(vcf) == "chr9" & start(vcf) ==
35811642)
tmp[idx] <- 0.9
tmp2[idx] <- 2
return(PureCN:::.annotateMappingBiasVcf(vcf,
data.frame(bias = tmp, pon.count = tmp2, mu = NA, rho = NA)))
}
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file, args.filterVcf = list(remove.off.target.snvs = FALSE),
vcf.file = vcf, genome = "hg19", test.purity = seq(0.3,
0.7, by = 0.05), min.ploidy = 1.5, max.ploidy = 2.1,
max.candidate.solutions = 1, fun.setMappingBiasVcf = myMappingBiasTestFun)
expect_equal(0.65, ret$results[[1]]$purity)
expect_equal(as.numeric(colnames(ret$candidates$all)), c(seq(0.3,
0.34, by = 1/50), seq(0.35, 0.7, by = 1/30)))
s <- predictSomatic(ret)
expect_equal(s$AR/s$MAPPING.BIAS, s$AR.ADJUSTED)
expect_equal(s$MAPPING.BIAS[s$chr == "chr9" & s$start ==
35811642], 0.9)
expect_equal(s$pon.count[s$chr == "chr9" & s$start == 35811642],
2)
})
test_that("Missing Gene column in interval.file is handled correctly", {
gc_data <- read.delim(interval.file, as.is = TRUE)
gc_data$Gene <- NULL
output.file <- tempfile(".txt")
write.table(gc_data, file = output.file, row.names = FALSE, sep = "\t",
quote = FALSE)
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
fun.segmentation = segmentationPSCBS,
interval.file = output.file, model.homozygous = TRUE, tumor.coverage.file = tumor.coverage.file,
candidates = purecn.example.output$candidates,
min.ploidy = 2.2, max.ploidy = 4, vcf.file = vcf.file,
genome = "hg19", test.purity = seq(0.3, 0.7, by = 0.05),
max.candidate.solutions = 1)
expect_true(ret$results[[1]]$ploidy <= 4)
expect_true(ret$results[[1]]$ploidy >= 2)
expect_true(is.na(ret$results[[1]]$gene.calls))
expect_error(callAlterations(ret), "requires gene-level calls")
rvcf <- predictSomatic(ret, return.vcf=TRUE)
expect_equal(length(ret$input$vcf),
length(rvcf))
expect_equal(".", info(rvcf)$GS[1])
tumor <- readCoverageFile(tumor.coverage.file)
normal <- readCoverageFile(normal.coverage.file)
tumor$gc_bias <- gc_data$gc_bias
tumor$normal.average.coverage <- normal$average.coverage
filtered <- tumor[!overlapsAny(tumor, ret$input$log.ratio)]
expect_equal(sum(!(filtered$average.coverage < 15 | filtered$normal.average.coverage <
15 | filtered$targeted.base < 5 | filtered$gc_bias <
0.25 | filtered$gc_bias > 0.8)),0)
file.remove(output.file)
})
test_that("Different chromosome naming styles throw exceptions", {
vcf <- readVcf(vcf.file, "hg19")
seqlevelsStyle(vcf) <- "ENSEMBL"
normCov <- readCoverageFile(normal.coverage.file)
tumorCov <- readCoverageFile(tumor.coverage.file)
seqlevelsStyle(normCov) <- "ENSEMBL"
seqlevelsStyle(tumorCov) <- "ENSEMBL"
expect_error(runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
genome = "hg19", vcf.file = vcf),
"Different chromosome names in coverage and VCF")
expect_error(runAbsoluteCN(normal.coverage.file = normCov,
tumor.coverage.file = tumorCov, interval.file = interval.file,
vcf.file = vcf, genome = "hg19", test.purity = seq(0.3,
0.7, by = 0.05), max.candidate.solutions = 1),
"tumor.coverage.file and interval.file do not")
output.file1 <- tempfile(fileext = ".txt")
output.file2 <- tempfile(fileext = ".txt")
gc1 <- read.delim(interval.file, as.is = TRUE)
gc1[, 1] <- as.character(tumorCov)
write.table(gc1, file = output.file1, row.names = FALSE, sep = "\t",
quote = FALSE)
gc2 <- gc1
gc2$Gene[2:4] <- paste("SCNN1D,XXXL")
write.table(gc2, file = output.file2, row.names = FALSE, sep = "\t",
quote = FALSE)
set.seed(123)
ret <- runAbsoluteCN(normal.coverage.file = normCov, tumor.coverage.file = tumorCov,
interval.file = output.file1, plot.cnv = FALSE, min.ploidy = 1.5,
max.ploidy = 2.1, vcf.file = vcf, genome = "hg19", test.purity = seq(0.4,
0.7, by = 0.05), max.candidate.solutions = 1)
set.seed(123)
ret2 <- runAbsoluteCN(normal.coverage.file = normCov, tumor.coverage.file = tumorCov,
interval.file = output.file2, plot.cnv = FALSE, min.ploidy = 1.5,
max.ploidy = 2.1, vcf.file = vcf, genome = "hg19", test.purity = seq(0.4,
0.7, by = 0.05), max.candidate.solutions = 1)
gpnmb <- callAlterations(ret)["GPNMB", ]
expect_equal(as.character(gpnmb$chr), "7")
expect_true(gpnmb$start > 2.3e+07)
expect_true(gpnmb$end < 23400000)
expect_true(gpnmb$C >= 6)
expect_equal(gpnmb$type, "AMPLIFICATION")
caAret <- callAlterations(ret, all.genes = TRUE)
caAret2 <- callAlterations(ret2, all.genes = TRUE)
expect_equal(nrow(caAret2), nrow(caAret) + 1)
expect_equal(caAret2[rownames(caAret), "C"], caAret$C)
expect_equal(caAret2[rownames(caAret), "start"], caAret$start)
expect_equal(caAret2["XXXL", "number.targets"], 3)
expect_equal(caAret2["XXXL", "start"], 1216606)
expect_equal(caAret2["XXXL", "end"], 1217696)
expect_equal(caAret2["XXXL", "C"], caAret["SCNN1D", "C"])
expect_equal(caAret2["XXXL", "seg.mean"], caAret["SCNN1D",
"seg.mean"])
plotAbs(ret, 1, type = "all")
expect_error(plotAbs(ret, 1, type = "BAF", chr = "chr1"))
plotAbs(ret, 1, type = "BAF", chr = "1")
x <- ret$results[[1]]$gene.calls
seg <- ret$results[[1]]$seg
expect_equal(x$breakpoints == 0, x$.min.segid == x$.max.segid)
expect_equal(seg$seg.mean[x$seg.id], x$seg.mean)
expect_equal(seg$C[x$seg.id], x$C)
file.remove(c(output.file1, output.file2))
})
test_that("Run with example seg.file works", {
ret <- runAbsoluteCN(tumor.coverage.file = tumor.coverage.file,
seg.file = seg.file, vcf.file = vcf.file, max.candidate.solutions = 1,
genome = "hg19", min.ploidy = 1.5, max.ploidy = 2.1,
test.purity = seq(0.4, 0.7, by = 0.05), sampleid = "Sample2")
expect_equal(0.65, ret$results[[1]]$purity)
# check for https://github.com/lima1/PureCN/issues/19
vcf <- purecn.example.output$input$vcf
vcf.id <- match("chr2231775021xxx", names(vcf))
end(vcf[vcf.id]) <- 236403333
ret <- runAbsoluteCN(seg.file = seg.file, interval.file = interval.file,
vcf.file = vcf, max.candidate.solutions = 1, genome = "hg19",
test.purity = seq(0.4, 0.7, by = 0.05), min.ploidy = 1.5,
max.ploidy = 2.1, verbose = FALSE)
expect_equal(0.65, ret$results[[1]]$purity)
tmp <- predictSomatic(ret)[ret$results[[1]]$SNV.posterior$vcf.ids==vcf.id,]
expect_equal(2, nrow(tmp))
expect_equal(2, sum(tmp$FLAGGED))
expect_equal(rep("chr2231775021xxx",2), as.character(tmp$ID))
tmp <- read.delim(seg.file, as.is = TRUE)
colnames(tmp)[1:4] <- c("Name", "Chromosome", "Start", "End")
output.file <- tempfile(fileext = ".seg")
write.table(tmp, file = output.file, quote = FALSE, row.names = FALSE,
sep = "\t")
expect_error(runAbsoluteCN(seg.file = output.file, interval.file = interval.file,
vcf.file = vcf.file, max.candidate.solutions = 1, genome = "hg19",
min.ploidy = 1.4, max.ploidy = 2.4, test.purity = seq(0.4,
0.7, by = 0.05), verbose = FALSE))
file.remove(output.file)
tmp <- read.delim(seg.file, as.is = TRUE)
tmp2 <- tmp
tmp2[, 1] <- "Sample2"
tmp <- rbind(tmp, tmp2)
output.file <- tempfile(fileext = ".seg")
write.table(tmp, file = output.file, quote = FALSE, row.names = FALSE,
sep = "\t")
expect_error(runAbsoluteCN(seg.file = output.file, interval.file = interval.file,
vcf.file = vcf.file, max.candidate.solutions = 1, genome = "hg19",
min.ploidy = 1.4, max.ploidy = 2.4, test.purity = seq(0.4,
0.7, by = 0.05), verbose = FALSE),
"seg.file contains multiple samples and sampleid missing")
expect_error(runAbsoluteCN(seg.file = output.file, interval.file = interval.file,
sampleid = "Sample3", vcf.file = vcf.file, max.candidate.solutions = 1,
genome = "hg19", test.purity = seq(0.3, 0.7, by = 0.05),
verbose = FALSE),
"contains multiple samples and sampleid does not match")
ret <- runAbsoluteCN(seg.file = output.file, interval.file = interval.file,
sampleid = "Sample1", vcf.file = vcf.file, max.candidate.solutions = 1,
fun.segmentation = segmentationHclust,
genome = "hg19", test.purity = seq(0.4, 0.7, by = 0.05),
verbose = FALSE, min.ploidy = 1.5, max.ploidy = 2.1)
file.remove(output.file)
testSeg <- function(seg, ...) return(seg)
res <- runAbsoluteCN(normal.coverage.file, tumor.coverage.file,
seg.file = seg.file, fun.segmentation = testSeg, min.ploidy = 1.5,
max.ploidy = 2.1, test.purity = seq(0.4, 0.7, by = 0.05),
max.candidate.solutions = 1, genome = "hg19", plot.cnv = FALSE)
seg <- read.delim(seg.file)
expect_equal(nrow(res$results[[1]]$seg), nrow(seg))
expect_equal(res$results[[1]]$seg$seg.mean, seg$seg.mean, tol=0.005)
})
test_that("Run with provided log-ratios works", {
log.ratio <- calculateLogRatio(readCoverageFile(normal.coverage.file),
readCoverageFile(tumor.coverage.file))
ret <- runAbsoluteCN(log.ratio = log.ratio, interval.file = interval.file,
vcf.file = vcf.file, max.candidate.solutions = 1, genome = "hg19",
min.ploidy = 1.5, max.ploidy = 2.1, plot.cnv = FALSE,
test.purity = seq(0.4, 0.7, by = 0.05))
expect_equal(0.65, ret$results[[1]]$purity)
ret <- runAbsoluteCN(log.ratio = log.ratio, seg.file = seg.file,
interval.file = interval.file, vcf.file = vcf.file, max.candidate.solutions = 1,
min.ploidy = 1.5, max.ploidy = 2.1,
genome = "hg19", test.purity = seq(0.5, 0.7, by = 0.05))
expect_equal(0.65, ret$results[[1]]$purity)
})
test_that("Betabin model runs with example data", {
vcf <- readVcf(vcf.file, "hg19")
gt1 <- round(geno(vcf)$DP[, 1] * as.numeric(geno(vcf)$FA[,
1]))
ad1 <- lapply(seq_along(gt1), function(i) as.numeric(c(geno(vcf)$DP[i,
1] - gt1[i], gt1[i])))
names(ad1) <- names(geno(vcf)$DP[, 1])
geno(vcf)$AD[, 1] <- ad1
geno(vcf)$FA <- NULL
geno(vcf)$DP <- NULL
geno(vcf)$BQ[,2] <- geno(vcf)$BQ[,1]
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file, sampleid = "LIB-02252e4",
vcf.file = vcf, max.candidate.solutions = 1, genome = "hg19",
min.ploidy = 1.5, max.ploidy = 2.1, plot.cnv = FALSE,
cosmic.vcf.file = cosmic.vcf.file, model = "betabin",
test.purity = seq(0.4, 0.7, by = 0.05))
expect_equal(0.65, ret$results[[1]]$purity, tol=0.1)
})
test_that("normalDB objects are used correctly", {
expect_error(runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file, genome = "hg19",
normalDB = vcf.file),
"normalDB not a valid normalDB object")
normalDB <- createNormalDatabase(normal.coverage.files)
tmp <- normalDB
tmp$normal.coverage.files <- NULL
expect_error(runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file, genome = "hg19",
normalDB = tmp), "normalDB appears to be empty")
tumor <- readCoverageFile(tumor.coverage.file)
tumor[1]$on.target <- FALSE
ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor, genome = "hg19",
vcf.file = vcf.file, sampleid = "Sample1", interval.file = interval.file,
normalDB = normalDB, args.filterIntervals = list(filter.lowhigh.gc = 0),
plot.cnv = FALSE, min.ploidy = 1.5, max.ploidy = 2.1, test.purity = seq(0.4,
0.7, by = 0.05), max.candidate.solutions = 1)
# normal <- readCoverageFile(normal.coverage.file)
# idx <- overlapsAny(tumor, ret$input$log.ratio)
# TODO: recalc when defaults are final
# cutoff <- median(normalDB$exon.median.coverage) * 0.3
# expect_equal(sum(!(normalDB$exon.median.coverage[!idx] <
# cutoff | width(tumor[!idx]) < 5 | tumor$average.coverage[!idx] <
# 15 | normal$average.coverage[!idx] < 15 | normalDB$fraction.missing[!idx] >
# 0.05)), 0)
# expect_true(sum(!(normalDB$exon.median.coverage[idx] < cutoff |
# width(tumor[idx]) < 5 | tumor$average.coverage[idx] <
# 15 | normal$average.coverage[idx] < 15 | normalDB$fraction.missing[idx] >
# 0.05)) > 9000)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.