#' Aggregate cancerhotspots reports
#' @description Takes tsv files generated by \code{\link{cancerhotspots}} and aggregates them into an MAF for downstream analysis
#' @param tsvs TSV files generated by \code{\link{cancerhotspots}}
#' @param minVaf Min. VAF threshold. Default 0.02
#' @param minDepth Min. depth of coverage. Default 15
#' @param sampleNames samples for each tsv file. Default NULL. Parses from file names.
#' @param maf Return as an MAF object. Default TRUE.
#' @param ... Additional argumnets passed to \code{\link{read.maf}} if `maf` is TRUE.
#' @return \code{\link{MAF}} object
#' @seealso \code{\link{cancerhotspots}}
#' @export
#'
cancerhotspotsAggr = function(tsvs = NULL, minVaf = 0.02, minDepth = 15, sampleNames = NULL, maf = TRUE, ...){
if(is.null(sampleNames)){
sampleNames = gsub(pattern = "\\.tsv$",
replacement = "",
x = basename(path = tsvs))
}
canhs = lapply(tsvs, function(tsv){
x = data.table::fread(tsv)
x$t_depth = apply(x[,.(A, T, G, C, Ins, Del)], 1, sum, na.rm = TRUE)
x = x[t_depth >= minDepth][VAF >= minVaf]
y = as.data.frame(
data.table::tstrsplit(x = x$loci, split = ":"),
col.names = c("Chromosome", "Start_Position")
)
data.table::setDT(x = y)
y[, End_Position := Start_Position]
x = cbind(y, x)
x$Variant_Type = ifelse(
test = x$Variant_Classification %in% c("INS", "DEL"),
yes = "INDEL",
no = "SNP"
)
y = as.data.frame(
data.table::tstrsplit(x = x$NT_change, split = ">"),
col.names = c("Reference_Allele", "Tumor_Seq_Allele2")
)
x = cbind(y, x)
x
})
names(canhs) = sampleNames
canhs = data.table::rbindlist(l = canhs,idcol = "Tumor_Sample_Barcode", use.names = TRUE)
if(maf){
if(nrow(canhs[,.N,Tumor_Sample_Barcode]) < 2){
warning("Min. 2 samples required. Coercing into MAF object is not possible.")
}else{
canhs = read.maf(maf = canhs, ...)
}
}
canhs
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.