#'Prepare input files for ASCAT tumor only samples
#' @description Function takes the output from \code{\link{gtMarkers}} and generates `logR` and `BAF` files required for ASCAT analysis.
#' @details The function will filter SNPs with low coverage (default <15), estimate BAF, logR, and generates the input files for ASCAT. Tumor `logR` file will be normalized for median depth of coverage. Alternatively, logR file can be segmented with \code{\link{segmentLogR}}
#' @param t_counts read counts from tumor generated by \code{\link{gtMarkers}}
#' @param sample_name Sample name. Used as a basename for output files. Default NA, parses from `t_counts` file.
#' @param min_depth Min read depth required to consider a marker. Default 15
#' @return Generates logR and BAF files required by ASCAT
#' @seealso \code{\link{gtMarkers}} \code{\link{prepAscat}} \code{\link{segmentLogR}}
#' @references Van Loo P, Nordgard SH, Lingjærde OC, et al. Allele-specific copy number analysis of tumors. Proc Natl Acad Sci U S A. 2010;107(39):16910-16915. doi:10.1073/pnas.1009843107
#' @export
prepAscat_t = function(t_counts = NULL, sample_name = NA, min_depth = 15){
if(any(is.null(t_counts))) stop("Missing tumor or normal read counts!")
if(is.na(sample_name)){
sample_name = gsub(pattern = "\\.tsv$", replacement = "", x = basename(path = t_counts))
}
counts = c(t_counts)
#library sizes
tot_map_reads = lapply(counts, function(x){
x = data.table::fread(input = x,nrows = 1)
as.numeric(x$V2)
})
names(tot_map_reads) = c("tumor")
message("Library sizes:")
message("Tumor: ", tot_map_reads$tumor)
counts = lapply(counts, function(x){
message("Counts file: ", basename(x))
x = data.table::fread(file = x)
x[,loci := gsub(pattern = "^chr", replacement = "", x = loci)]
message("Markers: ", nrow(x))
if(nrow(x[duplicated(loci)]) > 0){
message("Removed ", nrow(x[duplicated(loci)]), " duplicated loci")
x = x[!duplicated(loci)]
}
x[, tot_depth := apply(x[,.(A, T, G, C)], 1, sum)]
x = x[tot_depth > min_depth]
message("Markers > ", min_depth, ": ", nrow(x))
x[, baf := apply(x[,.(A, T, G, C)], 1, function(r) {r = sort(r); r[4]/sum(r[4], r[3])})]
#x[, baf := ifelse(test = baf < 0.5, yes = baf, no = 1 - baf)]
#x$baf = ifelse(x$baf <0.5,x$baf,1-x$baf)
x$baf = ifelse(runif(length(x$baf))<0.5,x$baf,1-x$baf)
x = data.frame(data.table::tstrsplit(x = x$loci, split = ":"), x$baf, x$tot_depth, row.names = x$loci)
colnames(x) = c("chr", "pos", "BAF", "depth")
x$pos = as.numeric(as.character(x$pos))
x
})
names(counts) = c("tumor")
counts = counts$tumor
med_cov = median(counts[counts$chr %in% c(1:22), "depth"], na.rm = TRUE)
message("Median depth of coverage (autosomes): ", med_cov)
message("------")
counts$logR = round(log2(counts$depth) - log2(med_cov), digits = 3)
data.table::fwrite(x = counts[,c("chr", "pos", "BAF")], file = paste0(sample_name, ".tumour.BAF.txt"), sep = "\t", row.names = TRUE)
data.table::fwrite(x = counts[,c("chr", "pos", "logR")], file = paste0(sample_name, ".tumour.logR.txt"), sep = "\t", row.names = TRUE)
message("Generated following files:")
message(paste0(sample_name, ".tumour.BAF.txt"))
message(paste0(sample_name, ".tumour.logR.txt"))
message("------")
# ascat_obj = NULL
# if("ASCAT" %in% rownames(installed.packages())){
# message("Running ASCAT::ascat.loadData:")
# ascat_obj = ASCAT::ascat.loadData(Tumor_LogR_file = paste0(sample_name, ".tumour.logR.txt"),
# Tumor_BAF_file = paste0(sample_name, ".tumour.BAF.txt"),
# chrs = c(1:22, "X", "Y"), sexchromosomes = c("X", "Y"))
# message("Running ASCAT::ascat.plotRawData()")
# ASCAT::ascat.plotRawData(ASCATobj = ascat_obj, img.prefix = sample_name)
# #message("Running ASCAT::ascat.predictGermlineGenotypes() for tumor only")
# #ascat.gg = ASCAT::ascat.predictGermlineGenotypes(ASCATobj = ascat.bc, platform = PLATFORM)
#
# message("Returned ASCAT object!")
# }else{
# warning("ASCAT package not found!")
# }
#
# ascat_obj
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.