#' Downsample a whole exome sequencing (WES) dataset to a subset only containing
#' genes targeted in the panel
#'
#' This function generates a subset of a whole exome sequencing dataset which
#' only includes the genes of a given gene panel. This subset may then be used
#' to simulate panel-based TMB quantification.
#'
#' @param WES a list generated by applyFilters or applyInputToTMB with variants,
#' filter, design and sample elements.
#' @param WES.design a \code{GRanges} object containing WES sequencing design.
#' Used to remove off target mutations
#' @param panel.design a \code{GRanges} object containing sequencing design of
#' the panel tyou want to simuate. Used to subset WES.
#' @param assembly human genome assembly: "hg19" or "hg38"
#'
#' @return Returns a \code{GRanges} object containing variants from the
#' simulated panel, which is only those variants from WES falling in the regions
#' targeted by the panel
#'
#'
#' @author Laura Fancello
#'
simulatePanel <- function(WES, WES.design, panel.design, assembly){
filter=WES$filter
sample=WES$sample
WES=WES$variants
# Sanity Checks -----------------------------------------------------------
## Check input arguments
if (is.null(WES)) {
stop("argument 'WES' is missing, with no default")
}
if (methods::is(WES)[1] == "CollapsedVCF") {
WES=SummarizedExperiment::rowRanges(WES)
}else{
if (methods::is(WES)[1] == "data.frame") {
WES=GenomicRanges::makeGRangesFromDataFrame(WES,
seqnames.field = "CHR",
start.field = "START",
end.field = "END",
strand.field = "STRAND",
keep.extra.columns = FALSE)
}
else{
if(methods::is(WES)[1] != "GRanges"){
stop("No valid object with variants from WES: please provide a GRanges
object.")
}
}
}
if (is.null(WES.design)) {
stop("argument 'WES.design' is missing, with no default")
}
if (!(methods::is(WES.design)[1] == "GRanges")) {
stop("No valid WES design found: please provide a GRanges object containing WES
sequencing design")
}
if (is.null(panel.design)) {
stop("argument 'panel.design' is missing, with no default")
}
if (!(methods::is(panel.design)[1] == "GRanges")) {
stop("No valid panel design found: please provide a GRanges object
containing sequencing design for the panel to simulate")
}
if (is.null(assembly)) {
stop("argument 'assembly' is missing, with no default: please specify 'hg19' or 'hg38'")
}
if ((assembly != "hg19") && (assembly != "hg38")) {
stop("No valid genome specified: please specify 'hg19' or 'hg38'")
}
# Preprocess input --------------------------------------------------------
## Set UCSC style and allowed chromosome names.
GenomeInfoDb::seqlevelsStyle(WES) <- "UCSC"
keepchr <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6",
"chr7", "chr8", "chr9", "chr10", "chr11", "chr12",
"chr13", "chr14", "chr15", "chr16", "chr17", "chr18",
"chr19", "chr20", "chr21", "chr22", "chrX", "chrY")
GenomeInfoDb::seqlevels(WES, pruning.mode="coarse") <- keepchr
# Remove off-target mutations from WES (e.g. mutations identified
# by WES but not nincluded in sequencing design) by overlapping with GRanges
# object with design
WES_ok <- IRanges::subsetByOverlaps(WES, WES.design)
# Simulate panel by subsetting WES ----------------------------------------
# Remove from WES variants not included in panel design
simulatedPanel_gr <- IRanges::subsetByOverlaps(WES_ok, panel.design)
# Generate output list with selected variants, filter description, object
# containing simulated panel sequencing design, sample name
simulatedPanel=list(variants=simulatedPanel_gr,
filter=filter,
design=panel.design,
sample=sample)
return(simulatedPanel)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.