## ----
#' @title Filter genotype table by sites
#'
#' @description This function will filter R objects of
#' \code{TasselGenotypePhenotype} class containing genotype tables.
#' The parameters for this function are derived from TASSEL's
#' \code{FilterSiteBuilder} plugin.
#'
#' @name filterGenotypeTableSites
#' @rdname filterGenotypeTableSites
#'
#' @param tasObj An object of class \code{TasselGenotypePenotype}.
#' @param siteMinCount Site minimum count of alleles not unknown. Can range
#' from 0 to inf. Defaults to 0.
#' @param siteMinAlleleFreq Site minimum minor allele frequency. Can range
#' from 0 to 1.0. Defaults to 0.0.
#' @param siteMaxAlleleFreq Site maximum minor allele frequency. Can range
#' from 0 to 1.0. Defaults to 1.0.
#' @param minHeterozygous Min heterozygous proportion. Can range from 0 to 1.0.
#' Defaults to 0.0.
#' @param maxHeterozygous Max heterozygous proportion. Can range from 0 to 1.0.
#' Defaults to 1.0.
#' @param removeMinorSNPStates Remove minor SNP states. Defaults to
#' \code{FALSE}.
#' @param removeSitesWithIndels Remove sites containing an indel
#' (\code{+} or \code{-}). Defaults to \code{FALSE}.
#' @param siteRangeFilterType True if filtering by site numbers. False if
#' filtering by chromosome and position. Options are
#' \code{none}, \code{sites}, or \code{position}. Defaults to \code{none}.
#' @param startSite The start site. Defaults to 0.
#' @param endSite The end site. Defaults to 0.
#' @param startChr Start chromosome for site filtration range if \code{position}
#' is chosen from \code{siteRangeFilterType}. Needs end chromosome
#' (\code{endChr}) to work.
#' @param endChr End chromosome for site filtration range if \code{position}
#' is chosen from \code{siteRangeFilterType}. Needs start chromosome
#' (\code{endChr}) to work.
#' @param startPos Physical start position (bp) for filtration range if
#' \code{position} is chosen from \code{siteRangeFilterType}. If
#' \code{NULL}, the first physical position in the data set will be
#' chosen.
#' @param endPos Physical end position (bp) for filtration range if
#' \code{position} is chosen from \code{siteRangeFilterType}. If
#' \code{NULL}, the last physical position in the data set will be
#' chosen.
#' @param gRangesObj Filter genotype table by \code{GenomicRanges} object.
#' If this parameter is selected, you cannot utilize the parameters,
#' \code{chrPosFile} or \code{bedFile}. Defaults to \code{NULL}.
#' @param chrPosFile An optional chromosome position file path of
#' \code{character} class. Defaults to \code{NULL}. \strong{Note:}
#' a chromosome position file must contain correct formatting
#' (e.g. a two column file with the header of
#' \code{c("Chromosome", "Position")}).
#' @param bedFile An optional BED coordinate file path of
#' \code{character} class. Defaults to \code{NULL}.
#'
#' @return Returns an object of \code{TasselGenotypePhenotype} class.
#'
#' @importFrom GenomicRanges end
#' @importFrom GenomicRanges seqnames
#' @importFrom GenomicRanges start
#' @importFrom rJava .jarray
#' @importFrom rJava .jnull
#' @importFrom rJava is.jnull
#' @importFrom rJava new
#' @importFrom rJava J
#' @export
filterGenotypeTableSites <- function(
tasObj,
siteMinCount = 0,
siteMinAlleleFreq = 0.0,
siteMaxAlleleFreq = 1.0,
minHeterozygous = 0.0,
maxHeterozygous = 1.0,
removeMinorSNPStates = FALSE,
removeSitesWithIndels = FALSE,
siteRangeFilterType = c("none", "sites", "position"),
startSite = NULL,
endSite = NULL,
startChr = NULL,
startPos = NULL,
endChr = NULL,
endPos = NULL,
gRangesObj = NULL,
chrPosFile = NULL,
bedFile = NULL
) {
if (class(tasObj) != "TasselGenotypePhenotype") {
stop("`tasObj` must be of class `TasselGenotypePhenotype`")
}
jGenoTable <- getGenotypeTable(tasObj)
if (rJava::is.jnull(jGenoTable)) {
stop("TASSEL genotype object not found")
}
# Range check
if (siteMinAlleleFreq > 1 || siteMinAlleleFreq < 0) {
stop("siteMinAlleleFreq parameter is out of range")
}
if (siteMaxAlleleFreq > 1 || siteMaxAlleleFreq < 0) {
stop("siteMaxAlleleFreq parameter is out of range")
}
if (minHeterozygous > 1 || minHeterozygous < 0) {
stop("minHeterozygous parameter is out of range")
}
if (maxHeterozygous > 1 || maxHeterozygous < 0) {
stop("maxHeterozygous parameter is out of range")
}
# Site check
taxa <- getTaxaList(tasObj)
if (siteMinCount > taxa$size()) {
stop("Minimum number of taxa exceeds total number of taxa in genotype table.")
}
# Range check (chromosomes)
chroms <- jGenoTable$chromosomes()
chroms <- unlist(lapply(chroms, function(x) { rJava::.jstrVal(x) }))
if (!is.null(startChr) || !is.null(endChr)) {
if (!(any(startChr %in% chroms)) || !(any(endChr %in% chroms))) {
stop("Chromosome IDs not found in genotype table.")
}
}
# Filter type selection
siteRangeFilterType <- match.arg(siteRangeFilterType)
if (missing(siteRangeFilterType) || !siteRangeFilterType %in% c("none", "sites", "position")) {
stop(
paste(
"Please specify analysis type",
"(\"none\", \"sites\", or \"position\")"
)
)
}
# Create filter siter builder plugin
plugin <- rJava::new(
rJava::J("net.maizegenetics.analysis.filter.FilterSiteBuilderPlugin"),
rJava::.jnull(),
FALSE
)
plugin$setParameter("siteMinCount", toString(siteMinCount))
plugin$setParameter("siteMinAlleleFreq", toString(siteMinAlleleFreq))
plugin$setParameter("siteMaxAlleleFreq", toString(siteMaxAlleleFreq))
plugin$setParameter("minHeterozygous", toString(minHeterozygous))
plugin$setParameter("maxHeterozygous", toString(maxHeterozygous))
plugin$setParameter("removeMinorSNPStates", toString(removeMinorSNPStates))
plugin$setParameter("removeSitesWithIndels", toString(removeSitesWithIndels))
# Logic check necessary parameters given range filter type
if (is.null(chrPosFile) && is.null(bedFile)) {
if (siteRangeFilterType == "sites") {
if (is.null(startSite) || is.null(endSite)) {
stop("Please specify both start and end sites.")
}
if (endSite > jGenoTable$numberOfSites()) {
stop("End site parameter exceeds total number of sites in genotype table.")
}
if (startSite > endSite) {
stop("Start site cannot be larger than end site.")
}
plugin$setParameter("startSite", toString(startSite))
plugin$setParameter("endSite", toString(endSite))
} else if (siteRangeFilterType == "position") {
if (is.null(startChr) || is.null(endChr)) {
stop("Please specify both start and end chromosomes.")
}
if (!is.null(startPos)) startPos <- toString(startPos)
if (!is.null(endPos)) endPos <- toString(endPos)
if (startChr == endChr && startPos > endPos) {
stop("Filtration paramaters outside acceptable range.")
}
plugin$setParameter("startChr", toString(startChr))
plugin$setParameter("startPos", startPos)
plugin$setParameter("endChr", toString(endChr))
plugin$setParameter("endPos", endPos)
}
} else if (is.character(chrPosFile) && is.null(bedFile) && is.null(gRangesObj)) {
tmpChrDF <- utils::read.table(chrPosFile, sep = "\t", header = TRUE)
headCheck <- c("Chromosome", "Position")
if (!identical(colnames(tmpChrDF), headCheck)) {
stop("Please check chromosome position file for correct formatting")
}
plugin$setParameter("chrPosFile", chrPosFile)
} else if (is.null(chrPosFile) && is.character(bedFile) && is.null(gRangesObj)) {
plugin$setParameter("bedFile", bedFile)
} else {
stop("Incorrect parameter usage")
}
# Filter by GRanges object if present
if (!is.null(gRangesObj)) {
if (class(gRangesObj) != "GRanges") {
stop("gRangesObj must be of class `GRanges`")
}
jRC <- rJava::J("net/maizegenetics/plugindef/GenerateRCode")
jGenoTable <- jRC$filterSitesByGRanges(
jGenoTable,
rJava::.jarray(as.vector(GenomicRanges::seqnames(gRangesObj))),
rJava::.jarray(GenomicRanges::start(gRangesObj)),
rJava::.jarray(GenomicRanges::end(gRangesObj))
)
}
# Try to run plugin
out <- tryCatch(
{
plugin$runPlugin(jGenoTable)
},
error = function(e) {
return(-1)
}
)
# Check if input had phenotype table. If yes, combine genotype with phenotype
if (class(out) != "jobjRef") {
message("No data returned.")
return(NA)
} else {
jPhenoTable <- getPhenotypeTable(tasObj)
if (rJava::is.jnull(jPhenoTable)) {
.tasselObjectConstructor(out)
} else {
.tasselObjectConstructor(
combineTasselGenotypePhenotype(
genotypeTable = out,
phenotype = jPhenoTable
)
)
}
}
}
## ----
#' @title Filter genotype table by site IDs
#'
#' @description
#' Filter a genotype table object by specifying literal site names (IDs)
#' for variant markers.
#'
#' @name filterGenotypeTableBySiteName
#' @rdname filterGenotypeTableBySiteName
#'
#' @param tasObj An object of class \code{TasselGenotypePenotype}.
#' @param siteNames A character vector of site names to filter on.
#'
#' @return Returns an object of \code{TasselGenotypePhenotype} class.
#'
#' @export
filterGenotypeTableBySiteName <- function(tasObj, siteNames) {
# Rudimentary type check
if (class(tasObj) != "TasselGenotypePhenotype") {
stop("`tasObj` must be of class `TasselGenotypePhenotype`")
}
# Get Java memory pointer
gtJava <- getGenotypeTable(tasObj)
# Instantiate new ArrayList object and populate with site names
idsToKeep <- rJava::.jnew("java.util.ArrayList")
for (id in siteNames) idsToKeep$add(id)
# Instantiate filter site builder plugin
plugin <- rJava::new(
rJava::J("net.maizegenetics.analysis.filter.FilterSiteBuilderPlugin"),
rJava::.jnull(),
FALSE
)
plugin$siteNamesList(idsToKeep)
# Instantiate DataSet object
dataSet <- rJava::J("net.maizegenetics.plugindef.DataSet")
# Try to filter data - if outofbounds exception - no sites returned
gtFilter <- tryCatch(
expr = {
# run plugin and return S4 rTASSEL object (filtered)
.tasselObjectConstructor(
plugin$runPlugin(dataSet$getDataSet(gtJava))
)
},
error = function(e) {
message("Error. No sites found: ")
message(" ", conditionMessage(e), "\n")
}
)
return(gtFilter)
}
## ----
#' @title Filter genotype table by taxa
#'
#' @description This function will filter R objects of
#' \code{TasselGenotypePhenotype} class containing genotype tables.
#' The parameters for this function are derived from TASSEL's
#' \code{FilterTaxaBuilder} plugin.
#'
#' @name filterGenotypeTableTaxa
#' @rdname filterGenotypeTableTaxa
#'
#' @param tasObj An object of class \code{TasselGenotypePenotype}.
#' @param minNotMissing Minimum proportion of sites not unknown to pass this
#' filter. Value can be between 0.0 and 1.0.
#' @param minHeterozygous Minimum proportion of sites that are heterozygous.
#' Value can be between 0.0 and 1.0.
#' @param maxHeterozygous Maximum proportion of sites that are heterozygous.
#' Value can be between 0.0 and 1.0.
#' @param taxa Vector of taxa IDs (as character) to subset. Defaults to
#' \code{NULL}.
#'
#' @return Returns an object of \code{TasselGenotypePhenotype} class.
#'
#' @importFrom rJava is.jnull
#' @importFrom rJava new
#' @importFrom rJava J
#' @importFrom rJava .jnull
#' @export
filterGenotypeTableTaxa <- function(
tasObj,
minNotMissing = 0.0,
minHeterozygous = 0.0,
maxHeterozygous = 1.0,
taxa = NULL
) {
if (class(tasObj) != "TasselGenotypePhenotype") {
stop("`tasObj` must be of class `TasselGenotypePhenotype`")
}
jGenoTable <- getGenotypeTable(tasObj)
if (rJava::is.jnull(jGenoTable)) {
stop("TASSEL genotype object not found")
}
# Range check
if (minNotMissing > 1 || minNotMissing < 0 ) {
stop("minNotMissing parameter is out of range")
}
if (minHeterozygous > 1 || minHeterozygous < 0 ) {
stop("minHeterozygous parameter is out of range")
}
if (maxHeterozygous > 1 || maxHeterozygous < 0 ) {
stop("maxHeterozygous parameter is out of range")
}
# Create filter taxa builder plugin
plugin <- rJava::new(
rJava::J("net.maizegenetics.analysis.filter.FilterTaxaBuilderPlugin"),
rJava::.jnull(),
FALSE
)
plugin$setParameter("minNotMissing", toString(minNotMissing))
plugin$setParameter("minHeterozygous", toString(minHeterozygous))
plugin$setParameter("maxHeterozygous", toString(maxHeterozygous))
if (!is.null(taxa)) {
if (!is.vector(taxa)) {
stop("Taxa list must be vector.")
}
if (!is.character(taxa)) {
stop("Taxa list must be of type character.")
}
builder <- .jnew("net.maizegenetics.taxa.TaxaListBuilder")
builder$addAll(.jarray(taxa))
taxaArray <- builder$build()
plugin$setParameter("includeTaxa", "true")
plugin$setParameter("taxaList", taxaArray)
}
resultDataSet <- plugin$runPlugin(jGenoTable)
# Check if input had phenotype table. If yes, combine genotype with phenotype
jPhenoTable <- getPhenotypeTable(tasObj)
if (rJava::is.jnull(jPhenoTable)) {
.tasselObjectConstructor(resultDataSet)
} else {
.tasselObjectConstructor(
combineTasselGenotypePhenotype(
genotypeTable = resultDataSet,
phenotype = jPhenoTable
)
)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.