#' calc_norm_factors
#'
#' Calculate normalization factors in a two step process:
#'
#' 1) summarize every region for each sample (default summary function is max)
#'
#' 2) caclulate a value to cap each sample to based on regions (default is 95th
#' quantile).
#'
#' The uderlying assumption here is that meaningful enrichment is present at the
#' majority of regions provided. If prevalence varies by a specific factor, say
#' ChIP-seq targets with different characteristics - ie. when analyzing TSSes
#' for H3K4me3 and an infrequent transcription factor it is more appropriate to
#' specify appropriate quantile cutoffs per factor.
#'
#' @param full_dt a data.table, as returned by ssvFetch*(..., return_data.table.
#' = TRUE)
#' @param value_ character, attribute in full_dt to normalzie.
#' @param cap_value_ character, new attribute name specifying values to cap to.
#' @param by1 character vector, specifies attributes relevant to step 1.
#' @param by2 character vector, specifies attributes relevant to step 1 and 2.
#' @param aggFUN1 function called on value_ with by = c(by1, by2) in step 1.
#' @param aggFUN2 function called on result of aggFUN1 with by = by2 in step 2.
#'
#' @return data.table mapping by2 to cap_value_.
#' @export
#'
#' @examples
#' data(CTCF_in_10a_profiles_dt)
#' calc_norm_factors(CTCF_in_10a_profiles_dt)
#' calc_norm_factors(CTCF_in_10a_profiles_dt,
#' aggFUN1 = mean, aggFUN2 = function(x)quantile(x, .5))
calc_norm_factors = function(full_dt,
value_ = "y",
cap_value_ = "y_cap_value",
by1 = "id",
by2 = "sample",
aggFUN1 = max,
aggFUN2 = function(x)quantile(x, .95)
){
value_summary_ = NULL #reserve binding for data.table
stopifnot(data.table::is.data.table(full_dt))
stopifnot(value_ %in% colnames(full_dt))
stopifnot(by1 %in% colnames(full_dt))
stopifnot(by2 %in% colnames(full_dt))
cap_dt = full_dt[, list(value_summary_ = aggFUN1(get(value_))), c(by1, by2)][, list(value_capped_ = aggFUN2(value_summary_)), c(by2)]
data.table::setnames(cap_dt, "value_capped_", cap_value_)
cap_dt[]
}
#' append_ynorm
#'
#' see \code{\link{calc_norm_factors}} for normalization details.
#'
#' @param full_dt a data.table, as returned by ssvFetch*(..., return_data.table
#' = TRUE).
#' @param value_ character, attribute in full_dt to normalzie.
#' @param cap_value_ character, new attribute name specifying values to cap to.
#' @param norm_value_ character, new attribute name specifying normalized values.
#' @param by1 character vector, specifies attributes relevant to step 1.
#' @param by2 character vector, specifies attributes relevant to step 1 and 2.
#' @param aggFUN1 function called on value_ with by = c(by1, by2) in step 1.
#' @param aggFUN2 function called on result of aggFUN1 with by = by2 in step 2.
#' @param cap_dt optionally, provide user generated by2 to cap_value_ mapping
#' @param do_not_cap if TRUE, normalized values are not capped to 1. Default is FALSE.
#' @param do_not_scaleTo1 if TRUE, normalized values are not scaled to 1. Default is FALSE.
#' @param force_append if TRUE, any previous cap_value or norm_value is overridden. Default is FALSE.
#'
#' @return data.table, full_dt with cap_value_ and norm_value_ values appended.
#' @export
#'
#' @examples
#' data(CTCF_in_10a_profiles_dt)
#' append_ynorm(CTCF_in_10a_profiles_dt)
#' append_ynorm(CTCF_in_10a_profiles_dt,
#' aggFUN1 = mean, aggFUN2 = function(x)quantile(x, .5))
append_ynorm = function(full_dt,
value_ = "y",
cap_value_ = "y_cap_value",
norm_value_ = "y_norm",
by1 = "id",
by2 = "sample",
aggFUN1 = max,
aggFUN2 = function(x)quantile(x, .95),
cap_dt = NULL,
do_not_cap = FALSE,
do_not_scaleTo1 = FALSE,
force_append = FALSE
){
if(do_not_cap && do_not_scaleTo1){
warning("With do_not_cap and do_not_scaleTo1, only cap_value will be appended and normalized value will be unchanged. This is not likely what you want.")
}
stopifnot(data.table::is.data.table(full_dt))
stopifnot(value_ %in% colnames(full_dt))
stopifnot(by1 %in% colnames(full_dt))
stopifnot(by2 %in% colnames(full_dt))
if(force_append){
full_dt[[cap_value_]] = NULL
full_dt[[norm_value_]] = NULL
}
stopifnot(!cap_value_ %in% colnames(full_dt))
stopifnot(!norm_value_ %in% colnames(full_dt))
if(is.null(cap_dt)){
cap_dt = calc_norm_factors(full_dt, value_, cap_value_, by1, by2, aggFUN1, aggFUN2)
}else{
stopifnot(cap_value_ %in% colnames(cap_dt))
stopifnot(by2 %in% colnames(cap_dt))
}
full_dt = merge(full_dt, cap_dt, by = by2)
data.table::set(full_dt, NULL, norm_value_, full_dt[[value_]])
if(!do_not_cap){
full_dt[get(norm_value_) > get(cap_value_), (norm_value_) := get(cap_value_)]
}
if(!do_not_scaleTo1){
full_dt[, (norm_value_) := get((norm_value_)) / get(cap_value_)]
}
full_dt[]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.