.remove_signal_ids = function(x, keep_ids){
new_sig_dt = x$signal_data[id %in% keep_ids]
new_nclust = length(unique(new_sig_dt$cluster_id))
new_sig_dt$cluster_id = NULL
new_sig_dt$id = as.character(new_sig_dt$id)
new_sig_dt
new_query_gr = x$query_gr[keep_ids]
new_man = lapply(x@manual_assigned, function(x){
intersect(x, keep_ids)
})
ClusteredSignal(signal_profile_dt = new_sig_dt,
query_gr = new_query_gr,
manual_assigned = new_man,
nclust = new_nclust,
signal_var = x@signal_var,
signal_var.within = x@signal_var.within,
facet_var = x@facet_var,
extra_var = x@extra_var
)
}
.remove_SCC_ids = function(x, keep_ids, meta_dt){
new_corr_res = x$full_correlation_results[id %in% keep_ids]
new_corr_res_l = split(new_corr_res, new_corr_res$name)
rls = sapply(split(x$read_length, x$read_length$name), function(x)x$read_length)
todo = names(new_corr_res_l)
names(todo) = todo
scc_res_l = lapply(todo, function(nam){
corr_res = gather_metrics(peak_strand_corr = new_corr_res_l[[nam]], read_length = rls[nam])
vnames = names(corr_res)
corr_res = lapply(vnames, function(nam){
xv = corr_res[[nam]]
if(!is.data.table(xv)){
xv = data.table(xv)
setnames(xv, nam)
}
xv
})
names(corr_res) = vnames
corr_res
})
vnames = names(scc_res_l[[1]])
nam = "read_correlation"
scc_res = lapply(vnames, function(nam){
part = lapply(scc_res_l, function(x){
xv = x[[nam]]
# if(!is.data.table(xv)){
# xv = data.table(xv)
# setnames(xv, nam)
# }
xv
})
dt = rbindlist(part, idcol = "name")
mdt = meta_dt
mdt = mdt[, union("name", setdiff(colnames(mdt), colnames(dt))), with = FALSE]
if(ncol(mdt) > 1){
merge(dt, mdt, by = "name")
}else{
dt
}
})
names(scc_res) = vnames
scc_res
}
.remove_FRIP_ids = function(x, keep_ids){
x[id %in% keep_ids]
}
#' ssvQC.selectFeatures
#'
#' @export
#' @rdname ssvQC.removeFeatures
#'
ssvQC.selectFeatures = function(sqc, ids = NULL, grs = NULL, features_name = NULL, invert = FALSE){
ssvQC.removeFeatures(sqc, ids = ids, grs = grs, features_name = features_name, invert = !invert)
}
#' ssvQC.removeFeatures
#'
#' @param sqc A valid ssvQC object after clustering by ssvQC.prepSignal
#' @param ids region ids.
#' @param grs GRanges to use to fine region ids.
#' @param invert If true, flip removal to selection and vice versa.
#'
#' @return ssvQC object with selected regions removed.
#' @rdname ssvQC.removeFeatures
#' @export
#'
#' @examples
#' library(ssvQC)
#' options(mc.cores = 1)
#' set.seed(0)
#' features_config_file = system.file(package = "ssvQC", "extdata/ssvQC_peak_config.csv")
#' features_config = QcConfigFeatures.parse(features_config_file)
#'
#' bam_config_file = system.file(package = "ssvQC", "extdata/ssvQC_bam_config.csv")
#' bam_config = QcConfigSignal.parse(bam_config_file)
#'
#' sqc.complete.file = ssvQC(features_config_file, bam_config_file)
#'
#' sqc.complete = ssvQC(features_config, bam_config)
#' sqc = ssvQC.runAll(sqc.complete)
#' ids = c("region_1", "region_2")
#' grs = NULL
#' invert = FALSE
#' features_name = NULL
#'
#' sqc$plots$SCC$dots
#'
#' #removal
#' sqc.filtered = ssvQC.removeFeatures(sqc, ids = c("region_1", "region_2"))
#'
#' sqc.filtered2 = ssvQC.removeFeatures(sqc.filtered, ids = c("region_1", "region_2"))
#'
#' sqc.filtered.inv = ssvQC.removeFeatures(sqc, ids = c("region_1", "region_2", "region_3", "region_4", "region_5"), invert = TRUE)
#'
#' sqc.filtered.gr = ssvQC.removeFeatures(sqc, grs = GRanges(paste0("chr", 1:16), IRanges(1, 1e9)))
#' sqc.filtered.gr$features_config$assessment_features
#' sqc.filtered.gr$plots$signal$heatmaps
#' sqc.filtered.gr$plots$SCC$dots
#'
#' sqc.filtered.gr_inv = ssvQC.removeFeatures(sqc, grs = GRanges(paste0("chr", 1:16), IRanges(1, 1e9)), invert = TRUE)
#' sqc.filtered.gr_inv$features_config$assessment_features
#' sqc.filtered.gr_inv$plots$signal$heatmaps
#' sqc.filtered.gr_inv$plots$SCC$dots
#'
#' # selection
#' sqc.selected = ssvQC.selectFeatures(sqc, ids = c("region_1", "region_2", "region_3", "region_4", "region_5"))
#'
#' sqc.selected2 = ssvQC.selectFeatures(sqc.selected, ids = c("region_1", "region_2", "region_3", "region_4", "region_5"))
#'
#' sqc.selected.inv = ssvQC.selectFeatures(sqc, ids = c("region_1", "region_2", "region_3", "region_4", "region_5"), invert = TRUE)
#'
#' sqc.selected.gr = ssvQC.selectFeatures(sqc, grs = GRanges(paste0("chr", 1:16), IRanges(1, 1e9)))
#' sqc.selected.gr$features_config$assessment_features
#' sqc.selected.gr$plots$signal$heatmaps
#' sqc.selected.gr$plots$SCC$dots
#'
#' sqc.selected.gr_inv = ssvQC.selectFeatures(sqc, grs = GRanges(paste0("chr", 1:16), IRanges(1, 1e9)), invert = TRUE)
#' sqc.selected.gr_inv$features_config$assessment_features
#' sqc.selected.gr_inv$plots$signal$heatmaps
#' sqc.selected.gr_inv$plots$SCC$dots
#'
ssvQC.removeFeatures = function(sqc, ids = NULL, grs = NULL, features_name = NULL, invert = FALSE){
stopifnot(is(sqc, "ssvQC"))
stopifnot(is.logical(invert))
if(length(sqc$features_config$assessment_features) == 0) stop("Call ssvQC.prepFeatures first.")
poss_feature_names = names(sqc$features_config$assessment_features)
if(is.null(features_name)){
if(length(poss_feature_names) > 1){
stop("features_name is required when there is more than one feature assessment group. Should be one of: ", paste(poss_feature_names, collapse = ", ") )
}else{
features_name = poss_feature_names[1]
}
}
stopifnot(is.character(features_name))
stopifnot(length(features_name) == 1)
stopifnot(features_name %in% poss_feature_names)
if(is.null(ids) && is.null(grs)){
stop("One of ids or grs is required.")
}
if(!is.null(ids) && !is.null(grs)){
stop("Only one of ids or grs is allowed.")
}
assessed_grs = sqc$features_config$assessment_features[[features_name]]
if(!is.null(grs)){
stopifnot(is(grs, "GRanges"))
ids = names(subsetByOverlaps(assessed_grs, grs))
#select ids based on grs
}
if(is.factor(ids)) ids = as.character(ids)
stopifnot(is(ids, "character"))
if(invert){
keep_ids = intersect(ids, names(assessed_grs))
remv_ids = setdiff(names(assessed_grs), ids)
}else{
keep_ids = setdiff(names(assessed_grs), ids)
remv_ids = intersect(ids, names(assessed_grs))
if(!setequal(ids, remv_ids)){
message("Not all ids present in current assessment set.")
}
}
if(length(keep_ids) < 5){
stop("Currently, fewer than 5 regions are not allowed. This will be addressed in future versions.")
}
keep_grs = assessed_grs[keep_ids]
remv_grs = assessed_grs[remv_ids]
if(length(sqc$features_config@assessment_gr) == 0){
stop("Call ssvQC.prepFeatures first.")
}
sqc$features_config@assessment_gr[[features_name]] = keep_grs
sqc$features_config@overlap_gr[[features_name]] = subsetByOverlaps(sqc$features_config$overlapped_features[[features_name]], remv_grs, invert = TRUE)
sqc$features_config@loaded_features[[features_name]] = lapply(sqc$features_config@loaded_features[[features_name]], function(gr){
subsetByOverlaps(gr, remv_grs, invert = TRUE)
})
if(length(sqc$signal_data) > 0){
sqc@signal_data[[features_name]] = lapply(sqc$signal_data[[features_name]], function(feat_sig){
.remove_signal_ids(feat_sig, keep_ids)
})
}
if(!is.null(sqc@other_data$SCC)){
sqc@other_data$SCC[[features_name]] = lapply(sqc@other_data$SCC[[features_name]], function(x){
.remove_SCC_ids(x, keep_ids, as.data.table(sqc$signal_config$meta_data))
})
}
if(!is.null(sqc@other_data$FRIP)){
sqc@other_data$FRIP[[features_name]] = lapply(sqc@other_data$FRIP[[features_name]], function(x){
.remove_FRIP_ids(x, keep_ids)
})
}
if(!is.null(sqc@other_data$read_count_correlation) | !is.null(sqc@other_data$signal_profile_correlation)){
sqc@other_data$read_count_correlation = NULL
sqc@other_data$signal_profile_correlation = NULL
sqc = ssvQC.prepCorrelation(sqc)
}
if(!is.null(sqc$plots$features)){
sqc = ssvQC.plotFeatures(sqc)
}
if(!is.null(sqc$plots$signal)){
sqc = ssvQC.plotSignal(sqc)
}
if(!is.null(sqc$plots$SCC)){
sqc = ssvQC.plotSCC(sqc)
}
if(!is.null(sqc$plots$FRIP)){
sqc = ssvQC.plotFRIP(sqc)
}
if(!is.null(sqc$plots$correlation)){
sqc = ssvQC.plotCorrelation(sqc)
}
sqc
}
#' ssvQC.removeClusters
#'
#' @param sqc A valid ssvQC object after clustering by ssvQC.prepSignal
#' @param cluster_numbers Number for clusters.
#' @param features_name feature_name to perform selection on. Required if multiple feature sets present.
#' @param signals_name signal_name to perform selection on. Required if multiple signal sets present.
#' @param invert If true, flip removal to selection and vice versa.
#'
#' @return ssvQC object with selected clusters removed.
#' @export
#' @rdname ssvQC.selectClusters
#'
#' @examples
#' library(ssvQC)
#' options(mc.cores = 1)
#' set.seed(0)
#' features_config_file = system.file(package = "ssvQC", "extdata/ssvQC_peak_config.csv")
#' features_config = QcConfigFeatures.parse(features_config_file)
#'
#' bam_config_file = system.file(package = "ssvQC", "extdata/ssvQC_bam_config.csv")
#' bam_config = QcConfigSignal.parse(bam_config_file)
#'
#' sqc.complete.file = ssvQC(features_config_file, bam_config_file)
#'
#' sqc.complete = ssvQC(features_config, bam_config)
#' sqc = ssvQC.runAll(sqc.complete)
#' sqc$plots$signal$heatmaps$CTCF_features$CTCF_signal
#'
#' sqc.rmclust = ssvQC.removeClusters(sqc, cluster_numbers = 1:4)
#' sqc.rmclust$plots$signal$heatmaps$CTCF_features$CTCF_signal
#'
#' sqc.rmclust_inv = ssvQC.removeClusters(sqc, cluster_numbers = 1:4, invert = TRUE)
#' sqc.rmclust_inv$plots$signal$heatmaps$CTCF_features$CTCF_signal
#'
#' sqc.selclust = ssvQC.selectClusters(sqc, cluster_numbers = 1:4)
#' sqc.selclust$plots$signal$heatmaps$CTCF_features$CTCF_signal
#'
#' sqc.selclust_inv = ssvQC.selectClusters(sqc, cluster_numbers = 1:4, invert = TRUE)
#' sqc.selclust_inv$plots$signal$heatmaps$CTCF_features$CTCF_signal
ssvQC.removeClusters = function(sqc, cluster_numbers, features_name = NULL, signals_name = NULL, invert = FALSE){
stopifnot(is(sqc, "ssvQC"))
stopifnot(is.logical(invert))
if(length(sqc$features_config$assessment_features) == 0) stop("Call ssvQC.prepFeatures first.")
poss_feature_names = names(sqc$features_config$assessment_features)
if(is.null(features_name)){
if(length(poss_feature_names) > 1){
stop("features_name is required when there is more than one feature assessment group. Should be one of: ", paste(poss_feature_names, collapse = ", ") )
}else{
features_name = poss_feature_names[1]
}
}
stopifnot(is.character(features_name))
stopifnot(length(features_name) == 1)
stopifnot(features_name %in% poss_feature_names)
if(length(sqc$signal_data) == 0) stop("Call ssvQC.prepSignal first.")
poss_signal_names = names(sqc$signal_data[[features_name]])
if(is.null(signals_name)){
if(length(poss_signal_names) > 1){
stop("signals_name is required when there is more than one feature assessment group. Should be one of: ", paste(poss_signal_names, collapse = ", ") )
}else{
signals_name = poss_signal_names[1]
}
}
stopifnot(is.character(signals_name))
stopifnot(length(signals_name) == 1)
stopifnot(signals_name %in% poss_signal_names)
x = sqc$signal_data[[features_name]][[signals_name]]
sel = x$signal_data[cluster_id %in% cluster_numbers]
table(sel$cluster_id)
ids = as.character(unique(sel$id))
ssvQC.removeFeatures(sqc, ids = ids, features_name = features_name, invert = invert)
}
#' ssvQC.selectClusters
#'
#' @export
#' @rdname ssvQC.selectClusters
ssvQC.selectClusters = function(sqc, cluster_numbers, features_name = NULL, signals_name = NULL, invert = FALSE){
ssvQC.removeClusters(sqc, cluster_numbers, features_name = features_name, signals_name = signals_name, invert = !invert)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.