R/summarize.R

Defines functions summarize

Documented in summarize

#' @title Summarize Data
#' @description
#' Summarizes pdr, epipolymorphism, and shannon values over the annotation regions
#' @param  gr1 A GenomicRanges object to be compared
#' @param  gr2 A GenomicRanges object to be compared
#' @param  value1 The value of gr1 to be compared
#' @param  value2 The value of gr2 be compared
#' @param  cutoff1 The first cutoff value for the number of reads (default:10)
#' @param  cutoff2 The second cutoff value for the number of reads (default:60)
#' @return A data frame containing a summary of the GenomicRanges object
#' @examples
#' p1.GR<-GRanges(seqnames = Rle(c("chr22"), c(5)),
#' ranges = IRanges(c(327,821,838,755,761), end = c(364,849,858,773,781)),
#' strand = Rle(strand(c("-", "+", "+", "+", "-"))),
#' values.loci = c("327:350:361:364","821:837:844:849",
#' "838:845:850:858","755:761:771:773","761:771:773:781"),
#' values.read1 = c(92,72,68,176,176),values.meth1=c(84,93,94,96,95),
#' values.shannon=c(0.4,0.5,0.5,0.2,0.5),values.pdr=c(0.6,0.25,0.23,0.15,0.17),
#' values.epipoly=c(0.48,0.42,0.38,0.27,0.3))
#'
#' p2.GR<-GRanges(seqnames = Rle(c("chr22"), c(5)),
#' ranges = IRanges(c(327,821,838,755,761), end = c(364,849,858,773,781)),
#' strand = Rle(strand(c("-", "+", "+", "+", "-"))),
#' values.loci = c("327:350:361:364","821:837:844:849",
#' "838:845:850:858","755:761:771:773","761:771:773:781"),
#' values.read1 = c(107,102,102,76,76),values.meth1=c(88,66,69,71,94),
#' values.shannon=c(0.12,0.25,0.54,0.23,0.25),
#' values.pdr=c(0.38,1,0.97,1,0.13),
#' values.epipoly=c(0.57,0.42,0.28,0.18,0.23))
#'
#' GR.List<-list(p1=p1.GR,p2=p2.GR)
#' summary <- summarize(gr1 = GR.List[[1]], gr2 = GR.List[[2]],
#' value1 = 'pdr', value2 = 'epipoly',
#' cutoff1 = 10, cutoff2 = 60)
#' @importFrom stats cor
#' @importFrom GenomicRanges findOverlaps values
#' @importFrom S4Vectors queryHits
#' @export
summarize <- function(gr1, gr2, value1, value2, cutoff1 = 10,
    cutoff2 = 60) {
    values <- c("pdr", "shannon", "epipoly","myValues")#new add
    if (!(value1 %in% values)) {
        stop("Invalid value '", value1, "': Possible values are 'pdr',
             'epipoly', 'myValues' or 'shannon'")#new add
    }
    if (!(value2 %in% values)) {
        stop("Invalid value '", value2, "': Possible values are 'pdr',
             'epipoly', 'myValues' or 'shannon'")#new add
    }
    o <- findOverlaps(gr1, gr2)
    x.anno <- values(gr1[unique(queryHits(o))])
    x.anno <- as.data.frame(x.anno)
    sub1 <- x.anno[x.anno$values.read1 >= cutoff1, ]
    sub2 <- x.anno[x.anno$values.read1 >= cutoff2, ]
    index1 <- which(colnames(x.anno)==paste0("values.",value1))
    index2 <- which(colnames(x.anno)==paste0("values.",value2))
    mean.value1.c1 <- mean(sub1[,index1],na.rm=TRUE)
    mean.value2.c1 <- mean(sub1[,index2],na.rm=TRUE)
    mean.value1.c2 <- mean(sub2[,index1],na.rm=TRUE)
    mean.value2.c2 <- mean(sub2[,index2],na.rm=TRUE)
    name <- c(paste0("mean.", value1, ".", cutoff1),
        paste0("mean.", value2, ".", cutoff1), paste0("mean.",
            value1, ".", cutoff2), paste0("mean.",
            value2, ".", cutoff2), paste0("corr.",
            cutoff1), paste0("corr.", cutoff2), paste0("loci.",
            cutoff1), paste0("loci.", cutoff2))
    df <- data.frame(mean.value1.cutoff1 = mean.value1.c1,
        mean.value2.cutoff1 = mean.value2.c1,
        mean.value1.cutoff2 = mean.value1.c2,
        mean.value2.cutoff2 = mean.value2.c2,
        corr.cutoff1 = cor(sub1[,index1],
            sub1[,index2]), corr.cutoff2 = cor(sub2[,index1],
            sub2[,index2]), loci.cutoff1 = nrow(sub1),
        loci.cutoff2 = nrow(sub2))
    colnames(df) <- name
    df
}
TheJacksonLaboratory/epihet documentation built on Dec. 22, 2020, 1:10 p.m.