DEGreport: Report of DEG analysis

Documented in degCheckFactors degFilter degMB degMean degMV degObj degQC degVar degVB

# Add function for PCA of discarded genes in the independing filtering by deseq2

#' Filter genes by group
#' 
#' This function will keep only rows that have a minimum counts of
#' 1 at least in a \code{min} number of samples (default 80%).
#' 
#' @param counts Matrix with expression data, columns are samples
#'   and rows are genes or other feature.
#' @param metadata Data.frame with information about
#'   each column in counts matrix. Rownames should match
#' \code{colnames(counts)}.
#' @param group Character column in metadata used to
#'   group samples and applied the cutoff.
#' @param min Percentage value indicating the minimum
#'   number of samples in each group that should have
#'   more than 0 in count matrix.
#' @param minreads Integer minimum number of reads to consider 
#'   a feature expressed.
#' @return count \code{matrix} after filtering genes (features)
#'   with not enough expression in any group.
#' @examples
#' data(humanGender)
#' library(SummarizedExperiment)
#' idx <- c(1:10, 75:85)
#' c <- degFilter(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], "group", min=1)
#' @export
degFilter <- function(counts, metadata, group, min=0.8, minreads=0){
    .unique_group <- as.character(unique(metadata[,group]))
    .keep = sapply(.unique_group, function(g){
        .samples = rownames(metadata)[metadata[, group] == g]
        .n = length(.samples)
        rowSums(counts[, .samples] > minreads) >= .n * min
    })
    counts[rowSums(.keep) > 0, ]
}
    

#' Plot main figures showing p-values distribution and mean-variance correlation
#' 
#' This function joins the output of \code{\link[DEGreport]{degMean}}, 
#' \code{\link[DEGreport]{degVar}} and \code{\link[DEGreport]{degMV}} in a
#' single plot. See these functions for further information.
#' 
#' @param counts Matrix with counts for each samples and each gene.
#' @param groups Character vector with group name for each sample in the
#'   same order than counts column names.
#' @param object [DEGSet] oobject.
#' @param pvalue pvalues of DEG analysis.
#' @return ggplot2 object
#' @examples
#' data(humanGender)
#' library(DESeq2)
#' idx <- c(1:10, 75:85)
#' dds <- DESeqDataSetFromMatrix(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], design=~group)
#' dds <- DESeq(dds)
#' res <- results(dds)
#' degQC(counts(dds, normalized=TRUE), colData(dds)[["group"]],
#'   pvalue = res[["pvalue"]])
#' @export
degQC <- function(counts, groups, object=NULL, pvalue=NULL){
    if (is.null(pvalue) & is.null(object))
        stop("You need to provide DEGset object or pvalue.")
    if (!is.null(object)[1] & class(object)[1] != "DEGSet")
        stop("Object should be a DEGSet class.")
    stopifnot(class(counts)[1] %in% c("matrix", "data.frame"))
    
    if (class(object)[1] == "DEGSet"){
        df <- deg(object, tidy = "tibble")
        if (length(intersect(rownames(counts), df[["gene"]])) < nrow(df))
            stop("Not all features in DEGSet are in counts table.")
        counts <- counts[df[["gene"]],]
        pvalue <- df[["pvalue"]]
        padj <- df[["padj"]]
    }else{
        padj <- p.adjust(pvalue, method = "fdr")
    }

    counts <- counts[!is.na(padj),]
    pvalue <- pvalue[!is.na(padj)]
    padj <- padj[!is.na(padj)]
    pmean <- degMean(pvalue, counts) +
        xlab("pvalues along expression quantiles") +
        guides(fill = "none")
    pvar <- degVar(pvalue, counts) +
        xlab("pvalues along variance quantiles") +
        guides("") +
        theme(legend.position="top") +
        theme(legend.text=element_text(size=3),
              legend.title=element_blank(),
              legend.key.size = unit(0.5,"line"))
    pmv <- degMV(groups, padj, counts)
    suppressWarnings(ggdraw() + 
        draw_plot(pvar, 0, 0.4, 0.6, 0.6 ) +
        draw_plot(pmv, 0.6, 0, 0.4, 0.7) +
        draw_plot(pmean, 0, 0, 0.6, 0.4))
}

#' Distribution of gene ratios used to calculate Size Factors.
#' 
#' This function check the median ratio normalization used by
#' DESeq2 and similarly by edgeR to visualy check whether
#' the median is the best size factor to represent depth.
#' 
#' @aliases degCheckFactors
#' @param counts Matrix with counts for each samples and each gene.
#'   row number should be the same length than pvalues vector.
#' @param each Plot each sample separately.
#' @return ggplot2 object
#' @details This function will plot the gene ratios for each sample. To calculate
#' the ratios, it follows the simliar logic than DESeq2/edgeR uses, where the expression
#' of each gene is divided by the mean expression of that gene. The distribution
#' of the ratios should approximate to a normal shape and the factors should be similar
#' to the median of distributions. If some samples show different distribution,
#' the factor may be bias due to some biological or technical factor.
#' @references 
#' * Code to calculate size factors comes from
#'   [DESeq2::estimateSizeFactorsForMatrix()].
#' @examples
#' data(humanGender)
#' library(SummarizedExperiment)
#' degCheckFactors(assays(humanGender)[[1]][, 1:10])
#' @export
degCheckFactors <-
    function(counts, each = FALSE)
    {
        counts <- as.data.frame(counts)
        geoMeanNZ <- function(x) {
            if (all(x == 0)) { 0 } else { exp( sum(log(x[x > 0])) / length(x) ) }
        }
        geoMeans <- apply(counts, 1, geoMeanNZ)
        loggeomeans <- log(geoMeans)
        df <- lapply(colnames(counts), function(s) {
            cnts <- counts[[s]]
            r <- (log(cnts) - loggeomeans)[is.finite(loggeomeans) & cnts > 0]
            data.frame(ratios = r,
                       sample = s,
                       stringsAsFactors = FALSE)
        }) %>% bind_rows()
        p <- ggplot(df, aes(ratios, group = sample)) +
            geom_density() +
            theme_bw() +
            xlim(-4, 4)
        if (each)
            p <- p + facet_wrap(~sample)
        p
    }


#' Distribution of pvalues by expression range
#' 
#' This function plot the p-values distribution colored by
#' the quantiles of the average count data.
#' 
#' @param pvalues pvalues of DEG analysis.
#' @param counts  Matrix with counts for each samples and each gene.
#'   row number should be the same length than pvalues vector.
#' @return ggplot2 object
#' @examples
#' data(humanGender)
#' library(DESeq2)
#' idx <- c(1:10, 75:85)
#' dds <- DESeqDataSetFromMatrix(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], design=~group)
#' dds <- DESeq(dds)
#' res <- results(dds)
#' degMean(res[, 4], counts(dds))
#' @export
degMean <-
    function(pvalues, counts)
{
    meanv  <-  apply(counts, 1, mean)
    q <- quantile(meanv, seq(.1, 1, .1))
    q <- q[!duplicated(q)]
    meanvfac <- cut(meanv,
                breaks=c(0, q),
                labels=names(q),
                right=TRUE)
    pvalfac  <-  cut(pvalues,
               breaks=c(-1, seq(.1, 1, .1)),
               labels=seq(0.1, 1, 0.1), right=TRUE)
    d  <-  data.frame(pvalues=factor(pvalfac),
                meanv=factor(meanvfac))
    suppressWarnings(ggplot(d, aes(pvalues, fill=meanv))+
        geom_bar()+
        theme_bw() +
        scale_fill_brewer("mean\nquantiles", palette="RdYlBu")) +
        labs(list(x="p-values", y="# genes"))
}

#' Distribution of pvalues by standard desviation range
#' 
#' This function pot the p-valyes distribution colored by
#' the quantiles of the standard desviation of count data.
#' 
#' @param pvalues pvalues of DEG analysis
#' @param counts Matrix with counts for each samples and each gene.
#'   row number should be the same length than pvalues vector.
#' @return ggplot2 object
#' @examples
#' data(humanGender)
#' library(DESeq2)
#' idx <- c(1:10, 75:85)
#' dds <- DESeqDataSetFromMatrix(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], design=~group)
#' dds <- DESeq(dds)
#' res <- results(dds)
#' degVar(res[, 4], counts(dds))
#' @export
degVar <-
    function(pvalues, counts)
{
    sdv <- apply(counts, 1, sd)
    q <- quantile(sdv, seq(.1, 1, .1))
    q <- q[!duplicated(q)]
    sdvfac <- cut(sdv,
                breaks=c(0, q),
                labels=names(q),
                right=TRUE)
    pvalfac <- cut(pvalues,
               breaks=c(-1, seq(.1, 1, .1)),
               labels=seq(0.1, 1, 0.1), right=TRUE)
    d <- data.frame(pvalues=factor(pvalfac), 
                sdv=factor(sdvfac))
    suppressWarnings(ggplot(d, aes(pvalues, fill=sdv))+
        geom_bar()+
        theme_bw() +
        scale_fill_brewer("variance\nquantiles", palette="RdYlBu") +
        labs(list(x="p-values", y="# genes")))
}

#' Correlation of the standard desviation and the mean of the abundance of a
#' set of genes.
#' 
#' @param group Character vector with group name for each sample in the
#'   same order than counts column names.
#' @param pvalues pvalues of DEG analysis.
#' @param counts Matrix with counts for each samples and each gene.
#' @param sign Defining the cutoff to label significant features.
#'   row number should be the same length than pvalues vector.
#' @return ggplot2 object
#' @examples
#' data(humanGender)
#' library(DESeq2)
#' idx <- c(1:10, 75:85)
#' dds <- DESeqDataSetFromMatrix(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], design=~group)
#' dds <- DESeq(dds)
#' res <- results(dds)
#' degMV(colData(dds)[["group"]],
#'       res[, 4],
#'       counts(dds, normalized = TRUE))
#' @export
degMV <-
    function(group, pvalues, counts, sign=0.01)
{
    var_ma <- sapply(unique(as.character(group)), function(g){
        apply(counts[, group==g], 1, sd, na.rm=TRUE)
    })    
    sdv <- apply(var_ma, 1, max)
    mean_ma <- sapply(unique(as.character(group)), function(g){
        apply(counts[, group==g], 1, median, na.rm=TRUE)
    })    
    meanv <- apply(mean_ma, 1, min)
    pv <- cut(pvalues, breaks=c(-1, sign, 1.1),
          labels=c("Sign", "NoSig"))
    # pv[is.na(pvalues)] <- "NoSig"
    d <- data.frame(pvalues=pv, max_sd=log2(sdv), 
                min_median=log2(meanv))
    red <- d[d[["pvalues"]] == "Sign" & !is.na(d[["pvalues"]]),]
    black <- rgb(0.9, 0.9, 0.9, 0.6)
    suppressWarnings(ggplot(d, aes_string("min_median", "max_sd"))+
                         geom_point(data = d, color = black) +
                         geom_point(data = red, color = "red") +
                         scale_color_manual(values=c("red", black))+
                         theme_bw()+
                         stat_quantile(aes(min_median, max_sd), colour="blue",
                                       quantiles = c(0.025, 0.975), 
                                       linetype=2, formula=y ~ x)) +
        theme(legend.position="top")
    }

#' Distribution of expression of DE genes compared to the background
#'
#' @aliases degMB
#' @param tags  List of genes that are DE.
#' @param group Character vector with group name for each sample in the
#'   same order than counts column names.
#' @param counts  Matrix with counts for each samples and each gene
#'   Should be same length than pvalues vector.
#' @param pop number of random samples taken for background comparison
#' @return ggplot2 object
#' @examples
#' data(humanGender)
#' library(DESeq2)
#' idx <- c(1:10, 75:85)
#' dds <- DESeqDataSetFromMatrix(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], design=~group)
#' dds <- DESeq(dds)
#' res <- results(dds)
#' degMB(row.names(res)[1:20], colData(dds)[["group"]],
#'   counts(dds, normalized = TRUE))
#' @export
degMB <-
    function(tags, group, counts, pop=400)
{
    delen <- length(tags)
    g <- ""

    # if counts is tiny, sample with replacement
    if(nrow(counts) < pop) {
        warning("The number of genes < samples requested, sampling with replacement.")
        replace = TRUE
    }
    else {
        replace = FALSE
    }
    rand <- sample(row.names(counts), pop, replace=replace)
    
    sign_group <- lapply(unique(as.character(group)), function(g){
        m <- apply(counts[tags, group==g, drop=FALSE], 1, mean, na.rm=TRUE)
        data.frame(mean = m, group = g,
                   type = paste("significants -", g),
                   stringsAsFactors = FALSE)
    })    
    
    rand_group <- lapply(unique(as.character(group)), function(g){
        m <- apply(counts[rand, group==g, drop=FALSE], 1, mean, na.rm=TRUE)
        data.frame(mean = m, group = g,
                   type = paste("background -", g),
                   stringsAsFactors = FALSE)
    }) 

    res <- bind_rows(c(sign_group, rand_group))

    suppressWarnings(ggplot(res, aes_string("type", "mean", 
        fill="group", colour="group"))+
        geom_violin(alpha=0.2)+
        scale_y_log10()+
        theme_bw())
}

#' Distribution of the standard desviation of
#'     DE genes compared to the background
#' @aliases degVB
#' @param tags List of genes that are DE.
#' @param group Character vector with group name for each sample in the
#'   same order than counts column names.
#' @param counts  matrix with counts for each samples and each gene.
#'     Should be same length than pvalues vector.
#' @param pop Number of random samples taken for background comparison.
#' @return ggplot2 object
#' @examples
#' data(humanGender)
#' library(DESeq2)
#' idx <- c(1:10, 75:85)
#' dds <- DESeqDataSetFromMatrix(assays(humanGender)[[1]][1:1000, idx],
#'   colData(humanGender)[idx,], design=~group)
#' dds <- DESeq(dds)
#' res <- results(dds)
#' degVB(row.names(res)[1:20], colData(dds)[["group"]],
#'   counts(dds, normalized = TRUE))
#' @export
degVB <-
    function(tags, group, counts, pop=400)
{
    delen <- length(tags)
    g <- ""
    # if counts is tiny, sample with replacement
    if(nrow(counts) < pop) {
        warning("The number of genes < samples requested, sampling with replacement.")
        replace = TRUE
    }
    else {
        replace = FALSE
    }
    rand <- sample(row.names(counts), pop)
    sign_group <- lapply(unique(as.character(group)), function(g){
        m <- apply(counts[tags, group==g, drop=FALSE], 1, sd, na.rm=TRUE)
        data.frame(sd = m, group = g,
                   type = paste("significants -", g),
                   stringsAsFactors = FALSE)
    })    
    
    rand_group <- lapply(unique(as.character(group)), function(g){
        m <- apply(counts[rand, group==g, drop=FALSE], 1, sd, na.rm=TRUE)
        data.frame(sd = m, group = g,
                   type = paste("background -", g),
                   stringsAsFactors = FALSE)
    }) 
    
    res <- bind_rows(c(sign_group, rand_group))
    
    suppressWarnings(ggplot(res, aes_string("type", "sd", 
                                            fill="group", colour="group"))+
                         geom_violin(alpha=0.2)+
                         scale_y_log10()+
                         theme_bw())
    }

degNcomb <- function() {
    .Deprecated("DESeq2::lcfShrink")
}

degComb <- function()
{
    .Deprecated("DESeq2::lcfShrink")
}

degFC <- function()
{
    .Deprecated("DESeq2::lcfShrink")
}

degBI <- function()
{
    .Deprecated("DESeq2::lcfShrink")
}

degBIcmd <- function()
{
    .Deprecated("DESeq2::lcfShrink")
}

degRank <- function()
{
    .Deprecated("DESeq2::lcfShrink")
}


degPR <- function()
{
    .Deprecated("DESeq2::lcfShrink")
}

#' Create a deg object that can be used to plot expression values
#'     at shiny server:runGist(9930881)
#' @aliases degObj
#' @param counts Output from get_rank function.
#' @param design Colour used for each gene.
#' @param outfile File that will contain the object.
#' @return R object to be load into vizExp.
#' @examples 
#' data(humanGender)
#' library(SummarizedExperiment)
#' degObj(assays(humanGender)[[1]], colData(humanGender), NULL)
#' @export
degObj <-
    function(counts, design, outfile)
{
    deg <- NULL
    deg <- list(counts, design)
    if (!is.null(outfile))
        save(deg, file=outfile)
    if (is.null(outfile))
        message("please, give an output file name.")
}
lpantano/DEGreport documentation built on Feb. 28, 2024, 12:01 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
lpantano/DEGreport
Report of DEG analysis

R/methods.R
In lpantano/DEGreport: Report of DEG analysis

Defines functions degObj degPR degRank degBIcmd degBI degFC degComb degNcomb degVB degMB degMV degVar degMean degCheckFactors degQC degFilter

Documented in degCheckFactors degFilter degMB degMean degMV degObj degQC degVar degVB

R Package Documentation

Browse R Packages

We want your feedback!

lpantano/DEGreport Report of DEG analysis

R/methods.R In lpantano/DEGreport: Report of DEG analysis

Defines functions degObj degPR degRank degBIcmd degBI degFC degComb degNcomb degVB degMB degMV degVar degMean degCheckFactors degQC degFilter

Documented in degCheckFactors degFilter degMB degMean degMV degObj degQC degVar degVB

R Package Documentation

Browse R Packages

We want your feedback!

lpantano/DEGreport
Report of DEG analysis

R/methods.R
In lpantano/DEGreport: Report of DEG analysis