R/plotUMI4C.R

Defines functions labels labels plotDomainogram plotDifferential formatPlotsUMI4C plotUMI4C

Documented in formatPlotsUMI4C plotDifferential plotDomainogram plotUMI4C

#' Plot UMI4C data
#'
#' Produce a UMI-4C data plot containing the genes in the region, the
#' adaptative smoothen trend and the domainogram.
#' @param umi4c \linkS4class{UMI4C} object as generated by \code{\link{makeUMI4C}}.
#' @param grouping Grouping used for the different samples. If none available or 
#' want to add new groupings, run \code{\link{addGrouping}}.
#' @param dgram_function Function used for calculating the fold-change in the
#' domainogram plot, either "difference" or "quotient". Default: "quotient".
#' @param dgram_plot Logical indicating whether to plot the domainogram. If the
#' \linkS4class{UMI4C} object only contains one sample will be automatically
#' set to FALSE. Default: TRUE.
#' @param colors Named vector of colors to use for plotting for each group.
#' @param ylim Limits of the trend y axis.
#' @param xlim Limits for the plot x axis (genomic coordinates).
#' @param TxDb TxDb object to use for drawing the genomic annotation.
#' @param longest Logical indicating whether to plot only the longest
#' transcripts for each gene in the gene annotation plot.
#' @param rel_heights Numeric vector of length 3 or 4 (if differential plot)
#' indicating the relative heights of each part of the UMI4C plot.
#' @param font_size Base font size to use for the UMI4C plot. Default: 14.
#' @return Produces a summary plot with all the information contained in the
#' UMI4C opject.
#' @examples
#' data("ex_ciita_umi4c")
#' ex_ciita_umi4c <- addGrouping(ex_ciita_umi4c, grouping="condition")
#' 
#' plotUMI4C(ex_ciita_umi4c,
#'     dgram_plot = FALSE
#' )
#' @import magick
#' @importFrom rlang .data
#' @export
plotUMI4C <- function(umi4c,
    grouping = "condition",
    dgram_function = "quotient",
    dgram_plot = TRUE,
    colors = NULL,
    xlim = NULL,
    ylim = NULL,
    TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene,
    longest = TRUE,
    rel_heights = c(0.25, 0.4, 0.12, 0.23),
    font_size = 14) {
    
    ## Define xlim if null
    if (is.null(xlim)) {
        xlim <- c(
            GenomicRanges::start(metadata(umi4c)$region),
            GenomicRanges::end(metadata(umi4c)$region)
        )
    }

    if (is.null(ylim)) ylim <- c(0, max(trend(umi4c)$trend, na.rm = TRUE))

    ## Get colors
    factors <- getFactors(umi4c, grouping=grouping)
    if (is.null(colors)) colors <- getColors(factors)

    ## Set dgram_plot to FALSE if there are more than 2 factors
    if (length(dgram(umi4c)) == 1 | length(factors) > 2) dgram_plot <- FALSE


    ## Plot trend
    trend_plot <- plotTrend(umi4c,
        grouping = grouping,
        xlim = xlim,
        ylim = ylim,
        colors = colors
    )

    ## Plot genes
    genes_plot <- plotGenes(
        window = metadata(umi4c)$region,
        TxDb = TxDb,
        longest = longest,
        xlim = xlim
    )

    ## Plot domainogram
    if (dgram_plot) {
        domgram_plot <- plotDomainogram(umi4c,
            grouping = grouping,
            dgram_function = dgram_function,
            colors = colors,
            xlim = xlim
        )
    } else {
        domgram_plot <- NA
    }

    ## Plot differential results
    if (length(umi4c@results) > 0) {
            diff_plot <- plotDifferential(umi4c,
                                          grouping = grouping,
                                          colors = colors,
                                          xlim = xlim
            )
    } else {
        diff_plot <- NA
    }

    ## Generate list of plots
    plot_list <- list(
        genes = genes_plot,
        trend = trend_plot,
        diff = diff_plot,
        dgram = domgram_plot
    )

    ## Remove empty plots
    keep_plots <- !is.na(plot_list)

    if (length(rel_heights) != sum(keep_plots)) {
        rel_heights <- rel_heights[keep_plots]
        if (rel_heights[length(rel_heights)] < 0.25) rel_heights[length(rel_heights)] <- 0.25
        rel_heights <- round(rel_heights/sum(rel_heights), 2)
    }

    plot_list <- plot_list[keep_plots]

    ## Extract legends and plot them separately
    legends <- lapply(plot_list[-1], cowplot::get_legend)
    legends_plot <- cowplot::plot_grid(plotlist = legends, nrow = 1, align = "h")

    ## Remove legends from plot
    plot_list <- formatPlotsUMI4C(plot_list = plot_list, font_size = font_size)

    ## Plot main
    main_plot <- cowplot::plot_grid(
        plotlist = plot_list,
        ncol = 1,
        align = "v",
        rel_heights = rel_heights
    )

    ## Plot UMI4C
    cowplot::plot_grid(legends_plot,
                       main_plot,
                       ncol = 1,
                       rel_heights = c(0.15, 0.85)
    )
}

#' Format plots for UMI4C
#'
#' @inheritParams plotUMI4C
#' @param plot_list List of plots generated by  \code{\link{plotUMI4C}}
#' @return Given a named plot_list and considering the number and type of included
#' plots, formats their axes accordingly to show the final UMI4C plot.
formatPlotsUMI4C <- function(plot_list,
                             font_size) {
    rm_y <- c("genes", "diff")
    rm_x <- names(plot_list)[-length(plot_list)]

    mat <- matrix(c(names(plot_list) %in% rm_x, names(plot_list) %in% rm_y),
                  ncol=2, dimnames = list(names(plot_list), c("rm_x", "rm_y")))

    theme_info <- data.frame("rm_x" = c(TRUE, TRUE, FALSE, FALSE),
                             "rm_y" = c(TRUE, FALSE, TRUE, FALSE),
                             "theme_function" = c("themeXYblank", "themeXblank", "themeYblank", "theme"))

    theme_info <- dplyr::left_join(as.data.frame(mat), theme_info, by = c("rm_x", "rm_y"))
    rownames(theme_info) <- rownames(mat)

    plot_list <-
        lapply(seq_len(length(plot_list)),
           function (x) {
               theme_to_use <- get(theme_info$theme_function[x])

               plot_list[[x]] +
                   cowplot::theme_cowplot(font_size) +
                   theme_to_use(legend.position = "none")
           })

    return(plot_list)
}

#' Plot differential contacts
#'
#' @inheritParams plotUMI4C
#' @return Produces a plot of the fold changes at the differential regions
#' analyzed ghat are contained in the \linkS4class{UMI4C} object.
#' @examples
#' data("ex_ciita_umi4c")
#' ex_ciita_umi4c <- addGrouping(ex_ciita_umi4c, grouping="condition")
#' 
#' enh <- GRanges(c("chr16:10925006-10928900", "chr16:11102721-11103700"))
#' umi_dif <- fisherUMI4C(ex_ciita_umi4c, query_regions = enh, 
#'                        filter_low = 20, resize = 5e3)
#' plotDifferential(umi_dif)
#' @export
plotDifferential <- function(umi4c,
                             grouping=NULL,
    colors = NULL,
    xlim = NULL) {
    factors <- getFactors(umi4c, grouping=grouping)

    if (is.null(colors)) colors <- getColors(factors)

    diff <- resultsUMI4C(umi4c, format = "data.frame", counts = FALSE)

    # Get coordinates for plotting squares
    if (grepl("DESeq2", umi4c@results$test)) {
        legend <- expression("Log"[2] * " FC")
    } else if (grepl("Fisher", umi4c@results$test)){
        legend <- expression("Log"[2] * " OR")
        diff$log2_odds_ratio[diff$odds_ratio == 0] <- min(diff$log2_odds_ratio[!is.infinite(diff$log2_odds_ratio)],
                                                          na.rm = TRUE
        )
        diff$log2_odds_ratio[is.infinite(diff$odds_ratio)] <- max(diff$log2_odds_ratio[!is.infinite(diff$log2_odds_ratio)],
                                                                  na.rm = TRUE
        )
    } else {
        stop("Couldn't recognize differential test.")
    }

    fill_variable <- colnames(diff)[grep("log2", colnames(diff))]

    diff_plot <-
        ggplot2::ggplot(diff) +
        ggplot2::geom_rect(ggplot2::aes_string(
            xmin = "start",
            xmax = "end",
            ymin = 0, ymax = 1,
            fill = fill_variable
        )) +
        ggplot2::geom_point(ggplot2::aes(
            x = start + ((end - start) / 2),
            y = 1.15, shape = sign
        )) +
        ggplot2::scale_fill_gradient2(
            low = colors[1],
            mid = "grey",
            high = colors[2],
            midpoint = 0,
            na.value = NA,
            name = legend,
            breaks = scales::pretty_breaks(n = 4),
            limits = c(-3, 3),
            guide = ggplot2::guide_colorbar(
                direction = "horizontal",
                title.position = "top",
                barwidth = 8
            )
        ) +
        ggplot2::scale_shape_manual(
            values = c("TRUE" = 8, "FALSE" = NA),
            guide = FALSE
        ) +
        themeYblank() +
        ggplot2::scale_x_continuous(
            labels = function(x) round(x / 1e6, 2),
            name = paste(
                "Coordinates",
                GenomicRanges::seqnames(bait(umi4c)),
                "(Mb)"
            )
        ) +
        ggplot2::coord_cartesian(xlim = xlim) +
        ggplot2::guides(fill = ggplot2::guide_colorbar(
            title.position = "left",
            label.position = "bottom",
            title.vjust = 1,
            direction = "horizontal"
        ))

    return(diff_plot)
}

#' Plot domainogram
#'
#' @inheritParams plotUMI4C
#' @return Produces the domainogram plot, summarizing the merged number of UMIs
#' at the different scales analyzed (y axis).
#' @examples
#' data("ex_ciita_umi4c")
#' ex_ciita_umi4c <- addGrouping(ex_ciita_umi4c, grouping="condition")
#' 
#' plotDomainogram(ex_ciita_umi4c, grouping = "condition")
#' @export
plotDomainogram <- function(umi4c,
    dgram_function = "quotient", # or "difference"
    grouping = NULL,
    colors = NULL,
    xlim = NULL) {
    
    if (!is.null(grouping)) {
        umi4c <- groupsUMI4C(umi4c)[[grouping]]
    }
    
    factors <- getFactors(umi4c)

    if (is.null(colors)) colors <- getColors(factors)

    if (length(factors) > 2) stop("Error in 'plotDomainogram':\n
                             dgram_grouping' cannot have more than two levels.
                             Choose another variable for grouping or refactor
                              the column to only have two levels.")

    dgram <- dgram(umi4c)

    ## Create dgram of difference
    if (dgram_function == "difference") {
        dgram_diff <- log2(1 + dgram[[factors[2]]]) - log2(1 + dgram[[factors[1]]])
        lab_legend <- " diff"
    } else if (dgram_function == "quotient") {
        dgram_diff <- log2(dgram[[factors[2]]] / dgram[[factors[1]]])
        lab_legend <- " FC"
    }

    ## Create melted dgram
    dgram_diff <- reshape2::melt(dgram_diff)
    colnames(dgram_diff) <- c("contact_id", "scales", "value")

    ## Add coordinates
    dgram_diff$start <- rep(
        GenomicRanges::start(umi4c),
        length(unique(dgram_diff$scales))
    )
    dgram_diff$end <- rep(
        (GenomicRanges::start(umi4c)[c(2:length(umi4c), length(umi4c))] -
            GenomicRanges::start(umi4c)) + GenomicRanges::start(umi4c),
        length(unique(dgram_diff$scales))
    )


    dgram_plot <-
        ggplot2::ggplot(dgram_diff) +
        ggplot2::geom_rect(ggplot2::aes(
            xmin = start, xmax = end,
            ymin = scales, ymax = scales + 1,
            fill = value
        )) +
        ggplot2::scale_fill_gradientn(
            colors = c(
                darken(colors[1], factor = 10),
                colors[1], "white",
                colors[2],
                darken(colors[2], factor = 10)
            ),
            na.value = NA,
            name = as.expression(bquote(Log[2] * " UMIs" * .(lab_legend))),
            breaks = scales::pretty_breaks(n = 4),
            guide = ggplot2::guide_colorbar(
                direction = "horizontal",
                title.position = "top",
                barwidth = 8
            )
        ) +
        ggplot2::scale_y_reverse(
            name = "",
            breaks = c(
                min(metadata(umi4c)$scales),
                max(metadata(umi4c)$scales)
            ),
            expand = c(0, 0)
        ) +
        ggplot2::scale_x_continuous(
            labels = function(x) round(x / 1e6, 2),
            name = paste(
                "Coordinates",
                GenomicRanges::seqnames(bait(umi4c)),
                "(Mb)"
            )
        ) +
        ggplot2::coord_cartesian(xlim = xlim) +
        ggplot2::guides(fill = ggplot2::guide_colorbar(
            title.position = "left",
            label.position = "bottom",
            title.vjust = 1,
            direction = "horizontal"
        ))

    return(dgram_plot)
}

#' Plot adaptative smoothen trend
#'
#' @inheritParams plotUMI4C
#' @return Produces the adaptative trend plot, showing average UMIs at each
#' position taking into account the minimum number of molecules used to merge
#' restriction fragments.
#' @examples
#' data("ex_ciita_umi4c")
#' 
#' plotTrend(ex_ciita_umi4c)
#' @importFrom stats sd
#' @export
plotTrend <- function(umi4c,
    grouping = NULL,
    colors = NULL,
    xlim = NULL,
    ylim = NULL) {
    
    if (!is.null(grouping)) {
        umi4c <- groupsUMI4C(umi4c)[[grouping]]
    }
    
    factors <- getFactors(umi4c)

    if (is.null(colors)) colors <- getColors(factors)

    ## Construct trend
    trend_df <- trend(umi4c)

    if (is.null(ylim)) ylim <- c(0, max(trend_df$trend))

    trend_df$relative_position <- "upstream"
    trend_df$relative_position[trend_df$geo_coord > GenomicRanges::start(bait(umi4c))] <- "downstream"
    trend_df$grouping_var <- trend_df$sample

    trend_plot <-
        ggplot2::ggplot(trend_df) +
        ggplot2::geom_ribbon(ggplot2::aes(geo_coord,
            ymin = trend - sd, ymax = trend + sd,
            group = interaction(
                grouping_var,
                relative_position
            ),
            fill = grouping_var
        ),
        alpha = 0.3, color = NA
        ) +
        ggplot2::geom_line(ggplot2::aes(geo_coord, trend,
            group = interaction(
                grouping_var,
                relative_position
            ),
            color = grouping_var
        )) +
        ggplot2::scale_color_manual(
            values = colors,
            name = "Trend group",
            guide = ggplot2::guide_legend(ncol = 1)
        ) +
        ggplot2::scale_fill_manual(
            values = colors,
            name = "Trend group",
            guide = ggplot2::guide_legend(ncol = 1)
        ) +
        ggplot2::annotate("point",
            x = GenomicRanges::start(bait(umi4c)), y = max(ylim),
            color = "black", fill = "black", pch = 25, size = 4
        ) +
        ggplot2::annotate("text",
                          x = GenomicRanges::start(bait(umi4c)), y = max(ylim) - 1,
                          label = bait(umi4c)$name,
                          hjust = 0
        ) +
        ggplot2::coord_cartesian(xlim = xlim, ylim = ylim) +
        ggplot2::scale_y_continuous(
            name = "UMIs normalized trend",
            breaks = scales::pretty_breaks(),
            expand = c(0, 0)
        ) +
        ggplot2::scale_x_continuous(
            labels = function(x) round(x / 1e6, 2),
            name = paste(
                "Coordinates",
                GenomicRanges::seqnames(bait(umi4c)),
                "(Mb)"
            )
        ) +
        ggplot2::theme(legend.position = "bottom")

    return(trend_plot)
}

#' Plot genes
#'
#' Plot genes in a window of interest.
#' @param window \linkS4class{GRanges} object with coordinates to use for
#' selecting the genes to plot.
#' @inheritParams plotUMI4C
#' @return Produces a plot with the genes found in the provided \code{window}.
#' @examples
#' window <- GRanges("chr16:11348649-11349648")
#' plotGenes(
#'     window = window,
#'     TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene
#' )
#' @export
plotGenes <- function(window,
    TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene,
    longest = TRUE,
    xlim = NULL,
    font_size = 14) {

    ## Get gene names in region
    genes_sel <- createGeneAnnotation(window,
        TxDb = TxDb,
        longest = longest
    )

    if (length(genes_sel) == 0) {
        genesPlot <-
            ggplot2::ggplot() +
            ggplot2::scale_y_continuous(limits = c(0, 1)) +
            ggplot2::scale_x_continuous(limits = xlim) +
            ggplot2::coord_cartesian(xlim = xlim)

        return(genesPlot)
    } else {
        ## Edit genes
        distance <- GenomicRanges::width(window) * 0.01

        ## Add stepping
        genes_step <- addStepping(genes_sel[genes_sel$type == "GENE", ], window, 2)
        genes_uni <- data.frame(genes_step)

        genes_exon <- data.frame(genes_sel[genes_sel$type == "EXON", ])
        genes_exon <- dplyr::left_join(genes_exon,
            genes_uni[, c(6, 11)],
            by = c(tx_id = "tx_id")
        )

        ## Plot genes
        genesPlot <-
            ggplot2::ggplot(data = genes_uni) +
            ggplot2::geom_segment(
                data = genes_uni,
                ggplot2::aes(
                    x = start, y = stepping,
                    xend = end, yend = stepping
                )
            ) +
            ggplot2::geom_rect(
                data = genes_exon,
                ggplot2::aes(
                    xmin = start, xmax = end,
                    ymin = (stepping - 0.3), ymax = (stepping + 0.3)
                ),
                fill = "grey39", color = "grey39"
            ) +
            ggplot2::geom_text(
                data = genes_uni,
                ggplot2::aes(x = end, y = stepping, label = gene_name),
                colour = "black",
                hjust = 0, fontface = 3, nudge_x = distance,
                size = 3
            ) +
            ggplot2::coord_cartesian(xlim = xlim) +
            themeYblank()

        return(genesPlot)
    }
}



#' Add stepping for plotting genes
#'
#' Given a \linkS4class{GRanges} dataset representing genes, will add an arbitrary value for
#' them to be plotted in the Y axis without overlapping each other.
#' @param genesDat GRanges object containing gene information.
#' @param coordinates GRanges object with coordinates you want to plot.
#' @param mcol.name Integer containing the column number that contains the gene
#' name.
#' @return Calculates the stepping position to avoid overlap between genes.
#' @import GenomicRanges
addStepping <- function(genesDat,
    coordinates,
    mcol.name) {
    ## Create extension for avoiding overlap with gene names
    ext <- vapply(mcols(genesDat)[, mcol.name], nchar, FUN.VALUE = integer(1)) * width(coordinates) / 30
    genesDat.ext <- regioneR::extendRegions(genesDat, extend.end = ext)

    ## Add stepping to data
    genesDat$stepping <- disjointBins(genesDat.ext,
        ignore.strand = TRUE
    )

    return(genesDat)
}

#' Darken colors
#'
#' @param color Character containing the name or hex value of a color.
#' @param factor Numeric representing a factor by which darken the specified
#' color.
#' @return Darkens the provided color by the provided factor.
#' @examples
#' darken("blue", factor = 1.4)
#' @export
darken <- function(color, factor = 1.4) {
    col <- grDevices::col2rgb(color)
    col <- col / factor
    col <- grDevices::rgb(t(col), maxColorValue = 255)
    col
}


#' Theme X blank
#' @param ... Additional arguments to pass to the theme call from ggplot2.
#' @return ggplot2 theme with a blank X axis.
#' @examples
#' library(ggplot2)
#'
#' ggplot(
#'     iris,
#'     aes(Sepal.Length, Sepal.Width)
#' ) +
#'     geom_point() +
#'     themeXblank()
#' @export
themeXblank <- function(...) {
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        axis.title.x = ggplot2::element_blank(),
        axis.line.x = ggplot2::element_blank(),
        axis.ticks.x = ggplot2::element_blank(),
        ...
    )
}

#' Theme Y blank
#' @inheritParams themeXblank
#' @return ggplot2 theme with a blank Y axis.
#' @examples
#' library(ggplot2)
#'
#' ggplot(
#'     iris,
#'     aes(Sepal.Length, Sepal.Width)
#' ) +
#'     geom_point() +
#'     themeYblank()
#' @export
themeYblank <- function(...) {
    ggplot2::theme(
        axis.text.y = ggplot2::element_blank(),
        axis.title.y = ggplot2::element_blank(),
        axis.line.y = ggplot2::element_blank(),
        axis.ticks.y = ggplot2::element_blank(),
        ...
    )
}

#' Theme Y blank
#' @inheritParams themeXblank
#' @return ggplot2 theme with a blank X and Y axis.
#' @examples
#' library(ggplot2)
#'
#' ggplot(
#'     iris,
#'     aes(Sepal.Length, Sepal.Width)
#' ) +
#'     geom_point() +
#'     themeXYblank()
#' @export
themeXYblank <- function(...) {
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        axis.title.x = ggplot2::element_blank(),
        axis.line.x = ggplot2::element_blank(),
        axis.ticks.x = ggplot2::element_blank(),
        axis.text.y = ggplot2::element_blank(),
        axis.title.y = ggplot2::element_blank(),
        axis.line.y = ggplot2::element_blank(),
        axis.ticks.y = ggplot2::element_blank(),
        ...
    )
}

#' Theme
#' @inheritParams themeXblank
#' @return ggplot2 theme.
#' @examples
#' library(ggplot2)
#'
#' ggplot(
#'     iris,
#'     aes(Sepal.Length, Sepal.Width)
#' ) +
#'     geom_point() +
#'     theme()
#' @export
theme <- function(...) {
    ggplot2::theme(
        ...
    )
}

#' Get default colors
#'
#' @param factors Name of the factors that will be used for grouping variables.
#' @return Depending on the number of factors it creates different color
#' palettes.
getColors <- function(factors) {
    if (length(factors) == 2) {
        colors <- c("darkorchid3", "darkorange3")
    } else if (length(factors) > 2) {
        colors <- RColorBrewer::brewer.pal(n = length(factors), name = "Set1")
    } else if (length(factors) == 1) {
        colors <- "darkorchid3"
    }

    names(colors) <- factors

    return(colors)
}

#' Get factors fro plotting
#' @param umi4c UMI4C object
#' @param grouping Grouping used for the different samples. If none available or 
#' want to add new groupings, run \code{\link{addGrouping}}.
#' @return Factor vector where the first element is the reference factor.
getFactors <- function(umi4c, grouping = NULL) {
    if (!is.null(grouping)) {
        umi4c <- groupsUMI4C(umi4c)[[grouping]]
    }
    factors <- unique(colnames(assay(umi4c)))
    factors <- factors[c(
        which(factors == metadata(umi4c)$ref_umi4c),
        which(factors != metadata(umi4c)$ref_umi4c)
    )]
    return(factors)
}

Try the UMI4Cats package in your browser

Any scripts or data that you put into this service are public.

UMI4Cats documentation built on Dec. 31, 2020, 2:01 a.m.