## Dummydata object
dummyData = function(gl, max){
res = list(
mat = data.frame(x = factor(1:length(gl)),
y = unlist(lapply(gl, length))),
max = max
)
class(res) = c(class(res), "dummyData")
return(res)
}
is.dummyData = function(x) inherits(x, "dummyData")
add_dummydata.gosummaries = function(gosummaries){
max = max(unlist(lapply(gosummaries, function(x){
lapply(x$Gene_lists, length)
})))
for(i in seq_along(gosummaries)){
gosummaries[[i]]$Data = dummyData(gosummaries[[i]]$Gene_lists, max)
}
return(gosummaries)
}
##
## gosummaries object constructor and related functions
gosummaries_base = function(gl = NULL, wc_data = NULL, score_type = "p-value", wc_algorithm = "middle", wordcloud_legend_title = NULL){
# Check the input parameters for consistency
if(is.null(gl) & is.null(wc_data)){
stop("Either gene lists or the word cloud data have to be specified")
}
if(!(score_type %in% c("p-value", "count"))){
stop("score_type has to be either: p-value or count")
}
if(!(wc_algorithm %in% c("middle", "top"))){
stop("wc_algorithm has to be either: middle or top")
}
if(!is.null(gl) & !is.null(wc_data)){
# Find length of the components
if(length(gl) != length(wc_data)){
stop("The gene lists and word cloud data have to have the same length")
}
components = 1:length(gl)
# Find the number of lists per component (based on first component)
gl_k = ifelse(is.list(gl[[1]]), 2, 1)
wc_condition = is.list(wc_data[[1]]) & !is.data.frame(wc_data[[1]])
wc_data_k = ifelse(wc_condition, 2, 1)
if(gl_k != wc_data_k){
stop("The gene lists and the word cloud data have to have a similar structure")
}
else{
k = gl_k
}
# Check if the names in the gene list and Go results are the same
if(!identical(names(gl), names(wc_data))){
stop("The gene lists and the word cloud data have to have same names")
}
else{
names = names(gl)
}
}
if(!is.null(gl) & is.null(wc_data)){
components = 1:length(gl)
k = ifelse(is.list(gl[[1]]), 2, 1)
names = names(gl)
}
if(is.null(gl) & !is.null(wc_data)){
components = 1:length(wc_data)
k = ifelse(is.list(wc_data[[1]]) & !is.data.frame(wc_data[[1]]), 2, 1)
names = names(wc_data)
}
# Create the resulting data structure
res = list()
for(i in components){
comp = list(
Title = names[i],
Gene_lists = NULL,
WCD = switch(k,
list(wcd1 = wc_data[[i]]),
list(wcd1 = wc_data[[i]][[1]], wcd2 = wc_data[[i]][[2]])
),
Data = NULL,
Percentage = " "
)
if(!is.null(gl)){
comp_gl = gl[[i]]
if(k == 1){
comp$Gene_lists = list(gl1 = comp_gl)
comp$Percentage = sprintf("n: %d", length(comp_gl))
}
else{
comp$Gene_lists = list(gl1 = comp_gl[[1]], gl2 = comp_gl[[2]])
comp$Percentage = sprintf("G1: %d\nG2: %d",
length(comp_gl[[1]]),
length(comp_gl[[2]]))
}
}
res[[i]] = comp
}
# Set word cloud legend title
if(is.null(wordcloud_legend_title)){
if(score_type == "p-value"){
wordcloud_legend_title = "P-value"
}
if(score_type == "count"){
wordcloud_legend_title = "Count"
}
}
res = structure(res,
class = "gosummaries",
score_type = score_type,
wordcloud_legend_title = wordcloud_legend_title,
wc_algorithm = wc_algorithm
)
return(res)
}
# gosummaries_base()
# gosummaries_base(list(Tere = "A"))
# gosummaries_base(list(Tere = list("A", "B")))
# gosummaries_base(NULL, list(Tere = data.frame(Term = "ime", Score = "muna")))
# gosummaries_base(list(Tere = "A"), list(Tere = data.frame(Term = "ime", Score = "muna")))
# gosummaries_base(list(Tere = "A"), list(Tore = data.frame(Term = "ime", Score = "muna")))
# gosummaries_base(list(Tere = list("A", "B")), list(Tere = data.frame(Term = "ime", Score = "muna")))
#' Constructor for gosummaries object
#'
#' Constructor for gosummaries object that contains all the necessary
#' information to draw the figure, like gene lists and their annotations,
#' expression data and all the relevant texts.
#'
#' The object is a list of "components", with each component defined by a gene
#' list or a
#' pair of gene lists. Each "component" has the slots as follows:
#' \itemize{
#' \item \bold{Title}: title string of the component. (Default: the names of
#' the gene lists)
#' \item \bold{Gene_lists}: list of one or two gene lists
#' \item \bold{WCD}: g:Profiler results based on the Gene_lists slot or user
#' entered table.
#' \item \bold{Data}: the related data (expression values, PCA rotation, ...)
#' that is used to draw the "panel" i.e. theplot above the wordclouds. In
#' principle there is no limitation what kind of data is there as far as the
#' function that is provided to draw that in \code{\link{plot.gosummaries}} can
#' use it.
#' \item \bold{Percentage}: a text that is drawn on the right top corner of
#' every component. In case of PCA this is the percentage of variation the
#' component explains, by default it just depicts the number of genes in the
#' Gene_lists slot.
#' }
#'
#' Some visual parameters are stored in the attributes of \code{gosummaries}
#' object:
#' \code{score_type} tells how to handle the scores associated to wordclouds,
#' \code{wc_algorithm} specifies the wordcloud layout algorithm and
#' \code{wordcloud_legend_title} specifies the title of the wordcloud. One can
#' change them using the \code{attr} function.
#'
#' The word clouds are specified as \code{data.frame}s with two columns: "Term"
#' and "Score". If one wants to use custom data for wordclouds, instead of the
#' default GO enrichment results, then this is possible to specify parameter
#' \code{wc_data}. The input structure is similar to the gene list input, only
#' instead of gene lists one has the two column
#' \code{data.frame}s.
#'
#' The GO enrichment analysis is performed using g:Profiler web toolkit and its
#' associated R package \code{gProfileR}. This means the computer has to have
#' internet access to annotate the gene lists. Since g:Profiler can accept a
#' wide range of gene IDs then user usually does not have to worry about
#' converitng the gene IDs into right format. To be absolutely sure the tool
#' recognizes the gene IDs one can check if they will give any results in
#' \url{http://biit.cs.ut.ee/gprofiler/gconvert.cgi}.
#'
#' There can be a lot of results for a typical GO enrichment analysis but
#' usually these tend to be pretty redundant. Since one can fit only a small
#' number of categories into a word cloud we have to bring down the number of
#' categories to show an reduce the redundancy. For this we use hierarchical
#' filtering option \"moderate\" in g:Profiler. In g:Profiler the categories
#' are grouped together when they share one or more enriched parents. The
#' \"moderate\" option selects the most significant category from each of such
#' groups. (See more at http://biit.cs.ut.ee/gprofiler/)
#'
#' The slots of the object can be filled with custom information using a
#' function \code{\link{add_to_slot.gosummaries}}.
#'
#' By default the Data slot is filled with a dataset that contains the number
#' of genes in the Gene_lists slot. Expression data can be added to the object
#' for example by using function \code{\link{add_expression.gosummaries}}. It
#' is possible to derive your own format for the Data slot as well, as long as
#' a panel plotting function for this data is alaso provided (See
#' \code{\link{panel_boxplot}} for further information).
#'
#' There are several constructors of gosummaries object that work on common
#' analysis result objects, such as \code{\link{gosummaries.kmeans}},
#' \code{\link{gosummaries.MArrayLM}} and \code{\link{gosummaries.prcomp}}
#' corresponding to k-means, limma and PCA results.
#'
#' @param x list of arrays of gene names (or list of lists of arrays of gene
#' names)
#' @param \dots additional parameters for gprofiler function
#' @return A gosummaries type of object
#'
#' @seealso \code{\link{gosummaries.kmeans}},
#' \code{\link{gosummaries.MArrayLM}}, \code{\link{gosummaries.prcomp}}
#'
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' # Define gene lists
#' genes1 = c("203485_at", "209469_at", "209470_s_at", "203999_at",
#' "205358_at", "203130_s_at", "210222_s_at", "202508_s_at", "203001_s_at",
#' "207957_s_at", "203540_at", "203000_at", "219619_at", "221805_at",
#' "214046_at", "213135_at", "203889_at", "209990_s_at", "210016_at",
#' "202507_s_at", "209839_at", "204953_at", "209167_at", "209685_s_at",
#' "211276_at", "202391_at", "205591_at",
#' "201313_at")
#' genes2 = c("201890_at", "202503_s_at", "204170_s_at", "201291_s_at",
#' "202589_at", "218499_at", "209773_s_at", "204026_s_at", "216237_s_at",
#' "202546_at", "218883_s_at", "204285_s_at", "208659_at", "201292_at",
#' "201664_at")
#'
#'
#' gl1 = list(List1 = genes1, List2 = genes2) # One list per component
#' gl2 = list(List = list(genes1, genes2)) # Two lists per component
#'
#' # Construct gosummaries objects
#' gs1 = gosummaries(gl1)
#' gs2 = gosummaries(gl2)
#'
#' plot(gs1, fontsize = 8)
#' plot(gs2, fontsize = 8)
#'
#' # Changing slot contents using using addToSlot.gosummaries
#' gs1 = add_to_slot.gosummaries(gs1, "Title", list("Neurons", "Cell lines"))
#'
#' # Adding expression data
#' data(tissue_example)
#'
#' gs1 = add_expression.gosummaries(gs1, exp = tissue_example$exp, annotation =
#' tissue_example$annot)
#' gs2 = add_expression.gosummaries(gs2, exp = tissue_example$exp, annotation =
#' tissue_example$annot)
#'
#' plot(gs1, panel_par = list(classes = "Tissue"), fontsize = 8)
#' plot(gs2, panel_par = list(classes = "Tissue"), fontsize = 8)
#' }
#'
#' # Using custom annotations for word clouds
#' wcd1 = data.frame(Term = c("KLF1", "KLF2", "POU5F1"), Score = c(0.05, 0.001,
#' 0.0001))
#' wcd2 = data.frame(Term = c("CD8", "CD248", "CCL5"), Score = c(0.02, 0.005,
#' 0.00001))
#'
#' gs = gosummaries(wc_data = list(Results1 = wcd1, Results2 = wcd2))
#' plot(gs)
#'
#' gs = gosummaries(wc_data = list(Results = list(wcd1, wcd2)))
#' plot(gs)
#'
#' # Adjust wordcloud legend title
#' gs = gosummaries(wc_data = list(Results = list(wcd1, wcd2)),
#' wordcloud_legend_title = "Significance score")
#' plot(gs)
#'
#' @rdname gosummaries
#' @export
gosummaries = function(x = NULL, ...){
UseMethod("gosummaries", x)
}
#' @param wc_data precalculated GO enrichment results (see Details)
#' @param organism the organism that the gene lists correspond to. The format
#' should be as follows: "hsapiens", "mmusculus", "scerevisiae", etc.
#' @param go_branches GO tree branches and pathway databases as denoted in
#' g:Profiler (Possible values: BP, CC, MF, keg, rea)
#' @param max_p_value threshold for p-values that have been corrected for
#' multiple testing
#' @param min_set_size minimal size of functional category to be considered
#' @param max_set_size maximal size of functional category to be considered
#' @param max_signif maximal number of categories returned per query
#' @param ordered_query logical showing if the lists are ordered or not (it
#' determines if the ordered query algorithm is used in g:Profiler)
#' @param hier_filtering a type of hierarchical filtering used when reducing
#' the number of g:Profiler results (see \code{\link{gprofiler}} for further
#' information)
#' @param score_type indicates the type of scores in \code{wc_data}. Possible
#' values: "p-value" and "count"
#' @param wc_algorithm the type of wordcloud algorithm used. Possible values
#' are "top" that puts first word to the top corner and "middle" that puts
#' first word to the middle.
#' @param wordcloud_legend_title title of the word cloud legend, should reflect
#' the nature of the score
#'
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#'
#' @rdname gosummaries
#' @method gosummaries default
#' @export
gosummaries.default = function(x = NULL, wc_data = NULL, organism = "hsapiens", go_branches = c("BP", "keg", "rea"), max_p_value = 1e-2, min_set_size = 50, max_set_size = 1000, max_signif = 40, ordered_query = TRUE, hier_filtering = "moderate", score_type = "p-value", wc_algorithm = "middle", wordcloud_legend_title = NULL, ...){
# Create basic structure
res = gosummaries_base(gl = x,
wc_data = wc_data,
score_type = score_type,
wc_algorithm = wc_algorithm,
wordcloud_legend_title = wordcloud_legend_title
)
# Add data and annotations
if(!is.null(x)){
res = add_dummydata.gosummaries(res)
}
if(is.null(wc_data)){
res = annotate.gosummaries(res, organism = organism,
go_branches = go_branches,
max_p_value = max_p_value,
min_set_size = min_set_size,
max_set_size = max_set_size,
max_signif = max_signif,
ordered_query = ordered_query,
hier_filtering = hier_filtering, ...)
attr(res, "wordcloud_legend_title") = "Enrichment p-value"
}
return(res)
}
#' @param gosummaries a gosummaries object
#' @rdname add_to_slot.gosummaries
#' @export
is.gosummaries = function(x) inherits(x, "gosummaries")
#' @param \dots not used
#'
#' @rdname add_to_slot.gosummaries
#' @method print gosummaries
#' @export
print.gosummaries = function(x, ...){
for(a in x){
cat(sprintf("Component title: %s\n", a$Title))
cat("\n")
cat(sprintf("%s\n", "Head of gene lists"))
for(l in a$Gene_lists){
print(head(l))
}
cat("\n")
cat(sprintf("%s\n", "Top annotation results"))
for(l in a$WCD){
print(head(l))
}
cat("\n")
cat(sprintf("%s\n", "Data"))
print(head(a$Data))
cat("\n")
cat(sprintf("%s\n", "Percentage slot:"))
cat(a$Percentage)
cat("\n===================================================\n\n")
}
}
#' @param i index
#'
#' @rdname add_to_slot.gosummaries
#' @method [ gosummaries
#' @export
"[.gosummaries" = function(x, i, ...) {
attrs = attributes(x)
out = unclass(x)
out = out[i]
attributes(out) = attrs
return(out)
}
#' Functions for working with gosummaries object
#'
#' Functions for working with gosummaries object
#'
#' Method [ ensures that subsetting gosummaries object will not lose the
#' attributes.
#'
#' \code{add_to_slot.gosummaries} allows to add values to specific slots in the
#' gosummaries object
#'
#' @param x a gosummaries object
#' @param slot the component slot name to be filled (e.g Title, Percentage,
#' etc.)
#' @param values list of values where each element corresponds to one component
#'
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' data(gs_kmeans)
#'
#' # Add new title to the components
#' gs_kmeans_new = add_to_slot.gosummaries(gs_kmeans, "Title",
#' as.list(paste("K-means cluster", 1:length(gs_kmeans))))
#'
#' print(gs_kmeans_new)
#'
#' @rdname add_to_slot.gosummaries
#' @export
add_to_slot.gosummaries = function(x, slot, values){
if(length(x) != length(values)){
stop("Length of gosummaries object and values does not match")
}
for(i in seq_along(x)){
x[[i]][[slot]] = values[[i]]
}
return(x)
}
##
## Annotate gosummaries with g:Profiler
filter_gprofiler = function(gpr, go_branches, min_set_size, max_signif){
gpr = gpr[gpr$domain %in% go_branches & gpr$term.size > min_set_size, ]
gpr = plyr::ddply(gpr, "query.number", function(x) {
x = x[order(x$p.value),]
rank = rank(x$p.value)
return(x[rank <= max_signif, ])
})
return(gpr)
}
annotate.gosummaries = function(gosummaries, organism, components = 1:length(gosummaries), go_branches, min_set_size, max_p_value, max_set_size, max_signif, ordered_query, hier_filtering, ...){
if(!is.gosummaries(gosummaries)){
stop("Function requires a gosummaries type of object")
}
#Compile gene lists
gl = NULL
for(i in seq_along(components)){
for(j in seq_along(gosummaries[[components[i]]]$Gene_lists)){
l = gosummaries[[components[i]]]$Gene_lists[[j]]
if(length(l) == 0){
l = c("uuuuuuu1", "3uuuuuuuuuuu5")
}
gl = c(gl, list(l))
}
}
# Run g:Profiler analysis
user_agent = sprintf("gProfileR/%s; GOsummaries/%s",
packageVersion("gProfileR"),
packageVersion("GOsummaries"))
gProfileR::set_user_agent(ua = user_agent, append = FALSE)
gProfileR::set_base_url("http://biit.cs.ut.ee/gprofiler")
# gProfileR::set_base_url(url = "http://biit.cs.ut.ee/gprofiler_archive/r1227_e72_eg19/web/")
gpr = gProfileR::gprofiler(query = gl, organism = organism,
ordered_query = ordered_query,
max_set_size = max_set_size,
hier_filtering = hier_filtering,
max_p_value = max_p_value, ...)
# Clean and filter the results
gpr$query.number = as.numeric(as.character(gpr$query.number))
gpr = filter_gprofiler(gpr, go_branches = go_branches,
min_set_size = min_set_size, max_signif = max_signif)
# Make one copy to return to the user
gpr_out = data.frame(Query = NA, gpr)
# Filter columns
gpr = gpr[, c("query.number", "p.value", "term.name")]
colnames(gpr) = c("query.number", "Score", "Term")
k = 1
for(i in seq_along(components)){
for(j in seq_along(gosummaries[[i]]$WCD)){
lname = paste("wcd", j, sep = "")
ind = gpr$query.number == k
results = gpr[ind, c("Term", "Score")]
gosummaries[[components[i]]]$WCD[[lname]] = results
gpr_out[ind, "Query"] = paste(gosummaries[[components[i]]]$Title, j)
k = k + 1
}
}
attr(gosummaries, "gprofiler_results") = gpr_out
return(gosummaries)
}
##
## Fill Data slot in gosummaries object
padz = function(x, n=max(nchar(x))) gsub(" ", "0", formatC(x, width=n))
#' Add expression data to gosummaries object
#'
#' Function to add expression data and its annotations to a gosummaries object.
#'
#' The data is added to the object in a "long" format so it would be directly
#' usable by the ggplot2 based panel drawing functions
#' \code{\link{panel_boxplot}} etc. For each component it produces a data frame
#' with columns:
#' \itemize{
#' \item \bold{x} : sample IDs for the x axis, their factor order is the same
#' as on the columns of input matrix
#' \item \bold{y} : expression values from the matrix
#' \item . . . : sample annotation columns from the annotation table that
#' can be displayed on figure as colors.
#' }
#'
#' @param gosummaries a gosummaries object
#' @param exp an expression matrix, with row names corresponding to the names
#' in the Gene_lists slot
#' @param annotation a \code{data.frame} describing the samples, its row names
#' should match with column names of \code{exp}
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' data(gs_limma)
#' data(tissue_example)
#'
#' # Add just expression without annotations
#' gs_limma_exp1 = add_expression.gosummaries(gs_limma, exp =
#' tissue_example$exp)
#'
#' print(gs_limma_exp1)
#'
#' # Add expression with annotations
#' gs_limma_exp2 = add_expression.gosummaries(gs_limma, exp =
#' tissue_example$exp, annotation = tissue_example$annot)
#'
#' print(gs_limma_exp1)
#' }
#' @export
add_expression.gosummaries = function(gosummaries, exp, annotation = NULL){
# Check arguments
if(!is.gosummaries(gosummaries)){
stop("Function requires a gosummaries type object")
}
if(!all(colnames(exp) %in% rownames(annotation)) & !is.null(annotation)){
stop("Column names of expression matrix and row names of annotation data.frame do not match")
}
if(is.null(gosummaries[[1]]$Gene_lists)){
stop("No gene list data provided")
}
# Do the work
for(i in seq_along(gosummaries)){
a = list()
for(j in seq_along(gosummaries[[i]]$Gene_lists)){
e = data.frame(exp[gosummaries[[i]]$Gene_lists[[j]], ])
d = reshape2::melt(e)
colnames(d) = c("ID", "y")
d$x = paste(j, padz(match(d$ID, colnames(e))), d$ID, sep = "")
a[[j]] = d
}
a = do.call("rbind", a)
if(!is.null(annotation)){
annotation$ID = make.names(rownames(annotation))
a = merge(a, annotation)
a = a[, !(colnames(a) %in% "ID")]
a$x = factor(a$x, levels = sort(unique(as.character(a$x))))
}
if(length(gosummaries[[i]]$Gene_lists) == 1){
class(a) = c(class(a), "oneListExpData")
}
if(length(gosummaries[[i]]$Gene_lists) == 2){
class(a) = c(class(a), "twoListExpData")
}
gosummaries[[i]]$Data = a
}
return(gosummaries)
}
add_pca.gosummaries = function(gosummaries, pcr, annotation){
if(!is.gosummaries(gosummaries)){
stop("Function requires a gosummaries type object")
}
if(inherits(pcr, "prcomp")){
x = pcr$x
}
else{
x = pcr
}
if(!all(rownames(x) %in% rownames(annotation)) & !is.null(annotation)) {
stop("Column names of expression matrix and row names of annotation dataframe do not match")
}
for(i in seq_along(gosummaries)){
a = data.frame(x = x[, i])
if(!is.null(annotation)){
a$ID = rownames(x)
annotation$ID = rownames(annotation)
a = merge(a, annotation)
a = a[, !(colnames(a) %in% "ID")]
}
class(a) = c(class(a), "pcaData")
gosummaries[[i]]$Data = a
}
return(gosummaries)
}
##
## Adjust Wordcloud appearance parameters in gosummaries object
shorten_strings = function(strings, max){
strings = as.character(strings)
n = nchar(strings)
ind = n > max
strings[ind] = paste(substr(strings[ind], 1, max - 3), "...", sep = "")
return(strings)
}
convert_scores = function(gosummaries){
for(i in seq_along(gosummaries)){
for(j in seq_along(gosummaries[[i]]$WCD)){
scores = gosummaries[[i]]$WCD[[j]]$Score
if(attr(gosummaries, "score_type") == "p-value"){
if(any(scores == 0)){
scores[scores == 0] = min(scores[scores != 0])
}
scores = -log10(scores)
}
if(attr(gosummaries, "score_type") == "count"){
scores = scores
}
gosummaries[[i]]$WCD[[j]]$Score = scores
}
}
return(gosummaries)
}
adjust_wordcloud_appearance = function(gosummaries, term_length = 35, wordcloud_colors = c("grey70", "grey10")){
# Convert different types of scores to common values
gosummaries = convert_scores(gosummaries)
# Find best score
scores = plyr::llply(gosummaries, function(x){
comp_scores = plyr::llply(x$WCD, function(y){
y$Score
})
unlist(comp_scores)
})
scores = unlist(scores)
if(length(scores) == 0){
return(gosummaries)
}
best_score = max(scores)
# Adjust Term names and scores
for(i in seq_along(gosummaries)){
for(j in seq_along(gosummaries[[i]]$WCD)){
# Shorten GO names
original_terms = gosummaries[[i]]$WCD[[j]]$Term
shortened_terms = shorten_strings(original_terms, term_length)
gosummaries[[i]]$WCD[[j]]$Term = shortened_terms
# Calculate colors
comp_scores = gosummaries[[i]]$WCD[[j]]$Score
palette = colorRampPalette(wordcloud_colors)(100)
palette_ind = ceiling(comp_scores / best_score * 100)
gosummaries[[i]]$WCD[[j]]$Colors = palette[palette_ind]
}
}
return(gosummaries)
}
##
## Plotting utility functions
## Define zeroGrob
zeroGrob = function(){
grob(cl = "zeroGrob", name = "NULL")
}
widthDetails.zeroGrob =
heightDetails.zeroGrob =
grobWidth.zeroGrob =
grobHeight.zeroGrob = function(x) unit(0, "cm")
drawDetails.zeroGrob = function(x, recording) {}
is.zero = function(x) identical(x, zeroGrob())
##
open_file_con = function(filename, width, height){
# Get file type
r = regexpr("\\.[a-zA-Z]*$", filename)
if(r == -1) stop("Improper filename")
ending = substr(filename, r + 1, r + attr(r, "match.length"))
f = switch(ending,
pdf = function(x, ...) pdf(x, ...),
png = function(x, ...) png(x, units = "in", res = 300, ...),
jpeg = function(x, ...) jpeg(x, units = "in", res = 300, ...),
jpg = function(x, ...) jpeg(x, units = "in", res = 300, ...),
tiff = function(x, ...) tiff(x, units = "in", res = 300,
compression = "lzw", ...),
bmp = function(x, ...) bmp(x, units = "in", res = 300, ...),
stop("File type should be: pdf, png, bmp, jpg, tiff")
)
f(filename, width = width, height = height)
}
panelize_ggplot2 = function(plot_function, customize_function, par){
res = function(data, fontsize, legend = FALSE){
p = plot_function(data, fontsize, par)
p = customize_function(p, par)
p = ggplot2::ggplot_build(p)
p = ggplot2::ggplot_gtable(p)
if(legend){
if(any(grepl("guide-box", p$layout$name))){
gb = gtable::gtable_filter(p, "guide-box")
gb_grob = gb$grob[[1]]$grob[[1]]
nc = ncol(gb_grob)
nr = nrow(gb_grob)
res = gb_grob[2:(nr - 1), 2:(nc - 1)]
return(res)
}
else{
res = gtable::gtable(widths = unit(0, "cm"),
heights = unit(0, "cm"))
res = gtable::gtable_add_grob(res, zeroGrob(), 1, 1)
return(res)
}
}
else{
return(gtable::gtable_filter(p, "panel"))
}
}
return(res)
}
##
## Raw plotting functions
calc_component_dimensions = function(component, par){
# Wordcloud height
nr = max(plyr::laply(component$WCD, nrow))
if(length(component$WCD) > 1){
wc_height = ifelse(nr > 3, max(nr / 6.5, 3), nr)
# arrows_height = 1.5
arrows_height = 0.5
}
else{
wc_height = ifelse(nr > 3, max(nr / 10, 3), nr)
arrows_height = 0.5
}
# Percentage slot width
gp = gpar(fontsize = par$fontsize, cex = 0.8)
percentage_rows = strsplit(component$Percentage, "\n")[[1]]
percentage_grobs = lapply(as.list(percentage_rows), textGrob, gp = gp)
percentage_widths = lapply(percentage_grobs, grobWidth)
percentage_widths_units = do.call(unit.c, percentage_widths)
# Compile results
lines_in_points = par$fontsize * 1.445
res = list(
title_height = unit(1.5 * lines_in_points, "points"),
panel_height = unit(par$panel_height * lines_in_points, "points"),
arrows_height = unit(arrows_height * lines_in_points, "points"),
wc_height = unit(wc_height * lines_in_points, "points"),
panel_width = unit(par$panel_width * lines_in_points, "points"),
percentage_width = max(percentage_widths_units) * 1.25,
wc_width = unit(par$panel_width * lines_in_points /
length(component$WCD), "points")
)
return(res)
}
calc_components_dimensions = function(gosummaries, par){
component_dims = list()
for(i in seq_along(gosummaries)){
component_dims[[i]] = calc_component_dimensions(gosummaries[[i]], par)
}
return(component_dims)
}
gen_legend = function(legend_data, par){
n = length(legend_data$colors)
# Create Grobs
title = textGrob(legend_data$title, y = 1, x = 0, just = c(0, 1),
gp = gpar(fontsize = par$fontsize, fontface = "bold",
cex = 0.8))
# rect_height = unit(1.7 * par$fontsize, "points")
rect_height = unit(6.096, "mm")
yy = unit(1, "npc") - unit(0.8 * par$fontsize * 1.1, "points") -
(0:(n - 1)) * rect_height
boxes = rectGrob(x = 0, y = yy, height = rect_height, width = rect_height,
just = c(0, 1),
gp = gpar(col = 0, fill = legend_data$colors))
yyy = yy - rect_height * 0.5
gl = gList()
length = c(rep(0, n), convertWidth(grobWidth(title), "in"))
for(i in 1:n){
gl[[i]] = textGrob(legend_data$labels[[i]],
x = rect_height + unit(3, "points"), y = yyy[i],
hjust = 0,
gp = gpar(cex = 0.8, fontsize = par$fontsize))
length[i] = convertWidth(grobWidth(gl[[i]]), "in")
}
# Calculate size
height = unit(0.8 * par$fontsize * 1.445, "points") + n * rect_height
width = rect_height + unit(3, "points") + unit(max(length), "in")
# Put together a frame
fg = frameGrob()
fg = packGrob(fg, rectGrob(width = width, height = height,
gp = gpar(col = NA)))
fg = packGrob(fg, title)
fg = packGrob(fg, boxes)
for(i in 1:length(gl)){
fg = packGrob(fg, gl[[i]])
}
return(fg)
}
gen_wordcloud_legend = function(gosummaries, par){
legend_data = list()
legend_data$title = par$wordcloud_legend_title
legend_data$colors = colorRampPalette(rev(par$wordcloud_colors))(3)
# Calculate p-value breakpoints
scores = plyr::ldply(gosummaries, function(x){
plyr::ldply(x$WCD, function(y) data.frame(y$Score))
})[,2]
if(length(scores) == 0){
empty_grob = rectGrob(width = unit(0.0001, "cm"),
height = unit(0.0001, "cm"))
return(empty_grob)
}
best_score = max(scores)
breaks = grid.pretty(c(0, best_score))
if(length(breaks) %% 2 == 0){
average_low = breaks[length(breaks) / 2]
average_high = breaks[length(breaks) / 2 + 1]
average = mean(c(average_low, average_high))
}
else{
average = breaks[ceiling(length(breaks) / 2)]
}
if(attr(gosummaries, "score_type") == "p-value"){
legend_data$labels = c(
substitute(10 ^ -p, list(p = breaks[length(breaks)])),
substitute(10 ^ -p, list(p = average)),
1
)
}
if(attr(gosummaries, "score_type") == "count"){
legend_data$labels = c(breaks[length(breaks)], average, 0)
}
return(gen_legend(legend_data, par))
}
plot_wordcloud = function(words, freq, color, algorithm, dimensions){
if(length(words) > 0){
return(plotWordcloud(words, freq, colors = color, random.order = FALSE,
min.freq = -Inf, rot.per = 0, scale = 0.85,
max_min = c(1, 0), algorithm = algorithm,
add = FALSE, grob = TRUE,
dimensions = dimensions))
}
return(zeroGrob())
}
plot_arrow = function(end, par){
x = switch(end, first = c(0, 0.95), both = c(0.05, 0.95), last = c(0.05, 1))
res = linesGrob(x = x, y = 0.5, arrow = arrow(ends = end, type = "closed",
angle = 15, length = unit(0.1, "inches")),
gp = gpar(lwd = 0.3 * par$fontsize, col = "grey40"))
return(res)
}
plot_component = function(data_component, plot_panel, par, component_dims){
# Create gtable
heights = with(component_dims, unit.c(title_height, panel_height,
arrows_height + wc_height))
widths = with(component_dims, unit.c(panel_width, percentage_width))
gtable_component = gtable::gtable(widths, heights)
# Add title
title = textGrob(x = 0,
hjust = 0,
label = data_component$Title,
gp = gpar(fontface = "bold", fontsize = par$fontsize)
)
gtable_component = gtable::gtable_add_grob(gtable_component, title, 1, 1,
clip = "off")
# Add plot
if(par$panel_height != 0){
p = plot_panel(data_component$Data, par$fontsize)
}
else{
p = zeroGrob()
}
b = rectGrob(gp = gpar(lwd = 1.5, col = "grey40", fill = NA))
gtable_component = gtable::gtable_add_grob(gtable_component,
gTree(children = gList(p, b)),
2, 1, clip = "off")
# Add percentage
p = textGrob(data_component$Percentage, x = 0.1, y = 1, vjust = 1,
hjust = 0, gp = gpar(fontsize = par$fontsize, cex = 0.8))
gtable_component = gtable::gtable_add_grob(gtable_component, p, 2, 2,
clip = "off")
# Arrows and wordclouds
heights = with(component_dims, unit.c(arrows_height, wc_height))
widths = with(component_dims, rep(wc_width, length(data_component$WCD)))
gtable_aw = gtable::gtable(widths, heights)
if(length(data_component$WCD) == 1){
wc = plot_wordcloud(words = data_component$WCD$wcd1$Term,
freq = data_component$WCD$wcd1$Score,
color = data_component$WCD$wcd1$Colors,
algorithm = switch(par$wc_algorithm, top = "leftside_top",
middle = "leftside"),
dimensions = with(component_dims, unit.c(wc_width, wc_height))
)
gtable_aw = gtable::gtable_add_grob(gtable_aw, wc, 2, 1,
name = "wordcloud")
}
if(length(data_component$WCD) == 2){
wc1 = plot_wordcloud(words = data_component$WCD$wcd1$Term,
freq = data_component$WCD$wcd1$Score,
color = data_component$WCD$wcd1$Colors,
algorithm = switch(par$wc_algorithm, top = "leftside_top",
middle = "leftside"),
dimensions = with(component_dims, unit.c(wc_width, wc_height))
)
wc2 = plot_wordcloud(words = data_component$WCD$wcd2$Term,
freq = data_component$WCD$wcd2$Score,
color = data_component$WCD$wcd2$Colors,
algorithm = switch(par$wc_algorithm, top = "rightside_top",
middle = "rightside"),
dimensions = with(component_dims, unit.c(wc_width, wc_height))
)
gtable_aw = gtable::gtable_add_grob(gtable_aw, wc1, 2, 1,
name = "wordcloud-left")
gtable_aw = gtable::gtable_add_grob(gtable_aw, wc2, 2, 2,
name = "wordcloud-right")
}
gtable_component = gtable::gtable_add_grob(gtable_component, gtable_aw, 3,
1, name = "arrows-wordcloud")
gtable_component = gtable::gtable_add_padding(gtable_component,
unit(c(0, 0, 0.5 * par$fontsize * 1.445, 0), "points"))
return(gtable_component)
}
plot_motor = function(gosummaries, plot_panel, par = list(fontsize = 10, panel_height = 5, panel_width = 405), filename = NA){
# Calculate dimensions for the picture components
component_dimensions = calc_components_dimensions(gosummaries, par)
# Create component grobs
components = list()
par$wc_algorithm = attr(gosummaries, "wc_algorithm")
for(i in seq_along(gosummaries)){
components[[i]] = plot_component(data_component = gosummaries[[i]],
plot_panel = plot_panel,
par = par,
component_dims = component_dimensions[[i]]
)
}
# Create legends
if(par$panel_height != 0){
panel_legend = plot_panel(gosummaries[[1]]$Data, par$fontsize,
legend = TRUE)
height = gtable::gtable_height(panel_legend)
height_in_cm = convertHeight(height, "cm", valueOnly = TRUE)
if(height_in_cm != 0){
panel_legend = gtable::gtable_add_padding(panel_legend,
unit(c(0, 0, 0.5 * par$fontsize * 1.445, 0), "points"))
}
}
else{
panel_legend = gtable::gtable(widths = unit(0, "cm"),
heights = unit(0, "cm"))
panel_legend = gtable::gtable_add_grob(panel_legend, zeroGrob(), 1, 1)
}
if(par$wordcloud_colors[1] == par$wordcloud_colors[2]){
wordcloud_legend = gtable::gtable(widths = unit(0.001, "cm"),
heights = unit(0.001, "cm"))
}
else{
wordcloud_legend = gen_wordcloud_legend(gosummaries, par)
}
# Calculate legend dimensions to create gtable for it
pl_height = gtable::gtable_height(panel_legend)
pl_width = gtable::gtable_width(panel_legend)
wl_height = grobHeight(wordcloud_legend)
wl_width = grobWidth(wordcloud_legend)
legend_width = max(pl_width, wl_width)
legend_height = unit.c(pl_height, wl_height)
vp = viewport(
y = unit(1, "npc") - unit(1.5 * par$fontsize * 1.445, "points"),
height = sum(legend_height), just = c(0.5, 1))
gtable_legend = gtable::gtable(width = legend_width,
height = legend_height, vp = vp)
panel_legend = editGrob(panel_legend, vp = viewport(x = unit(0, "npc"),
width = pl_width, just = c(0, 0.5)))
gtable_legend = gtable::gtable_add_grob(gtable_legend,
grobs = panel_legend, t = 1, l = 1,
name = "panel-legend", clip = "off")
wordcloud_legend = editGrob(wordcloud_legend,
vp = viewport(x = unit(0, "npc"),
width = wl_width, just = c(0, 0.5)))
gtable_legend = gtable::gtable_add_grob(gtable_legend, wordcloud_legend,
t = 2,
l = 1,
name = "wordcloud-legend",
clip = "off"
)
# Create gtable layout for the whole figure
component_widths = do.call(unit.c, lapply(components, gtable::gtable_width))
widths = unit.c(max(component_widths), gtable::gtable_width(gtable_legend))
heights = do.call(unit.c, lapply(components, gtable::gtable_height))
gtable_full = gtable::gtable(widths, heights)
# Add components
for(i in 1:length(components)){
component_width = gtable::gtable_width(components[[i]])
components[[i]] = editGrob(components[[i]],
vp = viewport(x = 0,
width = component_width,
just = c(0, 0.5)))
component_name = paste("Component", i, sep = "-")
gtable_full = gtable::gtable_add_grob(gtable_full, components[[i]], i,
1, name = component_name)
}
# Add legend
gtable_full = gtable::gtable_add_grob(gtable_full, gtable_legend, 1, 2,
length(components), name = "legend")
# Add padding
padding_size = unit(0.5 * par$fontsize * 1.445, "points")
gtable_full = gtable::gtable_add_padding(gtable_full, padding_size)
# Open connection to file if filename specified
if(!is.na(filename)){
width = convertWidth(gtable::gtable_width(gtable_full), "inches",
valueOnly = TRUE)
height = convertHeight(gtable::gtable_height(gtable_full), "inches",
valueOnly = TRUE)
open_file_con(filename, width, height)
}
# Draw
grid.draw(gtable_full)
# Close connection if filename specified
if(!is.na(filename)){
dev.off()
}
return(gtable_full)
}
##
## Panel functions for expression data
#' Panel drawing functions
#'
#' These functions are used to draw the panel portion of every component based
#' on the Data slots in gosummaries object. These concrete functions assume the
#' data is presented as done by \code{\link{add_expression.gosummaries}}. They
#' provide three options: boxplot, violin plot (which shows the distrubution
#' more precisely) and both combined. Additionally it is possible to combine
#' the values from one each functional group into one box/violin plot using
#' the corresponding functions ending with "_classes".
#'
#' These functions specify in principle the general setting for the panels,
#' like which "geom"-s, how the data is transformed and summarized, etc. To
#' make small adjustments to the figure such as changing color scheme, write
#' your own customization function (See \code{\link{customize}} as example).
#'
#' It is possible to write your own panel plotting function, as long as the
#' parameters used and the return value are similar to what is specified here.
#' When writing a new panel function one only has to make sure that it matches
#' the data given in the Data slot of the gosummaries object.
#'
#' @param data the data from Data slot of the gosummaries object
#' @param fontsize fontsize in points, mainly used to ensure that the legend
#' fontsizes match
#' @param par additional parameters for drawing the plot, given as list. These
#' functions use only \code{classes} slot for figuring out which parameter to
#' use for coloring the "geom"-s. However, when building a custom function it
#' provides a way to give extra parameters to the plotting function.
#' @return It returns a function that can draw a ggplot2 plot of the data in
#' Data slot of a gosummaries object. The legend and the actual plots for the
#' panels are extracted later from the figure produced by this function.
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#'
#' @examples
#'
#' \dontrun{
#' data(gs_kmeans)
#'
#' # Draw default version with plot_boxplot
#' plot(gs_kmeans, components = 1:3, classes = "Tissue")
#'
#' # Define alternative where one boxplot summarises all values in one class
#' plot_classes = function(data, fontsize, par){
#' qplot(x = data[, par$classes], y = data$y, geom = "boxplot",
#' fill = data[, par$classes]) + theme_bw()
#' }
#'
#' plot(gs_kmeans, components = 1:3, panel_plot = plot_classes, classes = "Tissue")
#'
#'
#' # Flip the boxplots to make them more comparable
#' plot_classes = function(data, fontsize, par){
#' qplot(x = data[, par$classes], y = data$y, geom = "boxplot",
#' fill = data[, par$classes]) + coord_flip() + theme_bw()
#' }
#'
#' plot(gs_kmeans, components = 1:3, panel_plot = plot_classes, classes = "Tissue")
#' }
#'
#' @rdname panel_boxplot
#' @export
panel_boxplot = function(data, fontsize = 10, par){
qq = function(x) {
bs = boxplot.stats(x)$stats
data.frame(ymin = bs[1], lower = bs[2], middle = bs[3], upper = bs[4],
ymax = bs[5])
}
if(!is.null(par$classes)){
# p = ggplot2::qplot(x = data$x, y = data$y, geom = "boxplot",
# stat = "summary", fun.data = qq,
# fill = data[, par$classes], width = 0.7)
# browser()
data = data[, c("x", "y", par$classes)]
colnames(data)[3] = "Class"
p = ggplot2::ggplot(aes(x = x, y = y, fill = Class, width = 0.7),
data = data)
p = p + ggplot2::layer(geom = "boxplot", stat = "summary",
position = position_identity(),
params = list(fun.data = qq, na.rm = T))
}
else{
p = ggplot2::ggplot(aes(x = x, y = y, width = 0.7), data = data)
p = p + ggplot2::layer(geom = "boxplot", stat = "summary",
position = position_identity(),
params = list(fun.data = qq, na.rm = T))
}
if(inherits(data, "twoListExpData")){
n_samples = length(unique(data$x))/2
p = p + ggplot2::geom_vline(xintercept = n_samples + 0.5)
}
p = p + ggplot2::theme_bw(base_size = fontsize)
return(p)
}
#' @rdname panel_boxplot
#' @export
panel_violin = function(data, fontsize = 10, par){
if(!is.null(par$classes)){
p = ggplot2::qplot(x = data$x, y = data$y, geom = "violin",
fill = data[, par$classes], colour = I(NA))
}
else{
p = ggplot2::qplot(x = data$x, y = data$y, geom = "violin",
colour = I(NA), fill = I("gray30"))
}
if(inherits(data, "twoListExpData")){
n_samples = length(unique(data$x))/2
p = p + ggplot2::geom_vline(xintercept = n_samples + 0.5)
}
p = p + ggplot2::theme_bw(base_size = fontsize)
return(p)
}
#' @rdname panel_boxplot
#' @export
panel_violin_box = function(data, fontsize = 10, par){
if(!is.null(par$classes)){
p = ggplot2::qplot(x = data$x, y = data$y, geom = "violin",
fill = data[, par$classes], colour = I(NA)) +
ggplot2::geom_boxplot(fill = NA, outlier.size = 0.25,
size = I(0.1), width = 0.7)
}
else{
p = ggplot2::qplot(x = data$x, y = data$y, geom = "violin",
colour = I(NA)) +
ggplot2::geom_boxplot(fill = NA, outlier.size = 0.25,
size = I(0.1), width = 0.7)
}
if(inherits(data, "twoListExpData")){
n_samples = length(unique(data$x))/2
p = p + ggplot2::geom_vline(xintercept = n_samples + 0.5)
}
p = p + ggplot2::theme_bw(base_size = fontsize)
return(p)
}
combine_classes = function(data, par){
if(inherits(data, "twoListExpData")){
data[, par$classes] = factor(data[, par$classes])
new_x = paste(substr(data[, "x"], 1, 1), data[, par$classes])
n_levels = length(levels(data[, par$classes]))
levels = paste(rep(1:2, times = c(n_levels, n_levels)),
levels(data[, par$classes]))
data$x = factor(new_x, levels = levels)
}
else{
data$x = data[, par$classes]
}
return(data)
}
#' @rdname panel_boxplot
#' @export
panel_boxplot_classes = function(data, fontsize = 10, par){
data = combine_classes(data, par)
p = panel_boxplot(data, fontsize, par)
return(p)
}
#' @rdname panel_boxplot
#' @export
panel_violin_classes = function(data, fontsize = 10, par){
data = combine_classes(data, par)
p = panel_violin(data, fontsize, par)
return(p)
}
#' @rdname panel_boxplot
#' @export
panel_violin_box_classes = function(data, fontsize = 10, par){
data = combine_classes(data, par)
p = panel_violin_box(data, fontsize, par)
return(p)
}
##
## Panel functions for pca data
panel_histogram = function(data, fontsize = 10, par){
if(!is.null(par$classes)){
p = ggplot2::qplot(data$x, geom = "histogram", fill = data[, par$classes],
binwidth = (max(data$x) - min(data$x)) / 20,
colour = I("grey70"))
}
else{
p = ggplot2::qplot(data$x, geom = "histogram",
binwidth = (max(data$x) - min(data$x)) / 20,
data = data)
}
p = p + ggplot2::theme_bw(base_size = fontsize)
return(p)
}
##
## Panel function for dummyData
panel_dummy = function(data, fontsize = 10, par){
if(nrow(data$mat) == 1){
colors = "#336699"
p = ggplot2::ggplot(aes(x = 1, y = y, fill = x, width = 0.6),
data = data$mat) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::scale_x_continuous(limits = c(0.5, 1.5)) +
ggplot2::scale_y_continuous(limits = c(0, data$max)) +
ggplot2::scale_fill_manual(values = colors) +
ggplot2::theme_bw(base_size = fontsize) +
ggplot2::theme(legend.position = "none") +
ggplot2::coord_flip()
}
if(nrow(data$mat) == 2){
colors = c("#336699", "#990033")
data$mat$y[1] = -data$mat$y[1]
data$mat$x = factor(data$mat$x, labels = c("G1 > G2", "G1 < G2"))
p = ggplot2::ggplot(aes(x = 1, y = y, fill = x, width = 0.6),
data = data$mat) +
ggplot2::geom_bar(stat = "identity", position = "identity") +
ggplot2::scale_x_continuous(limits = c(0.5, 1.5)) +
ggplot2::scale_y_continuous(limits = c(-data$max, data$max)) +
ggplot2::scale_fill_manual("Regulation direction", values = colors)+
ggplot2::theme_bw(base_size = fontsize) +
ggplot2::theme(legend.position = "none") +
ggplot2::coord_flip()
}
return(p)
}
##
## Customization functions
customize_dummy = function(p, par){
return(p)
}
#' Customization function for panel
#'
#' This function is supposed to make small changes in the panel function
#' appearance like changing color scheme for example. It has to match with the
#' output of the corresponding panel function. Check examples in
#' plot.gosummaries to see how to write one yourself.
#'
#' @param p a ggplot2 plot object
#' @param par parameters object like in \code{\link{panel_boxplot}}
#' @return a ggplot2 plot object with added customizations
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' data(gs_limma_exp)
#'
#' cust = function(p, par){
#' p = p + scale_fill_brewer(par$classes, type = "qual", palette = 1)
#' return(p)
#' }
#'
#' plot(gs_limma_exp, classes = "Tissue", panel_plot = panel_boxplot,
#' panel_customize = cust, fontsize = 8)
#' }
#'
#' @export
customize = function(p, par){
p = p + ggplot2::scale_fill_discrete(par$classes)
return(p)
}
##
## plot.gosummaries
#' Plot the GOsummaries figure
#'
#' The function to draw a GOsummaries figure based on a \code{gosummaries}
#' object. The GOsummaries figure consists of several components each defined
#' by a gene list ora a pair of them. The GO annotations of them are shown as
#' wordclouds. Optionally one can draw related (expression) data on panels atop
#' of the wordclouds.
#'
#' In most cases the function can decide which type of plot to draw into the
#' panel part. If there is no data explicitly put into the Data slots of the
#' gosummaries object, it just draws a horizontal barplot with the numbers of
#' genes. On visualizing the PCA data it draws histogram of the samples on the
#' principal axes. For clustering and differential expression it draws the
#' boxplot of expression values.
#'
#' @param x a gosummaries object
#' @param components index for the components to draw.
#' @param classes name of the variable from annotation data.frame that defines
#' the colors in the plot
#' @param panel_plot plotting function for panel
#' @param panel_customize customization function for the panel plot, menat for
#' making small changes like changing colour scheme
#' @param panel_par list of arguments passed on to \code{panel_plot} function
#' @param panel_height panel height as number of lines, with given
#' \code{fontsize}. If set to 0 no panel is drawn.
#' @param panel_width panel width in lines of text
#' @param fontsize font size used throughout the figure in points
#' @param term_length maximum length of the dispalyed GO categories in
#' characters, longer names are cropped to this size
#' @param wordcloud_colors two element vector of colors to define color scheme
#' for displaying the enrichment p-values across the wordclouds. First element
#' defines the color for category with worst p-value and the second for the
#' word with the best. Set the same value for both if you want to remove the
#' color scale and the legend.
#' @param wordcloud_legend_title title of wordcloud legend
#' @param filename file path where to save the picture. Filetype is decided by
#' the extension in the path. Currently following formats are supported: png,
#' pdf, tiff, bmp, jpeg. Even if the plot does not fit into the plotting
#' window, the file size is calculated so that the plot would fit there.
#' @param \dots not used
#'
#' @return The \code{\link{gtable}} object containing the figure
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' data(gs_limma)
#'
#' # Default plot
#' plot(gs_limma, fontsize = 8)
#'
#' # Omitting the panel area
#' plot(gs_limma, panel_height = 0, fontsize = 8)
#'
#' # Selecting only certain components
#' plot(gs_limma, components = c(1, 3), fontsize = 8)
#'
#' # Cutting the longer terms shorter (see right wordcloud on first component)
#' plot(gs_limma, term_length = 20, fontsize = 8)
#'
#' # Change wordcloud colors
#' plot(gs_limma, term_length = 20, wordcloud_colors = c("#C6DBEF", "#08306B"),
#' fontsize = 8)
#'
#' # Adjust panel plot type (see panel_boxplot help for options)
#' data(gs_kmeans)
#'
#' plot(gs_kmeans, panel_plot = panel_violin, classes = "Tissue", components =
#' 1:2, fontsize = 8)
#' plot(gs_kmeans, panel_plot = panel_violin_box, classes = "Tissue",
#' components = 1:2, fontsize = 8)
#'
#' # Adjust colorscheme for plot (see customize help for more information)
#' cust = function(p, par){
#' p = p + scale_fill_brewer(par$classes, type = "qual", palette = 2)
#' return(p)
#' }
#' plot(gs_kmeans, panel_plot = panel_violin, panel_customize = cust,
#' classes = "Tissue", components = 1:2, fontsize = 8)
#' }
#' @method plot gosummaries
#'
#' @export
plot.gosummaries = function(x, components = 1:min(10, length(x)), classes = NA, panel_plot = NULL, panel_customize = NULL, panel_par = list(), panel_height = 5, panel_width = 30, fontsize = 10, term_length = 35, wordcloud_colors = c("grey70", "grey10"), wordcloud_legend_title = NULL, filename = NA, ...){
# Check input
if(!is.gosummaries(x)){
stop("Function requires an object of gosummaries type")
}
if(any(!(components %in% 1:length(x)))){
stop("Selected components are not present in data")
}
# Add classes to panel_par
if(!is.na(classes)){
if(!(classes %in% colnames(x[[1]]$Data))){
stop("Classes variable has to be present in the data.frame in the component Data slot")
}
else{
panel_par[["classes"]] = classes
}
}
# Take out components of interest
x = x[components]
if(length(x) < 1) stop("No components selected")
# Add wordcloud colors and adjust the string length
x = adjust_wordcloud_appearance(x, term_length, wordcloud_colors)
# Adjust wordcloud legend title
if(is.null(wordcloud_legend_title)){
wordcloud_legend_title = attr(x, "wordcloud_legend_title")
}
# Attach default plotting method if it is not set
first_comp_data = x[[1]]$Data
if(is.null(panel_plot)){
if (is.null(first_comp_data)){
panel_height = 0
}
else if(inherits(first_comp_data, "dummyData")){
if(panel_height == 5){
panel_height = 3
}
panel_plot = panel_dummy
}
else if(inherits(first_comp_data, "pcaData")){
panel_plot = panel_histogram
}
else if(inherits(first_comp_data, "oneListExpData") |
inherits(first_comp_data, "twoListExpData")){
panel_plot = panel_boxplot
}
else{
stop("The panel data is in unknown format, please specify matching panel_plot function")
}
}
if(is.null(panel_customize)){
if(inherits(first_comp_data, "dummyData")){
panel_customize = customize_dummy
}
else{
panel_customize = customize
}
}
# Set figure parameters
par = list(
fontsize = fontsize,
panel_height = panel_height,
panel_width = panel_width,
wordcloud_colors = wordcloud_colors,
wordcloud_legend_title = wordcloud_legend_title
)
# Take the panel plot to proper format
plot_panel = panelize_ggplot2(panel_plot, panel_customize, panel_par)
invisible(plot_motor(x, plot_panel = plot_panel, par = par,
filename = filename))
}
##
## Data type specific convenience functions like for prcomp, kmeans, limma, ...
convert_gene_ids = function(unique_ids, gconvert_target, organism){
if(!is.null(gconvert_target)){
cat(sprintf("%s\n", "Convert IDs"))
gcr = gProfileR::gconvert(unique_ids, target = gconvert_target,
organism = organism)
gcr = ddply(gcr, "alias", function(x){
if(nrow(x) == 1){
return(x)
}
else{
return(x[which.min(nchar(as.character(x$target))), ])
}
})
i2g = gcr$target
names(i2g) = gcr$alias
}
else{
i2g = unique_ids
names(i2g) = toupper(unique_ids)
}
return(i2g)
}
filter_wc_data = function(wc_data, i2g, n_genes){
wc_data$Term = i2g[toupper(as.character(wc_data$Term))]
wc_data = wc_data[!is.na(wc_data$Term),]
wc_data = wc_data[!duplicated(wc_data$Term), ]
if(nrow(wc_data) != 0){
wc_data = wc_data[1:min(nrow(wc_data), n_genes), ]
}
return(wc_data)
}
filter_pca_wc_data = function(wc_data){
index = wc_data$Score / max(wc_data$Score)
return(subset(wc_data, index > 0.1))
}
# Spearman correlation analysis on PCA components
pspearman = function(rho, n, lower.tail = TRUE) {
q = (n^3 - n) * (1 - rho) / 6
den = (n * (n^2 - 1)) / 6 + 1
r = 1 - q/den
p = pt(r / sqrt((1 - r ^ 2) / (n - 2)), df = n - 2,
lower.tail = !lower.tail)
return(p)
}
spearman_mds = function(pc, expression, n_genes){
n = ncol(expression)
cc = cor(t(expression), pc, method = "spearman")[,1]
res = data.frame(Term = names(cc), Correlation = cc)
res$Score = pmin(pspearman(res$Correlation, n, lower.tail = FALSE),
pspearman(res$Correlation, n, lower.tail = TRUE))
res = res[order(abs(res$Correlation), decreasing = TRUE), ]
n_up = sum(res$Correlation > 0)
n_down = sum(res$Correlation < 0)
res = list(
res[res$Correlation < 0, c("Term", "Score")][1:min(n_down, 2*n_genes),],
res[res$Correlation > 0, c("Term", "Score")][1:min(n_up, 2 * n_genes), ]
)
return(res)
}
#' Prepare gosummaries object based on Multi Dimensional Scaling (MDS) results
#'
#' The Multi Dimensional Scaling (MDS) results are converted into a gosummaries
#' object, by finding genes that have most significant Spearman correlations
#' with each component.
#'
#' This visualisation of MDS results is very similar to the one performed by
#' \code{\link{gosummaries.prcomp}}. Difference from PCA is that, in general,
#' we do not have the loadings for individual genes that could be used to
#' associate genes with components. However, it is possible to find genes that
#' are most correlated with each component. This function uses Spearman
#' correlation coefficient to find most correlated features. The significance
#' of the correlation values is decided using he approximation with
#' t-distribution.
#'
#' The function can also display genes instead of their GO annotations, while
#' the sizes of the gene names correspond to the Spearman correlation p-values.
#' The corresponding parameters are described in more detail in
#' \code{\link{gosummaries.MArrayLM}}. This feature is important in
#' applications, like metabolomics and metagenomics, where the features are not
#' genes and it is not possible to run GO enrichment analysis.
#'
#' @param x a matrix representation of multi dimensional scaling result, rows
#' correspond to samples
#' @param exp an expression matrix, with columns corresponding to samples
#' (these have to be in the same order as in \code{x})
#' @param annotation a \code{data.frame} describing the samples, its row names
#' should match with column names of \code{exp} (Optional)
#' @param components numeric vector of comparisons to annotate
#' @param show_genes logical showing if GO categories or actual genes are shown
#' in word clouds
#' @param gconvert_target specifies gene ID format for genes showed in word
#' cloud. The name of the format is passed to \code{\link{gconvert}}, if NULL
#' original IDs are shown.
#' @param n_genes maximum number of genes shown in a word cloud
#' @param organism the organism that the gene lists correspond to. The format
#' should be as follows: "hsapiens", "mmusculus", "scerevisiae", etc.
#' @param \dots GO annotation filtering parameters as defined in
#' \code{\link{gosummaries.default}}
#' @return A gosummaries object.
#'
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' library(vegan)
#'
#' data("metagenomic_example")
#'
#' # Run Principal Coordinate Analysis on Bray-Curtis dissimilarity matrix
#' pcoa = cmdscale(vegdist(t(metagenomic_example$otu), "bray"), k = 3)
#'
#' # By turning off the GO analysis we can show the names of taxa
#' gs = gosummaries(pcoa, metagenomic_example$otu, metagenomic_example$annot,
#' show_genes = TRUE, gconvert_target = NULL, n_genes = 30)
#'
#' plot(gs, class = "BodySite", fontsize = 8)
#' }
#'
#' @export
gosummaries.matrix = function(x, exp = NULL, annotation = NULL, components = 1:min(ncol(x), 10), show_genes = FALSE, gconvert_target = "NAME", n_genes = ifelse(show_genes, 30, 500), organism = "hsapiens", ...){
# Check assumptions
if(is.null(exp)){
stop("expression matrix has to be specified")
}
if(nrow(x) != ncol(exp)){
stop("expression matrix has to have the same number of columns as the MDS matrix rows")
}
# Create wordcloud data
wc_data = list()
for(i in components){
component_name = sprintf("Component %d", i)
wc_data[[component_name]] = spearman_mds(x[, i], exp, n_genes)
}
# Create gosummaries object
if(!show_genes){
gl = plyr::llply(wc_data, plyr::llply, function(x){
as.character(x$Term)[1:n_genes]
})
gosummaries = gosummaries.default(gl, organism = organism, ...)
}
else{
unique_ids = unlist(plyr::llply(wc_data, plyr::llply, function(x){
as.character(x$Term)[1:n_genes]
}))
i2g = convert_gene_ids(unique_ids, gconvert_target, organism)
wc_data = plyr::llply(wc_data, plyr::llply, function(x){
filter_wc_data(x, i2g, n_genes)
})
gosummaries = gosummaries_base(gl = NULL, wc_data = wc_data,
wc_algorithm = "top",
score_type = "p-value",
wordcloud_legend_title = "Spearman p-value")
}
# Add histogram data
gosummaries = add_pca.gosummaries(gosummaries, x, annotation)
gosummaries = add_to_slot.gosummaries(gosummaries, "Percentage",
rep(" ", length(components)))
return(gosummaries)
}
#' Prepare gosummaries object based on PCA results
#'
#' The PCA results are converted into a gosummaries object, by extracting genes with the largest positive and negative weights from each component.
#'
#' The usual visualisation of PCA results displays the projections of sample
#' expression on the principal axes. It shows if and how the samples cluster,
#' but not why do they behave like that. Actually, it is possible to go
#' further and annotate the axes by studying genes that have the largest
#' influence in the linear combinations that define the principal components.
#' For example, high expression of genes with large negative weights pushes
#' the samples projection to the negative side of the principal axis and large
#' positive weigths to the positive side. If a sample has highly expressed
#' genes in both groups it stays most probably in the middle. If we annotate
#' functionally the genes with highest positive and negative weights for each
#' of the principal axes, then it is possible to say which biological
#' processes drive the separation of samples on them.
#'
#' This function creates a gosummaries object for such analysis. It expects
#' the results of \code{\link{prcomp}} function. It assumes that the PCA was
#' done on samples and, thus, the row names of the rotation matrix can be
#' interpreted as gene names. For each component it annotates \code{n_genes}
#' elements with highest positive and negative weights.
#'
#' The function can also display genes instead of their GO annotations, while
#' the sizes of the gene names correspond to the PCA loadings. The
#' corresponding parameters are described in more detail in
#' \code{\link{gosummaries.MArrayLM}}.
#'
#' @param x an object of class \code{prcomp}
#' @param annotation a \code{data.frame} describing the samples, its row names
#' should match with column names of the projection matrix in x
#' @param components numeric vector of components to include
#' @param show_genes logical showing if GO categories or actual genes are
#' shown in word clouds
#' @param gconvert_target specifies gene ID format for genes showed in word
#' cloud. The name of the format is passed to \code{\link{gconvert}}, if NULL
#' original IDs are shown.
#' @param n_genes shows the number of genes used for annotating the component,
#' in case gene names are shown, it is the maximum number of genes shown in a
#' word cloud
#' @param organism the organism that the gene lists correspond to. The format
#' should be as follows: "hsapiens", "mmusculus", "scerevisiae", etc
#' @param \dots GO annotation filtering parameters as defined in
#' \code{\link{gosummaries.default}}
#' @return A gosummaries object.
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' data(tissue_example)
#'
#' pcr = prcomp(t(tissue_example$exp))
#' gs_pca = gosummaries(pcr, annotation = tissue_example$annot)
#'
#' plot(gs_pca, classes = "Tissue", components = 1:3, fontsize = 8)
#' }
#'
#' # Read metabolomic data
#' data(metabolomic_example)
#'
#' pca = prcomp(t(metabolomic_example$data))
#'
#' # Turn off GO enricment, since it does not work on metabolites
#' gs = gosummaries(pca, annotation = metabolomic_example$annot,
#' show_gene = TRUE, gconvert_target = NULL)
#' plot(gs, class = "Tissue", components = 1:3, fontsize = 8)
#'
#' @method gosummaries prcomp
#'
#' @export
gosummaries.prcomp = function(x, annotation = NULL, components = 1:10, show_genes = FALSE, gconvert_target = "NAME", n_genes = ifelse(show_genes, 30, 500), organism = "hsapiens", ...){
gl = list()
for(i in components){
comp_name = sprintf("Principal component %d", i)
comp_weights = x$rotation[, i]
genes = rownames(x$rotation)
gl[[comp_name]] = list(
gl1 = genes[order((comp_weights))][1:n_genes],
gl2 = genes[order(-(comp_weights))][1:n_genes]
)
}
if(show_genes){
unique_ids = unique(unlist(llply(gl, llply, function(x){
x[1:min(length(x), 2 * n_genes)]
})))
i2g = convert_gene_ids(unique_ids, gconvert_target, organism)
# Create tables for wordclouds
wc_data = list()
for(i in components){
comp_weights = x$rotation[, i]
genes = rownames(x$rotation)
n_up = sum(comp_weights > 0)
ind_up = order(-(comp_weights))[1:min(n_up, 2 * n_genes)]
wc_data_up = data.frame(
Term = genes[ind_up],
Score = comp_weights[ind_up]
)
n_down = sum(comp_weights < 0)
ind_down = order((comp_weights))[1:min(n_down, 2 * n_genes)]
wc_data_down = data.frame(
Term = genes[ind_down],
Score = -comp_weights[ind_down]
)
wc_data_up = filter_wc_data(wc_data_up, i2g, n_genes)
wc_data_down = filter_wc_data(wc_data_down, i2g, n_genes)
wc_data_up = filter_pca_wc_data(wc_data_up)
wc_data_down = filter_pca_wc_data(wc_data_down)
wc_data[[sprintf("Principal component %d", i)]] = list(
wc_data_down,
wc_data_up
)
}
# Create gosummaries object
gosummaries = gosummaries_base(gl = gl, wc_data = wc_data,
wc_algorithm = "top",
score_type = "count",
wordcloud_legend_title = "PCA weight")
}
else{
gosummaries = gosummaries.default(gl, organism = organism, ...)
}
percentages = round((x$sdev ** 2)[components] / sum(x$sdev ** 2) * 100)
percentages = paste(percentages, "%", sep = "")
gosummaries = add_to_slot.gosummaries(gosummaries, "Percentage",
percentages)
gosummaries = add_pca.gosummaries(gosummaries, x, annotation)
return(gosummaries)
}
#' Prepare gosummaries object based on k-means results
#'
#' The gosummaries object is created based on the genes in the clusters, it is
#' possible to add corresponding gene expression data as well.
#'
#' The k-means clustering of expression matrix naturally defines a set of gene
#' lists that can be annotated functionally and displayed as a GOsummaries
#' figure. This functon takes in a \code{kmeans} object and and converts it to
#' a \code{gosummaries} object that can be plotted. If expression matrix is
#' attached then the panel shows the expression values for each gene as
#' boxplots, if not then number of genes is displayed
#'
#' It is advisable to filter some genes out before doing the clustering since
#' the very large gene lists (more than 2000 genes) might fail the annotation
#' step and are usually not too specific either.
#'
#' @param x an object of class \code{kmeans}
#' @param exp an expression matrix, with row names corresponding to the names
#' of the genes in clusters (Optional)
#' @param annotation a \code{data.frame} describing the samples, its row names
#' should match with column names of \code{exp} (Optional)
#' @param components numeric vector of clusters to annotate
#' @param organism the organism that the gene lists correspond to. The format
#' should be as follows: "hsapiens", "mmusculus", "scerevisiae", etc.
#' @param \dots GO annotation filtering parameters as defined in
#' \code{\link{gosummaries.default}}
#' @return A gosummaries object.
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#' \dontrun{
#' data(tissue_example)
#'
#' # Filter genes and perform k-means
#' sd = apply(tissue_example$exp, 1, sd)
#' exp2 = tissue_example$exp[sd > 0.75,]
#' exp2 = exp2 - apply(exp2, 1, mean)
#' kmr = kmeans(exp2, centers = 6, iter.max = 100)
#'
#' # Create gosummaries object
#' gs_kmeans = gosummaries(kmr, exp = exp2, annotation = tissue_example$annot)
#' plot(gs_kmeans, panel_height = 0, components = 1:3, fontsize = 8)
#' plot(gs_kmeans, classes = "Tissue", components = 1:3, fontsize = 8)
#' }
#'
#' @method gosummaries kmeans
#'
#' @export
gosummaries.kmeans = function(x, exp = NULL, annotation = NULL, components = 1:length(x$size), organism = "hsapiens", ...){
gl = list()
for(i in components){
gl[[sprintf("Cluster %d", i)]] = names(x$cluster[x$cluster == i])
}
gosummaries = gosummaries.default(gl, organism = organism,
ordered_query = FALSE, ...)
if(!is.null(exp)){
gosummaries = add_expression.gosummaries(gosummaries, exp, annotation)
}
return(gosummaries)
}
#' Prepare gosummaries object based on limma results
#'
#' The gosummaries object is created based on the differentially expresed
#' genes, each contrast defines one component.
#'
#' The usual differential expression analysis involves making several
#' comparisons between treatments ehere each one yields an up and down
#' regulated gene list. In a GOsummaries figure each comparison is displayed as
#' one component with two wordclouds. If expression matrix is attached then the
#' panel shows the expression values for each gene as boxplots, if not then
#' number of genes is displayed
#'
#' It is possible to show the gene names instead of GO annotations in the
#' wordclouds. The word sizes in wordclouds are defined by the limma p-values.
#' As the gene identifiers in expression matrices are usually rather
#' unintelligible then they are automatically converted into gene names using
#' \code{\link{gconvert}} function. It is possible to show also the original
#' identifiers by setting \code{gconvert_target} to NULL. This can be useful if
#' the values do not correspond to genes, but for example metabolites.
#'
#' @param x an object of class \code{MArrayLM}
#' @param p.value p-value threshold as defined in topTable
#' @param lfc log fold change threshold as defined in topTable
#' @param adjust.method multiple testing adjustment method as defined in
#' topTable
#' @param exp an expression matrix, with row names corresponding to the names
#' of the genes in clusters (Optional)
#' @param annotation a \code{data.frame} describing the samples, its row names
#' should match with column names of \code{exp} (Optional)
#' @param components numeric vector of comparisons to annotate
#' @param show_genes logical showing if GO categories or actual genes are
#' shown in word clouds
#' @param gconvert_target specifies gene ID format for genes showed in word
#' cloud. The name of the format is passed to \code{\link{gconvert}}, if NULL
#' original IDs are shown.
#' @param n_genes maximum number of genes shown in a word cloud
#' @param organism the organism that the gene lists correspond to. The format
#' should be as follows: "hsapiens", "mmusculus", "scerevisiae", etc.
#' @param \dots GO annotation filtering parameters as defined in
#' \code{\link{gosummaries.default}}
#' @return A gosummaries object.
#' @author Raivo Kolde <raivo.kolde@@eesti.ee>
#' @examples
#'
#' \dontrun{
#' data(tissue_example)
#'
#' # Do the t-test comparisons
#' mm = model.matrix(~ factor(tissue_example$annot$Tissue) - 1)
#' colnames(mm) = make.names(levels(factor(tissue_example$annot$Tissue)))
#'
#' contrast = limma::makeContrasts(brain - cell.line,
#' hematopoietic.system - muscle,
#' cell.line - hematopoietic.system,
#' levels = colnames(mm))
#'
#' fit = limma::lmFit(tissue_example$exp, mm)
#' fit = limma::contrasts.fit(fit, contrast)
#' fit = limma::eBayes(fit)
#'
#' gs_limma = gosummaries(fit)
#' gs_limma_exp = gosummaries(fit, exp = tissue_example$exp,
#' annotation = tissue_example$annot)
#'
#' plot(gs_limma, fontsize = 8)
#' plot(gs_limma, panel_height = 0, fontsize = 8)
#' plot(gs_limma_exp, classes = "Tissue", fontsize = 8)
#' }
#'
#' @method gosummaries MArrayLM
#'
#' @export
gosummaries.MArrayLM = function(x, p.value = 0.05, lfc = 1, adjust.method = "fdr", exp = NULL, annotation = NULL, components = 1:ncol(x), show_genes = FALSE, gconvert_target = "NAME", n_genes = 30, organism = "hsapiens", ...){
# Calculate the gene list
gl = list()
perc = list()
for(i in components){
flevels = rownames(x$contrasts)
tt = limma::topTable(x, coef = i, p.value = p.value, lfc = lfc,
adjust.method = adjust.method, number = Inf)
if(nrow(tt) == 0){
tt = data.frame(logFC = numeric(0), AveExpr = numeric(0), t = numeric(0), adj.P.Val = numeric(0), P.Value = numeric(0), B = numeric(0))
}
tt$ID = rownames(tt)
gl_up = as.character(tt$ID[tt$logFC > 0])
gl_down = as.character(tt$ID[tt$logFC < 0])
g1 = paste(flevels[x$contrasts[, i] < 0], collapse = ", ")
g2 = paste(flevels[x$contrasts[, i] > 0], collapse = ", ")
title = sprintf("G1: %s; G2: %s", g1, g2)
perc[[title]] = sprintf("G1 > G2: %d\nG1 < G2: %d", length(gl_down),
length(gl_up))
gl[[title]] = list(
gl1 = gl_down,
gl2 = gl_up
)
}
# Either add gene names or GO categories
if(show_genes){
# Convert IDs
# unique_ids = unique(unlist(gl))
unique_ids = unique(unlist(llply(gl, llply, function(x){
x[1:min(length(x), 2 * n_genes)]
})))
i2g = convert_gene_ids(unique_ids, gconvert_target, organism)
# Create tables for wordclouds
wc_data = list()
flevels = rownames(x$contrasts)
for(i in components){
tt = limma::topTable(x, coef = i, p.value = p.value, lfc = lfc,
adjust.method = adjust.method, number = Inf)
if(nrow(tt) == 0){
tt = data.frame(logFC = numeric(0), AveExpr = numeric(0), t = numeric(0), adj.P.Val = numeric(0), P.Value = numeric(0), B = numeric(0))
}
tt$ID = rownames(tt)
tt = tt[!is.na(tt$adj.P.Val), ]
wc_data_up = tt[tt$logFC > 0, c("ID", "adj.P.Val")]
wc_data_down = tt[tt$logFC < 0, c("ID", "adj.P.Val")]
colnames(wc_data_up) = c("Term", "Score")
colnames(wc_data_down) = c("Term", "Score")
wc_data_up = filter_wc_data(wc_data_up, i2g, n_genes)
wc_data_down = filter_wc_data(wc_data_down, i2g, n_genes)
g1 = paste(flevels[x$contrasts[, i] < 0], collapse = ", ")
g2 = paste(flevels[x$contrasts[, i] > 0], collapse = ", ")
title = sprintf("G1: %s; G2: %s", g1, g2)
wc_data[[title]] = list(
wc_data_down,
wc_data_up
)
}
# Create gosummaries object
gosummaries = gosummaries_base(gl = gl, wc_data = wc_data,
wc_algorithm = "top")
gosummaries = add_dummydata.gosummaries(gosummaries)
}
else{
gosummaries = gosummaries.default(gl, organism = organism, ...)
}
# Add additional data
if(!is.null(exp)){
gosummaries = add_expression.gosummaries(gosummaries, exp, annotation)
}
gosummaries = add_to_slot.gosummaries(gosummaries, "Percentage", perc)
return(gosummaries)
}
##
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.