#' @name compFGA
#' @title Comparison of fraction genome altered
#' @description This function calculates Fraction Genome Altered (FGA), Fraction Genome Gained (FGG), and Fraction Genome Lost (FGL) seperately, and compares them among curent subtypes identified from multi-omics integrative clustering algorithms.
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
#' @param segment A data frame containing segmented copy number and columns must exactly include the following elements: c('sample','chrom','start','end','value'). Column of `value` should be segments value when \code{iscopynumber = FALSE} but copy-number value when \code{iscopynumber = TRUE}. Copy-number will be converted to segments by log2(copy-number/2).
#' @param iscopynumber A logical value to indicate if the fifth column of segment input is copy-number. If segment file derived from CNV calling provides copy number instead of segment_mean value, this argument must be switched to TRUE. FALSE by default.
#' @param cnathreshold A numeric value to indicate the cutoff for identifying copy-number gain or loss. 0.2 by default.
#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default.
#' @param barcolor A string vector to indicate the mapping color for bars of FGA, FGG and FGL.
#' @param clust.col A string vector storing colors for each subtype.
#' @param fig.path A string value to indicate the output path for storing the barplot.
#' @param fig.name A string value to indicate the name of the barplot.
#' @param width A numeric value to indicate the width of barplot.
#' @param height A numeric value to indicate the height of barplot.
#' @return A list contains the following components:
#' \code{summary} a table summarizing the measurements of FGA, FGG, and FGL per sample
#' \code{FGA.p.value} a nominal p value quantifying the difference of FGA among current subtypes
#' \code{pairwise.FGA.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGA if more than 2 subtypes were identified
#' \code{FGG.p.value} a nominal p value quantifying the difference of FGG among current subtypes
#' \code{pairwise.FGG.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGG if more than 2 subtypes were identified
#' \code{FGL.p.value} a nominal p value quantifying the difference of FGL among current subtypes
#' \code{pairwise.FGL.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGL if more than 2 subtypes were identified
#' \code{test.method} a string value indicating the statistical testing method to calculate p values
#' @export
#' @import ggplot2
#' @import patchwork
#' @importFrom dplyr group_by summarize %>%
#' @importFrom ggpubr stat_compare_means
#' @examples # There is no example and please refer to vignette.
#' @references Cerami E, Gao J, Dogrusoz U, et al. (2012). The cBio Cancer Genomics Portal: An Open Platform for Exploring Multidimensional Cancer Genomics Data. Cancer Discov, 2(5):401-404.
#' Gao J, Aksoy B A, Dogrusoz U, et al. (2013). Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal. Sci Signal, 6(269):pl1-pl1.
compFGA <- function(moic.res = NULL,
segment = NULL,
iscopynumber = FALSE,
cnathreshold = 0.2,
test.method = "nonparametric",
barcolor = c("#008B8A", "#F2042C", "#21498D"),
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
fig.path = getwd(),
fig.name = NULL,
width = 8,
height = 4) {
# check arguments
if(!all(is.element(c("sample","chrom","start","end","value"), colnames(segment)))) {
stop("segment data must have the following columns: sample, chrom, start, end, value.")
if(iscopynumber) {
segment$value <- log2(segment$value/2) # convert copy-number to segment-mean value
comsam <- intersect(moic.res$clust.res$samID, unique(segment[,1]))
# check data
if(length(comsam) == nrow(moic.res$clust.res)) {
message("--all samples matched.")
} else {
message(paste0("--",(nrow(moic.res$clust.res)-length(comsam))," samples mismatched from current subtypes."))
if(!is.element(test.method, c("nonparametric","parametric"))) {
stop("test.method can be one of nonparametric or parametric.")
clust.res <- moic.res$clust.res[comsam, , drop = FALSE]
segment <- segment[which(segment$sample %in% comsam),]
n.moic <- length(unique(clust.res$clust))
# data process
segment$bases <- segment$end - segment$start
# calculate FGA, FGG and FGL
display.progress = function (index, totalN, breakN=20) {
if ( index %% ceiling(totalN/breakN) == 0 ) {
cat(paste(round(index*100/totalN), "% ", sep = ""))
std <- function(x, na.rm = TRUE) {
if(na.rm) {
x <- as.numeric(na.omit(x))
} else {sd(x)/sqrt(length(x))}
outTab <- data.frame()
for (i in 1:length(unique(segment$sample))) {
display.progress(index = i, totalN = length(unique(segment$sample)))
tmp <- segment[segment$sample == names(table(segment$sample))[i],]
if (length(tmp[abs(tmp$value) > cnathreshold,"bases"][6]) == 0) {
FGA = 0
} else {
FGA = sum(tmp[abs(tmp$value) > cnathreshold,"bases"]) / sum(tmp[,"bases"])
if (length(tmp[tmp$value > cnathreshold,"bases"][6]) == 0) {
FGG = 0
} else {
FGG = sum(tmp[tmp$value > cnathreshold,"bases"]) / sum(tmp[,"bases"])
if (length(tmp[tmp$value < (-cnathreshold),"bases"][6]) == 0) {
FGL = 0
} else {
FGL = sum(tmp[tmp$value < (-cnathreshold),"bases"]) / sum(tmp[,"bases"])
tmp <- data.frame(samID = names(table(segment$sample))[i],
stringsAsFactors = FALSE)
outTab <- rbind.data.frame(outTab, tmp, stringsAsFactors = FALSE)
outTab$Subtype <- paste0("CS", clust.res[outTab$samID, "clust"])
# calculate mean and se
summaryFGA <- outTab %>% group_by(Subtype) %>% dplyr::summarize(mean = mean(FGA, na.rm = TRUE), se = std(FGA, na.rm = TRUE))
summaryFGG <- outTab %>% group_by(Subtype) %>% dplyr::summarize(mean = mean(FGG, na.rm = TRUE), se = std(FGG, na.rm = TRUE))
summaryFGL <- outTab %>% group_by(Subtype) %>% dplyr::summarize(mean = mean(FGL, na.rm = TRUE), se = std(FGL, na.rm = TRUE))
summaryFGGL <- data.frame(rbind.data.frame(summaryFGG,summaryFGL),class = rep(c("FGG","FGL"),c(nrow(summaryFGG),nrow(summaryFGL))),stringsAsFactors = FALSE)
# statistical testing
# generate boxviolin plot with statistical testing
if(n.moic == 2 & test.method == "nonparametric") {
statistic <- "wilcox.test"
FGA.test <- wilcox.test(outTab$FGA ~ outTab$Subtype)$p.value
FGG.test <- wilcox.test(outTab$FGG ~ outTab$Subtype)$p.value
FGL.test <- wilcox.test(outTab$FGL ~ outTab$Subtype)$p.value
if(n.moic == 2 & test.method == "parametric") {
statistic <- "t.test"
FGA.test <- t.test(outTab$FGA ~ outTab$Subtype)$p.value
FGG.test <- t.test(outTab$FGG ~ outTab$Subtype)$p.value
FGL.test <- t.test(outTab$FGL ~ outTab$Subtype)$p.value
if(n.moic > 2 & test.method == "nonparametric") {
statistic <- "kruskal.test"
FGA.test <- kruskal.test(outTab$FGA ~ outTab$Subtype)$p.value
FGG.test <- kruskal.test(outTab$FGG ~ outTab$Subtype)$p.value
FGL.test <- kruskal.test(outTab$FGL ~ outTab$Subtype)$p.value
pairwise.FGA.test <- pairwise.wilcox.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGG.test <- pairwise.wilcox.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGL.test <- pairwise.wilcox.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
if(n.moic > 2 & test.method == "parametric") {
statistic <- "anova"
FGA.test <- summary(aov(outTab$FGA ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
FGG.test <- summary(aov(outTab$FGG ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
FGL.test <- summary(aov(outTab$FGL ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
pairwise.FGA.test <- pairwise.t.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGG.test <- pairwise.t.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGL.test <- pairwise.t.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
# generate barplot
FGA.col <- barcolor[1]
FGG.col <- barcolor[2]
FGL.col <- barcolor[3]
p1 <- ggplot(summaryFGA, aes(x = Subtype, y = mean,fill=rep("0",nrow(summaryFGA)))) +
geom_bar(stat = 'identity') +
geom_errorbar(aes(ymax = mean+se, ymin = mean-se),position = position_dodge(0.9), width = 0.15) +
#hjust = ifelse(n.moic %% 2 == 0, 0.5, 0),
x = n.moic / 2 + 0.5,
#y = as.numeric(summaryFGA[which.max(summaryFGA$mean),"mean"] + summaryFGA[which.max(summaryFGA$mean),"se"]),
y = as.numeric(summaryFGA[which.max(summaryFGA$mean),"mean"]),
size = 8, angle = 90, fontface = "bold",
#label = paste0(statistic, " p = ", formatC(FGA.test, digits = 1, format = "e")),
label = cut(FGA.test,c(0,0.001,0.01,0.05,0.1,1),labels = c("****","***","**","*","."))) +
scale_x_discrete(name = "",position = "top") +
theme_bw() +
theme(axis.line.y = element_line(size = 0.8),
axis.ticks.y = element_line(size = 0.2),
axis.text.y = element_blank(),
axis.title.x = element_text(vjust = -0.3,size = 12),
axis.text.x = element_text(size = 10, color = "black"),
plot.margin = unit(c(0.3, -1.7, 0.3, 0.3), "lines"),
legend.title = element_blank()) +
coord_flip() +
scale_fill_manual(values = FGA.col, breaks = c("0"), labels = c("Copy number-altered genome")) +
scale_y_reverse(expand = c(0.01,0),
name = "FGA (Fraction of Genome Altered)", position = "left")
p2 <- ggplot(summaryFGGL, aes(x = Subtype, y = ifelse(class == 'FGG',mean,-mean),fill=class)) +
geom_bar(stat = 'identity') +
geom_errorbar(data=summaryFGGL[summaryFGGL$class=='FGG',],aes(ymax = mean+se, ymin =mean-se),position = position_dodge(0.9), width = 0.15) +
geom_errorbar(data=summaryFGGL[summaryFGGL$class=='FGL',],aes(ymax = -mean-se, ymin =-mean+se),position = position_dodge(0.9), width = 0.15) +
x = n.moic / 2 + 0.5,
y = -as.numeric(summaryFGL[which.max(summaryFGL$mean),"mean"]),
size = 8, angle = 90, fontface = "bold",
label = cut(FGL.test,c(0,0.001,0.01,0.05,0.1,1),labels = c("****","***","**","*","."))) +
x = n.moic / 2 + 0.5,
y = as.numeric(summaryFGG[which.max(summaryFGG$mean),"mean"]),
size = 8, angle = 90, fontface = "bold",
label = cut(FGG.test,c(0,0.001,0.01,0.05,0.1,1),labels = c("****","***","**","*","."))) +
scale_x_discrete(name = "") +
theme_bw() +
theme(axis.line.y = element_line(size = 0.8),
axis.ticks.y = element_line(size = 0.2),
axis.text.y = element_blank(),
axis.title.x = element_text(vjust = -0.3, size = 12),
axis.text.x = element_text(size = 10, color = "black"),
plot.margin = unit(c(0.3, 0.3, 0.3, -1), "lines"),
legend.title = element_blank()) +
coord_flip() +
scale_fill_manual(values = c(FGL.col, FGG.col), breaks = c("FGL","FGG"),
labels = c("Copy number-lost genome","Copy number-gained genome")) +
scale_y_continuous(expand = c(0.01,0),
name = "FGL or FGG (Fraction of Genome Lost or Gained)")
pp <- ggplot() +
# geom_text(data = summaryFGGL,
# aes(label = Subtype, x=Subtype), y = 0.5,
# size = 0.8*11/.pt, # match font size to theme
# hjust = 0.5, vjust = 0.5) +
geom_label(data = summaryFGGL,
aes(label = Subtype, x = Subtype, fill = Subtype),
y = 0.5,
color = "white",
size = 0.9*11/.pt, # match font size to theme
hjust = 0.4, vjust = 0.5) +
scale_fill_manual(values = clust.col) +
theme(axis.line.y =element_blank(),
axis.ticks.y =element_blank(),
axis.text.y =element_blank(),
axis.title.y =element_blank(),
axis.title.x =element_blank(),
plot.margin = unit(c(0.3, 0, 0.3, 0), "lines")
) +
guides(fill = FALSE) +
coord_flip() +
pal <- p1 + pp + p2 +
plot_layout(widths = c(7,1,7), guides = 'collect') & theme(legend.position = 'top')
# save to pdf
if(is.null(fig.name)) {
outFig <- "barplot of FGA.pdf"
} else {
outFig <- paste0(fig.name,".pdf")
ggsave(file.path(fig.path, outFig), width = width, height = height)
# print to screen
if(n.moic > 2) {
return(list(summary = outTab,
FGA.p.value = FGA.test,
pairwise.FGA.test = pairwise.FGA.test,
FGG.p.value = FGG.test,
pairwise.FGG.test = pairwise.FGG.test,
FGL.p.value = FGL.test,
pairwise.FGL.test = pairwise.FGL.test,
test.method = statistic))
} else {
return(list(summary = outTab,
FGA.p.value = FGA.test,
FGG.p.value = FGG.test,
FGL.p.value = FGL.test,
test.method = statistic))
