#################################
#### Alignment visualization ####
#################################
AlignmentPlot <- function(path.prefix,
independent.variable,
case.group,
control.group,
phenoData.result,
my_colors) {
Alignment_report_reads <- read.csv(paste0(path.prefix,
"RNASeq_results/Alignment_Report/",
"Alignment_report_reads.csv"),
row.names = 1, header = TRUE)
Overall.map.rates <- read.csv(paste0(path.prefix,
"RNASeq_results/Alignment_Report/",
"Overall_Mapping_rate.csv"),
row.names = 1, header = TRUE)
Alignment_report_reads$samples <- row.names(Alignment_report_reads)
melted <- reshape2::melt(Alignment_report_reads, id.vars = c("samples"))
melted$value <- as.numeric(melted$value)
ggplot(data=melted,
aes(x = melted$samples,
y = melted$value,
fill = melted$variable)) +
geom_bar(stat="identity", position=position_dodge()) +
theme_bw() +
geom_text(aes(label=value,
angle = 90),
size = 3,
position = position_dodge(width=1),
check_overlap = TRUE) +
xlab("Samples") + ylab("Mapping Reads") +
ggtitle("Bar Plot (Each condition reads)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position="top",
legend.text = element_text(size = 6)) +
scale_fill_discrete(name = "Conditions")
ggsave(paste0(path.prefix,
"RNASeq_results/",
"Alignment_Report/Alignment_Result_ggplot2.png"),
dpi = 300,
width = 7,
height = 7)
# phenoData.result<- phenoDataWrap(path.prefix,
# independent.variable,
# case.group,
# control.group)
color.group <- c(rep(1, phenoData.result$case.group.size),
rep(2, phenoData.result$case.group.size))
Overall.map.rates$samples <- row.names(Overall.map.rates)
colnames(Overall.map.rates) <- c("rates", "samples")
ggplot(data=Overall.map.rates,
aes(x = Overall.map.rates$samples,
y = Overall.map.rates$rates)) +
geom_bar(stat = "identity", fill=my_colors[as.numeric(color.group)]) +
theme_bw() + xlab("Samples") + ylab("Mapping Rates") +
ggtitle("Bar Plot (Over all Mapping Rates)") +
geom_text(aes(label = Overall.map.rates$rates,
angle = 90),
size = 4,
position = position_dodge(width=1),
check_overlap = TRUE) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10))
ggsave(paste0(path.prefix,
"RNASeq_results/",
"Alignment_Report/Overall_Mapping_rate_ggplot2.png"),
dpi = 300,
width = 7,
height = 7)
}
###################################################
#### Pre-differential expression visualization ####
###################################################
# Frequency Plot
FrequencyPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
independent.variable.data.frame,
my_colors) {
message("\u25CF Plotting Frequency plot\n")
if(!dir.exists(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/Frequency"))){
dir.create(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/Frequency"))
}
# csv.results <- ParseResultCSV(which.analysis,
# which.count.normalization,
# path.prefix,
# independent.variable,
# case.group,
# control.group)
# case.normalized <- csv.results$case
# control.normalized <- csv.results$control
# independent.variable.data.frame <- cbind(case.normalized, control.normalized)
rafalib::mypar(1, 1)
sample.size <- length(independent.variable.data.frame)
# , ylim = (-5, )
# Reorder 'independent.variable.data.frame' by column mean (big to small)
mns <- colMeans(independent.variable.data.frame, na.rm=TRUE)
# order(mns, decreasing = FALSE)
independent.variable.data.frame <-
independent.variable.data.frame[,order(mns, decreasing = TRUE)]
melted.data.normal <- reshape2::melt(independent.variable.data.frame)
x.range.normal <- stats::quantile(melted.data.normal$value,probs=c(0,0.9))
ggplot(aes(x = melted.data.normal$value,
colour = melted.data.normal$variable),
data = melted.data.normal) +
xlim(x.range.normal[1]-20, x.range.normal[2]+20) +
geom_density() + theme_bw() +
xlab(which.count.normalization) + ylab("Frequency") +
ggtitle("Frequency Plot (ggplot2)") +
theme(plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position = "none")
ggsave(paste0(path.prefix,
paste0("RNASeq_results/", which.analysis,
"/images/preDE/Frequency/",
"Frequency_Plot_normalized_count_ggplot2.png")),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '", path.prefix, "RNASeq_results/", which.analysis,
"/images/Frequency_Plot_normalized_count_ggplot2.png",
"' has been created. \n\n")
melted.data <- reshape2::melt(log2(independent.variable.data.frame+1))
x.range <- stats::quantile(melted.data$value,probs=c(0,.99))
ggplot(aes(x=melted.data$value,
colour=melted.data$variable),
data = melted.data) +
xlim(x.range[1]-5, x.range[2]+5) +
geom_density() + theme_bw() +
xlab(bquote(~Log[2](.(which.count.normalization)+1))) + ylab("Frequency") +
ggtitle("Frequency Plot (ggplot2)") +
theme(plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position = "none")
ggsave(paste0(path.prefix,
paste0("RNASeq_results/", which.analysis,
"/images/preDE/Frequency/",
"Frequency_Plot_log_normalized_count_ggplot2.png")),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis, "/images/",
"Frequency_Plot_log_normalized_count_ggplot2.png",
"' has been created. \n\n")
}
# Box plot and violin plot
BoxViolinPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
independent.variable.data.frame,
phenoData.result,
my_colors) {
# load gene name for further usage
# csv.results <- ParseResultCSV(which.analysis,
# which.count.normalization,
# path.prefix,
# independent.variable,
# case.group,
# control.group)
# case.normalized <- csv.results$case
# control.normalized <- csv.results$control
# phenoData.result<- phenoDataWrap(path.prefix,
# independent.variable,
# case.group,
# control.group)
# independent.variable.data.frame <- cbind(case.normalized, control.normalized)
color.group <- c(rep(1, phenoData.result$case.group.size),
rep(2, phenoData.result$case.group.size))
log2.normalized.value = log2(independent.variable.data.frame+1)
log2.normalized.value <- reshape2::melt(log2.normalized.value)
colnames(log2.normalized.value) <- c("samples", which.count.normalization)
if(!dir.exists(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/Distribution/"))){
dir.create(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/Distribution/"))
}
# Box plot
message("\u25CF Plotting Box plot\n")
ggplot(data = log2.normalized.value,
aes(x=log2.normalized.value$samples,
y=log2.normalized.value[which.count.normalization][[1]]),
las = 2) +
geom_boxplot(fill=my_colors[as.numeric(color.group)]) +
theme_bw() +
xlab("Samples") + ylab(bquote(~Log[2](.(which.count.normalization)+1))) +
ggtitle("Box Plot (ggplot2)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10))
ggsave(paste0(path.prefix,
paste0("RNASeq_results/", which.analysis,
"/images/preDE/Distribution/Box_Plot_ggplot2.png")),
dpi = 300,
width = 7,
height = 7)
# dev.off()
message("(\u2714) : '",
path.prefix, "RNASeq_results/",which.analysis, "/images/preDE/",
"Distribution/Box_Plot_ggplot2.png' has been created. \n\n")
# Violin plot
message("\u25CF Plotting Violin plot\n")
ggplot(data = log2.normalized.value,
aes(x=log2.normalized.value$samples,
y=log2.normalized.value[which.count.normalization][[1]],
color=log2.normalized.value$samples),
las = 2) +
geom_violin() +
scale_color_manual(values=my_colors[as.numeric(color.group)]) +
stat_summary(fun.y=mean, geom="point", shape=23, size=2) +
theme_bw() +
xlab("Samples") + ylab(bquote(~Log[2](.(which.count.normalization)+1))) +
ggtitle("Violin Plot (ggplot2)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position = "none")
ggsave(paste0(path.prefix,
paste0("RNASeq_results/", which.analysis,
"/images/preDE/Distribution/Violin_Plot_ggplot2.png")),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Distribution/Violin_Plot_ggplot2.png",
"' has been created. \n\n")
}
# PCA plot
PCAPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
independent.variable.data.frame,
phenoData.result,
my_colors){
# http://www.sthda.com/english/articles/31-principal-component-methods-in-r-
#practical-guide/112-pca-principal-component-analysis-essentials/
# load gene name for further usage
message("\u25CF Plotting PCA related plot\n")
# csv.results <- ParseResultCSV(which.analysis,
# which.count.normalization,
# path.prefix,
# independent.variable,
# case.group,
# control.group)
# case.normalized <- csv.results$case
# control.normalized <- csv.results$control
if(!dir.exists(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/PCA/"))){
dir.create(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/PCA/"))
}
# phenoData.result<- phenoDataWrap(path.prefix,
# independent.variable,
# case.group,
# control.group)
# independent.variable.data.frame <- cbind(case.normalized, control.normalized)
grp = factor(c(rep(case.group, phenoData.result$case.group.size),
rep(control.group, phenoData.result$case.group.size)),
levels = c(case.group, control.group))
# color group
color.group <- c(rep(1, phenoData.result$case.group.size),
rep(2, phenoData.result$case.group.size))
normalized.trans <- data.frame(t(independent.variable.data.frame))
normalized.trans$attribute <- grp
pca <- FactoMineR::PCA(normalized.trans,
ncp=2,
quali.sup=length(normalized.trans),
graph = FALSE)
eig.val <- factoextra::get_eigenvalue(pca)
# Write out general PCA
write.csv(data.frame(eig.val),
file = paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/PCA/PCA_dimension_factoextra.csv"),
row.names=TRUE)
factoextra::fviz_eig(pca, addlabels = TRUE,
ylim = c(0, 50),
main = "PCA Dimensions") +
labs(title ="PCA Dimensions Plot (factoextra)") +
theme(plot.title = element_text(size = 15, hjust = 0.5, face = "bold"),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10))
ggsave(paste0(path.prefix, paste0("RNASeq_results/", which.analysis,
"/images/preDE/PCA/",
"Dimension_PCA_Plot_factoextra.png")),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '", path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/PCA/Dimension_PCA_Plot_factoextra.png",
"' has been created. \n")
#var$coord: coordinates of variables to create a scatter plot
#var$cos2: represents the quality of representation for variables on the
# factor map. It’s calculated as the squared coordinates:
# var.cos2 = var.coord * var.coord.
#var$contrib: contains the contributions (in percentage) of the variables to
# the principal components. The contribution of a variable (var)
# to a given principal component is (in percentage) :
# (var.cos2 * 100) / (total cos2 of the component).
#var <- get_pca_var(res.pca)
factoextra::fviz_pca_ind(pca,
xlab = paste0("PC1(",
round(data.frame(eig.val)$
variance.percent[1], 2), "%)"),
ylab = paste0("PC2(",
round(data.frame(eig.val)$
variance.percent[2],2), "%)"),
legend.title = "Treatment variable",
legend.position = "top",
pointshape = 21, pointsize = 3.5, geom.ind = "point",
fill.ind = normalized.trans$attribute,
palette = my_colors,
habillage = normalized.trans$attribute,
addEllipses = TRUE) +
labs(title ="PCA Plot (factoextra)") +
theme(plot.title = element_text(size = 15, hjust = 0.5, face = "bold"),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10))
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/PCA/PCA_Plot_factoextra.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '", path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/PCA/PCA_Plot_factoextra.png' has been created. \n")
normalized.res.PCA <- FactoMineR::PCA(normalized.trans,
scale.unit=TRUE, ncp=2,
quali.sup=length(normalized.trans),
graph = FALSE)
groups <- c(case.group, control.group)
PCA.data.frame <- data.frame("PC1" = normalized.res.PCA$ind$coord[,1],
"PC2" = normalized.res.PCA$ind$coord[,2])
ggplot(PCA.data.frame,
aes(x=normalized.res.PCA$ind$coord[,1] ,
y=normalized.res.PCA$ind$coord[,2])) +
geom_point(aes(color = factor(groups[as.numeric(color.group)],
levels = c(case.group, control.group))),
size = 3.5) +
scale_color_manual(values = my_colors) +
theme_bw() +
xlab(paste0("PC1(", round(normalized.res.PCA$eig[,2][1], 2), "%)")) +
ylab(paste0("PC2(", round(normalized.res.PCA$eig[,2][2], 2), "%)")) +
ggtitle("PCA Plot (ggplot2)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5)) +
theme(axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position="top") +
labs(color = independent.variable) +
geom_hline(yintercept=0, linetype="dashed", color = "black") +
geom_vline(xintercept=0, linetype="dashed", color = "black")
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/PCA/PCA_Plot_ggplot2.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/PCA/PCA_Plot_ggplot2.png' has been created. \n\n")
}
#Correlation plot
CorrelationPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
independent.variable.data.frame,
phenoData.result){
# load gene name for further usage
message("\u25CF Plotting Correlation plot\n")
# csv.results <- ParseResultCSV(which.analysis,
# which.count.normalization,
# path.prefix,
# independent.variable,
# case.group,
# control.group)
# case.normalized <- csv.results$case
# control.normalized <- csv.results$control
if(!dir.exists(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/Correlation/"))){
dir.create(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/preDE/Correlation/"))
}
# phenoData.result<- phenoDataWrap(path.prefix,
# independent.variable,
# case.group,
# control.group)
# independent.variable.data.frame <- cbind(case.normalized, control.normalized)
res <- round(stats::cor(independent.variable.data.frame,
method = c("pearson", "kendall", "spearman")), 3)
max.value <- max(res)
min.value <- min(res)
# Correlation_dot_plot.png
col <- colorRampPalette(c("#4477AA", "#FFFFFF", "#BB4444"))
png(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Correlation/Correlation_Dot_Plot_corrplot.png"),
width=5,
height=5,
units="in",
res=300)
cex.before <- par("cex")
par(cex = 0.5)
corrplot::corrplot(res, col=col(200), type = "upper",
tl.col = "black", tl.srt = 45, addCoef.col = "black",
cl.cex = 1, mar=c(0,0,4,0),
cl.lim = c(min.value, max.value), is.corr = FALSE)
mtext(expression(bold("Correlation Dot Plot (corrplot)")))
par(cex = cex.before)
dev.off()
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Correlation/Correlation_Dot_Plot_corrplot.png",
"' has been created. \n")
# Correlation_plot.png
png(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Correlation/",
"Correlation_Bar_Plot_PerformanceAnalytics.png"),
width=5,
height=5,
units="in",
res=300)
PerformanceAnalytics::chart.Correlation(res, histogram=TRUE, pch=19)
dev.off()
message("(\u2714) : '", path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Correlation/",
"Correlation_Bar_Plot_PerformanceAnalytics.png' has been created. \n")
# Correlation_heat_plot.png
melted_res <- reshape2::melt(res)
max.value <- max(melted_res$value)
min.value <- min(melted_res$value)
colnames(melted_res) <- c("Var1", "Var2", "value")
ggplot(melted_res,
aes(melted_res$Var1,
melted_res$Var2,
fill = melted_res$value))+
xlab("Samples") + ylab("Samples") +
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", mid ="white", high = "red",
midpoint = (max.value + min.value)/2,
limit = c(min.value,max.value),
space = "Lab", name="Correlation") +
theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 30, vjust = 1,
size = 10, hjust = 1))+
labs(title ="Correlation Heat Plot (ggplot2)") +
theme(plot.title = element_text(size = 15, hjust = 0.5, face = "bold"),
axis.title.x = element_text(size = 10 ),
axis.title.y = element_text(size = 10)) +
coord_fixed()
# Print the heatmap
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Correlation/Correlation_Heat_Plot_ggplot2.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/Correlation/Correlation_Heat_Plot_ggplot2.png",
"' has been created. \n\n")
}
###################################################
#### Differential expression visualization ####
###################################################
# Volcano plot
VolcanoPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
condition.pval,
condition.log2FC,
normalized_dataset) {
# load gene name for further usage
message("\u25CF Plotting Volcano plot\n")
# normalized_dataset <- read.csv(paste0(path.prefix, "RNASeq_results/",
# which.analysis, "/",
# strsplit(which.analysis, "_")[[1]][1],
# "_normalized_result.csv"))
## Volcano plot
# Make a basic volcano plot
log2FC.pval <- data.frame("log2FC" = normalized_dataset$log2FC,
"pval" = normalized_dataset$pval)
down.regulated.gene <- log2FC.pval[((log2FC.pval$log2FC < -condition.log2FC) &
(log2FC.pval$pval < condition.pval)),]
up.regulated.gene <- log2FC.pval[((log2FC.pval$log2FC > condition.log2FC) &
(log2FC.pval$pval < condition.pval)),]
all.x.value <- c(down.regulated.gene$log2FC, up.regulated.gene$log2FC)
x.range <- stats::quantile(all.x.value, probs=c(0.05,0.95))
x.limit <- max(abs(x.range)) + 5
all.y.value <- c(-log10(down.regulated.gene$pval),
-log10(up.regulated.gene$pval))
y.range <- stats::quantile(all.y.value, probs=c(0.05,0.95))
y.limit <- max(abs(y.range)) + 5
ggplot(log2FC.pval,
aes(x = log2FC.pval$log2FC, y=-log10(log2FC.pval$pval))) +
geom_point(size = 0.8) +
xlim(-x.limit, x.limit) + ylim(0, y.limit) +
theme_bw() +
xlab(bquote(~Log[2](fold~change))) + ylab(bquote(~-Log[10](p-value))) +
ggtitle("Volcano (ggplot2)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position="top") +
geom_point(data = down.regulated.gene,
aes(x = down.regulated.gene$log2FC,
y=-log10(down.regulated.gene$pval)),
colour = "#00cc00",
size = 0.8) +
geom_point(data = up.regulated.gene,
aes(x = up.regulated.gene$log2FC,
y=-log10(up.regulated.gene$pval)),
colour = "red",
size = 0.8) +
geom_hline(yintercept = -log10(condition.pval),
linetype="dashed",
color = "black") +
geom_vline(xintercept = 1,
linetype="dashed",
color = "black") +
geom_vline(xintercept = -1,
linetype="dashed",
color = "black")
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/Volcano_Plot_graphics.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '", path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/Volcano_Plot_ggplot2.png' has been created. \n\n")
}
# MA plot
MAPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
condition.pval,
csv.results,
normalized_dataset) {
# load gene name for further usage
# csv.results <- ParseResultCSV(which.analysis,
# which.count.normalization,
# path.prefix,
# independent.variable,
# case.group,
# control.group,
# case.normalized,
# control.normalized,
# normalized_dataset)
case.normalized <- csv.results$case
case.size <- length(case.normalized)
control.normalized <- csv.results$control
control.size <- length(control.normalized)
message("\u25CF Plotting MA plot\n")
# normalized_dataset <- read.csv(paste0(path.prefix, "RNASeq_results/",
# which.analysis, "/",
# strsplit(which.analysis, "_")[[1]][1],
# "_normalized_result.csv"))
## Ma plot
ggplot(normalized_dataset,
aes(x = log2(normalized_dataset[,1+case.size+control.size+3]),
y = normalized_dataset$log2FC,
colour = normalized_dataset$pval<condition.pval)) +
xlab(bquote(~Log[2](.(which.count.normalization)))) +
ylab(bquote(~Log[2](fold~change))) +
theme_bw() +
ggtitle("MA Plot (ggplot2)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10)) +
scale_color_manual(values=c("#999999", "#FF0000")) +
geom_point(size = 0.8) +
geom_hline(yintercept=0, color="blue") +
ylim(-6, 6) +
theme(legend.position="top") +
labs(color=paste0("p-value < ", condition.pval))
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/MA_Plot_ggplot2.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '", path.prefix, "RNASeq_results/", which.analysis,
"/images/preDE/MA_Plot_ggplot2.png' has been created. \n\n")
}
# PCA plot
DEPCAPlot <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
DE.csv.results,
phenoData.result,
my_colors){
# http://www.sthda.com/english/articles/31-principal-component-methods-in-r-
#practical-guide/112-pca-principal-component-analysis-essentials/
# load gene name for further usage
message("\u25CF Plotting PCA related plot\n")
# DE.csv.results <- read.csv(paste0(path.prefix, "RNASeq_results/",
# which.analysis, "/",
# strsplit(which.analysis,
# split = "_")[[1]][1],
# "_normalized_DE_result.csv"))
# phenoData.result<- phenoDataWrap(path.prefix,
# independent.variable,
# case.group,
# control.group)
# This is to get the result of normalized count
DE.csv.normalized.count.only <-
DE.csv.results[,2:(phenoData.result$case.group.size +
phenoData.result$case.group.size + 1)]
if(!dir.exists(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/DE/PCA/"))){
dir.create(paste0(path.prefix, "RNASeq_results/",
which.analysis, "/images/DE/PCA/"))
}
# The independent.variable group
grp = factor(c(rep(case.group, phenoData.result$case.group.size),
rep(control.group, phenoData.result$case.group.size)),
levels = c(case.group, control.group))
# color group
color.group <- c(rep(1, phenoData.result$case.group.size),
rep(2, phenoData.result$case.group.size))
normalized.trans <- data.frame(t(DE.csv.normalized.count.only))
normalized.trans$attribute <- grp
pca = FactoMineR::PCA(normalized.trans, ncp=2,
quali.sup=length(normalized.trans),
graph = FALSE)
eig.val <- factoextra::get_eigenvalue(pca)
# Write out DE PCA
write.csv(data.frame(eig.val),
file = paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/PCA/PCA_dimension_factoextra.csv"),
row.names=TRUE)
factoextra::fviz_eig(pca, addlabels = TRUE, ylim = c(0, 50)) +
labs(title ="PCA Dimensions Plot (factoextra)") +
theme(plot.title = element_text(size = 15, hjust = 0.5, face = "bold"),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10))
ggsave(paste0(path.prefix,
paste0("RNASeq_results/", which.analysis,
"/images/DE/PCA/Dimension_PCA_Plot_factoextra.png")),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/PCA/Dimension_PCA_Plot_factoextra.png",
"' has been created. \n")
#var$coord: coordinates of variables to create a scatter plot
#var$cos2: represents the quality of representation for variables on the
# factor map. It’s calculated as the squared coordinates:
# var.cos2 = var.coord * var.coord.
#var$contrib: contains the contributions (in percentage) of the variables
# to the principal components. The contribution of a variable
# (var) to a given principal component is (in percentage) :
# (var.cos2 * 100) / (total cos2 of the component).
#var <- get_pca_var(res.pca)
factoextra::fviz_pca_ind(pca,
xlab = paste0("PC1(",
round(data.frame(eig.val)$
variance.percent[1], 2), "%)"),
ylab = paste0("PC2(",
round(data.frame(eig.val)$
variance.percent[2],2), "%)"),
legend.title = "Treatment variable",
legend.position = "top",
pointshape = 21, pointsize = 3.5, geom.ind = "point",
fill.ind = normalized.trans$attribute,
palette = my_colors,
habillage = normalized.trans$attribute,
addEllipses = TRUE
) +
labs(title ="PCA Plot (factoextra)") +
theme(plot.title = element_text(size = 15, hjust = 0.5, face = "bold"),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10))
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/PCA/PCA_Plot_factoextra.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/PCA/PCA_Plot_factoextra.png' has been created. \n")
normalized.res.PCA <- FactoMineR::PCA(normalized.trans,
scale.unit=TRUE, ncp=2,
quali.sup=length(normalized.trans),
graph = FALSE)
groups <- c(case.group, control.group)
PCA.data.frame <- data.frame("PC1" = normalized.res.PCA$ind$coord[,1],
"PC2" = normalized.res.PCA$ind$coord[,2])
ggplot(PCA.data.frame, aes(x=normalized.res.PCA$ind$coord[,1] ,
y=normalized.res.PCA$ind$coord[,2])) +
geom_point(aes(color = factor(groups[as.numeric(color.group)],
levels = c(case.group, control.group))),
size = 3.5) +
scale_color_manual(values = my_colors) +
theme_bw() +
xlab(paste0("PC1(", round(normalized.res.PCA$eig[,2][1], 2), "%)")) +
ylab(paste0("PC2(", round(normalized.res.PCA$eig[,2][2], 2), "%)")) +
ggtitle("PCA Plot (ggplot2)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10)) +
theme(legend.position="top") +
labs(color = independent.variable) +
geom_hline(yintercept=0, linetype="dashed", color = "black") +
geom_vline(xintercept=0, linetype="dashed", color = "black")
ggsave(paste0(path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/PCA/PCA_Plot_ggplot2.png"),
dpi = 300,
width = 7,
height = 7)
message("(\u2714) : '",
path.prefix, "RNASeq_results/", which.analysis,
"/images/DE/PCA/PCA_Plot_ggplot2.png' has been created. \n\n")
}
DEHeatmap <- function(which.analysis,
which.count.normalization,
path.prefix,
independent.variable,
case.group,
control.group,
DE.csv.results,
phenoData.result,
my_colors) {
# load gene name for further usage
message("\u25CF Plotting Differential Expressed Heatmap related plot\n")
# DE.csv.results <- read.csv(paste0(path.prefix, "RNASeq_results/",
# which.analysis, "/",
# strsplit(which.analysis,
# split = "_")[[1]][1],
# "_normalized_DE_result.csv"))
# phenoData.result<- phenoDataWrap(path.prefix,
# independent.variable,
# case.group,
# control.group)
## Maybe change !!!! temp !!
DE.csv.results <- DE.csv.results[DE.csv.results$gene.name != ".",]
message(" \u25CF Checking found differential express transcript term.\n")
if (nrow(DE.csv.results) == 0) {
message(" \u25CF (\u26A0) No term were found.\n\n")
} else {
DE.csv.results <- DE.csv.results[order(abs(DE.csv.results$log2FC),
decreasing = TRUE),]
DE.csv.results <- DE.csv.results[!duplicated(DE.csv.results$gene.name),]
# Decide sample name whether to show
if (phenoData.result$case.group.size + phenoData.result$case.group.size > 16) {
message(" \u25CF Total sample number: ",
phenoData.result$case.group.size +
phenoData.result$case.group.size,
" (sample name will not be shown).\n")
show.sample.name <- FALSE
} else {
message(" \u25CF Total sample number: ",
phenoData.result$case.group.size +
phenoData.result$case.group.size,
" (sample name will be shown).\n")
show.sample.name <- TRUE
}
# Decide gene name whether to show
DE.csv.normalized.count.only <-
DE.csv.results[,2:(phenoData.result$case.group.size +
phenoData.result$case.group.size + 1)]
row.names(DE.csv.normalized.count.only) <- DE.csv.results$gene.name
if (nrow(DE.csv.normalized.count.only) > 60) {
message(" \u25CF Found ",
nrow(DE.csv.normalized.count.only),
" terms. More than 60 terms (gene names will not be shown).\n")
show.gene.name <- FALSE
} else {
message(" \u25CF Found ", nrow(DE.csv.normalized.count.only),
" terms (gene names will be shown).\n")
show.gene.name <- TRUE
}
message(" \u25CF Calculating log2(",which.count.normalization, "+1).\n")
log.data.frame <- log2(DE.csv.normalized.count.only+1)
# Getting log control mean
control.log.average <-
rowMeans(log.data.frame[seq_len(phenoData.result$case.group.size)])
message(" \u25CF Each log2(", which.count.normalization,
"+1) minus average of control.\n")
log.data.frame.minus <- log.data.frame - control.log.average
df.new <- scale(log.data.frame.minus)
# Do for annotation ! (grouping in pheatmap)
annotation_list = factor(c(rep(case.group,
phenoData.result$case.group.size),
rep(control.group,
phenoData.result$case.group.size)),
levels = c(case.group, control.group))
annotation <- data.frame(Var1 = annotation_list)
colnames(annotation) <- independent.variable
# check out the row names of annotation
rownames(annotation) <- colnames(df.new)
my_colors_list <- my_colors
names(my_colors_list) <- c(case.group, control.group)
anno_colors <- list(Var1 = my_colors_list)
names(anno_colors) <- independent.variable
# Check Na(list) or Infinite(numeric)
if (any(is.na((df.new)) | is.infinite((df.new)))) {
message("(\u26A0) : There are invalid value after ",
"scaling DEG dataframe. Heatmap can't be drawn !\n\n")
} else {
redgreen <- c("blue", "white", "red")
pal <- colorRampPalette(redgreen)(100)
## Not change distance , highlight case and control
pheatmap::pheatmap(df.new, scale = "row",
xlab = "samples", ylab = "transcript names",
margins = c(10,8), col = pal,
main = "Heatmap Plot (pheatmap)",
cluster_rows = TRUE, cluster_cols = FALSE,
show_rownames = show.gene.name,
show_colnames = show.sample.name,
annotation_col = annotation,
annotation_colors = anno_colors,
filename = paste0(path.prefix, "RNASeq_results/",
which.analysis,
"/images/DE/",
"Heatmap_Plot_pheatmap.png"),
fontsize = 7)
message("(\u2714) : '", path.prefix, "RNASeq_results/",
which.analysis, "/images/DE/Heatmap_Plot_pheatmap.png",
"' has been created. \n\n")
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.