#' PCA of samples (for DESeq2 object)
#'
#' PCA of samples (for DESeq2 object)
#'
#' @param dds a \code{DESeqDataSet} object
#' @param group vector of the condition from which each sample belongs
#' @param n number of features to keep among the most variant
#' @param type.trans type of transformation to use (\code{"VST"} or \code{"rlog"})
#' @param out \code{TRUE} to export the figure
#' @param col colors of the bars
#' @param axesList list of couples of axes on which projecting the samples
#' @param versionName versionName of the project
#' @return A plot of the two first principal components and the coordinates of the samples
#' @author Marie-Agnes Dillies and Hugo Varet
# created March 27th, 2013
# modified Sept 19th, 2013 (adapted for DESeq2)
# modified Sept 20th, 2013 (estimateDispersions now run in the main script, better display of labels)
# modified Sept 24th, 2013 (colors)
# modified Sept 30th, 2013 (add type.norm)
# modified Oct 8th, 2013 (add lightgray lines on the plot)
# modified Oct 14th, 2013 (type.norm > type.trans and plotPCA > PCAPlot)
# modified Oct 25th, 2013 (modification for multiple factors)
# modified Mar 21st, 2014 (removed outputfile argument)
# modified April 2nd, 2014 (use of varianceStabilizingTransformation() and rlogTransformation() instead of getVarianceStabilizedData() and rlogData())
# modified July 30th, 2014 (remove axes argument)
# modified Aug 5th, 2014 (removed graphDir argument)
# modified March 9th, 2015 (diagnostic eigen values)
# modified June 15th, 2015 (take the mininimum between 500 and the number of features)
# modified April 04th, 2016 (simplification with tmpFunction(), add axesList argument)
# modified August 26th, 2019 (ggplot2)
PCAPlot <- function(dds, group, n=min(500,nrow(counts(dds))), type.trans=c("VST","rlog"), out=TRUE,
col=c("lightblue","orange","MediumVioletRed","SpringGreen"), axesList=list(c(1,2), c(1,3)),
versionName="."){
type.trans <- type.trans[1]
group <- data.frame(group=apply(group, 1, paste, collapse="-"))
group$group <- factor(group$group, levels=unique(group$group))
if (out) pdf(file=paste0("figures/", versionName, "-PCA.pdf"), width=8)
# normalisation vst ou rlog
if (type.trans == "VST") counts.trans <- assay(varianceStabilizingTransformation(dds))
else counts.trans <- assay(rlogTransformation(dds))
# PCA sur les 500 features les plus variants (pour reduire la dimension)
rv = apply(counts.trans, 1, var, na.rm=TRUE)
pca = prcomp(t(counts.trans[order(rv, decreasing = TRUE), ][1:n,]))
prp <- pca$sdev^2 * 100 / sum(pca$sdev^2)
prp <- round(prp,2)
tmpFunction <- function(axes=c(1, 2)){
index1 <- axes[1]
index2 <- axes[2]
d <- data.frame(x=pca$x[,index1],
y=pca$x[,index2],
group=group$group,
sample=factor(rownames(pca$x), levels=rownames(pca$x)))
ggplot(data=d, aes(x=.data$x, y=.data$y, color=group, label=sample)) +
geom_point(show.legend=TRUE, size=3) +
labs(color="") +
scale_colour_manual(values=col) +
geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
xlab(paste0("PC", index1, " (",prp[index1],"%)")) +
ylab(paste0("PC", index2, " (",prp[index2],"%)")) +
ggtitle(paste(versionName, "Principal Component Analysis", sep=" - "))
}
for (axes in axesList) print(tmpFunction(axes))
if (out) dev.off()
pdf(paste0("figures/", versionName, "-diagEigenValuesPCA.pdf"))
d <- data.frame(pc=factor(1:length(pca$sdev)), eg=pca$sdev^2)
d <- d[1:min(15, nrow(d)),]
print(ggplot(d, aes(x=.data$pc, y=.data$eg)) +
geom_bar(stat="identity", show.legend=FALSE) +
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
xlab("Principal component") +
ylab("Eigen value") +
ggtitle(paste(versionName, "Eigen values of the PCA", sep=" - ")))
dev.off()
return(invisible(pca$x))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.