#'Principle component analysis with scatterplots from PAC
#'
#'\code{PAC_pca} PAC principle component analysis.
#'
#'Given a PAC object the function will perform a principle component analysis by
#'calling the PCA function in the FactoMineR package, and then plot scatter
#'plots with the fviz_pca functions of the factoextra package.
#'
#'@family PAC analysis
#'
#'@seealso \url{https://github.com/Danis102} for updates on the current package.
#'
#'@param PAC PAC-list object.
#'
#'@param norm Character indicating what type of data to be used. If
#' type="counts" the PCA will be conducted on the raw Counts. If type="cpm" the
#' analysis will be done on cpm values returned from \code{PAC_norm} function
#' and stored in the norm folder of the PAC-list object. The name of any other
#' table in the norm(PAC) folder can also be used.
#'
#'@param type Character indicating what type of pca and plot to be drawn. When
#' type="pheno" then results will be drawned from a sample perspective, while
#' if type="anno" then results will be presented from a sequence perspective.
#' If type="both" then a biplot will be drawn, representing both pheno and anno
#' features. Note, the 1st object (target column name) in the pheno_target or
#' anno_taget can be used to specifically highlight sample and sequence groups
#' (see below).
#'
#'@param graphs Logical whether or not scatter plots should be plotted.
#' (default=TRUE)
#'
#'@param anno_target List with: 1st object being a character vector of target
#' column(s) in Anno, 2nd object being a character vector of the target
#' features(s) in the target column (1st object). The 1st object also control
#' sequence feature colors when type="anno". (default=NULL)
#'
#'@param pheno_target List with: 1st object being a character vector of target
#' column(s) in Pheno, 2nd object being a character vector of the target
#' group(s) in the target column (1st object). The 1st object also control
#' group colors when type="pheno" or "both". (default=NULL)
#'
#'@param labels If labels="sample", then points will be labeled with the names
#' rownames in pheno(PAC) or anno(PAC) depending on type="pheno" or
#' type="anno", respectively. Point labels can also be manually provided as a
#' character vector in the same length as the intended target. As default,
#' labels=NULL where only point are plotted.
#'@param ... parsing to the fviz_pca functions of the factoextra package.
#'@return A PCA list object generated by the PCA function in the FactoMineR
#' package
#'
#' @examples
#'
#' # Load a PAC-object
#' load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata",
#' package = "seqpac", mustWork = TRUE))
#'
#' # Simple sample counts pca and scatterplots with no groupings:
#' pca_cnt <- PAC_pca(pac, norm="counts")
#'
#' # Sample cpm pca and scatterplots with color groupings from
#' # pheno(PAC)$type column:
#' pca_cpm <- PAC_pca(pac, norm="cpm", type="pheno",
#' pheno_target=list("stage"))
#'
#' # Same but with or without text labels:
#' pca_cpm_lab <- PAC_pca(pac, norm="cpm", type="pheno",
#' pheno_target=list("stage"), labels="samples")
#' pca_cpm_lab2 <- PAC_pca(pac, norm="cpm", type="pheno",
#' pheno_target=list("stage"),
#' labels=pheno(pac)$batch)
#'
#' # Cpm pca with anno(PAC) sequence features instead of Pheno samples and
#' # restricted to read size 20-22:
#' pca_cpm_anno <- PAC_pca(pac, norm="cpm", type="anno",
#' anno_target=list("Size", 20:22))
#'
#' # Cpm pca as biplot:
#' pca_cpm_bi <- PAC_pca(pac, norm="cpm", type="both",
#' pheno_target=list("stage"))
#'
#' # Plot individual graphs
#' pca_cpm_bi$graphs[[1]]
#' pca_cpm_lab$graphs[[1]]
#' pca_cpm_anno$graphs[[3]]
#'
#' # Extract pca output
#' pca_cpm_anno$pca
#'
#' @importFrom ggplot2 geom_hline geom_vline geom_point aes
#' theme scale_colour_gradient theme_minimal xlab ylab
#' @export
PAC_pca <- function(PAC, norm="counts", type="pheno", graphs=TRUE,
pheno_target=NULL, anno_target=NULL, labels=NULL, ...){
Dim.1 <- Dim.2 <- Dim.3 <- NULL
## Check S4
if(isS4(PAC)){
tp <- "S4"
PAC <- as(PAC, "list")
}else{
tp <- "S3"
}
if(length(pheno_target)==2){
PAC <- PAC_filter(PAC, subset_only=TRUE,
pheno_target=pheno_target)
}
if(length(anno_target)==2){
PAC <- PAC_filter(PAC, subset_only=TRUE,
anno_target=anno_target)
}
stopifnot(PAC_check(PAC))
if(norm=="counts"){
data <- PAC$Counts
}else{
data <- PAC$norm[[norm]]
}
if(!is.null(labels)){
geom <- c("point", "text")
if(length(labels) > 1){
if(any(duplicated(labels))){
colnames(data) <- paste(labels, seq.int(ncol(data)), sep="_")
}else{
colnames(data) <- as.character(labels)
}
}
}else{
geom <- "point"
}
if(type=="pheno"|type=="both"){
data <- t(as.matrix(data))
}
pca_res <- FactoMineR::PCA(data, graph=FALSE)
if(graphs==FALSE){
return(pca_res)
}else{
if(!is.null(pheno_target)|!is.null(anno_target)){
if(type=="pheno"){
# Check likely type of data
n_sampl <- nrow(PAC$Pheno)
col <- PAC$Pheno[,pheno_target[[1]]]
if(is.matrix(col)){
col <- col[,1]
}
rtio <- length(unique(col))/n_sampl
if(is.factor(col)|rtio<0.4){
col <- as.factor(as.character(col))
}
if(is.numeric(col)|is.integer(col)|rtio>0.4){
col <- as.numeric(col)
}
}
if(type=="anno"){
col <- as.factor(as.character(PAC$Anno[,anno_target[[1]]]))
}
if(type=="both"){
if(!is.null(pheno_target)){
col <- as.factor(as.character(PAC$Pheno[,pheno_target[[1]]]))
}else{
col <- "none"}}
}else{
col <- "none"}
grphs <- list(PC1_PC2=NULL, PC1_PC3=NULL, PC2_PC3=NULL)
# Build graphs depending on type
if(type=="pheno"){
# For gradient/numeric use ggplot2
if(is.numeric(col)){
coord<- as.data.frame(pca_res$ind$coord)
con<- as.data.frame(pca_res$eig[,"percentage of variance"])
grphs$PC1_PC2 <- ggplot2::ggplot() +
geom_hline(yintercept=0, linetype="dashed", color="black", size=0.5)+
geom_vline(xintercept=0, linetype="dashed", color="black", size=0.5)+
geom_point(data=coord, aes(x=Dim.1, y=Dim.2, colour=col)) +
theme(legend.position="none") +
scale_colour_gradient(low="#00FFE6", high="#FF0000") +
theme_minimal() +
xlab(paste0("PC1 (", round(con["comp 1",], digits=2), "%)")) +
ylab(paste0("PC2 (", round(con["comp 2",], digits=2), "%)"))
grphs$PC1_PC3 <- ggplot2::ggplot() +
geom_hline(yintercept=0, linetype="dashed", color="black", size=0.5)+
geom_vline(xintercept=0, linetype="dashed", color="black", size=0.5)+
geom_point(data=coord, aes(x=Dim.1, y=Dim.3, colour=col)) +
theme(legend.position="none") +
scale_colour_gradient(low="#00FFE6", high="#FF0000") +
theme_minimal() +
xlab(paste0("PC1 (", round(con["comp 1",], digits=2), "%)")) +
ylab(paste0("PC3 (", round(con["comp 3",], digits=2), "%)"))
grphs$PC2_PC3 <- ggplot2::ggplot() +
geom_hline(yintercept=0, linetype="dashed", color="black", size=0.5)+
geom_vline(xintercept=0, linetype="dashed", color="black", size=0.5)+
geom_point(data=coord, aes(x=Dim.2, y=Dim.3, colour=col)) +
theme(legend.position="none") +
scale_colour_gradient(low="#00FFE6", high="#FF0000") +
theme_minimal() +
xlab(paste0("PC2 (", round(con["comp 2",], digits=2), "%)")) +
ylab(paste0("PC3 (", round(con["comp 3",], digits=2), "%)"))
}else{
# For factor use factoextra
grphs$PC1_PC2 <- factoextra::fviz_pca_ind(
pca_res, habillage = col, geom=geom, repel=TRUE, addEllipses = FALSE,
axes=c(1,2), invisible="quali", pointsize=2, labelsize=3,
title="PC1_PC2 - Pheno")
grphs$PC1_PC3 <- factoextra::fviz_pca_ind(
pca_res, habillage = col, geom=geom, repel=TRUE, addEllipses = FALSE,
axes=c(1,3), invisible="quali", pointsize=2, labelsize=3,
title="PC1_PC3 - Pheno",)
grphs$PC2_PC3 <- factoextra::fviz_pca_ind(
pca_res, habillage = col, geom=geom, repel=TRUE, addEllipses = FALSE,
axes=c(2,3), invisible="quali", pointsize=2, labelsize=3,
title="PC2_PC3 - Pheno")
grphs <- lapply(grphs, function(x){
x <- gginnards::move_layers(x, match_type="GeomPoint",
position = "top")
x <- gginnards::move_layers(x, match_type="GeomTextRepel",
position = "top")
return(x)
})
}
}
if(type=="anno"){
# For gradient/numeric use ggplot2
if(is.numeric(col)){
coord<- as.data.frame(pca_res$var$coord)
con<- as.data.frame(pca_res$eig[,"percentage of variance"])
grphs$PC1_PC2 <- ggplot2::ggplot() +
geom_hline(yintercept=0, linetype="dashed", color="black", size=0.5)+
geom_vline(xintercept=0, linetype="dashed", color="black", size=0.5)+
geom_point(data=coord, aes(x=Dim.1, y=Dim.2, colour=col)) +
theme(legend.position="none") +
scale_colour_gradient(low="#00FFE6", high="#FF0000") +
theme_minimal() +
xlab(paste0("PC1 (", round(con["comp 1",], digits=2), "%)")) +
ylab(paste0("PC2 (", round(con["comp 2",], digits=2), "%)"))
grphs$PC1_PC3 <- ggplot2::ggplot() +
geom_hline(yintercept=0, linetype="dashed", color="black", size=0.5)+
geom_vline(xintercept=0, linetype="dashed", color="black", size=0.5)+
geom_point(data=coord, aes(x=Dim.1, y=Dim.3, colour=col)) +
theme(legend.position="none") +
scale_colour_gradient(low="#00FFE6", high="#FF0000") +
theme_minimal() +
xlab(paste0("PC1 (", round(con["comp 1",], digits=2), "%)")) +
ylab(paste0("PC3 (", round(con["comp 3",], digits=2), "%)"))
grphs$PC2_PC3 <- ggplot2::ggplot() +
geom_hline(yintercept=0, linetype="dashed", color="black", size=0.5)+
geom_vline(xintercept=0, linetype="dashed", color="black", size=0.5)+
geom_point(data=coord, aes(x=Dim.2, y=Dim.3, colour=col)) +
theme(legend.position="none") +
scale_colour_gradient(low="#00FFE6", high="#FF0000") +
theme_minimal() +
xlab(paste0("PCA2 (", round(con["comp 2",], digits=2), "%)")) +
ylab(paste0("PCA3 (", round(con["comp 3",], digits=2), "%)"))
}else{
grphs$PC1_PC2 <- factoextra::fviz_pca_ind(
pca_res, geom="point", habillage = col, repel=TRUE,
addEllipses = FALSE, axes=c(1,2), invisible="quali",
pointsize=1, title="PC1_PC2 - Anno")
grphs$PC1_PC3 <- factoextra::fviz_pca_ind(
pca_res, geom="point", habillage = col, repel=TRUE,
addEllipses = FALSE, axes=c(1,3), invisible="quali",
pointsize=1, title="PC1_PC3 - Anno")
grphs$PC2_PC3 <- factoextra::fviz_pca_ind(
pca_res, geom="point", habillage = col, repel=TRUE,
addEllipses = FALSE, axes=c(2,3), invisible="quali",
pointsize=1, title="PC2_PC3 - Anno")
}
}
if(type=="both"){
grphs$PC1_PC2 <- factoextra::fviz_pca_biplot(
pca_res, habillage = col, geom=c("arrow", "text"), geom.var = "point",
col.var = "black", repel=TRUE, addEllipses = FALSE, axes=c(1,2),
invisible="quali", pointsize=0.5, labelsize=3,
title="PC1_PC2 - Biplot", ...)
grphs$PC1_PC3 <- factoextra::fviz_pca_biplot(
pca_res, habillage = col, geom=c("arrow", "text"), geom.var = "point",
col.var = "black", repel=TRUE, addEllipses = FALSE, axes=c(1,3),
invisible="quali", pointsize=0.5, labelsize=3,
title="PC1_PC3 - Biplot", ...)
grphs$PC2_PC3 <- factoextra::fviz_pca_biplot(
pca_res, habillage = col, geom=c("arrow", "text"), geom.var = "point",
col.var = "black", repel=TRUE, addEllipses = FALSE, axes=c(2,3),
invisible="quali", pointsize=0.5, labelsize=3,
title="PC2_PC3 - Biplot", ...)
grphs <- lapply(grphs, function(x){
x <- gginnards::move_layers(x, match_type="GeomPoint",
position = "bottom")
x <- gginnards::move_layers(x, match_type="GeomArrow",
position = "top")
x <- gginnards::move_layers(x, match_type="GeomTextRepel",
position = "top")
return(x)
})
}
print(cowplot::plot_grid(plotlist=grphs, ncol=2, nrow=2))
return(list(graphs=grphs, pca=pca_res))
} #end graph=true
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.