Compiled date: r Sys.Date()
Last edited: 2020-08-03
License: r packageDescription("POMA")[["License"]]
knitr::opts_chunk$set( collapse = TRUE, # fig.align = "center", comment = ">" )
Run the following code to install the Bioconductor version of package.
# install.packages("BiocManager") BiocManager::install("POMA")
library(POMA)
The following function will return an Exploratory Data Analysis (EDA) PDF report. The input object must be an MSnSet object.
data("st000336") PomaEDA(st000336)
library(patchwork) library(knitr) library(dplyr) library(tibble) library(ggplot2) library(reshape2) library(Biobase) e <- t(Biobase::exprs(st000336)) target <- Biobase::pData(st000336) %>% rownames_to_column("ID") %>% rename(Group = 2) %>% select(ID, Group)
imputed <- PomaImpute(st000336, method = "knn") pre_processed <- PomaNorm(imputed, method = "log_pareto") %>% PomaOutliers(coef = 1.5)
# zeros zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>% rownames_to_column("names") %>% filter(number != 0) all_zero <- zeros %>% filter(number == nrow(e)) # missing values nas <- data.frame(number = colSums(is.na(e))) %>% rownames_to_column("names") %>% filter(number != 0) # zero variance var_zero <- e %>% as.data.frame() %>% summarise_all(~ var(., na.rm = TRUE)) %>% t() %>% as.data.frame() %>% rownames_to_column("names") %>% filter(V1 == 0)
Generated EDA PDF report starts here.
Your data have 57 samples, 31 features and 2 groups, that are ‘Controls’, ‘DMD’. Furthermore, 1 covariates have been found in your data. These covariates are ‘steroids’.
A 3.45% of values in your data are NAs (missing values). Variables that have NA values are ‘aspartic_acid (5)’, ‘citrulline (28)’, ‘glutamic_acid (15)’, ‘methionine (3)’, ‘x3_hba (1)’, ‘a_kg (1)’, ‘citrate (2)’, ‘fumarate (1)’, ‘lactate (2)’, ‘malate (1)’, ‘pyruvate (1)’, ‘succinate (1)’.
A 0% of values in your data are zeros.
Removed from the exploratory data analysis 0 features that only have zeros.
Removed from the exploratory data analysis 0 features that have zero variance.
summary_table1 <- data.frame(Samples = nrow(e), Features = ncol(e), Covariates = ncol(Biobase::pData(st000336)) - 1) summary_table2 <- data.frame(Counts_Zero = sum(zeros$number), Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%")) summary_table3 <- data.frame(Counts_NA = sum(is.na(e)), Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) knitr::kable(summary_table1) knitr::kable(summary_table2) knitr::kable(summary_table3)
if (nrow(nas) >= 1){ ggplot(nas, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Missing values") + xlab("") + ggtitle("Missing Value Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") }
if (nrow(zeros) >= 1){ ggplot(zeros, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Zeros") + xlab("") + ggtitle("Zeros Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") }
counts <- data.frame(table(target$Group)) colnames(counts) <- c("Group", "Counts") ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) + geom_col() + ylab("Counts") + xlab("") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
indNum <- nrow(Biobase::pData(pre_processed)) jttr <- ifelse(indNum <= 10, TRUE, FALSE) p1 <- PomaBoxplots(imputed, jitter = jttr) + xlab("Samples") + ylab("Value") + ggtitle("Not Normalized") + theme(legend.position = "bottom") p2 <- PomaBoxplots(pre_processed, jitter = jttr) + xlab("Samples") + ylab("Value") + ggtitle("Normalized ('log_pareto')") + theme(legend.position = "bottom") p1 + p2
p3 <- PomaDensity(imputed) + ggtitle("Not Normalized") p4 <- PomaDensity(pre_processed) + ggtitle("Normalized ('log_pareto')") p3/p4
outliers <- st000336 %>% PomaImpute(method = "knn") %>% PomaNorm(method = "log_pareto") %>% PomaOutliers(do = "analyze", coef = 1.5) outliers$polygon_plot
7 possible outliers detected in your data. These outliers are ‘DMD119.2.U02’, ‘DMD084.11.U02’, ‘DMD087.12.U02’, ‘DMD023.10.U02’, ‘DMD046.11.U02’, ‘DMD133.9.U02’, ‘DMD135.10.U02’.
if(nrow(outliers$outliers) >= 1){ knitr::kable(outliers$outliers) }
correlations <- PomaCorr(pre_processed) high_correlations <- correlations$correlations %>% filter(abs(corr) > 0.97)
There are 0 high correlated feature pairs in your data.
PomaHeatmap(pre_processed)
PomaMultivariate(pre_processed, method = "pca")$scoresplot
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.