Nothing
## ----global_options, include=FALSE--------------------------------------------
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.pos = 'h')
## ----setup, include = FALSE---------------------------------------------------
chooseCRANmirror(graphics=FALSE, ind=1)
knitr::opts_chunk$set(
collapse = TRUE,
comment = '#>'
)
options(tinytex.verbose = TRUE)
## ----batch_workflow, include = TRUE, fig.align = 'center', echo=FALSE, fig.cap='proBatch in batch correction workflow', out.width = '50%'----
knitr::include_graphics('Batch_effects_workflow_staircase.png')
## ----dependencies, eval = FALSE-----------------------------------------------
# bioc_deps <- c("GO.db", "impute", "preprocessCore", "pvca","sva" )
# cran_deps <- c("corrplot", "data.table", "ggplot2", "ggfortify","lazyeval",
# "lubridate", "pheatmap", "reshape2","readr", "rlang",
# "tibble", "dplyr", "tidyr", "wesanderson","WGCNA")
#
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install(bioc_deps)
# install.packages(cran_deps)
## ----install_proBatch, fig.show='hold', eval = FALSE--------------------------
# #install the development version from GitHub:
# install.packages('devtools')
# devtools::install_github('symbioticMe/proBatch', build_vignettes = TRUE)
## ----load_packages------------------------------------------------------------
require(dplyr)
require(tibble)
require(ggplot2)
## ----col_names----------------------------------------------------------------
feature_id_col = 'peptide_group_label'
measure_col = 'Intensity'
sample_id_col = 'FullRunName'
essential_columns = c(feature_id_col, measure_col, sample_id_col)
## ----tech_bio_cols------------------------------------------------------------
technical_factors = c('MS_batch', 'digestion_batch')
biological_factors = c('Strain', 'Diet', 'Sex')
biospecimen_id_col = 'EarTag'
## -----------------------------------------------------------------------------
batch_col = 'MS_batch'
## ----load_data, fig.show='hold'-----------------------------------------------
library(proBatch)
data('example_proteome', 'example_sample_annotation', 'example_peptide_annotation',
package = 'proBatch')
## ----date_to_order, fig.show='hold'-------------------------------------------
generated_sample_annotation <- date_to_sample_order(example_sample_annotation,
time_column = c('RunDate','RunTime'),
new_time_column = 'generated_DateTime',
dateTimeFormat = c('%b_%d', '%H:%M:%S'),
new_order_col = 'generated_order',
instrument_col = NULL)
library(knitr)
kable(generated_sample_annotation[1:5,] %>%
select(c('RunDate', 'RunTime', 'order', 'generated_DateTime', 'generated_order')))
## ----pep_annotation, fig.show='hold'------------------------------------------
generated_peptide_annotation <- create_peptide_annotation(example_proteome,
feature_id_col = 'peptide_group_label',
protein_col = 'Protein')
## ----reduce_proteome----------------------------------------------------------
example_proteome = example_proteome %>% select(one_of(essential_columns))
gc()
## ----long_to_matrix-----------------------------------------------------------
example_matrix <- long_to_matrix(example_proteome,
feature_id_col = 'peptide_group_label',
measure_col = 'Intensity',
sample_id_col = 'FullRunName')
## -----------------------------------------------------------------------------
log_transformed_matrix <- log_transform_dm(example_matrix,
log_base = 2, offset = 1)
## ---- fig.show='hold'---------------------------------------------------------
color_list <- sample_annotation_to_colors(example_sample_annotation,
factor_columns = c('MS_batch', 'digestion_batch',
'EarTag', 'Strain',
'Diet', 'Sex'),
numeric_columns = c('DateTime','order'))
## ----plot_mean, fig.show='hold', fig.width=5, fig.height=2--------------------
plot_sample_mean(log_transformed_matrix, example_sample_annotation, order_col = 'order',
batch_col = batch_col, color_by_batch = TRUE, ylimits = c(12, 16.5),
color_scheme = color_list[[batch_col]])
## ----plot_boxplots, fig.show='hold', fig.width=10, fig.height=5---------------
log_transformed_long <- matrix_to_long(log_transformed_matrix)
batch_col = 'MS_batch'
plot_boxplot(log_transformed_long, example_sample_annotation,
batch_col = batch_col, color_scheme = color_list[[batch_col]])
## ----median_normalization_log, fig.show='hold'--------------------------------
median_normalized_matrix = normalize_data_dm(log_transformed_matrix,
normalize_func = 'medianCentering')
## ----median_normalization_raw, fig.show='hold'--------------------------------
median_normalized_matrix = normalize_data_dm(example_matrix,
normalize_func = 'medianCentering',
log_base = 2, offset = 1)
## ----quantile_norm, fig.show='hold'-------------------------------------------
quantile_normalized_matrix = normalize_data_dm(log_transformed_matrix,
normalize_func = 'quantile')
## ----plot_mean_normalized, fig.show='hold', fig.width=5, fig.height=2---------
plot_sample_mean(quantile_normalized_matrix, example_sample_annotation,
color_by_batch = TRUE, ylimits = c(12, 16),
color_scheme = color_list[[batch_col]])
## ----plot_hierarchical, fig.show='hold', fig.width=10, fig.height=5-----------
selected_annotations <- c('MS_batch', 'digestion_batch', 'Diet')
#Plot clustering between samples
plot_hierarchical_clustering(quantile_normalized_matrix,
sample_annotation = example_sample_annotation,
color_list = color_list,
factors_to_plot = selected_annotations,
distance = 'euclidean', agglomeration = 'complete',
label_samples = FALSE)
## ----plot_heatmap, fig.show='hold', fig.width=10, fig.height=11---------------
plot_heatmap_diagnostic(quantile_normalized_matrix, example_sample_annotation,
factors_to_plot = selected_annotations,
cluster_cols = TRUE,
color_list = color_list,
show_rownames = FALSE, show_colnames = FALSE)
## ----plot_PCA, fig.show='hold', fig.width=10, fig.height=8.5------------------
pca1 = plot_PCA(quantile_normalized_matrix, example_sample_annotation, color_by = 'MS_batch',
plot_title = 'MS batch', color_scheme = color_list[['MS_batch']])
pca2 = plot_PCA(quantile_normalized_matrix, example_sample_annotation, color_by = 'digestion_batch',
plot_title = 'Digestion batch', color_scheme = color_list[['digestion_batch']])
pca3 = plot_PCA(quantile_normalized_matrix, example_sample_annotation, color_by = 'Diet',
plot_title = 'Diet', color_scheme = color_list[['Diet']])
pca4 = plot_PCA(quantile_normalized_matrix, example_sample_annotation, color_by = 'DateTime',
plot_title = 'DateTime', color_scheme = color_list[['DateTime']])
library(ggpubr)
ggarrange(pca1, pca2, pca3, pca4, ncol = 2, nrow = 2)
## ----plot_PCA_spec, fig.show='hold', fig.width=5, fig.height=5----------------
pca_spec = plot_PCA(quantile_normalized_matrix, example_sample_annotation,
color_by = 'digestion_batch',
plot_title = 'Digestion batch')
pca_spec
## ----plot_PVCA, fig.show='hold', fig.width=5, fig.height=5--------------------
plot_PVCA(quantile_normalized_matrix, example_sample_annotation,
technical_factors = technical_factors,
biological_factors = biological_factors)
## ----plot_spikeIn, fig.show='hold'--------------------------------------------
quantile_normalized_long <- matrix_to_long(quantile_normalized_matrix)
plot_spike_in(quantile_normalized_long, example_sample_annotation,
peptide_annotation = example_peptide_annotation,
protein_col = 'Gene', spike_ins = 'BOVINE_A1ag',
plot_title = 'Spike-in BOVINE protein peptides',
color_by_batch = TRUE, color_scheme = color_list[[batch_col]])
## ----loess_fit, fig.show='hold', fig.width=5, fig.height=2.4------------------
loess_fit_df <- adjust_batch_trend_df(quantile_normalized_long, example_sample_annotation)
## ----loess_30, fig.show='hold', fig.width=5, fig.height=2.4-------------------
loess_fit_30 <- adjust_batch_trend_df(quantile_normalized_long, example_sample_annotation,
span = 0.3)
plot_with_fitting_curve(feature_name = '10231_QDVDVWLWQQEGSSK_2',
fit_df = loess_fit_30, fit_value_col = 'fit',
df_long = quantile_normalized_long,
sample_annotation = example_sample_annotation,
color_by_batch = TRUE, color_scheme = color_list[[batch_col]],
plot_title = 'Span = 30%')
## ----loess_70, fig.show='hold', fig.width=5, fig.height=2.4-------------------
loess_fit_70 <- adjust_batch_trend_df(quantile_normalized_long, example_sample_annotation,
span = 0.7)
plot_with_fitting_curve(feature_name = '10231_QDVDVWLWQQEGSSK_2',
fit_df = loess_fit_70, fit_value_col = 'fit',
df_long = quantile_normalized_long,
sample_annotation = example_sample_annotation,
color_by_batch = TRUE, color_scheme = color_list[[batch_col]],
plot_title = 'Span = 70%')
## ----median_batch, fig.show='hold', fig.width=3, fig.height=2.4---------------
peptide_median_df <- center_feature_batch_medians_df(loess_fit_df, example_sample_annotation)
plot_single_feature(feature_name = '10231_QDVDVWLWQQEGSSK_2', df_long = peptide_median_df,
sample_annotation = example_sample_annotation, measure_col = 'Intensity',
plot_title = 'Feature-level Median Centered')
## ----comBat, fig.show='hold'--------------------------------------------------
comBat_df <- correct_with_ComBat_df(loess_fit_df, example_sample_annotation)
## ----combat_result, fig.show='hold', fig.width=3, fig.height=2.4-------------
plot_single_feature(feature_name = '10231_QDVDVWLWQQEGSSK_2',
df_long = loess_fit_df,
sample_annotation = example_sample_annotation,
plot_title = 'Loess Fitted')
plot_single_feature(feature_name = '10231_QDVDVWLWQQEGSSK_2',
df_long = comBat_df,
sample_annotation = example_sample_annotation,
plot_title = 'ComBat corrected')
## ----batch_corr_general, fig.show='hold'--------------------------------------
batch_corrected_df <- correct_batch_effects_df(df_long = quantile_normalized_long,
sample_annotation = example_sample_annotation,
discrete_func = 'ComBat',
continuous_func = 'loess_regression',
abs_threshold = 5, pct_threshold = 0.20)
batch_corrected_matrix = long_to_matrix(batch_corrected_df)
## ----setup_corr_heatmap, fig.show='hold', fig.height=5, fig.width=8-----------
earTags <- c('ET1524', 'ET2078', 'ET1322', 'ET1566', 'ET1354', 'ET1420', 'ET2154',
'ET1515', 'ET1506', 'ET2577', 'ET1681', 'ET1585', 'ET1518', 'ET1906')
replicate_filenames = example_sample_annotation %>%
filter(MS_batch %in% c('Batch_2', 'Batch_3')) %>%
filter(EarTag %in% earTags) %>%
pull(!!sym('FullRunName'))
## ---- fig.show='hold', fig.width=10, fig.height=11----------------------------
p1_exp = plot_sample_corr_heatmap(log_transformed_matrix,
samples_to_plot = replicate_filenames,
plot_title = 'Correlation of selected samples')
## -----------------------------------------------------------------------------
breaksList <- seq(0.7, 1, by = 0.01) # color scale of pheatmap
heatmap_colors = colorRampPalette(
rev(RColorBrewer::brewer.pal(n = 7, name = 'RdYlBu')))(length(breaksList) + 1)
## ----corr_samples_heatmap, fig.show='hold', fig.height=2.12, fig.width=3.35----
# Plot the heatmap with annotations for the chosen samples
factors_to_show = c(batch_col, biospecimen_id_col)
p1 = plot_sample_corr_heatmap(log_transformed_matrix,
samples_to_plot = replicate_filenames,
sample_annotation = example_sample_annotation,
factors_to_plot = factors_to_show,
plot_title = 'Log transformed correlation matrix of
\nselected replicated samples',
color_list = color_list,
heatmap_color = heatmap_colors, breaks = breaksList,
cluster_rows= FALSE, cluster_cols=FALSE,fontsize = 4,
annotation_names_col = TRUE, annotation_legend = FALSE,
show_colnames = FALSE)
p2 = plot_sample_corr_heatmap(batch_corrected_matrix,
samples_to_plot = replicate_filenames,
sample_annotation = example_sample_annotation,
factors_to_plot = factors_to_show,
plot_title = 'Batch Corrected
\nselected replicated samples',
color_list = color_list,
heatmap_color = heatmap_colors, breaks = breaksList,
cluster_rows= FALSE, cluster_cols=FALSE,fontsize = 4,
annotation_names_col = TRUE, annotation_legend = FALSE,
show_colnames = FALSE)
library(gridExtra)
grid.arrange(grobs = list(p1$gtable, p2$gtable), ncol=2)
## ----corr_samples_distrib, fig.show='hold', fig.width=3.2, fig.height=3.5-----
sample_cor_raw <- plot_sample_corr_distribution(log_transformed_matrix,
example_sample_annotation,
#repeated_samples = replicate_filenames,
batch_col = 'MS_batch',
biospecimen_id_col = 'EarTag',
plot_title = 'Correlation of samples (raw)',
plot_param = 'batch_replicate')
raw_corr = sample_cor_raw +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ylim(0.7,1) + xlab(NULL)
sample_cor_batchCor <- plot_sample_corr_distribution(batch_corrected_matrix,
example_sample_annotation,
batch_col = 'MS_batch',
plot_title = 'Batch corrected',
plot_param = 'batch_replicate')
corr_corr = sample_cor_batchCor +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ylim(0.7, 1) + xlab(NULL)
library(gtable)
library(grid)
g2 <- ggplotGrob(raw_corr)
g3 <- ggplotGrob(corr_corr)
g <- cbind(g2, g3, size = 'first')
grid.draw(g)
## ----correlation_of_peptides, eval = FALSE------------------------------------
# peptide_cor_raw <- plot_peptide_corr_distribution(log_transformed_matrix,
# example_peptide_annotation,
# protein_col = 'Gene',
# plot_title = 'Peptide correlation (raw)')
#
# peptide_cor_batchCor <- plot_peptide_corr_distribution(batch_corrected_matrix,
# example_peptide_annotation,
# protein_col = 'Gene',
# plot_title = 'Peptide correlation (batch corrected)')
# g2 <- ggplotGrob(peptide_cor_raw+
# ylim(c(-1, 1)))
# g3 <- ggplotGrob(peptide_cor_batchCor+
# ylim(c(-1, 1)))
# g <- cbind(g2, g3, size = 'first')
# grid.draw(g)
## ----sessionInfo, eval=TRUE---------------------------------------------------
sessionInfo()
## ----citation-----------------------------------------------------------------
citation('proBatch')
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.