Suggested answers to the workshop questions are below. You might have some different code e.g. to customise the volcano plot as you like. Feel free to comment on any of these solutions in the workshop website as described here.
# load libraries library(airway) # tidyverse core packages library(tibble) library(dplyr) library(tidyr) library(readr) library(stringr) library(purrr) library(ggplot2) # tidyverse-friendly packages library(plotly) library(ggrepel) library(tidyHeatmap) library(tidybulk)
# Set up data data("airway") counts_tt <- airway %>% tidybulk()
counts_tt %>% scale_abundance() %>% reduce_dimensions(method="PCA", .dims=3)
counts_tt %>% keep_abundant(factor_of_interest=dex) %>% test_differential_abundance( .formula = ~ 0 + dex, .contrasts = c("dextrt - dexuntrt"), omit_contrast_in_colnames = TRUE ) %>% filter(FDR < 0.05) %>% summarise(num_de = n_distinct(feature))
# load data data("pasilla", package = "rpharma2020tidytranscriptomics") # create tidybulk tibble counts_tt <- pasilla %>% tidybulk() %>% mutate(symbol = AnnotationDbi::mapIds(org.Dm.eg.db::org.Dm.eg.db, keys=as.character(feature), keytype = "FLYBASE", column="SYMBOL", multiVals = "first")) # filter counts counts_filtered <- counts_tt %>% keep_abundant(factor_of_interest = condition) # scale counts counts_scaled <- counts_filtered %>% scale_abundance()
counts_scal_PCA <- counts_scaled %>% reduce_dimensions(method="PCA")
counts_de <- counts_tt %>% test_differential_abundance(.formula = ~ 0 + condition + type, .contrasts = c("conditiontreated - conditionuntreated"), omit_contrast_in_colnames = TRUE) counts_de %>% filter(FDR < 0.05) %>% summarise(num_de = n_distinct(feature))
topgenes <- counts_de %>% pivot_transcript() %>% arrange(PValue) %>% head(10) topgenes
What code can generate a heatmap of variable genes (starting from count_scaled)?
counts_scaled %>% # extract 500 most variable genes keep_variable( .abundance = counts_scaled, top = 500) %>% # create heatmap heatmap( .column = sample, .row = feature, .value = counts_scaled, annotation = c(condition, type), transform = log1p )
What code can you use to visualise expression of the pasilla gene (gene id: FBgn0261552)
counts_scaled %>% # extract counts for pasilla gene filter(feature == "FBgn0261552") %>% # make stripchart ggplot(aes(x = condition, y = counts_scaled + 1, fill =condition, label = sample)) + geom_boxplot() + geom_jitter() + scale_y_log10()+ theme_bw()
What code can generate an interactive volcano plot that has gene ids showing on hover?
p <- counts_de %>% pivot_transcript() %>% # Subset data mutate(significant = FDR<0.05 & abs(logFC) >=2) %>% # Plot ggplot(aes(x = logFC, y = PValue, label=feature)) + geom_point(aes(color = significant, size = significant, alpha=significant)) + geom_text_repel() + # Custom scales scale_y_continuous(trans = "log10_reverse") + scale_color_manual(values=c("black", "#e11f28")) + scale_size_discrete(range = c(0, 2)) + theme_bw() ggplotly(p, tooltip = c("text"))
Tip: You can use "text" instead of "label" if you don't want the column name to show up in the hover e.g. above will give "FBgn0261552" rather than "feature:FBgn0261552".
What code can generate a heatmap of the top 100 DE genes?
top100 <- counts_de %>% pivot_transcript() %>% arrange(PValue) %>% head(100) counts_scaled %>% filter(feature %in% top100$feature) %>% heatmap( .column = sample, .row = feature, .value = counts_scaled, annotation = c(condition, type), transform = log1p )
# Set up data pasilla_de <- rpharma2020tidytranscriptomics::pasilla %>% # Convert SE object to tibble tidybulk %>% # Scale abundance for plotting identify_abundant(factor_of_interest=condition) de_all <- pasilla_de %>% # edgeR QLT test_differential_abundance( ~ condition + type, method = "edger_quasi_likelihood", prefix = "edgerQLT_" ) %>% # edgeR LRT test_differential_abundance( ~ condition + type, method = "edger_likelihood_ratio", prefix = "edgerLR_" ) %>% # limma-voom test_differential_abundance( ~ condition + type, method = "limma_voom", prefix = "voom_" ) %>% # DESeq2 test_differential_abundance( ~ condition + type, method = "deseq2", prefix = "deseq2_" )
de_all %>% # Subset transcript information pivot_transcript() %>% # Reshape for nesting pivot_longer( cols = -c(feature, .abundant), names_sep = "_", names_to = c("method", "statistic"), values_to = "value" ) %>% # Filter statistic filter(statistic %in% c("FDR", "adj.P.Val", "padj")) %>% filter(value < 0.05) %>% # Nesting dplyr::count(method)
BRCA_cell_type_long %>% group_by(cell_type) %>% summarise(m = median(proportion)) %>% dplyr::arrange(dplyr::desc(m))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.