inferCNV uses the R packages r CRANpkg("ape")
, r Biocpkg("BiocGenerics")
, r CRANpkg("binhf")
, r CRANpkg("caTools")
, r CRANpkg("coda")
, r CRANpkg("coin")
, r CRANpkg("dplyr")
, r CRANpkg("doparallel")
, r Biocpkg("edgeR")
, r CRANpkg("fastcluster")
, r CRANpkg("fitdistrplus")
, r CRANpkg("foreach")
, r CRANpkg("futile.logger")
, r CRANpkg("future")
, r CRANpkg("gplots")
, r CRANpkg("ggplot2")
, r CRANpkg("HiddenMarkov")
, r CRANpkg("reshape")
, r CRANpkg("rjags")
, r CRANpkg("RColorBrewer")
, r Biocpkg("SingleCellExperiment")
, r Biocpkg("SummarizedExperiment")
and imports functions from the archived r CRANpkg("GMD")
If you want to use the interactive heatmap visualization, please check the add-on packge R r Githubpkg ("broadinstitute/inferCNV_NGCHM")
after installing the packages r CRANpkg("tibble")
, r Githubpkg("bmbroom/tsvio")
and r Githubpkg("bmbroom/NGCHMR")
. To install optional packages, type the following in an R command window:
install.packages("tibble") install.packages("devtools") devtools::install_github("bmbroom/tsvio") devtools::install_github("bmbroom/NGCHMR", ref="stable") devtools::install_github("broadinstitute/inferCNV_NGCHM")
And download the NGCHM java application by typing the following in a regular shell: ```{bash, eval = FALSE} wget
```r knitr::opts_chunk$set(echo = TRUE) library(infercnv)
Reading in the raw counts matrix and meta data, populating the infercnv object
infercnv_obj = CreateInfercnvObject( raw_counts_matrix="../inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz", annotations_file="../inst/extdata/oligodendroglioma_annotations_downsampled.txt", delim="\t", gene_order_file="../inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt", ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)"))
infercnv_obj_default = infercnv::run( infercnv_obj, cutoff=1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics out_dir="../example_output", cluster_by_groups=TRUE, plot_steps=FALSE, denoise=TRUE, HMM=TRUE, no_prelim_plot=TRUE, png_res=60, num_threads=2, BayesMaxPNormal=0 )
Basic ouput from running inferCNV.
HMM preditions
Removing those genes that are very lowly expressed or present in very few cells
# filter out low expressed genes cutoff=1 infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff) # filter out bad cells min_cells_per_gene=3 infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene) ## for safe keeping infercnv_orig_filtered = infercnv_obj
infercnv_obj <- infercnv::normalize_counts_by_seq_depth(infercnv_obj)
infercnv_obj <- log2xplus1(infercnv_obj)
threshold = mean(abs(get_average_bounds(infercnv_obj))) infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold)
infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
# re-center each cell infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median")
plot_cnv(infercnv_obj, out_dir='../example_output/', output_filename='infercnv.chr_smoothed', x.range="auto", title = "chr smoothed and cells re-centered", png_res=60, color_safe_pal = FALSE)
infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=TRUE)
plot_cnv(infercnv_obj, out_dir='../example_output/', output_filename='infercnv.ref_subtracted', x.range="auto", title="ref subtracted", png_res=60, color_safe_pal = FALSE)
Converting the log(FC) values to regular fold change values, centered at 1 (no fold change)
This is important because we want (1/2)x to be symmetrical to 1.5x, representing loss/gain of one chromosome region.
infercnv_obj <- invert_log2(infercnv_obj)
infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5)
plot_cnv(infercnv_obj, out_dir='../example_output/', output_filename='infercnv.denoised', x.range="auto",, title="denoised", png_res=60, color_safe_pal = FALSE)
This generally improves on the visualization
infercnv_obj = remove_outliers_norm(infercnv_obj)
For additional explanations on files, usage, and a tutorial please visit the wiki.
This tool is a part of the TrinityCTAT toolkit focused on leveraging the use of RNA-Seq to better understand cancer transcriptomes. To find out more please visit TrinityCTAT
This methodology was used in:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.