Nothing
## ----options, include=FALSE, cache=FALSE, results='hide', message=FALSE-------
knitr::opts_chunk$set(fig.align="center", cache=FALSE,error=FALSE,
fig.width=6,fig.height=6,autodep=TRUE,
out.width="600px", out.height="600px",
results="markup", echo=TRUE, eval=TRUE)
options(getClass.msg=FALSE)
set.seed(6473) ## for reproducibility
library(scone)
library(RColorBrewer)
## ----datain, message=FALSE----------------------------------------------------
library(scRNAseq)
## ----- Load Example Data -----
fluidigm <- ReprocessedFluidigmData(assays = "rsem_counts")
## ----showqc-------------------------------------------------------------------
## ----- List all QC fields -----
# List all qc fields (accessible via colData())
metadata(fluidigm)$which_qc
## ----biocoverage--------------------------------------------------------------
# Joint distribution of "biological condition"" and "coverage type""
table(colData(fluidigm)$Coverage_Type,
colData(fluidigm)$Biological_Condition)
## ----prefilter----------------------------------------------------------------
# Preliminary Sample Filtering: High-Coverage Only
is_select = colData(fluidigm)$Coverage_Type == "High"
fluidigm = fluidigm[,is_select]
# Retain only detected transcripts
fluidigm = fluidigm[which(apply(assay(fluidigm) > 0,1,any)),]
## ----ralign-------------------------------------------------------------------
# Define a color scheme
cc <- c(brewer.pal(9, "Set1"))
# One batch per Biological Condition
batch = factor(colData(fluidigm)$Biological_Condition)
# Alignment Quality Metrics
qc = colData(fluidigm)[,metadata(fluidigm)$which_qc]
# Barplot of read proportion mapping to human transcriptome
ralign = qc$RALIGN
o = order(ralign)[order(batch[order(ralign)])] # Order by batch, then value
barplot(ralign[o], col=cc[batch][o],
border=cc[batch][o], main="Percentage of reads mapped")
legend("bottomleft", legend=levels(batch), fill=cc,cex=0.4)
## ----nreads-------------------------------------------------------------------
# Barplot of total read number
nreads = qc$NREADS
o = order(nreads)[order(batch[order(nreads)])] # Order by batch, then value
barplot(nreads[o], col=cc[batch][o],
border=cc[batch][o], main="Total number of reads")
legend("topright", legend=levels(batch), fill=cc, cex=0.4)
## ----qpc----------------------------------------------------------------------
## ----- PCA of QC matrix -----
qpc = prcomp(qc,center = TRUE,scale. = TRUE)
barplot((qpc$sdev^2)/sum(qpc$sdev^2), border="gray",
xlab="PC", ylab="Proportion of Variance", main="Quality PCA")
## ----qpc_view-----------------------------------------------------------------
# Barplot of PC1 of the QC matrix
qc1 = as.vector(qpc$x[,1])
o = order(qc1)[order(batch[order(qc1)])]
barplot(qc1[o], col=cc[batch][o],
border=cc[batch][o], main="Quality PC1")
legend("bottomright", legend=levels(batch),
fill=cc, cex=0.8)
## ----fnr_fit------------------------------------------------------------------
# Extract Housekeeping Genes
data(housekeeping)
hk = intersect(housekeeping$V1,rownames(assay(fluidigm)))
# Mean log10(x+1) expression
mu_obs = rowMeans(log10(assay(fluidigm)[hk,]+1))
# Assumed False Negatives
drop_outs = assay(fluidigm)[hk,] == 0
# Logistic Regression Model of Failure
ref.glms = list()
for (si in 1:dim(drop_outs)[2]){
fit = glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,
family=binomial(logit))
ref.glms[[si]] = fit$coefficients
}
## ----fnr_vis,fig.width=8,fig.height=4,out.width="800px",out.height="400px"----
par(mfrow=c(1,2))
# Plot Failure Curves and Calculate AUC
plot(NULL, main = "False Negative Rate Curves",
ylim = c(0,1),xlim = c(0,6),
ylab = "Failure Probability", xlab = "Mean log10 Expression")
x = (0:60)/10
AUC = NULL
for(si in 1:ncol(assay(fluidigm))){
y = 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1)
AUC[si] = sum(y)/10
lines(x, 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1),
type = 'l', lwd = 2, col = cc[batch][si])
}
# Barplot of FNR AUC
o = order(AUC)[order(batch[order(AUC)])]
barplot(AUC[o], col=cc[batch][o], border=cc[batch][o], main="FNR AUC")
legend("topright", legend=levels(batch), fill=cc, cex=0.4)
## ----metric_sample_filter, fig.height= 10,out.height="1000px"-----------------
# Initial Gene Filtering:
# Select "common" transcripts based on proportional criteria.
num_reads = quantile(assay(fluidigm)[assay(fluidigm) > 0])[4]
num_cells = 0.25*ncol(fluidigm)
is_common = rowSums(assay(fluidigm) >= num_reads ) >= num_cells
# Metric-based Filtering
mfilt = metric_sample_filter(assay(fluidigm),
nreads = colData(fluidigm)$NREADS,
ralign = colData(fluidigm)$RALIGN,
gene_filter = is_common,
pos_controls = rownames(fluidigm) %in% hk,
zcut = 3, mixture = FALSE,
plot = TRUE)
# Simplify to a single logical
mfilt = !apply(simplify2array(mfilt[!is.na(mfilt)]),1,any)
## ----thresh,fig.width= 6, fig.height= 4, out.width="600px",out.height="400px"----
hist(qc$RALIGN, breaks = 0:100)
# Hard threshold
abline(v = 15, col = "yellow", lwd = 2)
# 3 (zcut) standard deviations below the mean ralign value
abline(v = mean(qc$RALIGN) - 3*sd(qc$RALIGN), col = "green", lwd = 2)
# 3 (zcut) MADs below the median ralign value
abline(v = median(qc$RALIGN) - 3*mad(qc$RALIGN), col = "red", lwd = 2)
# Sufficient threshold
abline(v = NULL, col = "grey", lwd = 2)
# Final threshold is the minimum of
# 1) the sufficient threshold and
# 2) the max of all others
thresh = min(NULL,
max(c(15,mean(qc$RALIGN) - 3*sd(qc$RALIGN),
median(qc$RALIGN) - 3*mad(qc$RALIGN))))
abline(v = thresh, col = "blue", lwd = 2, lty = 2)
legend("topleft",legend = c("Hard","SD","MAD","Sufficient","Final"),
lwd = 2, col = c("yellow","green","red","grey","blue"),
lty = c(1,1,1,1,2), cex = .5)
## ----filterCount--------------------------------------------------------------
goodDat = fluidigm[,mfilt]
# Final Gene Filtering: Highly expressed in at least 5 cells
num_reads = quantile(assay(fluidigm)[assay(fluidigm) > 0])[4]
num_cells = 5
is_quality = rowSums(assay(fluidigm) >= num_reads ) >= num_cells
## ----scone_init---------------------------------------------------------------
# Expression Data (Required)
expr = assay(goodDat)[is_quality,]
# Biological Origin - Variation to be preserved (Optional)
bio = factor(colData(goodDat)$Biological_Condition)
# Processed Alignment Metrics - Variation to be removed (Optional)
qc = colData(goodDat)[,metadata(goodDat)$which_qc]
ppq = scale(qc[,apply(qc,2,sd) > 0],center = TRUE,scale = TRUE)
# Positive Control Genes - Prior knowledge of DE (Optional)
poscon = intersect(rownames(expr),strsplit(paste0("ALS2, CDK5R1, CYFIP1,",
" DPYSL5, FEZ1, FEZ2, ",
"MAPT, MDGA1, NRCAM, ",
"NRP1, NRXN1, OPHN1, ",
"OTX2, PARD6B, PPT1, ",
"ROBO1, ROBO2, RTN1, ",
"RTN4, SEMA4F, SIAH1, ",
"SLIT2, SMARCA1, THY1, ",
"TRAPPC4, UBB, YWHAG, ",
"YWHAH"),split = ", ")[[1]])
# Negative Control Genes - Uniformly expressed transcripts (Optional)
negcon = intersect(rownames(expr),hk)
# Creating a SconeExperiment Object
my_scone <- SconeExperiment(expr,
qc=ppq, bio = bio,
negcon_ruv = rownames(expr) %in% negcon,
poscon = rownames(expr) %in% poscon
)
## ----scone_in2----------------------------------------------------------------
## ----- User-defined function: Dividing by number of detected genes -----
EFF_FN = function (ei)
{
sums = colSums(ei > 0)
eo = t(t(ei)*sums/mean(sums))
return(eo)
}
## ----- Scaling Argument -----
scaling=list(none=identity, # Identity - do nothing
eff = EFF_FN, # User-defined function
sum = SUM_FN, # SCONE library wrappers...
tmm = TMM_FN,
uq = UQ_FN,
fq = FQT_FN,
deseq = DESEQ_FN)
## ----scone_in3, eval=FALSE----------------------------------------------------
#
# # Simple FNR model estimation with SCONE::estimate_ziber
# fnr_out = estimate_ziber(x = expr, bulk_model = TRUE,
# pos_controls = rownames(expr) %in% hk,
# maxiter = 10000)
#
# ## ----- Imputation List Argument -----
# imputation=list(none=impute_null, # No imputation
# expect=impute_expectation) # Replace zeroes
#
# ## ----- Imputation Function Arguments -----
# # accessible by functions in imputation list argument
# impute_args = list(p_nodrop = fnr_out$p_nodrop, mu = exp(fnr_out$Alpha[1,]))
#
# my_scone <- scone(my_scone,
# imputation = imputation, impute_args = impute_args,
# scaling=scaling,
# k_qc=3, k_ruv = 3,
# adjust_bio="no",
# run=FALSE)
## ----scone_params-------------------------------------------------------------
my_scone <- scone(my_scone,
scaling=scaling,
k_qc=3, k_ruv = 3,
adjust_bio="no",
run=FALSE)
head(get_params(my_scone))
## ----scone_params_view--------------------------------------------------------
apply(get_params(my_scone),2,unique)
## ----scone_params_filt, eval=FALSE--------------------------------------------
#
# is_screened = ((get_params(my_scone)$imputation_method == "expect") &
# (get_params(my_scone)$scaling_method %in% c("none",
# "eff")))
#
# my_scone = select_methods(my_scone,
# rownames(get_params(my_scone))[!is_screened ])
#
## ----scone_run----------------------------------------------------------------
BiocParallel::register(
BiocParallel::SerialParam()
) # Register BiocParallel Serial Execution
my_scone <- scone(my_scone,
scaling=scaling,
run=TRUE,
eval_kclust = 2:6,
stratified_pam = TRUE,
return_norm = "in_memory",
zero = "postadjust")
## ----scone_view1--------------------------------------------------------------
# View Metric Scores
head(get_scores(my_scone))
# View Mean Score Rank
head(get_score_ranks(my_scone))
# Extract normalized data from top method
out_norm = get_normalized(my_scone,
method = rownames(get_params(my_scone))[1])
## ----biplot_color-------------------------------------------------------------
pc_obj = prcomp(apply(t(get_scores(my_scone)),1,rank),
center = TRUE,scale = FALSE)
bp_obj = biplot_color(pc_obj,y = -get_score_ranks(my_scone),expand = .6)
## ----biplot_color4------------------------------------------------------------
bp_obj = biplot_color(pc_obj,y = -get_score_ranks(my_scone),expand = .6)
points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1)
points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1.5)
points(t(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",]),
pch = 1, col = "blue", cex = 1)
points(t(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",]),
pch = 1, col = "blue", cex = 1.5)
arrows(bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",][1],
bp_obj[rownames(bp_obj) == "none,none,no_uv,no_bio,no_batch",][2],
bp_obj[1,][1],
bp_obj[1,][2],
lty = 2, lwd = 2)
## ----sconeReport, eval=FALSE--------------------------------------------------
#
# # Methods to consider
# scone_methods = c(rownames(get_params(my_scone))[1:12],
# "none,none,no_uv,no_bio,no_batch")
#
# # Shiny app
# sconeReport(my_scone,methods = scone_methods,
# qc = ppq,
# bio = bio,
# negcon = negcon, poscon = poscon)
#
## ----session------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.