#' sciCNV function
#'
#' @description Inferring copy number variations of scRNA-seq data at single-cell level
#'
#' @param norm.mat: Matrix of normalized single-cell gene expression (e.g. normalized scRNA-seq). Formatted as follows:
#' 1st column = gene name list.
#' 1st Row = cell identifier.
#' Cells are arranged with test cells in leftward columns, followed by control cells (normal diploid cells of matching lineage) in rightward columns.
#' @param ave.ctrl: It is the average expression of cells per gene in control population.
#' @param gen.Loc: It is the matrix of assigning chromosome number to each gene sorted based on chromosme number, starts and ends.
#' @param No.test: number of test cells
#' @param sharpness: a variable that adjusts the resolution used for the sciCNV-curve calculation (by defining a moving window size over which gene expression values are averaged).
#' Default =1.0 (range approximately 0.6-1.4).
#' A lower sharpness can be used to offset data sparsity and provide more reliable detection of large CNVs.
#' A higher sharpness provides more resolution for the detection of smaller CNVs but is more susceptible to noise from data sparsity and thus requires greater data density.
#' @param baseline_adj: The baseline adjustment is only applied to test cells if TRUE is specified. Default is FALSE.
#' @param baseline: An optional correction to adjust the CNV zero setpoint (copy number gain =0) and improve CNV detection when CN gains and losses are substantially unbalanced.
#' Consider using for markedly hyperdiploid or hyodiploid cells with multiple trisomies or monosomies where the chromosome number deviates substantially from 46.
#' Default = 0. Ideal setting: a fraction representing the net fractional genomic change from diploidy (using a positive fraction for gain and a negative fraction for net genomic loss).
#' If the CNV profile is unknown, run the CNV analysis with default zero setting, review the preliminary CNV profile, and consider re-running with baseline ccorrection.
#'
#' @author Ali Mahdipour-Shirayeh, Princess Margaret Cancer Centre, University of Toronto
#'
#' @return Provides the iCNV profile of all single-cells at higher sinsitivity, eficiency and accuracy
#'
#' @examples
#' sciCNV_mat <- sciCNV(norm.mat=normlized_data, ave.ctrl=mean_control, gen.Loc, No.test=100)
#'
#' @import parallel
#'
#' @export
sciCNV <- function(norm.mat,
ave.ctrl,
gen.Loc,
No.test,
sharpness,
baseline_adj = FALSE, # TRUE or FALSE
baseline = 0 # default is 0. Typical range -0.5 to +0.5.
){
## argument validation
if ( is.na(No.test) ){
stop( "Number of tumor cells needs to be inserted.")
}
if (is.na(sharpness)){
sharpness <- 1 # typical range 0.6-1.4
}
if (is.na(baseline_adj)){
baseline_adj = FALSE
}
if ( (baseline_adj == TRUE) & (is.na(baseline)) ){
baseline <- 0
}
if ( (baseline_adj == FALSE) & ( baseline != 0) ){
stop( "The baseline value is only accepted for baseline_adj = TRUE.")
}else if ( (baseline_adj == FALSE) ){
baseline <- 0
}
if( (baseline > 0.5) || (baseline < -0.5) ){
stop( "The baseline value inserted is not supported (baseline is
assumed to be in [-0.5, 0.5] interval).")
}
## Attaching the average gene expression of the control cells
norm.mat1 <- cbind(gen.Loc, as.matrix(norm.mat), ave.ctrl )
rownames(norm.mat1) <- rownames(norm.mat)
## Focusing the sciCNV analysis on genes with expression >2% of cells (open to user specification/optimization).
## This enriches the analysis for informative genes.
## The variable excludes infrequently detected genes, that are detected in less than a minimum % of cells (assessed across both test and control cells).
percent <- 0.02
MSC1 <- norm.mat1[which(rowSums(norm.mat != 0) >= ncol(norm.mat)*percent ), ]
colnames(MSC1) <- c("genes","chromosomes", colnames(norm.mat),"AveNCs")
#chr.n <- sort(MSC1[ , 2], decreasing=FALSE)
MSC <- MSC1[ , -c(1,2)]
#####################################
## single cell inferred CNV function
#####################################
V7Alt <- matrix(0, ncol = (ncol(MSC)-1), nrow = nrow(MSC))
# Average gene expression of control cells
mean.ctrl <- as.matrix(as.numeric(MSC[, ncol(MSC)]) )
## sciCNV for test and control cells
new.genes <- rownames(MSC1)
clmns <- ncol(norm.mat)
chr.n <- as.matrix( gen.Loc[which(as.matrix(gen.Loc[,1]) %in% new.genes), 2])
resolution <- 50*sharpness
P12 <- floor(nrow(MSC)/resolution)
row.n <- length(new.genes)
FF <- rep(0, row.n)
AW <- rep(0, row.n)
BD <- rep(0, row.n)
G <- as.matrix(seq(1,row.n,1))
FF[1] <- 1
AW[1] <- P12
BD[1] <- P12
for(i in 2:row.n){
if( chr.n[i] == chr.n[i-1]){
FF[i] <- FF[i-1] + 1
} else
FF[i] <- 1
}
for(i in 2:row.n){
if( chr.n[i] == chr.n[i-1]){
if( AW[i-1] >0 ){
AW[i] <- AW[i-1] -1
} else
AW[i] <- 0
} else
AW[i] <- P12
}
for(i in seq(row.n-1,1,-1) ){
if( chr.n[i] == chr.n[i+1]){
if( BD[i+1] > 0 ){ BD[i] <- BD[i+1] -1
} else { BD[i] <- 0}
} else { BD[i] <- P12}
}
mat.fab <- cbind(FF,AW, BD)
n.cores <- 1 # detectCores(all.tests = FALSE, logical = TRUE)
v7alt.fun <- function(i){ CNV_infer(ss.expr = as.matrix(MSC[ ,i]),
mean.ctrl = mean.ctrl,
gen.Loc = gen.Loc,
resolution = resolution,
baseline_adj = FALSE,
baseline = 0,
chr.n = chr.n,
P12 = P12,
mat.fab = mat.fab)}
V7Alt11 <- parallel::mclapply( 1:clmns, v7alt.fun, mc.cores = n.cores )
V7Alt <- matrix(unlist(V7Alt11), ncol = clmns, byrow = FALSE)
colnames(V7Alt) <- colnames(MSC[,-ncol(MSC)])
rownames(V7Alt) <- MSC1[ , 1]
####
return(V7Alt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.