#' @title iCTC-identification of CTC
#'
#' @description this package stats::predicts whether -- is a CTC or
#' not using various preprocessing techniques and machine learning
#'
#' @param cell_samples, can contain any number of peripheral blood cells,
#' with atleast desired genes either in row or column
#'
#' @param cases, can contain any subset of (1,2,3,4,5,6,7,8,9) numbers,
#' meaning of these numbers given below:
#' 1-> Harmony_NB, 2-> Harmony_RF, 3-> Harmony_GBM
#'
#' 4-> PCA_NB, 5-> PCA_RF, 6-> PCA_GBM
#'
#' 7-> Original_NB, 8-> Original_RF, 9-> Original_GBM
#'
#' Harmony-> projects cells into a shared embedding in which cells group by
#' cell type rather than dataset-specific conditions.
#' PCA-> Principal component analysis (PCA) is a technique used to emphasize
#' variation and bring out strong patterns in a dataset.
#' Original means none of (Harmony, PCA).
#'
#' NB-> Naive Bayes(ML Model)
#' RF-> Random Forest(ML Model)
#' GBM-> Gradient Boosting Machine(ML Model)
#' @examples
#' library(devtools)
#' install_github("immunogenomics/harmony")
#' library(harmony)
#' cell_samples<-iCTC::raw_test_data$Clearcell_Polaris_sample_test
#' results<-iCTC(cell_samples=cell_samples, cases = c(4,5,6))
#' results$CTC_probabilistic_score
#' @return results, will retrun table of samples and predicted values
#' corresponding cases which have given
#' row conatins cases and column with sample names
#' @export iCTC
iCTC <- function(cell_samples,cases=c(4,5,6))
{
cases<-as.integer(cases)
if(any(cases>9) || any(cases<1))
{
print("please give any input in range 1 to 9")
return()
}
gc()
cases<-sort(cases)
cell_samples_name<-0
if(is.matrix(cell_samples))
{
cell_samples<-cell_samples
}else
{
cell_samples_name<-names(cell_samples)
cell_samples<-matrix(cell_samples)
rownames(cell_samples)<-cell_samples_name
}
cell_samples<-genes_filter(cell_samples)
if(!is.matrix(cell_samples))
return(cell_samples)
print(cases)
cell_samples_name<-colnames(cell_samples)
colnames(cell_samples)<-paste(seq_len(length(colnames(cell_samples))),
colnames(cell_samples),seq_len(length(colnames(cell_samples))),sep="_")
n_models<-3
n_methods<-3
count<-list("harmony_count"=c(0),"pca_count"=c(0),"original_count"=c(0))
for(i in seq_len(length(cases)))
{
for(j in seq_len(n_methods))
{
if(cases[[i]]<=(n_models*j) && cases[[i]]>=(n_models*(j-1)+1))
{
count[[j]] <- count[[j]] + 1
}
}
}
methods<-c("Harmony","PCA","Original")
models<-c("NB","RF","GBM")
method_functions<-list(harmony,pca,original)
model_functions<-list(NB,RF,GBM)
for(j in seq_len(n_methods))
{
print(paste(methods[j]," count ", count[[j]], sep=""))
}
count<-list("harmony_count"=c(0),"pca_count"=c(0),"original_count"=c(0))
all_result <- list()
temp_method<-0
for(i in seq_len(length(cases)))
{
for(j in seq_len(n_methods))
{
if(cases[[i]]>=(n_models*(j-1)+1) &&
cases[[i]]<=(n_models*j) && count[[j]]==0)
{
temp_method<-j
count[[j]]<-1
print(paste(methods[[j]], " correction running...", sep=""))
preprocessed_data <- method_functions[[j]](cell_samples)
print(paste(methods[[j]], " correction Done", sep=""))
}
}
for(k in seq_len(n_models))
{
if(k==n_models)
k<-0
if(cases[[i]] %% n_models==k)
{
gc()
if(k==0)
k<-n_models
print(paste(models[[k]], " running...", sep=""))
all_result <- append(all_result,
list(model_functions[[k]](temp_method,preprocessed_data)))
print(paste(models[[k]], " Done", sep=""))
}
}
}
models_methods<-c()
for(i in cases)
{
models_methods<-c(models_methods,
paste(methods[as.integer(((i-1)/n_models)+1)],
models[as.integer(((i-1)%%n_models)+1)],sep="_"))
}
result_label<-matrix(0,length(cases),length(cell_samples[1,]))
rownames(result_label)<-models_methods
colnames(result_label)<-cell_samples_name
result_CTC<-matrix(0,length(cases),length(cell_samples[1,]))
rownames(result_CTC)<-models_methods
colnames(result_CTC)<-cell_samples_name
result_Blood<-matrix(0,length(cases),length(cell_samples[1,]))
rownames(result_Blood)<-models_methods
colnames(result_Blood)<-cell_samples_name
for(col in seq_len(length(cell_samples[1,])))
{
for(row in seq_len(length(cases)))
{
result_CTC[row,col]<-all_result[[row]]$te_probCTC[col]
result_Blood[row,col]<-all_result[[row]]$te_probCTC[col]
if(all_result[[row]]$te_labels[col]==0)
{
result_label[row,col]<-"CTC"
}
if(all_result[[row]]$te_labels[col]==1)
{
result_label[row,col]<-"Blood"
}
}
}
results<-list("predicted_Labels"=result_label,
"CTC_probabilistic_score"=result_CTC,
"Blood_probabilistic_score"=result_Blood)
return(results)
}
genes_filter <- function(gene_file)
{
gc()
common_genes <- rownames(iCTC::Original_data_1$log_normalized_data_train_1)
if(length(stats::na.omit(match(toupper(common_genes),
toupper(rownames(gene_file))))>=0)==length(common_genes))
{
index<-1
print("All desired genes in row names found")
}
else if(length(stats::na.omit(match(toupper(common_genes),
toupper(colnames(gene_file))))>=0)==length(common_genes))
{
index<-2
print("All desired genes in col names found")
}else
{
index<-3
count_1<-length(stats::na.omit(match(toupper(common_genes),
toupper(rownames(gene_file))))>=0)
count_2<-length(stats::na.omit(match(toupper(common_genes),
toupper(colnames(gene_file))))>=0)
if(count_1>count_2 && count_2==0)
{
print(paste("Only ",count_1,
" desired genes found in row names of your data",sep=""))
print("please provide data with atleast desired genes
either in col or row names")
return(index)
}
if(count_2>count_1 && count_1==0)
{
print(paste("Only ",count_2,
" desired genes found in col names of your data",sep=""))
print("please provide data with atleast desired genes
either in col or row names")
return(index)
}
if(count_2>0 && count_1>0)
{
print(paste("Only ",count_1," and ",count_2,
" desired genes found in row and col names of your data",
sep=""))
print("please provide data with atleast desired genes
either in col or row names")
return(index)
}
if(count_2==0 && count_1==0)
{
print("no single desired gene found in your data")
print("please provide data with atleast desired genes
either in col or row names")
return(index)
}
}
if(index==1)
{
index<-apply(gene_file,2,
function(x) sum(x>0)>=(length(gene_file[,1])*.06))
if((length(gene_file[1,])-length(index))>0)
{
print(paste("only ",(length(gene_file[1,])-length(index)),
" samples found with atlest 10 percent expressed genes
in column names of your data"))
}else
{
print("All samples found with atlest 10 percent expressed genes
in column names of your data")
}
return(gene_file[match(toupper(common_genes),
toupper(rownames(gene_file))),index])
}
if(index==2)
{
index<-apply(gene_file,1,
function(x) sum(x>0)>=(length(gene_file[,1])*.1))
if((length(gene_file[,1])-length(index))>0)
{
print(paste("only ",(length(gene_file[,1])-length(index)),
" samples found with atlest 10 percent expressed genes
in column names of your data"))
}else
{
print("All samples found with atlest 10 percent expressed genes
in row names of your data ")
}
return(t(gene_file[index,match(toupper(common_genes),
toupper(colnames(gene_file)))]))
}
}
normalization <- function(data,method="log")
{
gc()
if (method == "log")
{
log_counts <- log(data + 1)
print("Transformation Done")
return(log_counts)
}
else if (method == "median")
{
normalized_matrix <- as.matrix(data)
total_count<-colSums(data)
med_total<-stats::median(total_count)
for(i in seq_len(ncol(normalized_matrix)))
{
normalized_matrix[,i] <- data[,i] * (med_total/total_count[i])
}
print("Normalization Done")
return(normalized_matrix)
}else
{
return("error")
}
}
pca <- function(user_data)
{
gc()
user_data<-normalization(user_data,method = "median")
user_data<-normalization(user_data,method = "log")
user_data<-stats::predict(iCTC::pca_data_100,t(user_data))
return(user_data)
}
harmony <- function(user_data)
{
user_data<-normalization(user_data,method = "median")
user_data<-normalization(user_data,method = "log")
user_data<-stats::predict(iCTC::pca_data_100,t(user_data))
temp<-rbind(iCTC::PCA_data_2$log_normalized_data_train_2,user_data)
temp_1<-matrix(0,ncol=4,nrow=dim(user_data)[1])
temp_1[,1]<-rownames(user_data)
temp_1[,2]<-rep("other_1",dim(user_data)[1])
temp_1[,3]<-rep("temp_1",dim(user_data)[1])
temp_1[,4]<-rep("sample_1",dim(user_data)[1])
colnames(temp_1)<-c("Type","Study","Sample_id","label")
temp_2<-matrix(0, ncol=4,
nrow=dim(iCTC::PCA_data_2$log_normalized_data_train_2)[1])
temp_2[,1]<-rownames(iCTC::PCA_data_2$log_normalized_data_train_2)
temp_2[,2]<-rep("other_2",
dim(iCTC::PCA_data_2$log_normalized_data_train_2)[1])
temp_2[,3]<-rep("temp_2",
dim(iCTC::PCA_data_2$log_normalized_data_train_2)[1])
temp_2[,4]<-rep("sample_2",
dim(iCTC::PCA_data_2$log_normalized_data_train_2)[1])
colnames(temp_1)<-c("Type","Study","Sample_id","label")
temp_2<-rbind(temp_2,temp_1)
temp_2<-data.frame(temp_2)
harmonized_pcs <- harmony::HarmonyMatrix( data_mat = temp,
meta_data <- temp_2, vars_use = "Study", do_pca = FALSE)
hyrdo_test_3<-harmonized_pcs[which(unlist(temp_2$Study) %in% "other_1"),]
return(hyrdo_test_3)
}
original <- function(user_data)
{
gc()
user_data<-normalization(user_data,method = "median")
user_data<-normalization(user_data,method = "log")
return(t(user_data))
}
NB <- function(data_case,te)
{
gc()
if(data_case==1)
{
test_prediction_1 <-stats::predict(iCTC::nb_3,te)
test_prob <-stats::predict(iCTC::nb_3,te,type="prob")
}
if(data_case==2)
{
test_prediction_1 <-stats::predict(iCTC::nb_2,te)
test_prob <-stats::predict(iCTC::nb_2,te,type="prob")
}
if(data_case==3)
{
data_2<-data.frame(t(iCTC::Original_data_1$log_normalized_data_train_1),
"labels"=factor(c(rep(0,538),rep(1,1323))))
fit<-caret::train(data_2[,-c(dim(data_2)[2])], data_2[,dim(data_2)[2]],
'nb',trControl=caret::trainControl(method='cv',
number=5, returnResamp = "all"))
test_prediction_1 <-stats::predict(fit,te)
test_prob <-stats::predict(fit,te,type="prob")
}
return_list <- list("te_labels"=as.numeric(as.matrix(test_prediction_1)),
"te_probCTC"=as.numeric(test_prob[,1]),
"te_probBlood"=as.numeric(test_prob[,2]))
return(return_list)
}
RF <- function(data_case,te)
{
gc()
if(data_case==1)
{
test_prediction_1 <-stats::predict(iCTC::rf_3,te)
test_prob <-stats::predict(iCTC::rf_3,te,type="prob")
}
if(data_case==2)
{
test_prediction_1 <-stats::predict(iCTC::rf_2,te)
test_prob <-stats::predict(iCTC::rf_2,te,type="prob")
}
if(data_case==3)
{
test_prediction_1 <-stats::predict(iCTC::rf_1,te)
test_prob <-stats::predict(iCTC::rf_1,te,type="prob")
}
return_list <- list("te_labels"=as.numeric(as.matrix(test_prediction_1)),
"te_probCTC"=as.numeric(test_prob[,1]),
"te_probBlood"=as.numeric(test_prob[,2]))
return(return_list)
}
GBM <- function(data_case,te)
{
gc()
if(data_case==1)
{
test_prediction_1 <-stats::predict(iCTC::gbm_3,te)
test_prob <-stats::predict(iCTC::gbm_3,te,type="prob")
}
if(data_case==2)
{
test_prediction_1 <-stats::predict(iCTC::gbm_2,te)
test_prob <-stats::predict(iCTC::gbm_2,te,type="prob")
}
if(data_case==3)
{
test_prediction_1 <-stats::predict(iCTC::gbm_1,te)
test_prob <-stats::predict(iCTC::gbm_1,te,type="prob")
}
return_list <- list("te_labels"=as.numeric(as.matrix(test_prediction_1)),
"te_probCTC"=as.numeric(test_prob[,1]),
"te_probBlood"=as.numeric(test_prob[,2]))
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.