#################################################
########## NA2 ###########
#################################################
NA_compute2_load <- function(data_dir, depth_dir, out_dir) {
#browser()
dir.create(out_dir, showWarnings = F)
if (!dir.exists(out_dir))
showNotification(paste("Filtered data output folder,", out_dir, ", cannot be created"), duration = 10, type = "warning")
# load data
cells_aggregate_info <- NULL
snpMut_filt_freq <- NULL
depth <-NULL
if (file.exists(file.path( data_dir, "cells_aggregate_info.rds")))
cells_aggregate_info <- readRDS(file=paste0(file.path( data_dir, "cells_aggregate_info.rds")))
if (file.exists(file.path( data_dir, "snpMut_filt_freq.rds")))
snpMut_filt_freq <- readRDS(file=paste0(file.path( data_dir, "snpMut_filt_freq.rds")))
if (file.exists(file.path( depth_dir, "final_data_depth.txt")))
depth <- as.matrix(read.table( file.path(depth_dir, "final_data_depth.txt")))
return( list(cells_aggregate_info=cells_aggregate_info, snpMut_filt_freq=snpMut_filt_freq, depth=depth))
}
NA_compute2 <- function(depth_minimum, minumum_median_total, minumum_median_mutation, data_dir, depth_dir, out_dir, time_points, verified_genes, files) {
#browser()
cells_aggregate_info <- files$cells_aggregate_info
snpMut_filt_freq <- files$snpMut_filt_freq
depth <- files$depth
# set working directory
#baseDir = "/data_directory/BimiB/share/LACE/MELANOMA/"
#setwd(baseDir)
# load required libraries
# library("TRONCO")
# dir.create(out_dir, showWarnings = F)
# if (!dir.exists(out_dir))
# showNotification(paste("Filtered data output folder,", out_dir, ", cannot be created"), duration = 10, type = "warning")
# list of manually verified mutations
#verified_genes = c("ARPC2","CCT8","COL1A2","CYCS","HNRNPC","PCBP1","PRAME","RPL5")
#depth_minimum = 3 # minimum depth to set values to NA
#minumum_median_total = 10 # minimum median depth for total reads
#minumum_median_mutation = 4 # minimum median depth for reads supporting mutations
# load data
# cells_aggregate_info <- readRDS(file=paste0(file.path( data_dir, "cells_aggregate_info.rds")))
# snpMut_filt_freq <- readRDS(file=paste0(file.path( data_dir, "snpMut_filt_freq.rds")))
# depth = as.matrix(read.table( file.path(depth_dir, "final_data_depth.txt")))
# select only verified mutations
if (!is.null(verified_genes))
snpMut_filt_freq = snpMut_filt_freq[
which(snpMut_filt_freq$Gene %in% verified_genes),
c("scID","Time","Gene","Chr","PosStart","PosEnd","REF",
"ALT","MutType","depth","Allele_Ratio")
]
snpMut_filt_freq = snpMut_filt_freq[order(snpMut_filt_freq[,3],snpMut_filt_freq[,4],snpMut_filt_freq[,5],snpMut_filt_freq[,6],snpMut_filt_freq[,7],snpMut_filt_freq[,8],snpMut_filt_freq[,9],snpMut_filt_freq[,1],snpMut_filt_freq[,2],snpMut_filt_freq[,10],snpMut_filt_freq[,11]),]
# compute frequency of each mutation
#distinct_mutations = unique(snpMut_filt_freq[,c("Gene","Chr","PosStart","PosEnd","REF","ALT","ALT","ALT","ALT","ALT","ALT","ALT")])
distinct_mutations = unique(snpMut_filt_freq[,c("Gene","Chr","PosStart","PosEnd","REF","ALT")])
#time_points=c("before treatment", "4d on treatment", "28d on treatment", "57d on treatment")
for (t in seq(1,length(time_points)) )
distinct_mutations[[paste0('FreqT',t)]] = NA
distinct_mutations[['MedianDepth']] = NA
distinct_mutations[['MedianDepthMut']] = NA
#colnames(distinct_mutations)[7] = "FreqT1"
#colnames(distinct_mutations)[8] = "FreqT2"
#colnames(distinct_mutations)[9] = "FreqT3"
#colnames(distinct_mutations)[10] = "FreqT4"
#colnames(distinct_mutations)[11] = "MedianDepth"
#colnames(distinct_mutations)[12] = "MedianDepthMut"
cells_timepoints = unique(snpMut_filt_freq[,c("scID","Time")])
#distinct_mutations$FreqT1 = NA
#distinct_mutations$FreqT2 = NA
#distinct_mutations$FreqT3 = NA
#distinct_mutations$FreqT4 = NA
#distinct_mutations$MedianDepth = NA
#distinct_mutations$MedianDepthMut = NA
times = list()
for (t in time_points)
times[[t]] = as.numeric(table(cells_timepoints$Time)[t])
for(i in 1:nrow(distinct_mutations)) {
curr = snpMut_filt_freq[which(snpMut_filt_freq$Gene%in%distinct_mutations[i,"Gene"]&snpMut_filt_freq$Chr%in%distinct_mutations[i,"Chr"]&snpMut_filt_freq$PosStart%in%distinct_mutations[i,"PosStart"]&snpMut_filt_freq$PosEnd%in%distinct_mutations[i,"PosEnd"]&snpMut_filt_freq$REF%in%distinct_mutations[i,"REF"]&snpMut_filt_freq$ALT%in%distinct_mutations[i,"ALT"]),]
for (t in seq(1,length(time_points)) )
distinct_mutations[i,paste0('FreqT',t)] = as.numeric(table(curr$Time)[time_points[[t]]]) / times[[t]]
#distinct_mutations[i,"FreqT1"] = as.numeric(table(curr$Time)["before treatment"]) / t1
#distinct_mutations[i,"FreqT2"] = as.numeric(table(curr$Time)["4d on treatment"]) / t2
#distinct_mutations[i,"FreqT3"] = as.numeric(table(curr$Time)["28d on treatment"]) / t3
#distinct_mutations[i,"FreqT4"] = as.numeric(table(curr$Time)["57d on treatment"]) / t4
#print('a')
#print(curr$depth)
#print(median(curr$depth))
#print(median(curr$Allele_Ratio))
distinct_mutations[i,"MedianDepth"] = as.numeric(median(curr$depth))
distinct_mutations[i,"MedianDepthMut"] = as.numeric(median(curr$Allele_Ratio))
}
#distinct_mutations = distinct_mutations[-c(3,5,6,8),]
if (length(sort(unique(c(which(distinct_mutations$MedianDepth<minumum_median_total),which(distinct_mutations$MedianDepthMut<minumum_median_mutation)))))>0)
distinct_mutations = distinct_mutations[-sort(unique(c(which(distinct_mutations$MedianDepth<minumum_median_total),which(distinct_mutations$MedianDepthMut<minumum_median_mutation)))),]
if (nrow(distinct_mutations)==0)
return(NULL)
rownames(distinct_mutations) = 1:nrow(distinct_mutations)
valid_distinct_mutations = distinct_mutations
valid_distinct_mutations_values = NULL
for(i in 1:nrow(valid_distinct_mutations)) {
valid_distinct_mutations_values = c(valid_distinct_mutations_values,paste0(valid_distinct_mutations[i,c("Chr","PosStart")],collapse="_"))
}
save(valid_distinct_mutations,file=file.path(out_dir,"valid_distinct_mutations.RData"))
# make final mutations data structures
#load("valid_clones_mapping.RData")
#mutations = array(0,c(length(unique(valid_clones_mapping$Run)),6))
#rownames(mutations) = sort(unique(valid_clones_mapping$Run))
#colnames(mutations) = paste0(valid_distinct_mutations$Gene,"_",valid_distinct_mutations_values,"_",valid_distinct_mutations$REF,"_",valid_distinct_mutations$ALT)
mutations = array(0,c(length(unique(colnames(depth))),nrow(valid_distinct_mutations)))
rownames(mutations) = sort(unique(colnames(depth)))
colnames(mutations) = paste0(valid_distinct_mutations$Gene,"_",valid_distinct_mutations_values,"_",valid_distinct_mutations$REF,"_",valid_distinct_mutations$ALT)
for(i in 1:nrow(valid_distinct_mutations)) {
curr_gene = valid_distinct_mutations[i,"Gene"]
curr_chr = valid_distinct_mutations[i,"Chr"]
curr_start = valid_distinct_mutations[i,"PosStart"]
curr_end = valid_distinct_mutations[i,"PosEnd"]
curr_ref = valid_distinct_mutations[i,"REF"]
curr_alt = valid_distinct_mutations[i,"ALT"]
curr_mutant_cells = which(cells_aggregate_info$Gene==curr_gene&cells_aggregate_info$Chr==curr_chr&cells_aggregate_info$PosStart==curr_start&cells_aggregate_info$PosEnd==curr_end&cells_aggregate_info$REF==curr_ref&cells_aggregate_info$ALT==curr_alt)
curr_mutant_cells = cells_aggregate_info$scID[curr_mutant_cells]
mutations[curr_mutant_cells[which(curr_mutant_cells%in%rownames(mutations))],i] = 1
}
# set NA values
depth = t(depth)
depth = depth[,valid_distinct_mutations_values, drop=FALSE]
depth = depth[rownames(mutations),,drop=FALSE]
colnames(depth) = colnames(mutations)
mutations[which(depth<=depth_minimum,arr.ind=TRUE)] = NA # missing values rate equals to 359/2850, that is approx 12.6%
# make final data
cells_aggregate_info = cells_aggregate_info[which(cells_aggregate_info$scID%in%rownames(mutations)), , drop=F]
print('cells_aggregate_info')
print(dim(cells_aggregate_info))
mycellsdata = list()
for ( t in time_points )
mycellsdata[[t]] = mutations[sort(unique(cells_aggregate_info$scID[which(cells_aggregate_info$Time==t)])), , drop=F]
D = mycellsdata
save(D,file=file.path(out_dir,"D.RData"))
# make Oncoprint
n_rows=0
for (t in seq(1, length(time_points)))
n_rows=n_rows+nrow(D[[t]])
clusters = array(NA,c(n_rows,1))
init_i=1
end_i=0
r_names= NULL
data = NULL
for (t in seq(1, length(time_points))) {
if(nrow(D[[t]])>0) {
end_i <- end_i+nrow(D[[t]])
r_names=c(r_names, rownames(D[[t]]))
clusters[init_i:end_i,1] = time_points[[t]]
init_i <- end_i+1
data <- rbind(data, D[[t]])
}
}
rownames(clusters) <- r_names
#clusters[1:nrow(D[[1]]),1] = "T1_before_treatment"
#clusters[(nrow(D[[1]])+1):(nrow(D[[1]])+nrow(D[[2]])),1] = "T2_4_days_treatment"
#clusters[(nrow(D[[1]])+nrow(D[[2]])+1):(nrow(D[[1]])+nrow(D[[2]])+nrow(D[[3]])),1] = "T3_28_days_treatment"
#clusters[(nrow(D[[1]])+nrow(D[[2]])+nrow(D[[3]])+1):(nrow(D[[1]])+nrow(D[[2]])+nrow(D[[3]])+nrow(D[[4]])),1] = "T4_57_days_treatment"
#rownames(clusters) = c(rownames(D[[1]]),rownames(D[[2]]),rownames(D[[3]]),rownames(D[[4]]))
#data = rbind(D[[1]],D[[2]],D[[3]],D[[4]])
data[which(is.na(data))] = 0
data = import.genotypes(data)
data = annotate.stages(data,clusters)
## if (ncol(data$genotypes)>1 || length(unique(data$genotypes[,1]))>1) #errore generico
## oncoprint(data,excl.sort=FALSE,group.by.stage=TRUE)
distinct_mutations[['ResultantMeanDepth']]<-apply(depth,2,mean)
distinct_mutations[['ResultantVarDepth']]<-apply(depth,2,var)
num_columns <- sapply(distinct_mutations, is.numeric)
distinct_mutations[num_columns] <- lapply(distinct_mutations[num_columns], round, 3)
return(distinct_mutations)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.