#' @export
#' @importFrom Rfast colRanks
#' @importFrom dplyr summarise id desc
#' @importFrom plotly group_by
#' @import octad.db
#### runsRGES #######
runsRGES <- function(dz_signature=NULL,choose_fda_drugs = FALSE,max_gene_size=500,
cells=NULL,outputFolder=NULL,weight_cell_line=NULL,permutations=10000){
getsRGES <- function(RGES, cor, pert_dose, pert_time, diff, max_cor){
sRGES <- RGES
pert_time <- ifelse(pert_time < 24, "short", "long")
pert_dose <- ifelse(pert_dose < 10, "low", "high")
if (pert_time == "short" & pert_dose == "low"){
sRGES <- sRGES + diff[4]
}
if (pert_dose == "low" & pert_time == "long"){
sRGES <- sRGES + diff[2]
}
if (pert_dose == "high" & pert_time == "short"){
sRGES <- sRGES + diff[1]
}
return(sRGES * cor/max_cor)
}
cmap_score_ultimate=function (sig_up, sig_down, drug_signature)
{
num_genes <- length(drug_signature)
ks_up <- 0
ks_down <- 0
connectivity_score <- 0
drug_signature=rank(drug_signature)
up_tags_rank=drug_signature[as.vector(sig_up)]
down_tags_rank=drug_signature[as.vector(sig_down)]
up_tags_position=sort(up_tags_rank)
down_tags_position=sort(down_tags_rank)
num_tags_up <- length(up_tags_position)
num_tags_down <- length(down_tags_position)
if (num_tags_up > 1) {
a_up <- 0
b_up <- 0
a_up <- max(sapply(seq_len(num_tags_up), function(j) {
j/num_tags_up - up_tags_position[j]/num_genes
}))
b_up <- max(sapply(seq_len(num_tags_up), function(j) {
up_tags_position[j]/num_genes - (j - 1)/num_tags_up
}))
if (a_up > b_up) {
ks_up <- a_up
}
else {
ks_up <- -b_up
}
}
else {
ks_up <- 0
}
if (num_tags_down > 1) {
a_down <- 0
b_down <- 0
a_down <- max(sapply(seq_len(num_tags_down), function(j) {
j/num_tags_down - down_tags_position[j]/num_genes
}))
b_down <- max(sapply(seq_len(num_tags_down), function(j) {
down_tags_position[j]/num_genes - (j - 1)/num_tags_down
}))
if (a_down > b_down) {
ks_down <- a_down
}
else {
ks_down <- -b_down
}
}
else {
ks_down <- 0
}
if (ks_up == 0 & ks_down != 0) {
connectivity_score <- -ks_down
}
else if (ks_up != 0 & ks_down == 0) {
connectivity_score <- ks_up
}
else if (sum(sign(c(ks_down, ks_up))) == 0) {
connectivity_score <- ks_up - ks_down
}
else {
connectivity_score <- ks_up - ks_down
}
return(connectivity_score)
}
if(missing(dz_signature)){
stop('Disease signature input not found')
}
if(is.null(dz_signature$Symbol)|is.null(dz_signature$log2FoldChange)){
stop('Either Symbol or log2FoldChange collumn in Disease signature is missing')
}
if (!missing(outputFolder)) {
if(!dir.exists(outputFolder)){
dir.create(outputFolder)
}
}
if(!missing(cells)){
lincs_sig_info$cell_id = toupper(octad.db::lincs_sig_info$cell_id)
lincs_sig_info = subset(octad.db::lincs_sig_info,cell_id %in% cells)
}else if(!missing(cells)&nrow(lincs_sig_info)==0){
stop('Wrong cell line name. List of possible cell lines is available via command unique(octad.db::lincs_sig_info$cell_id)')
}else if(missing(cells)){
cells='' #plug for older code version
}
lincs_signatures=octad.db::lincs_signatures
# }else{
#don't bother
# load(paste0(data,"lincs_signatures_cmpd_landmark_GSE92742.RData"))
# }
if (choose_fda_drugs) {
#fda_drugs = read.csv("data/repurposing_drugs_20170327.txt", stringsAsFactors = F,sep='\t')
#load('data/fda_drugs.rda')
fda_drugs=octad.db::fda_drugs
lincs_sig_info_FDA <- subset(lincs_sig_info, id %in% colnames(lincs_signatures) & tolower(pert_iname) %in% tolower(fda_drugs$pert_iname))
FDAdf <- select(lincs_sig_info_FDA, pert_id, pert_iname)
FDAdf <- unique(FDAdf[,seq_len(2)])
write.csv(FDAdf,file = paste0(outputFolder,"FDA_approved_drugs.csv"),row.names = FALSE)
lincs_sig_info <- subset(lincs_sig_info,id %in% colnames(lincs_signatures))
}else{
lincs_sig_info <- subset(lincs_sig_info, id %in% colnames(lincs_signatures))
}
#write paths
output_path <- paste0("all_",paste(cells,collapse='_'),"_lincs_score.csv")
sRGES_output_path <- paste0("sRGES",paste(cells,collapse='_'),".csv")
sRGES_output_path_drug <- paste0("sRGES_FDAapproveddrugs.csv")
dz_sig_output_path <- paste0("dz_sig_used.csv")
#remove duplicate instances
lincs_sig_info <- lincs_sig_info[!duplicated(lincs_sig_info$id),]
sig.ids <- lincs_sig_info$id
####compute RGES####
gene.list <- toupper(rownames(lincs_signatures))
dz_signature <- subset(dz_signature,Symbol %in% gene.list)
dz_genes_up <- subset(dz_signature,log2FoldChange>0)
dz_genes_up=dz_genes_up[order(dz_genes_up$log2FoldChange,decreasing=TRUE),]
dz_genes_down <- subset(dz_signature,log2FoldChange<0)
dz_genes_down=dz_genes_down[order(dz_genes_down$log2FoldChange,decreasing=TRUE),]
###############
#compute RGES
#caps gene selection to max gene size
if (nrow(dz_genes_up) > max_gene_size){
dz_genes_up <- dz_genes_up %>% head(max_gene_size)
}
if (nrow(dz_genes_down) > max_gene_size){
#dz_genes_down <- data.frame(GeneID=dz_genes_down[1:max_gene_size,]) %>% left_join(dz_signature,by = 'GeneID')
dz_genes_down <- dz_genes_down %>% head(max_gene_size)
}
write.csv(rbind(dz_genes_up, dz_genes_down), dz_sig_output_path)
#print(paste('finished loading in', round(Sys.time()-start,2),units(Sys.time()-start)))
#start=Sys.time()
parallel=FALSE
if(!parallel){
# require(lme4)
# require(Rfast)
dz_cmap_scores <- NULL
#count <- 0
cmap_exp_sig <- Rfast::colRanks(-1 * lincs_signatures, method = "max")
names.list <- list(rownames(lincs_signatures),colnames(lincs_signatures))
dimnames(cmap_exp_sig) <- names.list
cat(paste('Started sRGES computation. Average computation time ~1-3mins.'),'\n')
start_time=Sys.time()
####slow loop#### NO SLOW LOOP ANYMORE! :)
pb <- txtProgressBar(min = 1, max = permutations, style = 3) #set progressbar
i=0
time_vector=0
#loop_time=Sys.time()
cmap_exp_signature=data.frame(ids = gene.list, rank = cmap_exp_sig[,as.vector(sig.ids)])
cmap_exp_sig=as.data.frame(cmap_exp_sig)
cmap_exp_sig$ids=NULL
dz_cmap_scores=apply(cmap_exp_sig[as.vector(sig.ids)],2,
FUN=function(x) cmap_score_ultimate(dz_genes_up$Symbol,dz_genes_down$Symbol,drug_signature=x))
#for (exp_id in sig.ids) {
# i=i+1
# setTxtProgressBar(pb, i)
#count <- count + 1
#print(count)
#cmap_exp_signature is a dataframe with columns ids: whatever id we're using e.g. Symbol
#rank which is the reverse rank of the expression of the expression
# cmap_exp_signature <- data.frame(gene.list,
# Rfast::colRanks(-1 * lincs_signatures[, as.character(exp_id)],
# method = "min"))
# colnames(cmap_exp_signature) <- c("ids","rank")
#runs a function cmap_score_new from drugs_core_functions.R
# setTxtProgressBar(pb, exp_id)
# x=Sys.time()
# cmap_exp_signature <- data.frame(ids = gene.list, rank = cmap_exp_sig[,exp_id])
# dz_cmap_scores <- c(dz_cmap_scores,
# cmap_score_new(dz_genes_up$Symbol,dz_genes_down$Symbol,
# cmap_exp_signature[c('ids',paste('rank',exp_id,sep='.'))]))
# time_vector=c(time_vector,Sys.time()-x)
#}
#Sys.time()-loop_time
#print(paste('finished slow loop in', round(Sys.time()-start,2),units(Sys.time()-start)))
#start=Sys.time()
#random scores
# N_PERMUTATIONS <- 10000 #default 100000
random_sig_ids <- sample(colnames(lincs_signatures),permutations,replace=TRUE)
#count <- 0
random_cmap_scores <- NULL
cmap_exp_signature=as.data.frame(Rfast::colRanks(-1 * lincs_signatures[, as.character(random_sig_ids)], method = "max"))
random_cmap_scores=apply(cmap_exp_signature,2,
FUN=function(x) cmap_score_ultimate(
sample(1:length(dz_genes_up$Symbol),replace=TRUE),
sample(1:length(dz_genes_down$Symbol),replace=TRUE),
drug_signature=x))
p <- sapply(dz_cmap_scores, function(score){
sum(random_cmap_scores < score)/length(random_cmap_scores)
})
padj <- p.adjust(p, "fdr")
results <- data.frame(id = sig.ids, cmap_score = dz_cmap_scores, p, padj)
results <- merge(results, lincs_sig_info, by = "id")
results <- results[order(results$cmap_score),]
write.csv(results, output_path)
####################
#summarize RGES
lincs_drug_prediction <- read.csv(output_path)
#should use pert_dose > 0.01
lincs_drug_prediction_subset <- subset(lincs_drug_prediction, pert_dose > 0 & pert_time %in% c(6, 24))
#pairs that share the same drug and cell id
lincs_drug_prediction_pairs <- merge(lincs_drug_prediction_subset, lincs_drug_prediction_subset, by=c("pert_iname", "cell_id"))
#x is the reference
lincs_drug_prediction_pairs <- subset(lincs_drug_prediction_pairs, id.x != id.y & pert_time.x == 24 & pert_dose.x == 10) #, select <- c("cmap_score.x", "cmap_score.y", "pert_dose.y", "pert_time.y"))
#difference of RGES to the reference
lincs_drug_prediction_pairs$cmap_diff <- lincs_drug_prediction_pairs$cmap_score.x - lincs_drug_prediction_pairs$cmap_score.y
lincs_drug_prediction_pairs$dose <- round(log(lincs_drug_prediction_pairs$pert_dose.y, 2), 1)
#estimate difference
lincs_drug_prediction_pairs$dose_bin <- ifelse(lincs_drug_prediction_pairs$pert_dose.y < 10, "low", "high")
diff <- tapply(lincs_drug_prediction_pairs$cmap_diff, paste(lincs_drug_prediction_pairs$dose_bin, lincs_drug_prediction_pairs$pert_time.y), mean)
#ignore weighting cell lines
if (!missing(weight_cell_line)){
lincs_cell_line_weight <- weight_cell_line
pred = merge(lincs_drug_prediction, lincs_cell_line_weight, by.x="cell_id", by.y = 0)
}else{
pred <- lincs_drug_prediction
pred$medcor <- 1
}
pred$RGES <- sapply(1:nrow(pred), function(id){getsRGES(pred$cmap_score[id], pred$medcor[id], pred$pert_dose[id], pred$pert_time[id], diff, max(pred$medcor))})
cmpd_freq <- table(pred$pert_iname)
pred <- subset(pred, pert_iname %in% names(cmpd_freq[cmpd_freq > 0]))
pred_merged <- pred %>%
group_by(pert_iname) %>%
dplyr::summarise(
mean = mean(RGES),
n = length(RGES),
median = median(RGES),
sd = sd(RGES))
pred_merged$sRGES <- pred_merged$mean
pred_merged <- pred_merged[order(pred_merged$sRGES), ]
write.csv(pred_merged, sRGES_output_path)
# return(pred_merged)
#limit to FDA approved drugs
if (choose_fda_drugs){
pred_merged_drug <- merge(fda_drugs, pred_merged, by = "pert_iname")
pred_merged_drug <- pred_merged_drug[order(pred_merged_drug$sRGES), ]
write.csv(pred_merged_drug, sRGES_output_path_drug)
}
cat('\n',paste('Finished computations in', round(Sys.time()-start_time,2),units(Sys.time()-start_time),',writing output'),'\n')
#start=Sys.time()
return(pred_merged)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.