library(tidyverse)
library(magrittr)
library(ARMET)
library(tidybulk)
args = commandArgs(trailingOnly=TRUE)
# foreignProp="0"
# run="1"
# S = "60"
# slope = 0.5
# whichChanging = "16"
# method ="cibersort"
# input_file ="dev/test_simulation_singleCell/input__slope_-1__foreignProp_0__S_30__whichChanging_1__run_1.rds"
# file = "dev/test_simulation_singleCell/output__slope_-1__foreignProp_0__S_30__whichChanging_1__run_1__method_cibersort.rds"
# output_file = "dev/test_simulation/asses__slope_0.5__foreignProp_0__S_60__whichChanging_16__run_1__method_ARMET.rds"
foreignProp = args[1]
run = args[2]
S = args[3]
slope = as.numeric(args[4])
whichChanging = args[5]
method = args[6]
input_file = args[7]
file = args[8]
output_file = args[9]
# Pre-load ARMET in case
if(method == "ARMET") ARMET_res = readRDS(file)
decide_if_fp_tp = function(.data, CI, slope){
.data %>%
# If NA set to 0 and 1
mutate(
estimate = if_else(estimate %>% is.na, 0, estimate),
p.value = if_else(p.value %>% is.na, 1, p.value)
) %>%
# If non significant or significant with opposite direction
mutate(
fp = alpha_2 == 0 &
p.value <= CI |
(alpha_2 != 0 & p.value < CI & (estimate*alpha_2)<0)
) %>%
# If significant but the same direction
mutate(tp = alpha_2 != 0 & p.value < CI & (estimate*alpha_2)>0) %>%
# # Resolve NA presence that cause error
# mutate(
# fp = if_else(is.na(fp), FALSE, fp),
# tp = if_else(is.na(tp), FALSE, tp),
# alpha_2 = if_else(is.na(alpha_2), 0, alpha_2)
# ) %>%
# Filter out accidental fp because of simplex, when cell is really 0 slope
# Keep the NA because I need for ARMET lower level retrieval
filter(
is.na(alpha_2) | !(
fp &
(slope * estimate )<0 &
alpha_2 == 0
)
) %>%
# Eliminate true negative if test failed
filter(!(fp == FALSE & failed_test))
}
process_third_party = function(CI, file, input_file, slope){
readRDS(file) %>%
# Add truth
dplyr::select(.cell_type, estimate, p.value) %>%
left_join(
readRDS(input_file) %>%
attr("proportions") %>%
distinct(`cell_type_curated`, alpha_2) %>%
bind_rows(
tibble(
cell_type_curated = c("endothelial", "epithelial", "fibroblast", "eosinophil", "neutrophil", "dendritic_myeloid", "macrophage", "t_gamma_delta", "mast_cell"),
alpha_2 = 0
)
),
by = c(".cell_type" = "cell_type_curated" )
) %>%
# Apparently the estimate have opposite sign!!!
mutate(estimate = -estimate) %>%
# Mark the failed tests
mutate(failed_test = FALSE) %>%
# # Mark the failed tests
# mutate(failed_test = p.value %>% is.na) %>%
decide_if_fp_tp(CI, slope) %>%
# Filter cells that are not present in reference
filter(alpha_2 %>% is.na %>% `!`)
}
check_lower_levels_for_ARMET = function(.data, input_df, CI, slope){
ancestors =
ARMET::tree %>%
ToDataFrameTypeColFull(TRUE) %>%
filter(level_4 %in% (input_df %>% filter(alpha_2!=0))) %>%
distinct(level_1, level_2, level_3) %>%
as.data.frame %>%
as.character
second_best =
.data %>%
filter(`Cell type category` %in% ancestors ) %>%
mutate(divisor = 1:n()) %>%
# Decide what is the penalty
mutate(tp = divisor/3) %>%
filter( p.value < CI & (estimate*slope)>0) %>%
arrange(divisor) %>%
dplyr::slice(1) %>%
pull(tp)
.data %>%
mutate(tp = ifelse(
!is.na(alpha_2) &
alpha_2 != 0 &
!tp &
length(second_best)>0,
second_best,
as.integer(tp)
))
}
process_ARMET = function(CI, file, input_file, slope){
# Load input data frame that will be used more than once
input_df =
readRDS(input_file) %>%
attr("proportions") %>%
distinct(`cell_type_curated`, alpha_2) %>%
bind_rows(
tibble(
cell_type_curated = c("endothelial", "epithelial", "fibroblast", "eosinophil", "neutrophil", "dendritic_myeloid", "macrophage", "t_gamma_delta", "mast_cell"),
alpha_2 = 0
)
)
#readRDS(file) %$% ## NOW I LOAD THIS BEFORE HAND ONLY ONCE AS IT WAS SLOWING DOWN EVERYTHING
ARMET_res %$%
proportions %>%
#get_CI(CI) %>%
mutate(.value_2 = map_dbl(
draws_cens,
~ .x %>%
filter(A == 2) %>%
pull(.value ) %>%
mean
)) %>%
mutate(prob_non_0_2 = map_dbl(
draws_cens,
~ .x %>%
filter(A == 2) %>%
ARMET:::draws_to_prob_non_zero()
)) %>%
# Add truth
dplyr::select(`Cell type category`, .value_2, prob_non_0_2) %>%
left_join(input_df, by = c("Cell type category" = "cell_type_curated" )) %>%
mutate(
estimate = .value_2,
p.value = (1-abs(prob_non_0_2))
) %>%
# Mark the failed tests
mutate(failed_test = FALSE) %>%
decide_if_fp_tp(CI, slope) %>%
# If I don't get the answer, check the other levels
check_lower_levels_for_ARMET(input_df, CI, slope) %>%
# Filter cells that are not present in reference
filter(alpha_2 %>% is.na %>% `!`)
# # Calculate
# mutate(fp = (alpha_2 == 0) & abs(prob_non_0_2) > CI) %>%
# mutate(tp = (alpha_2 != 0) & abs(prob_non_0_2) > CI) %>%
# # Filter out accidental fp because of simplex
# filter(!(fp & (slope * .value_2 )<0))
}
tibble(
foreignProp = foreignProp,
run = run,
S = S,
slope = slope,
whichChanging = whichChanging,
method = method,
input_file = input_file,
file = file
) %>%
mutate( CI = list(rev(1-c( seq(0.0, 0.85, 0.05), seq(0.85, 0.9999, 0.001),seq(0.9999, 0.99999999, 0.000001) ))) ) %>%
unnest(CI) %>%
# Calculate
mutate(fp_tp = pmap(
list(CI, file, input_file, slope, method),
~ ..5 %>% purrr::when(
(.) == "ARMET" ~ process_ARMET(..1, ..2, ..3, ..4),
~ process_third_party(..1, ..2, ..3, ..4)
))) %>%
unnest(fp_tp) %>%
# Save
saveRDS(output_file, compress = "xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.