Nothing
#' Select hits by RSA
#'
#' Selected hits by redundant siRNA activity method. Here is a wrapper function of RSA 1.8 by Yingyao Zhou.
#' @import magrittr
#' @import ggplot2
#' @param dat synthetic lethal RNAi screen data
#' @param treatment the treatment condition in EXPERIMENT_MODIFICATION
#' @param control the control condition in EXPERIMENT_MODIFICATION
#' @param normMethod normalization methods. If "PLATE", then values are normalized by plate median, otherwise use the
#' provided control siRNA
#' @param LB Low bound
#' @param UB up bound
#' @param revHits reverse hit picking, default the lower the score the better
#' @param Bonferroni conceptually useful when there are different number of siRNAs per gene, default FALSE
#' @param outputFile output file name
#' @param scoreFile name of the score file to be written under the current folder
#' @return A result file written to the current folder.
#' \itemize{
#' \item Gene_ID,Well_ID,Score: columns from input spreadsheet
#' \item LogP: OPI p-value in log10, i.e., -2 means 0.01
#' \item OPI_Hit: whether the well is a hit, 1 means yes, 0 means no
#' \item #hitWell: number of hit wells for the gene
#' \item #totalWell: total number of wells for the gene. If gene A has three wells w1, w2 and w3, and w1 and w2 are hits, #totalWell should be 3, #hitWell should be 2, w1 and w2 should have OPI_Hit set as 1 and w3 should have OPI_Hit set as 0.
#' \item OPI_Rank: ranking column to sort all wells for hit picking
#' \item Cutoff_Rank: ranking column to sort all wells based on Score in the simple activity-based method
#' }
#' Note: a rank value of 999999 means the well is not a hit
#' @references
#' Koenig, R. et al. A probability-based approach for the analysis of large-scale RNAi screens. Nat Methods 4, 847-849 (2007).
#' @examples
#' rsaHits(exampleDat, treatment = "treatment", control = "control",
#' normMethod = "PLATE", LB = 0.2, UB = 0.8, revHits = FALSE,
#' Bonferroni = FALSE, outputFile = "RSAhits.csv")
#' @export
rsaHits <- function(dat, treatment, control, normMethod = "PLATE", LB, UB,
revHits = FALSE, Bonferroni = FALSE, outputFile = "RSAhits.csv",
scoreFile = "RSA_score.csv") {
# browser()
if(class(dat) != "data.frame") stop("incorrect input!")
tem.1 <- sapply(unique(as.character(dat$MASTER_PLATE)), .ff_rsaRatio, dat, treatment, control, normMethod = normMethod, simplify = FALSE)
rsaScore <- data.frame(do.call(rbind, lapply(names(tem.1), function(x) tem.1[[x]])))
write.table(rsaScore, scoreFile, sep = "\t", quote = FALSE, row.names = FALSE)
opts <- list(LB = LB, UB = UB, outputFile = outputFile, reverse = revHits, bonferroni = Bonferroni)
rsaRes <- OPI(rsaScore$Gene_ID, rsaScore$Score, opts, rsaScore)
message("File written to:", outputFile)
write.table(rsaRes, outputFile, sep = "\t", quote = FALSE, row.names = FALSE)
message("Summary of RSA:")
message(paste("Total #Genes = ", length(unique(rsaRes$Gene_ID)), collapse = ""));
message(paste("Total #Wells = ", nrow(rsaRes), collapse = ""));
message(paste("Total #Hit Genes = ", length(unique(rsaRes$Gene_ID[rsaRes$OPI_Rank < 999999])), collapse = ""));
message(paste("Total #Hit Wells = ", sum(rsaRes$OPI_Rank < 999999), collapse = ""));
}
#' @keywords internal
handleOneGroup <- function(i, dataset, optsb, t.rank = NULL) {
## how many of them are up or low than cut-off.
if(optsb$reverse) {
i_max = sum(dataset$Score[i]>=optsb$LB)
i_min = max(1,sum(dataset$Score[i]>=optsb$UB))
} else {
i_max = sum(dataset$Score[i]<=optsb$UB)
i_min = max(1,sum(dataset$Score[i]<=optsb$LB))
}
## t.r is the true rank, instead of order.
if (is.null(t.rank)) {
t.r = i
} else {
t.r = t.rank[i]
}
r = OPIScore(t.r, nrow(dataset), i_min, i_max, optsb$bonferroni)
return(cbind(LogP = r["logp"],
OPI_Hit = as.numeric(seq(length(i)) <= r["cutoff"]),
"#hitWell" = i_max,
"#totalWell" = length(i),
rank = i))
}
#' @keywords internal
OPIScore <- function(I_rank, N, i_min = 1, i_max = -1, bonferroni = FALSE) {
n_drawn = length(I_rank) # number of black
if(i_max == -1) {
i_max=n_drawn
}
r1 = c(logp=1.0,cutoff=0)
if(i_max < i_min) return(r1)
best_logp = 1.0
cutoff = 0
for(i in i_min:i_max) {
if(i < i_max && I_rank[i] == I_rank[i + 1]) { next; }
logp = phyper(i - 1, I_rank[i], N - I_rank[i], n_drawn, lower.tail = FALSE, log.p = TRUE)
logp = max(logp / log(10), -100)
if(logp <= best_logp) {
best_logp = logp
cutoff = i
}
}
if(bonferroni) {
best_logp = best_logp + log(i_max - i_min + 1) / log(10)
}
return (c(logp = best_logp, cutoff = cutoff))
}
#' @keywords internal
OPI <- function(Groups, Scores, opts, Data = NULL) {
t = data.frame(cbind(Gene_ID = Groups, Score = Scores))
Sorted_Order = order(t$Score, decreasing = opts$reverse);
Data = Data[Sorted_Order, ]
t = t[Sorted_Order, ]
## get the ranks, "max" for the tie.
t.rank <- rank(t$Score, ties.method = "max")
t = do.call("rbind", tapply(seq(nrow(t)), list(t$Gene_ID), handleOneGroup, dataset = t, opts, t.rank))
t = cbind(Data, t[order(t[, "rank"]), ])
# add OPI_Rank
t = t[order(t$LogP, t$Score * ifelse(opts$reverse, -1, 1)), ]
t$OPI_Rank = cumsum(t$OPI_Hit)
t$OPI_Rank[t$OPI_Hit == 0] = 999999
# add Cutoff_Rank
t = t[order(t$Score * (ifelse(opts$reverse, -1, 1)), t$LogP), ]
if (opts$reverse) {
tmp = (t$Score >= opts$LB)
} else {
tmp = (t$Score <= opts$UB)
}
t$Cutoff_Rank = cumsum(tmp)
t$Cutoff_Rank[!tmp] = 999999
# add EXP_Rank
t$EXP_Rank = pmin(t$OPI_Rank, t$Cutoff_Rank)
t$EXP_Rank = pmin(t$OPI_Rank, t$Cutoff_Rank)
if (opts$reverse) {
return(t[order(t$OPI_Rank, -t$Score), ])
} else {
return(t[order(t$OPI_Rank, t$Score), ])
}
}
#' @keywords internal
.ff_rsaRatio <- function(masterPlate, dat, treatment, control, normMethod) {
# browser()
master.norm <- .ff_masterPlateValue(masterPlate, dat, treatment,
control, normMethod)
masterp.da <- master.norm[[1]]
treat.num <- master.norm[[2]]
cont.num <- master.norm[[3]]
plate.ratio <- apply(masterp.da, 1, .ff_ratio_rsa, treat.num, treat.num + 1)
genes <- sapply(as.character(names(plate.ratio)),
function(x) unlist(strsplit(x," "))[[1]])
rsaScore <- data.frame(Gene_ID = genes, Well_ID = names(plate.ratio),
Score = plate.ratio)
rownames(rsaScore) <- NULL
rsaScore
}
#' @keywords internal
.ff_ratio_rsa <- function(x, y, z) {
x <- as.numeric(x)
a <- length(x)
round(median(x[1:y], na.rm = TRUE) / median(x[z:a], na.rm = TRUE), 4)
# tem.1 <- round(median(x[1:y], na.rm = TRUE) / median(x[z:a], na.rm = TRUE), 4)
# tem.1
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.