R/callingWaterfall.R

Defines functions .distancePointSegment .magnitude .distancePointLine callingWaterfall

Documented in callingWaterfall .distancePointLine .distancePointSegment

#' Drug sensitivity calling using waterfall plots
#'
#' 1. Sensitivity calls were made using one of IC50, ActArea or Amax
#' 
#' 2. Sort log IC50s (or ActArea or Amax) of the cell lines to generate a 
#'   “waterfall distribution”
#'   
#' 3. Identify cutoff:
#' 
#'  3.1 If the waterfall distribution is non-linear (pearson cc to the linear 
#'    fit <=0.95), estimate the major inflection point of the log IC50 curve as 
#'    the point on the curve with the maximal distance to a line drawn between 
#'    the start and end points of the distribution.
#'    
#'  3.2 If the waterfall distribution appears linear (pearson cc to the linear 
#'    fit > 0.95), then use the median IC50 instead.
#'    
#' 4. Cell lines within a 4-fold IC50 (or within a 1.2-fold ActArea or 20% Amax 
#'   difference) difference centered around this inflection point are classified 
#'   as being “intermediate”,  cell lines with lower IC50s (or ActArea/Amax 
#'   values) than this range are defined as sensitive, and those with IC50s (or 
#'   ActArea/Amax) higher than this range are called “insensitive”.
#'   
#' 5. Require at least x sensitive and x insensitive cell lines after applying 
#'   these criteria (x=5 in our case).
#'
## FIXME:: Write a real example
#' @examples
#' # Dummy example
#' 1 + 1
#'
## FIXME:: Clarify the parameters of this function
#' @param x What type of object does this take in?
#' @param type  
#'   ic50: IC50 values in micro molar (positive values)
#'   actarea: Activity Area, that is area under the drug activity curve (positive values)
#'   amax: Activity at max concentration (positive values)
#'   
#' @param intermediate.fold vector of fold changes used to define the intermediate sensitivities for ic50, actarea and amax respectively
#' @param cor.min.linear \code{numeric} The minimum linear correlation to 
#'   require?
#' @param name \code{character} The name of the output to use in plot
#' @param plot \code{boolean} Whether to plot the results
#' 
#' @return \code{factor} Containing the drug sensitivity status of each
#'   cellline.
#' 
#' @importFrom stats complete.cases  cor.test lm median
#' @importFrom graphics par points abline lines legend
#' @importFrom grDevices rainbow
#' 
#' @export
#' @keywords internal
callingWaterfall <- function(x, type = c("IC50", "AUC", "AMAX"), intermediate.fold = c(4, 1.2, 1.2), cor.min.linear = 0.95, name = "Drug",
    plot = FALSE) {
    
    type <- match.arg(type)
    if (any(!is.na(intermediate.fold) & intermediate.fold < 0)) {
        intermediate.fold <- intermediate.fold[!is.na(intermediate.fold) & intermediate.fold < 0] <- 0
    }
    if (is.null(names(x))) {
        names(x) <- paste("X", seq_along(x), sep = ".")
    }
    
    xx <- x[complete.cases(x)]
    switch(type, IC50 = {
        xx <- -log10(xx)
        ylabel <- "-log10(IC50)"
        ## 4 fold difference around IC50 cutoff
        if (length(intermediate.fold) == 3) {
            intermediate.fold <- intermediate.fold[1]
        }
        if (intermediate.fold != 0) {
            interfold <- log10(intermediate.fold)
        } else {
            interfold <- 0
        }
    }, AUC = {
        ylabel <- "AUC"
        ## 1.2 fold difference around Activity Area cutoff
        if (length(intermediate.fold) == 3) {
            intermediate.fold <- intermediate.fold[2]
        }
        interfold <- intermediate.fold
    }, AMAX = {
        ylabel <- "Amax"
        ## 1.2 fold difference around Amax
        if (length(intermediate.fold) == 3) {
            intermediate.fold <- intermediate.fold[3]
        }
        interfold <- intermediate.fold
    })
    
    if (length(xx) < 3) {
        tt <- array(NA, dim = length(x), dimnames = list(names(x)))
        if (interfold == 0) {
            tt <- factor(tt, levels = c("resistant", "sensitive"))
        } else {
            tt <- factor(tt, levels = c("resistant", "intermediate", "sensitive"))
        }
        return(tt)
    }
    
    oo <- order(xx, decreasing = TRUE)
    ## test linearity with Pearson correlation
    cc <- stats::cor.test(-xx[oo], seq_along(oo), method = "pearson")
    ## line between the two extreme sensitivity values
    dd <- cbind(y = xx[oo][c(1, length(oo))], x = c(1, length(oo)))
    rr <- lm(y ~ x, data = data.frame(dd))
    ## compute distance from sensitivity values and the line between the two extreme sensitivity values
    ddi <- apply(cbind(seq_along(oo), xx[oo]), 1, function(x, slope, intercept) {
        return(.distancePointLine(x = x[1], y = x[2], a = slope, b = intercept))
    }, slope = rr$coefficients[2], intercept = rr$coefficients[1])
    if (cc$estimate > cor.min.linear) {
        ## approximately linear waterfall
        cutoff <- which.min(abs(xx[oo] - median(xx[oo])))
        cutoffn <- names(cutoff)[1]
    } else {
        ## non linear waterfall identify cutoff as the maximum distance
        cutoff <- which.max(abs(ddi))
        cutoffn <- names(ddi)[cutoff]
    }
    ## identify intermediate sensitivities
    switch(type, IC50 = {
        if (interfold == 0) {
            rang <- c(xx[oo][cutoff], xx[oo][cutoff])
        } else {
            rang <- c(xx[oo][cutoff] - interfold, xx[oo][cutoff] + interfold)
        }
    }, AUC = {
        if (interfold == 0) {
            rang <- c(xx[oo][cutoff], xx[oo][cutoff])
        } else {
            rang <- c(xx[oo][cutoff]/interfold, xx[oo][cutoff] * interfold)
        }
    }, AMAX = {
        if (interfold == 0) {
            rang <- c(xx[oo][cutoff], xx[oo][cutoff])
        } else {
            rang <- c(xx[oo][cutoff]/interfold, xx[oo][cutoff] * interfold)
        }
    })
    
    
    ## check whether range is either min or max
    if (rang[2] >= max(xx)) {
        rang[2] <- sort(unique(xx), decreasing = TRUE)[2]
    }
    if (rang[2] <= min(xx)) {
        rang[2] <- sort(unique(xx), decreasing = FALSE)[2]
    }
    if (rang[1] <= min(xx)) {
        rang[1] <- sort(unique(xx), decreasing = FALSE)[2]
    }
    if (rang[1] >= max(xx)) {
        rang[1] <- sort(unique(xx), decreasing = TRUE)[2]
    }
    
    ## compute calls
    calls <- rep(NA, length(xx))
    names(calls) <- names(xx)
    calls[xx < rang[1]] <- "resistant"
    calls[xx >= rang[2]] <- "sensitive"
    calls[xx >= rang[1] & xx < rang[2]] <- "intermediate"
    
    if (plot) {
        par(mfrow = c(2, 1))
        ccols <- rainbow(4)
        mycol <- rep("grey", length(xx))
        names(mycol) <- names(xx)
        mycol[calls == "sensitive"] <- ccols[2]
        mycol[calls == "intermediate"] <- ccols[3]
        mycol[calls == "resistant"] <- ccols[4]
        mycol[cutoffn] <- ccols[1]
        mypch <- rep(16, length(xx))
        names(mypch) <- names(xx)
        mypch[cutoffn] <- 19
        plot(xx[oo], col = mycol[oo], pch = mypch[oo], ylab = ylabel, main = sprintf("%s\nWaterfall", name))
        points(x = cutoff, y = xx[cutoffn], pch = mypch[cutoffn], col = mycol[cutoffn])
        graphics::abline(a = rr$coefficients[1], b = rr$coefficients[2], lwd = 2, col = "darkgrey")
        lines(x = c(cutoff, cutoff), y = c(par("usr")[3], xx[cutoffn]), col = "red")
        lines(x = c(par("usr")[1], cutoff), y = c(xx[cutoffn], xx[cutoffn]), col = "red")
        legend("topright", legend = c(sprintf("resistant (n=%i)", sum(!is.na(calls) & calls == "resistant")), sprintf("intermediate (n=%i)", 
            sum(!is.na(calls) & calls == "intermediate")), sprintf("sensitive (n=%i)", sum(!is.na(calls) & calls == "sensitive")), "cutoff", 
            sprintf("R=%.3g", cc$estimate)), col = c(rev(ccols), NA), pch = c(16, 16, 16, 19, NA), bty = "n")
        
        plot(ddi, pch = mypch[oo], col = mycol[oo], ylab = "Distance", main = sprintf("%s\n%s", name, "Distance from min--max line"))
        points(x = cutoff, y = ddi[cutoffn], pch = mypch[cutoffn], col = mycol[cutoffn])
        legend("topright", legend = c("resistant", "intermediate", "sensitive", "cutoff"), col = rev(ccols), pch = c(16, 16, 16, 19), bty = "n")
    }
    
    tt <- rep(NA, length(x))
    names(tt) <- names(x)
    tt[names(calls)] <- calls
    if (interfold == 0) {
        tt <- factor(tt, levels = c("resistant", "sensitive"))
    } else {
        tt <- factor(tt, levels = c("resistant", "intermediate", "sensitive"))
    }
    return(tt)
}


# Helper Functions --------------------------------------------------------

#' Calculate shortest distance between point and line
#'
#' @examples .distancePointLine(0, 0, 1, -1, 1)
#'
#' @description This function calculates the shortest distance between a point 
#'   and a line in 2D space.
#'
#' @param x x-coordinate of point
#' @param y y-coordinate of point
#' @param a coefficient in line equation a * x + b * y + c = 0
#' @param b coefficient in line equation a * x + b * y + c = 0
#' @param c coefficient in line equation a * x + b * y + c = 0
#' 
#' @return \code{numeric} The shortest distance between a point and a line
#' 
#' @export
#' @keywords internal
.distancePointLine <- function(x, y, a, b, c) {
    
    if (!(all(is.finite(c(x, y, a, b, c))))) {
        stop("All inputs to linePtDist must be real numbers.")
    }
    
    return(abs(a * x + b * y + c)/sqrt(a^2 + b^2))
}

#' @export
#' @keywords internal
.magnitude <- function(p1, p2) {
    return(sqrt(sum((p2 - p1)^2)))
}

#' Calculate shortest distance between point and line segment
#'
#' @description This function calculates the shortest distance between a point 
#'   and a line segment in 2D space.
#' 
#' @param x x-coordinate of point
#' @param y y-coordinate of point
#' @param x1 x-coordinate of one endpoint of the line segment
#' @param y1 y-coordinate of line segment endpoint with x-coordinate x1
#' @param x2 x-coordinate of other endpoint of line segment
#' @param y2 y-coordinate of line segment endpoint with x-coordinate x2
#' 
#' @return \code{numeric} The shortest distance between a point and a line
#'   segment
#' 
#' @examples .distancePointSegment(0, 0, -1, 1, 1, -1)
#' 
#' @export
#' @keywords internal
.distancePointSegment <- function(x, y, x1, y1, x2, y2) {
    if (!(all(is.finite(c(x, y, x1, x2, y1, y2))))) {
        stop("All inputs to linePtDist must be real numbers.")
    }
    
    bestEndpointDistance <- min(c(.magnitude(c(x, y), c(x1, y1)), .magnitude(c(x, y), c(x2, y2))))
    
    if (.magnitude(c(x, y), c((x1 + x2)/2, (y1 + y2)/2)) < bestEndpointDistance) {
        # length to point has only one local minimum which is the global minimum; iff this condition is true then the shortest distance is to a
        # point on the line segment vertical line segment
        if (x1 == x2) {
            a <- 1
            b <- 0
            c <- -x1
        } else {
            a <- (y2 - y1)/(x2 - x1)
            b <- -1
            c <- y1 - a * x1
        }
        return(.distancePointLine(x, y, a, b, c))
    } else {
        return(bestEndpointDistance)
    }
}

Try the CoreGx package in your browser

Any scripts or data that you put into this service are public.

CoreGx documentation built on Nov. 8, 2020, 4:50 p.m.