#2013 - Federico Comoglio & Cem Sievers, D-BSSE, ETH Zurich
#' Identify the interval of relative substitution frequencies dominated by
#' experimental induction.
#'
#' Identifies the interval/support of relative substitution frequencies (RSFs)
#' dominated by the second model component, i.e. by the probability of being
#' induced by the experimental procedure. In addition, this function can be
#' used to generate diagnostic plots of the model fit, representing (i) model
#' densities and log odds ratio (ii) the posterior class probability, i.e. the
#' probability of a given observation being generated by experimental
#' induction.
#'
#'
#' @usage getExpInterval(model, bayes = TRUE, leftProb, rightProb, plot = TRUE)
#' @param model A list containing the model as returned by the function
#' \code{fitMixtureModel}
#' @param bayes Logical, if TRUE the Bayes classifier (cutoff at posterior
#' class probabilities >= 0.5) is applied. If FALSE, custom cutoff values
#' should be provided through leftProb and rightProb. Default is TRUE.
#' @param leftProb Numeric, the posterior probability corresponding to the left
#' boundary (start) of the high confidence RSF interval.
#' @param rightProb Numeric, the posterior probability corresponding to the
#' right boundary (end) of the high confidence RSF interval.
#' @param plot Logical, if TRUE diagnostics plot showing the model components,
#' log odds and the computed posterior with highlighted identified RSF interval
#' are returned.
#' @return A list with two numeric slots, corresponding to the extremes of the
#' RSF interval (RSF support). \item{supportStart}{start of the high confidence
#' RSF interval} \item{supportEnd}{end of the high confidence RSF interval}
#' @author Federico Comoglio and Cem Sievers
#'
#' @references Sievers C, Schlumpf T, Sawarkar R, Comoglio F and Paro R. (2012) Mixture
#' models and wavelet transforms reveal high confidence RNA-protein interaction
#' sites in MOV10 PAR-CLIP data, Nucleic Acids Res. 40(20):e160. doi:
#' 10.1093/nar/gks697
#'
#' Comoglio F, Sievers C and Paro R (2015) Sensitive and highly resolved identification
#' of RNA-protein interaction sites in PAR-CLIP data, BMC Bioinformatics 16, 32.
#'
#' @seealso \code{\link{fitMixtureModel}} \code{\link{getHighConfSub}}
#' \code{\link{estimateFDR}}
#' @keywords core graphics
#' @examples
#'
#' data( model )
#'
#' #default
#' support <- getExpInterval( model = model, bayes = TRUE, plot = TRUE )
#' support
#'
#' #custom interval (based, e.g. on visual inspection of posterior class probability
#' # or evaluation of FDR using the estimateFDRF function)
#' support <- getExpInterval( model = model, leftProb = 0.2, rightProb = 0.7, plot = TRUE )
#' support
#'
#' @export getExpInterval
getExpInterval <- function( model, bayes = TRUE, leftProb, rightProb, plot = TRUE ) {
# Error handling
# if bayes FALSE and no custom cutoffs provided, raise an error
if( !bayes & ( missing( leftProb ) | missing( rightProb ) ) ) {
stop( 'bayes = FALSE and no left and right probability cutoffs provided. Please consider either using the Bayesian criterion or provide custom cutoffs.' )
}
#1-assign left and right cutoffs according to user choice
if( bayes ) {
leftProb = .5
rightProb = .5
}
else {
leftProb = leftProb
rightProb = rightProb
}
xval <- seq(.001, .999, .001)
#2-compute log odds and responsibilities (for second component)
odds <- computelogOdds( model )
responsibilities <- (model$l2 * model$p2) / (model$l1 * model$p1 + model$l2 * model$p2)
#3-compute support
left <- which( responsibilities >= leftProb )[ 1 ]
right <- which( responsibilities >= rightProb )
right <- tail( right, 1 )
supportStart <- xval[ left ]
supportEnd <- xval[ right ]
#4-if plot, plot model components, posterior and highlight support
if( plot ) {
par( mfrow = c( 1, 2 ) )
plot(x = xval,
y = model$p,
type = 'l',
lwd = 2,
ylim = c( 0, max( model$p, odds ) ),
col = 'black',
ylab = 'density',
xlab = 'Relative Substitution Frequency',
main = 'Model densities' )
lines( xval, model$p1, col = 'red', lwd = 2 )
lines( xval, model$p2, col = 'blue3', lwd = 2 )
lines( xval, odds, col = 'green3', lwd = 2 )
legend( x = 'topright',
legend = c( 'p (full density)', 'p1 (non-experimental)', 'p2 (experimental)', 'log odds ratio' ),
col = c( 'black', 'red', 'blue3', 'green3'),
lwd = 2,
bty = 'n' )
plot( x = xval,
y = responsibilities,
type = 'n',
ylim = c( -.1, 1 ),
ylab = 'Posterior probability',
xlab = 'Relative Substitution Frequency',
main = 'Posterior class probability' )
xCoord <- seq( supportStart, supportEnd, .001 )
yCoord <- responsibilities[ left : right ]
polyCoord <- cbind( xCoord, yCoord )
polyCoord <- rbind( polyCoord,
c( polyCoord[ nrow( polyCoord ), 1 ], 0 ),
c( polyCoord[1, 1], 0 ),
polyCoord[1, ] )
polygon( polyCoord, col = 'royalblue1', border = NULL )
lines( xval, responsibilities, lwd = 2 )
rect( supportStart, -.1 , supportEnd, -.05, col = 'wheat2', border = NA )
text( x = ( supportEnd - supportStart ) / 2,
y = -.077,
labels = paste( 'Support = [', supportStart, ', ', supportEnd, ']', sep = '' ) )
}
return( list( supportStart = supportStart, supportEnd = supportEnd ) )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.