R/getExpInterval.R

Defines functions getExpInterval

Documented in getExpInterval

#2013 - Federico Comoglio & Cem Sievers, D-BSSE, ETH Zurich


#' Identify the interval of relative substitution frequencies dominated by
#' experimental induction.
#' 
#' Identifies the interval/support of relative substitution frequencies (RSFs)
#' dominated by the second model component, i.e. by the probability of being
#' induced by the experimental procedure. In addition, this function can be
#' used to generate diagnostic plots of the model fit, representing (i) model
#' densities and log odds ratio (ii) the posterior class probability, i.e. the
#' probability of a given observation being generated by experimental
#' induction.
#' 
#' 
#' @usage getExpInterval(model, bayes = TRUE, leftProb, rightProb, plot = TRUE)
#' @param model A list containing the model as returned by the function
#' \code{fitMixtureModel}
#' @param bayes Logical, if TRUE the Bayes classifier (cutoff at posterior
#' class probabilities >= 0.5) is applied. If FALSE, custom cutoff values
#' should be provided through leftProb and rightProb. Default is TRUE.
#' @param leftProb Numeric, the posterior probability corresponding to the left
#' boundary (start) of the high confidence RSF interval.
#' @param rightProb Numeric, the posterior probability corresponding to the
#' right boundary (end) of the high confidence RSF interval.
#' @param plot Logical, if TRUE diagnostics plot showing the model components,
#' log odds and the computed posterior with highlighted identified RSF interval
#' are returned.
#' @return A list with two numeric slots, corresponding to the extremes of the
#' RSF interval (RSF support). \item{supportStart}{start of the high confidence
#' RSF interval} \item{supportEnd}{end of the high confidence RSF interval}
#' @author Federico Comoglio and Cem Sievers
#'
#' @references Sievers C, Schlumpf T, Sawarkar R, Comoglio F and Paro R. (2012) Mixture
#' models and wavelet transforms reveal high confidence RNA-protein interaction
#' sites in MOV10 PAR-CLIP data, Nucleic Acids Res. 40(20):e160. doi:
#' 10.1093/nar/gks697
#' 
#' Comoglio F, Sievers C and Paro R (2015) Sensitive and highly resolved identification
#' of RNA-protein interaction sites in PAR-CLIP data, BMC Bioinformatics 16, 32.
#'
#' @seealso \code{\link{fitMixtureModel}} \code{\link{getHighConfSub}}
#' \code{\link{estimateFDR}}
#' @keywords core graphics
#' @examples
#' 
#' data( model )
#' 
#' #default
#' support <- getExpInterval( model = model, bayes = TRUE, plot = TRUE )
#' support
#' 
#' #custom interval (based, e.g. on visual inspection of posterior class probability 
#' # or evaluation of FDR using the estimateFDRF function)
#' support <- getExpInterval( model = model, leftProb = 0.2, rightProb = 0.7, plot = TRUE )
#' support
#' 
#' @export getExpInterval

getExpInterval <- function( model, bayes = TRUE, leftProb, rightProb, plot = TRUE ) {
# Error handling
#   if bayes FALSE and no custom cutoffs provided, raise an error

	if( !bayes & ( missing( leftProb ) | missing( rightProb ) ) ) {
		stop( 'bayes = FALSE and no left and right probability cutoffs provided. Please consider either using the Bayesian criterion or provide custom cutoffs.' )
	}
    	
	#1-assign left and right cutoffs according to user choice
	if( bayes ) {
		leftProb = .5
		rightProb = .5	
	}	
	else {
		leftProb = leftProb
		rightProb = rightProb
	}

	xval <- seq(.001, .999, .001)

	#2-compute log odds and responsibilities (for second component)
	odds <- computelogOdds( model )
	responsibilities <- (model$l2 * model$p2) / (model$l1 * model$p1 + model$l2 * model$p2)

	#3-compute support
	left <- which( responsibilities >= leftProb )[ 1 ]
	right <- which( responsibilities >= rightProb )
	right <- tail( right, 1 )
	supportStart <- xval[ left ]
	supportEnd <- xval[ right ]

	#4-if plot, plot model components, posterior and highlight support
	if( plot ) {
		par( mfrow = c( 1, 2 ) )
		plot(x     = xval, 
		     y     = model$p, 
		     type  = 'l', 
		     lwd   = 2,
		     ylim  = c( 0, max( model$p, odds ) ),
		     col   = 'black', 
		     ylab  = 'density', 
		     xlab  = 'Relative Substitution Frequency',
	 	     main  = 'Model densities' )

		lines( xval, model$p1, col = 'red', lwd = 2 )
		lines( xval, model$p2, col = 'blue3', lwd = 2 ) 
		lines( xval, odds, col = 'green3', lwd = 2 )

		legend( x = 'topright', 
			legend = c( 'p (full density)', 'p1 (non-experimental)', 'p2 (experimental)', 'log odds ratio' ), 
			col = c( 'black', 'red', 'blue3', 'green3'),
			lwd = 2,
			bty = 'n' )

		plot( x    = xval, 
		      y    = responsibilities, 
		      type = 'n',
		      ylim = c( -.1, 1 ), 
		      ylab = 'Posterior probability', 
                      xlab = 'Relative Substitution Frequency', 
                      main = 'Posterior class probability' )

		xCoord <- seq( supportStart, supportEnd, .001 )
		yCoord <- responsibilities[ left : right ]
		polyCoord <- cbind( xCoord, yCoord )
		polyCoord <- rbind( polyCoord, 
				    c( polyCoord[ nrow( polyCoord ), 1 ], 0 ),
				    c( polyCoord[1, 1], 0 ),
				    polyCoord[1, ] )

		polygon( polyCoord, col = 'royalblue1', border = NULL )
		lines( xval, responsibilities, lwd = 2 )
		rect( supportStart, -.1 , supportEnd, -.05, col = 'wheat2', border = NA )
		text( x      = ( supportEnd - supportStart ) / 2, 
		      y      = -.077, 
		      labels = paste( 'Support = [', supportStart, ', ', supportEnd, ']', sep = '' ) )
	}

	return( list( supportStart = supportStart, supportEnd = supportEnd ) )
}
FedericoComoglio/wavClusteR documentation built on Oct. 29, 2020, 2:44 p.m.