#' Make antibody binding positivity calls
#'
#' After normalization and data smoothing, this last step makes the call for each
#' peptide of the peptideSet after baseline correcting the peptide intenstities.
#'
#' @param peptideSet A \code{peptideSet} object. The peptides, after normalization
#' and possibly data smoothing.
#' @param cutoff A \code{numeric}. If FDR, the FDR threshold. Otherwise, a cutoff
#' for the background corrected intensities.
#' @param method A \code{character}. The method used to make positivity calls.
#' "absolute" and "FDR" are available. See details below.
#' @param freq A \code{logical}. If set to TRUE, return the percentage of slides
#' calling a peptide positive. Otherwise, return a \code{logical} indicating binding
#' events.
#' @param group A \code{character}. Only used when freq is set to TRUE. A character indicating
#' a variable by which to group slides. If non-null the percentage is calculated by group.
#' @param verbose A \code{logical}. If set to TRUE, progress information will be displayed.
#'
#' @details
#' This function requires specific variables ptid and visit in pData(peptideSet).
#' The variable \code{ptid} should indicate subjects, and the variable \code{visit}
#' should be a factor with levels pre and post.
#'
#' If slides are paired for subjects, intensities corresponding to post-visit are
#' substracted from pre. If slides are not paired, slides with pre have intensities
#' averaged by peptides, and averaged peptide intensities are subtracted from slides
#' that have entry post. Calls are made on these baseline corrected intensities.
#'
#' When method = FDR, a left-tail method is used to generate a threshold controlling
#' the False Discovery Rate at level \code{cutoff}. When method = absolute, Intensities
#' exceeding the threshold are labelled as positive.
#'
#' When freq = TRUE a group variable may be specified. The argument group indicates
#' the name of a variable in pData(peptideSet) by which positive calls should be grouped.
#' The call frequency for each peptide is calculated within groups.
#'
#' @return If freq = TRUE, a \code{numeric} \code{matrix} with peptides as rows and
#' groups as columns where the values are the frequency of response in the group. If
#' freq = FALSE, a \code{logical} \code{matrix} indicating binding events for each
#' peptide in each subject.
#'
#' @rdname makeCalls
#' @aliases makeCalls
#' @author Greg Imholte
#' @export
#' @example examples/pipeline.R
makeCalls <- function(peptideSet, cutoff=1.2, method="absolute", freq=TRUE, group=NULL, verbose=FALSE){
if (class(peptideSet)!="peptideSet") {
stop("peptideSet must be an object of class peptideSet")
}
if (preproc(peptideSet@experimentData)$transformation!="log") {
warning("The probe measurements should be log transformed.")
}
if (preproc(peptideSet@experimentData)$normalization=="none") {
warning("You should probably normalize your data before using this function.")
}
I <- baselineCorrect.pSet(peptideSet, verbose=verbose)
if (method == "FDR") {
Calls<-.findFDR(I, cutoff, position(peptideSet), verbose=verbose)
} else if(method == "absolute") {
Calls <- I > cutoff
}
if (!is.null(group) && freq) {
#parse the grouping variable
# Only select the Post and remove empty levels
t1 <- grepl("post", tolower(pData(peptideSet)$visit))
pd <- pData(peptideSet)[t1, ]
if(!group%in%colnames(pd)){
stop("The grouping variable is not part of the pData object.")
} else {
factor<-factor(pd[,group])
}
if(nlevels(factor) > 1){
#split the ptid into groups
ptidGroups <- split(pd$ptid,factor)
#apply the rowMeans to each group
res <- lapply(ptidGroups, function(curPtid,Calls,ptid){rowMeans(Calls[,ptid%in%curPtid, drop=FALSE])}, Calls, pd$ptid)
res <- do.call(cbind, res)
} else {
return(rowMeans(Calls)*100)
}
return(res*100)
} else if(freq) {
return(rowMeans(Calls)*100)
} else {
return(Calls)
}
}
.findFDR <- function(I, cutoff, position, verbose=FALSE){
seqY <- seq(min(abs(I)), max(abs(I)),.05)
# Split the data by unique positions
tmp <- split(as.data.frame(I), position)
# Compute the mean over unique positions
D <- sapply(tmp,apply, 2, mean)
#Calculate F(T)
FDR <- sapply(seqY, function(x, D){median(
#Fs(T)
apply(D, 1, function(D,x){min(sum(D < -x)/sum(D > x), 1)}, x), na.rm=TRUE)}, D)
# Did not find anything below the cutoff or everything is NA
if (all(round(FDR,2)>cutoff, na.rm=TRUE) | all(is.na(FDR))) {
return(I > max(I)) # Return all FALSE
} else {
Dmin <- seqY[which.min(abs(FDR-cutoff))]
if(verbose){
cat("The selected threshold T is ", Dmin, "\n")
}
return(I > Dmin)
}
}
#' Substract baseline intensities
#'
#' Correct intensities by substracting PRE visit sample intensities.
#'
#' @param pSet A \code{peptideSet} with sample PRE and POST visits.
#' @param verbose A \code{logical}. If TRUE, information regarding the
#' pairedness of the data will be displayed.
#'
#' @return A \code{matrix} of the baseline corrected intensities, with as many
#' columns as there are samples POST visit
#'
#' @details
#' If samples are not PAIRED (One PRE and POST for each ptid), then the average
#' expression of all PRE visit samples is substracted from each sample.
#'
#' @author Raphael Gottardo, Gregory Imholte
#'
baselineCorrect.pSet <- function(pSet, verbose=FALSE){
y <- exprs(pSet)
ptid <- pData(pSet)$ptid
t0 <- grep("pre", tolower(pData(pSet)$visit))
t1 <- grep("post", tolower(pData(pSet)$visit))
### Paired?
if (length(ptid[t0])==0||length(ptid[t1])==0) {
I<-as.matrix(y[,t1])
} else {
if (isTRUE(all.equal(sort(ptid[t0]), sort(ptid[t1])))) {
if (verbose) {
message("You have paired PRE/POST samples\n")
}
I <- as.matrix(y[,t1])-as.matrix(y[,t0])
} else {
if(verbose) {
message("You don't have paired PRE/POST samples\n")
}
I <- as.matrix(y[,t1])-rowMeans(y[, t0, drop=FALSE], na.rm=TRUE)#the vector to be subtracted from matrix need to be the same length as nrow of the matrix
}
}
colnames(I) <- ptid[t1]
if(isTRUE(preproc(pSet)$split.by.clade)){
rownames(I) <- rownames(pSet)
} else{
rownames(I) <- peptide(pSet)
}
return(I)
}
#' Substract baseline intensities
#'
#' Correct intensities by substracting PRE visit sample intensities.
#'
#' @param pSet A \code{peptideSet} with sample PRE and POST visits.
#' @param verbose A \code{logical}. If TRUE, information regarding the
#' pairedness of the data will be displayed.
#'
#' @return A \code{matrix} of the baseline corrected intensities, with as many
#' columns as there are samples POST visit
#'
#' @details
#' The function will try to pair as many sample as possible. The remaining subjects
#' with a POST and no PRE will use the average expression of all baseline samples.
#' Subjects with baseline only will not be represented in the resulting matrix.
#'
#' @author Renan Sauteraud
baseline_correct <- function(pSet, verbose=FALSE){
exprs <- exprs(pSet)
pd <- pData(pSet)
t0 <- grep("pre", tolower(pd$visit))
t1 <- grep("post", tolower(pd$visit))
## All paired
if (isTRUE(all.equal(sort(pd$ptid[t0]), sort(pd$ptid[t1])))){
if (verbose) {
message("You have paired PRE/POST samples\n")
}
I <- as.matrix(exprs[,t1])-as.matrix(exprs[,t0])
} else {
## Deal with paired samples first
paired <- pd$ptid[t1][pd$ptid[t1] %in% pd$ptid[t0]]
pre_paired <- rownames(pd[ pd$ptid %in% paired & tolower(pd$visit)=="pre",])
post_paired <- rownames(pd[ pd$ptid %in% paired & tolower(pd$visit)=="post",])
I <- exprs[, post_paired] - exprs[, pre_paired]
colnames(I) <- paired
## Cbind the rest
if (verbose) {
cat(length(non_paired), "subjects don't have a baseline sample.\n")
}
non_paired <- unique(pd$ptid[!( pd$ptid %in% paired) & tolower(pd$visit)=="post"])
if(length(non_paired) > 0){
post_only <- rownames(pd[ pd$ptid %in% non_paired, ])
I_no_pairs <- exprs[, post_only] - rowMeans(exprs[, t0, drop=FALSE], na.rm=TRUE)
colnames(I_no_pairs) <- non_paired
I <- cbind(I, I_no_pairs)
}
}
rownames(I) <- peptide(pSet)
return(I)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.