R/normalizePhospho.R
In phosphonormalizer: Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.

Documented in normalizePhospho

####################################################################
## Pairwise Normalization of MS-based phosphoproteomic data       ##
## Sohrab Saraei                                                  ##
## This function compensates for the bias introduced in global    ##
## phosphorylation in the sample after using median normalization.##
####################################################################

normalizePhospho <- function(enriched, non.enriched, phospho = NULL, samplesCols, modseqCols, techRep, plot.fc=NULL)
{
  #Check if all the necessary arguments are present
  if(missing(enriched)) stop("The function parameter (enriched) is missing!")
  if(missing(non.enriched)) stop("The function parameter (non.enriched) is missing!")
  if(missing(samplesCols)) stop("The function parameter (samplesCols) is missing!")
  if(missing(modseqCols)) stop("The function parameter (modseqCols) is missing!")
  if(missing(techRep)) stop("The function parameter (techRep) is missing!")
  
  #A function that checks the types of the columns of a data.frame
  
  #Check the type of the arguments
  class.dfCols <- function(df, types)
  {
    all(apply(df, MARGIN = 2, function(x) class(x)[1]) %in% types)
  }
  if(!is(enriched, "data.frame") & !is(enriched, "MSnSet"))
    stop("The enriched parameter must be of either types of data.frame or MSnSet")
  
  if(!is(non.enriched, "data.frame") & !is(non.enriched, "MSnSet"))
    stop("The non.enriched parameter must be of either types of data.frame or MSnSet")
  
  if(is(enriched, "MSnSet")) 
  {
    enriched.eset <- enriched
    enriched <- cbind(MSnbase::fData(enriched)[, modseqCols$enriched], MSnbase::exprs(enriched)[, samplesCols$enriched])
    non.enriched <- cbind(MSnbase::fData(non.enriched)[, modseqCols$non.enriched], MSnbase::exprs(non.enriched)[, samplesCols$non.enriched])
    modseqCols <- data.frame(enriched = c(1,2), non.enriched = c(1,2))
    samplesCols <- data.frame(enriched = 3:ncol(enriched), non.enriched = 3:ncol(non.enriched))
  }
  
  if(!is.null(phospho) & !methods::is(phospho, "character"))
    stop("The phospho parameter must be of type of character")
  
  if(!methods::is(samplesCols, "data.frame") | 
     ncol(samplesCols) != 2 | !class.dfCols(samplesCols, c("integer","numeric")) | 
     all(!colnames(samplesCols) %in% c("enriched", "non.enriched")))
    stop("The samplesCols must be data.frame with two columns, with the column names enriched and non.enriched
         , of type numeric or integer, which must contain the column number of samples that hold the abundances")
  
  if(!methods::is(modseqCols, "data.frame") | 
     ncol(modseqCols) != 2 | !class.dfCols(modseqCols, c("integer","numeric")) | 
     all(!colnames(modseqCols) %in% c("enriched", "non.enriched")))
    stop("The modseqCols must be data.frame with two columns, with the names enriched and non.enriched
         , of type numeric or integer, which must contain the column number of samples that hold the sequence 
         and modifications of the peptides")
  
  if(any(!class.dfCols(enriched[, modseqCols$enriched], "character") ,
         !class.dfCols(non.enriched[, modseqCols$non.enriched], "character")))
    stop("The sequence and modification columns that is specified are not of type charachter!")
  
  if(any(!class.dfCols(enriched[, samplesCols$enriched], c("numeric", "integer")) ,
         !class.dfCols(non.enriched[, samplesCols$non.enriched], c("numeric", "integer"))))
    stop("The samples specified are not of type of numeric")
  
  mod = seq = NULL
  enriched.original.mat <- as.matrix(enriched[, samplesCols$enriched])
  seqMod <- enriched[, modseqCols$enriched]
  #Removing peptides with non-quantified values across the runs
  enriched <- enriched[apply(X = enriched[ ,samplesCols$enriched], MARGIN = 1, 
                             function(x) all(x != 0)),]
  non.enriched <- non.enriched[apply(X = non.enriched[,samplesCols$non.enriched], MARGIN = 1, 
                                     function(x) all(x != 0)),]
  
  colnames(enriched)[modseqCols$enriched] <- c("seq", "mod")
  colnames(non.enriched)[modseqCols$non.enriched] <- c("seq", "mod")
  
  if(is.null(phospho))
  {
    enriched <- enriched[grepl(pattern = "Phospho", x = enriched$mod),]
    non.enriched <- non.enriched[grepl(pattern = "Phospho", x = non.enriched$mod),]
  } else {
    enriched <- enriched[grepl(pattern = phospho, x = enriched$mod),]
    non.enriched <- non.enriched[grepl(pattern = phospho, x = non.enriched$mod),]
  }
  
  enriched <- plyr::ddply(enriched, plyr::.(seq, mod),
                          function(df) colSums(df[, samplesCols$enriched]))
  non.enriched <- plyr::ddply(non.enriched, plyr::.(seq, mod),
                              function(df) colSums(df[, samplesCols$non.enriched]))
  
  enriched[,"modSeq"] <- paste(enriched$seq, enriched$mod,sep = ", ")
  non.enriched[,"modSeq"] <- paste(non.enriched$seq, non.enriched$mod,sep = ", ")

  inter <- intersect(non.enriched$modSeq, enriched$modSeq)

  stopifnot(length(inter) > 0)
  enriched.olp.idx <- which(enriched$modSeq %in% inter)
  non.enriched.olp.idx <- which(non.enriched$modSeq %in% inter)
  
  enriched.mat <- enriched[enriched.olp.idx, ]
  non.enriched.mat <- non.enriched[non.enriched.olp.idx, ]
  
  enriched.mat <- enriched.mat[order(enriched.mat$modSeq),]
  non.enriched.mat <- non.enriched.mat[order(non.enriched.mat$modSeq),]
  
  enriched.mat <- as.matrix(enriched.mat[, -c(1,2,ncol(enriched))])
  non.enriched.mat <- as.matrix(non.enriched.mat[, -c(1,2,ncol(enriched))])
  if(length(inter) > 1) {
    ratios <- non.enriched.mat/enriched.mat
    
    colnames(ratios) <- as.numeric(techRep)
    ratios.avg <- matrix(nrow = nrow(ratios), ncol = length(levels(techRep)))
    for (tr in levels(techRep)) {
      if(nrow(ratios.avg) == 1) {
        ratios.avg[,as.numeric(tr)] <- mean(ratios[,colnames(ratios) == levels(techRep)[as.numeric(tr)]])
      } else {
        ratios.avg[,as.numeric(tr)] <- rowMeans(ratios[,colnames(ratios) == levels(techRep)[as.numeric(tr)]])
      }
    }
    
    
    max.fc <- log2(ratios.avg[,1]) - log2(ratios.avg[,2])
    
    for (i in 2:(ncol(ratios.avg)-1)) {
      for (j in (i+1):(ncol(ratios.avg))) {
        max.fc <- matrixStats::rowMaxs(cbind(max.fc, log2(ratios.avg[,i]) - log2(ratios.avg[,j])))
      }
    }
    
    boxp <- boxplot(max.fc, plot = FALSE)
    ratios <- ratios[!(max.fc > max(boxp$stats)),]
    
    ratios <- log10(ratios)
    if(methods::is(ratios, "matrix") | methods::is(ratios, "data.frame")) {
      col.sub <- rowMeans(ratios)
    } else {
      col.sub <- mean(ratios)
    }
    
    ratios.norm <- ratios - col.sub
    
    if(methods::is(ratios, "matrix") | methods::is(ratios, "data.frame")) {
      factors <- 10^(matrixStats::colMedians(ratios.norm))
    } else {
      factors <- ratios.norm
    }
  } else {
    factors <- as.numeric(non.enriched.mat/enriched.mat)
  }

  enriched.normalized.mat <- t(t(enriched.original.mat) * factors)
  	if(!is.null(plot.fc)) {
	for(i in plot.fc$control) {
	  for(j in plot.fc$samples) {
		a.original <- rowMeans(log2(enriched.original.mat[,which(techRep==i)]+1),na.rm=TRUE)
		b.original <- rowMeans(log2(enriched.original.mat[,which(techRep==j)]+1),na.rm=TRUE)
		fc.original <- a.original - b.original
		a.normnalized <- rowMeans(log2(enriched.normalized.mat[,which(techRep==i)]+1),na.rm=TRUE)
		b.normnalized <- rowMeans(log2(enriched.normalized.mat[,which(techRep==j)]+1),na.rm=TRUE)
		fc.normnalized <- a.normnalized - b.normnalized
		boxplot(cbind(fc.original, fc.normnalized), range=1.5, outline=FALSE, main=paste0("Peptide log fold changes", " (sample ", j, " vs sample ", i, ")"), names=c("Median normalized","Pairwise normalized"))
		abline(h=0, lty=2)}
	}

	}
  message(paste0("The number of peptides in the intersect is: ", length(inter)))
  message(paste0(length(plot.fc$control) * length(plot.fc$samples), " plots generated. Browse through them."))
  data.frame(seqMod, t(t(enriched.original.mat) * factors)) 
}

Any scripts or data that you put into this service are public.

phosphonormalizer documentation built on Dec. 10, 2020, 2:01 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

phosphonormalizer
Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.

R/normalizePhospho.R
In phosphonormalizer: Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.

Defines functions normalizePhospho

Documented in normalizePhospho

Try the phosphonormalizer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

phosphonormalizer Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.

R/normalizePhospho.R In phosphonormalizer: Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.

Defines functions normalizePhospho

Documented in normalizePhospho

Try the phosphonormalizer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

phosphonormalizer
Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.

R/normalizePhospho.R
In phosphonormalizer: Compensates for the bias introduced by median normalization in phosphoproteomics. This is done by taking enriched and non-enriched data and creating a normalization factor.