PhosR: A set of methods and tools for comprehensive analysis of phosphoproteomics data

Documented in matANOVA meanAbundance medianScaling minmax mIntersect mUnion phosCollapse standardise

#' Median centering and scaling
#'
#' Median centering and scaling of an input numeric matrix
#'
#'
#' @param mat a matrix with rows correspond to phosphosites and columns
#' correspond to samples.
#' @param scale a boolean flag indicating whether to scale the samples.
#' @param grps a string or factor specifying the grouping (replciates).
#' @param reorder whehther to reorder by factor.
#'
#' @return A median scaled matrix
#'
#' @importFrom limma normalizeMedianAbsValues
#'
#' @examples
#'
#' data('phospho.cells.Ins.sample')
#' grps = gsub('_[0-9]{1}', '', colnames(phospho.cells.Ins))
#' phospho.cells.Ins.filtered <- selectGrps(phospho.cells.Ins, grps, 0.5, n=1)
#'
#' set.seed(123)
#' phospho.cells.Ins.impute <-
#'     scImpute(phospho.cells.Ins.filtered,
#'             0.5,
#'             grps)[,colnames(phospho.cells.Ins.filtered)]
#'
#' set.seed(123)
#' phospho.cells.Ins.impute[,1:5] <- ptImpute(phospho.cells.Ins.impute[,6:10],
#'     phospho.cells.Ins.impute[,1:5], percent1 = 0.6,
#'     percent2 = 0, paired = FALSE)
#'
#' phospho.cells.Ins.ms <-
#'     medianScaling(phospho.cells.Ins.impute, scale = FALSE)
#'
#' @export
#'
medianScaling <- function(mat, scale = TRUE, grps = NULL, reorder = FALSE) {

    mat.medianScaled <- NULL
    if (!is.null(grps)) {
        tmp <- lapply(split(seq_len(ncol(mat)), grps), function(i) mat[,
            i])
        mat.medianScaled <- do.call(cbind, lapply(tmp, medianScale,
            scale = scale))

        if (reorder == FALSE) {
            mat.medianScaled <- mat.medianScaled[, colnames(mat)]
        }
    } else {
        mat.medianScaled <- medianScale(mat, scale = scale)
    }

    return(mat.medianScaled)
}

medianScale <- function(mat, scale) {
    if (scale == TRUE) {
        normcont <- stats::median(apply(mat, 2, stats::median,
            na.rm = TRUE))
        adjval <- apply(mat, 2, stats::median, na.rm = TRUE) -
            normcont
        mat.scaled <- normalizeMedianAbsValues(sweep(mat, 2,
            adjval, "-"))
        return(mat.scaled)
    } else {
        normcont <- stats::median(apply(mat, 2, stats::median,
            na.rm = TRUE))
        adjval <- apply(mat, 2, stats::median, na.rm = TRUE) -
            normcont
        mat.unscaled <- sweep(mat, 2, adjval, "-")
        return(mat.unscaled)
    }
}


#' Standardisation
#'
#' Standardisation by z-score transformation.
#'
#' @usage standardise(mat)
#'
#' @param mat a matrix with rows correspond to phosphosites and columns
#' correspond to samples.
#'
#' @return A standardised matrix
#'
#' @examples
#'
#' data('phospho_L6_ratio')
#' data('SPSs')
#'
#' grps = gsub('_.+', '', colnames(phospho.L6.ratio))
#'
#' # Cleaning phosphosite label
#' phospho.site.names = rownames(phospho.L6.ratio)
#' L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'),
#'                                 function(x){paste(toupper(x[2]), x[3], '',
#'                                                 sep=';')}))
#' phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites),
#'                             colMeans))
#' phospho.site.names = split(phospho.site.names, L6.sites)
#'
#' # Construct a design matrix by condition
#' design = model.matrix(~ grps - 1)
#'
#' # phosphoproteomics data normalisation using RUV
#' ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
#' phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3,
#'                                 ctl = ctl)
#' phosphoL6 = phospho.L6.ratio.RUV
#' rownames(phosphoL6) = phospho.site.names
#'
#' # filter for up-regulated phosphosites
#' phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub('_.+', '',
#'                                 colnames(phosphoL6)))
#' aov <- matANOVA(mat=phosphoL6, grps=gsub('_.+', '', colnames(phosphoL6)))
#' phosphoL6.reg <- phosphoL6[(aov < 0.05) &
#'                         (rowSums(phosphoL6.mean > 0.5) > 0),,drop = FALSE]
#' L6.phos.std <- standardise(phosphoL6.reg)
#'
#'
#' @export
#'
standardise <- function(mat) {
    means <- apply(mat, 1, mean)
    sds <- apply(mat, 1, stats::sd)

    X2 <- sweep(mat, MARGIN = 1, STATS = means, FUN = "-")
    zX <- sweep(X2, MARGIN = 1, STATS = sds, FUN = "/")

    return(zX)
}

#' @title Multi-intersection, union
#'
#' @description A recusive loop for intersecting multiple sets.
#'
#' @aliases mUnion
#'
#' @usage
#' mIntersect(x, y, ...)
#' mUnion(x, y, ...)
#'
#' @param x,y,... objects to find intersection/union.
#'
#' @return An intersection/union of input parameters
#'
#' @examples
#'
#' data('phospho_liverInsTC_RUV_sample')
#' data('phospho_L6_ratio')
#'
#' site1 <- gsub('~[STY]', '~',
#'             sapply(strsplit(rownames(phospho.L6.ratio), '~'),
#'                     function(x){paste(toupper(x[2]), x[3], sep='~')}))
#' site2 <- rownames(phospho.liver.Ins.TC.ratio.RUV)
#'
#' # step 2: rank by fold changes
#' tmp <- do.call(cbind, lapply(split(1:ncol(phospho.L6.ratio), gsub('_exp\\d+',
#'                             '', colnames(phospho.L6.ratio))),
#'                             function(i){rowMeans(phospho.L6.ratio[,i])}))
#' site1 <- t(sapply(split(data.frame(tmp), site1), colMeans))[,-1]
#'
#' tmp <- do.call(cbind, lapply(split(1:ncol(phospho.liver.Ins.TC.ratio.RUV),
#'                             gsub(
#'                                 '(Intensity\\.)(.*)(\\_Bio\\d+)',
#'                                 '\\2',
#'                                 colnames(phospho.liver.Ins.TC.ratio.RUV))),
#'                             function(i){
#'                                 rowMeans(phospho.liver.Ins.TC.ratio.RUV[,i])
#'                             }))
#' site2 <- t(sapply(split(data.frame(tmp), site2), colMeans))
#'
#' o <- mIntersect(site1, site2)
#'
#' @export mIntersect
#'
mIntersect <- function(x, y, ...) {
    if (missing(...))
        intersect(x, y) else intersect(x, mIntersect(y, ...))
}


#' @export mUnion
#'
mUnion <- function(x, y, ...) {
    if (missing(...))
        union(x, y) else union(x, mUnion(y, ...))
}



#' Minmax scaling
#'
#' Perform a minmax standardisation to scale data into 0 to 1 range
#'
#' @usage minmax(mat)
#'
#' @param mat a matrix with rows correspond to phosphosites and columns
#' correspond to condition
#'
#' @return Minmax standardised matrix
#'
#' @examples
#'
#' data('phospho_L6_ratio')
#' data('SPSs')
#'
#' grps = gsub('_.+', '', colnames(phospho.L6.ratio))
#'
#' # Cleaning phosphosite label
#' phospho.site.names = rownames(phospho.L6.ratio)
#' L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'),
#'                                 function(x){paste(toupper(x[2]), x[3], '',
#'                                                 sep=';')}))
#' phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites),
#'                             colMeans))
#' phospho.site.names = split(phospho.site.names, L6.sites)
#'
#' # Construct a design matrix by condition
#' design = model.matrix(~ grps - 1)
#'
#' # phosphoproteomics data normalisation using RUV
#' ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
#' phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3,
#'                                 ctl = ctl)
#'
#' phosphoL6 = phospho.L6.ratio.RUV
#' rownames(phosphoL6) = phospho.site.names
#'
#' # filter for up-regulated phosphosites
#' phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub('_.+', '',
#'                                 colnames(phosphoL6)))
#' aov <- matANOVA(mat=phosphoL6, grps=gsub('_.+', '', colnames(phosphoL6)))
#' phosphoL6.reg <- phosphoL6[(aov < 0.05) &
#'                         (rowSums(phosphoL6.mean > 0.5) > 0),,drop = FALSE]
#' L6.phos.std <- standardise(phosphoL6.reg)
#' rownames(L6.phos.std) <- sapply(strsplit(rownames(L6.phos.std), '~'),
#'     function(x){gsub(' ', '', paste(toupper(x[2]), x[3], '', sep=';'))})
#'
#' L6.phos.seq <- sapply(strsplit(rownames(phosphoL6.reg), '~'),
#'                     function(x)x[4])
#'
#' numMotif = 5
#' numSub = 1
#'
#' ks.profile.list <- kinaseSubstrateProfile(PhosphoSite.mouse, L6.phos.std)
#' motif.mouse.list = PhosR::motif.mouse.list
#'
#' motif.mouse.list.filtered <-
#'     motif.mouse.list[which(motif.mouse.list$NumInputSeq >= numMotif)]
#' ks.profile.list.filtered <-
#'     ks.profile.list[which(ks.profile.list$NumSub >= numSub)]
#'
#' # scoring all phosphosites against all motifs
#' motifScoreMatrix <-
#'     matrix(NA, nrow=nrow(L6.phos.std),
#'     ncol=length(motif.mouse.list.filtered))
#' rownames(motifScoreMatrix) <- rownames(L6.phos.std)
#' colnames(motifScoreMatrix) <- names(motif.mouse.list.filtered)
#'
#' # extracting flanking sequences
#' seqWin = mapply(function(x) {
#'     mid <- (nchar(x)+1)/2
#'     substr(x, start=(mid-7), stop=(mid+7))
#' }, L6.phos.seq)
#'
#'
#' print('Scoring phosphosites against kinase motifs:')
#' for(i in seq_len(length(motif.mouse.list.filtered))) {
#'     motifScoreMatrix[,i] <-
#'         frequencyScoring(seqWin, motif.mouse.list.filtered[[i]])
#'         cat(paste(i, '.', sep=''))
#' }
#' motifScoreMatrix <- minmax(motifScoreMatrix)
#'
#'
#' @export
#'
minmax <- function(mat) {
    # minmax normalise
    apply(mat, 2, function(x) {
        (x - min(x))/(max(x) - min(x))
    })
}

#' Summarising phosphosites to proteins
#'
#' Summarising phosphosite-level information to proteins for performing
#' downstream gene-centric analyses.
#'
#' @usage phosCollapse(mat, id, stat, by='min')
#'
#' @param mat a matrix with rows correspond to phosphosites and columns
#' correspond to samples.
#' @param id an array indicating the groupping of phosphosites etc.
#' @param stat an array containing statistics of phosphosite such as
#' phosphorylation levels.
#' @param by how to summarise phosphosites using their statistics. Either by
#' 'min' (default), 'max', or 'mid'.
#'
#' @return A matrix summarised to protein level
#'
#' @examples
#' library(limma)
#'
#' data('phospho_L6_ratio')
#' data('SPSs')
#'
#' grps = gsub('_.+', '', colnames(phospho.L6.ratio))
#'
#' # Cleaning phosphosite label
#' phospho.site.names = rownames(phospho.L6.ratio)
#' L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'),
#'                                 function(x){paste(toupper(x[2]), x[3], '',
#'                                                 sep=';')}))
#' phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites),
#'                             colMeans))
#' phospho.site.names = split(phospho.site.names, L6.sites)
#'
#' # Construct a design matrix by condition
#' design = model.matrix(~ grps - 1)
#'
#' # phosphoproteomics data normalisation using RUV
#' ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
#' phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3,
#'                                 ctl = ctl)
#'
#'
#' # divides the phospho.L6.ratio data into groups by phosphosites
#' L6.sites <- gsub(' ', '', gsub('~[STY]', '~',
#'                 sapply(strsplit(rownames(phospho.L6.ratio.RUV), '~'),
#'                 function(x){paste(toupper(x[2]), x[3], sep='~')})))
#' phospho.L6.ratio.sites <- t(sapply(split(data.frame(phospho.L6.ratio.RUV),
#'                                         L6.sites), colMeans))
#'
#' # fit linear model for each phosphosite
#' f <- gsub('_exp\\d', '', colnames(phospho.L6.ratio.RUV))
#' X <- model.matrix(~ f - 1)
#' fit <- lmFit(phospho.L6.ratio.RUV, X)
#'
#' # extract top-ranked phosphosites for each condition compared to basal
#' table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1)
#' table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3)
#' table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2)
#'
#' DE1.RUV <- c(sum(table.AICAR[,'adj.P.Val'] < 0.05),
#'     sum(table.Ins[,'adj.P.Val'] < 0.05),
#'     sum(table.AICARIns[,'adj.P.Val'] < 0.05))
#'
#' # extract top-ranked phosphosites for each group comparison
#' contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X)
#' contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X)
#' fit1 <- contrasts.fit(fit, contrast.matrix1)
#' fit2 <- contrasts.fit(fit, contrast.matrix2)
#' table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf)
#' table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf)
#'
#' DE2.RUV <- c(sum(table.AICARInsVSIns[,'adj.P.Val'] < 0.05),
#'     sum(table.AICARInsVSAICAR[,'adj.P.Val'] < 0.05))
#'
#' o <- rownames(table.AICARInsVSIns)
#' Tc <- cbind(table.Ins[o,'logFC'], table.AICAR[o,'logFC'],
#'             table.AICARIns[o,'logFC'])
#' rownames(Tc) = gsub('(.*)(;[A-Z])([0-9]+)(;)', '\\1;\\3;', o)
#' colnames(Tc) <- c('Ins', 'AICAR', 'AICAR+Ins')
#'
#' # summary phosphosite-level information to proteins for performing downstream
#' # gene-centric analyses.
#' Tc.gene <- phosCollapse(Tc, id=gsub(';.+', '', rownames(Tc)),
#'     stat=apply(abs(Tc), 1, max), by = 'max')
#'
#' @export
#'
phosCollapse <- function(mat, id, stat, by = "min") {
    listMat <- split(data.frame(mat, stat), id)

    matNew <- c()
    if (by == "min") {
        matNew <- as.matrix(do.call(rbind, lapply(listMat, function(x) {
            if (nrow(x) == 1) {
                as.numeric(x[1, seq_len(ncol(x) - 1)])
            } else {
                x[order(as.numeric(x[, ncol(x)]))[1], seq_len(ncol(x) - 1)]
            }
        })))
    } else if (by == "max") {
        matNew <- as.matrix(do.call(rbind, lapply(listMat, function(x) {
            if (nrow(x) == 1) {
                as.numeric(x[1, seq_len(ncol(x) - 1)])
            } else {
                x[order(as.numeric(x[, ncol(x)]), decreasing = TRUE)[1],
                    seq_len(ncol(x) - 1)]
            }
        })))
    } else if (by == "mid") {
        matNew <- as.matrix(do.call(rbind, lapply(listMat, function(x) {
            if (nrow(x) == 1) {
                as.numeric(x[1, seq_len(ncol(x) - 1)])
            } else {
                mid <- round(nrow(x)/2)
                x[order(as.numeric(x[, ncol(x)]))[mid], seq_len(ncol(x) - 1)]
            }
        })))
    } else {
        stop("Unrecognised way of collapsing the data!")
    }

    return(matNew)
}

#' ANOVA test
#'
#' @description Performs an ANOVA test and returns its adjusted p-value
#'
#'
#' @usage matANOVA(mat, grps)
#'
#' @param mat An p by n matrix where p is the number of phosphosites and n is
#' the number of samples
#' @param grps A vector of length n, with group or time point information of the
#'  samples
#'
#' @return A vector of multiple testing adjusted p-values
#'
#' @examples
#' data('phospho_L6_ratio')
#' data('SPSs')
#'
#' grps = gsub('_.+', '', colnames(phospho.L6.ratio))
#'
#' # Cleaning phosphosite label
#' phospho.site.names = rownames(phospho.L6.ratio)
#' L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'),
#'                                 function(x){paste(toupper(x[2]), x[3], '',
#'                                                 sep=';')}))
#' phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites),
#'                             colMeans))
#' phospho.site.names = split(phospho.site.names, L6.sites)
#'
#' # Construct a design matrix by condition
#' design = model.matrix(~ grps - 1)
#'
#' # phosphoproteomics data normalisation using RUV
#' ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
#' phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3,
#'                                 ctl = ctl)
#'
#'
#' phosphoL6 = phospho.L6.ratio.RUV
#' rownames(phosphoL6) = phospho.site.names
#'
#' # filter for up-regulated phosphosites
#' phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub('_.+', '',
#'                                 colnames(phosphoL6)))
#' aov <- matANOVA(mat=phosphoL6, grps=gsub('_.+', '', colnames(phosphoL6)))
#'
#' @export
matANOVA <- function(mat, grps) {
    ps <- apply(mat, 1, function(x) {
        summary(stats::aov(as.numeric(x) ~ grps))[[1]][["Pr(>F)"]][1]
    })

    # adjust for multiple testing
    ps.adj <- stats::p.adjust(ps, method = "fdr")
    return(ps.adj)
}

#' @title Obtain average expression from replicates
#'
#' @usage meanAbundance(mat, grps)
#'
#' @param mat a matrix with rows correspond to phosphosites and columns
#' correspond to samples.
#' @param grps a string specifying the grouping (replciates).
#'
#' @return a matrix with mean expression from replicates
#'
#' @examples
#' data('phospho_L6_ratio')
#' data('SPSs')
#'
#' grps = gsub('_.+', '', colnames(phospho.L6.ratio))
#'
#' # Cleaning phosphosite label
#' phospho.site.names = rownames(phospho.L6.ratio)
#' L6.sites = gsub(' ', '', sapply(strsplit(rownames(phospho.L6.ratio), '~'),
#'                                 function(x){paste(toupper(x[2]), x[3], '',
#'                                                 sep=';')}))
#' phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites),
#'                             colMeans))
#' phospho.site.names = split(phospho.site.names, L6.sites)
#'
#' # Construct a design matrix by condition
#' design = model.matrix(~ grps - 1)
#'
#' # phosphoproteomics data normalisation using RUV
#' ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
#' phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3,
#'                                 ctl = ctl)
#'
#'
#' phosphoL6 = phospho.L6.ratio.RUV
#' rownames(phosphoL6) = phospho.site.names
#'
#' # filter for up-regulated phosphosites
#' phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub('_.+', '',
#'                                 colnames(phosphoL6)))
#'
#' @export
meanAbundance <- function(mat, grps) {
    # meanMat <- sapply(split(seq_len(ncol(mat)), grps),
    # function(i) rowMeans(mat[,i]))[,unique(grps)]
    meanMat = mapply(function(i, mat) {
        rowMeans(mat[, i])
    }, split(seq_len(ncol(mat)), grps), MoreArgs = list(mat = mat))[,
        unique(grps)]
    return(meanMat)
}
PengyiYang/PhosR documentation built on June 21, 2020, 8:37 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PengyiYang/PhosR
A set of methods and tools for comprehensive analysis of phosphoproteomics data

R/toolBox.R
In PengyiYang/PhosR: A set of methods and tools for comprehensive analysis of phosphoproteomics data

Defines functions meanAbundance matANOVA phosCollapse minmax mUnion mIntersect standardise medianScale medianScaling

Documented in matANOVA meanAbundance medianScaling minmax mIntersect mUnion phosCollapse standardise

R Package Documentation

Browse R Packages

We want your feedback!

PengyiYang/PhosR A set of methods and tools for comprehensive analysis of phosphoproteomics data

R/toolBox.R In PengyiYang/PhosR: A set of methods and tools for comprehensive analysis of phosphoproteomics data

Defines functions meanAbundance matANOVA phosCollapse minmax mUnion mIntersect standardise medianScale medianScaling

Documented in matANOVA meanAbundance medianScaling minmax mIntersect mUnion phosCollapse standardise

R Package Documentation

Browse R Packages

We want your feedback!

PengyiYang/PhosR
A set of methods and tools for comprehensive analysis of phosphoproteomics data

R/toolBox.R
In PengyiYang/PhosR: A set of methods and tools for comprehensive analysis of phosphoproteomics data