R/panelOptimizerClassAndMethods.R

Defines functions .lmPlotSimple .nullProfileSimple .lfmSimple .entropySimple .mapMutationsSimple .getMutationsSimple .alignSequencesSimple .setupSimple .lmParamsSimple .newLowMACAsimple .createEmptyLowObj

#-------------------------------------------
# SET OF CLASS AND METHODS ADAPTED FROM LOWMACA FOR panelOptimizer
#-------------------------------------------

.createEmptyLowObj <- function()
    list(
        arguments=list(
            genes=NULL
            , pfam=NULL
            , pfamAllGenes=''
            , input=''
            , mode=''
            , params=list(
                mutation_type=c('missense', 'all', 'truncating', 'silent')[1]
                , tumor_type='all_tumors'
                , min_mutation_number=-1L
                , density_bw=0
                , clustal_cmd='clustalo'
                , use_hmm=FALSE
                , datum=FALSE
                )
            , parallelize=list(
                getMutations=FALSE
                , makeAlignment=TRUE
                )
            )
        )

.newLowMACAsimple <- function(genes=NULL, pfam=NULL)
{
    # check arguments
    if( is.null(genes) & is.null(pfam) )
        stop('Either a gene or a pfam should be specified.')
    if( length(pfam)>1 )
        stop('Only one Pfam ID can be evaluated')
    # initialize the LowMaca object
    object <- .createEmptyLowObj()
    if( !is.null(pfam) ) {
        object$arguments$mode <- 'pfam'
        object$arguments$pfam <- pfam
    } else {
        object$arguments$mode <- 'genes'
        object$arguments$genes <- genes
    }
    mode <- object$arguments$mode
    if( mode == 'genes' )
    {
        # load annotation files
        myAlias <- LowMACAAnnotation::getMyAlias()
        myUni <- LowMACAAnnotation::getMyUni()
        #
        selectedColumns <- c('Gene_Symbol', 'Entrez', 'UNIPROT', 'AMINO_SEQ')
        geneID <- .checkGene_to_geneID(genes, myUni, myAlias)$Gene_Symbol
        selectedRows <- myUni$Gene_Symbol %in% geneID
        genesData <- myUni[selectedRows, selectedColumns]
        genesData$Pfam_ID <- '-'
        genesData$Envelope_Start <- 1
        genesData$Envelope_End <- nchar(genesData$AMINO_SEQ)
        seq_names <- paste(genesData[, 'Gene_Symbol']
            , genesData[, 'Pfam_ID']
            , genesData[, 'Entrez'] 
            , paste(genesData[, 'Envelope_Start'], 
                genesData[, 'Envelope_End'], sep=";") 
            , sep="|")
        rownames(genesData) <- seq_names
    } else {
        # load annotation files
        myPfam <- LowMACAAnnotation::getMyPfam()
        # select rows and colums to match the input
        selectedColumns <- c('Gene_Symbol', 'Pfam_ID', 'Entrez'
            , 'Envelope_Start', 'Envelope_End', 'UNIPROT', 'Pfam_Fasta')
        selectedRows <- myPfam$Pfam_ID %in% pfam
        if( !any(selectedRows) ) {
          stop('Pfam name is not correct or is not mapped by LowMACAsimple')
        }
        genesData <- myPfam[selectedRows, selectedColumns]
        seq_names <- paste(genesData[, 'Gene_Symbol']
            , genesData[, 'Pfam_ID']
            , genesData[, 'Entrez'] 
            , paste(genesData[, 'Envelope_Start'], 
                genesData[, 'Envelope_End'], sep=";") 
            , sep="|")
        colnames(genesData)[colnames(genesData) == 'Pfam_Fasta'] <- 'AMINO_SEQ'
        rownames(genesData) <- seq_names
        if( !is.null(genes) ) {
            object$arguments$genes <- genes
            object$arguments$pfamAllGenes <- genesData
            # load annotation files
            myAlias <- LowMACAAnnotation::getMyAlias()
            myUni <- LowMACAAnnotation::getMyUni()
            geneID <- .checkGene_to_geneID(genes, myUni, myAlias)$Gene_Symbol
            selectedRows <- genesData$Gene_Symbol %in% geneID
            genesData <- genesData[selectedRows, ]
        }
    }
    genesData[, 'Gene_Symbol'] <- factor(genesData[, 'Gene_Symbol'])
    genesData[, 'Pfam_ID'] <- factor(genesData[, 'Pfam_ID'])
    genesData[, 'Entrez'] <- factor(genesData[, 'Entrez'])
    genesData[, 'UNIPROT'] <- factor(genesData[, 'UNIPROT'])
    ## convert non-canonical amnio acids into their
    ## most similar and canonical one
    ##        U (selenocysteine) -> A (alanine)
    genesData$AMINO_SEQ <- gsub('U','A', genesData$AMINO_SEQ)
    ## return the updated object
    object$arguments$input <- genesData
    return(object)

}

.lmParamsSimple <- function(object) {

    return(object$arguments$params)
    }

.setupSimple <- function(object, repos=NULL) {

    object <- .alignSequencesSimple(object)
    object <- .getMutationsSimple(object, repos=repos)
    object <- .mapMutationsSimple(object)
    return(object)
    }

.alignSequencesSimple <- function(object) {

    genesData <- object$arguments$input
    object$alignment <- .clustalOAlign(genesData)
    object$alignment$df <- data.frame(
                consensus=strsplit(genesData$AMINO_SEQ,'')[[1]]
                , conservation=rep(1, length(genesData$AMINO_SEQ))
                )
    return(object)
    }

.getMutationsSimple <- function(object, repos=NULL) {

    genes <- object$arguments$input
    mutation_type <- object$arguments$params$mutation_type
    tumor_type <- object$arguments$params$tumor_type
    parallelGetMut <- object$arguments$parallelize$getMutationsSimple
    # outputFolder <- object$arguments$paths$output_folder
    gmOut <- .getLocalGeneMutations(genes, 
            mutation_type=mutation_type, tumor_type=tumor_type, 
            localData=repos)
    object$mutations$data <- gmOut$Mutations
    object$mutations$freq <- gmOut$AbsFreq
    return(object)
    }

.mapMutationsSimple <- function(object) {

    mut <- object$mutations$data
    if( nrow(mut)==0 )
        return(object)
    alignment <- object$alignment$ALIGNMENT
    alignmentLength <- max(alignment$Align)
    # # elaborate the mutations and map them with the alignment
    mut_aligned <- merge( mut , alignment[!is.na(alignment$Ref) , ]
                    , by.x=c("Entrez" , "Gene_Symbol" , "Amino_Acid_Position") 
                    , by.y=c("Entrez" , "Gene_Symbol" , "Ref") 
                    , all.x=TRUE)
    mut_aligned <- mut_aligned[!is.na(mut_aligned$Align), ]
    # parameters for the analysis [can't be set by the user, so far]
    splitCriterion <- c('Gene_Symbol', 'Tumor_Type', 'domainID')[1]
    
    # split data
    mut_aligned.split <- split(mut_aligned, mut_aligned[[splitCriterion]])
    mut_aligned.extended <- matrix(0, nrow=length(mut_aligned.split)
        , ncol=alignmentLength)
    for(i in seq_len(length(mut_aligned.split))) {
        tmp <- lengths(
            split(mut_aligned.split[[i]]$Align, mut_aligned.split[[i]]$Align)
            )
        if( length(tmp)>0 ) 
            mut_aligned.extended[i,as.numeric(names(tmp))] <- tmp
    }
    rownames(mut_aligned.extended) <- names(mut_aligned.split)
    # filter data based on number of mutations
    minNMut <- object$arguments$params$min_mutation_number
    tokeep <- rowSums(mut_aligned.extended) >= minNMut
    if(any(!tokeep))
    {
        warning(
            paste(
              "We excluded these genes (or domains) because they have less than"
                , minNMut, "mutations")
            , immediate.=TRUE)
        excludedGenes <- rownames(mut_aligned.extended[!tokeep, ])
        print(excludedGenes)
        object$mutations$excluded <- excludedGenes
    }
    mut_aligned.extended <- mut_aligned.extended[tokeep, , drop=FALSE]
    object$mutations$aligned <- mut_aligned.extended
    return(object)

    }

.entropySimple <- function(object, bw=NULL ) {
    conservation <- 0
    myMut <- object$mutations
    if(identical(myMut , list())){
        stop("mutations slot is empty. Launch the method getMutationsSimple!")
    }
    myAln <- object$alignment
    if(identical(myAln , list())){
        stop("alignment slot is empty. Launch the method alignSequencesSimple!")
    }
    if( nrow(object$mutations$data)==0 ) {
        object$entropy$absval <- NA
        object$entropy$log10pval <- NA
        object$entropy$pvalue <- NA
        return(object)
    }
    mut_extended <- object$mutations$aligned
    alignment    <- object$alignment
    # Uniform variable
    message("Making uniform model...")
    if( is.null(bw) ) bw <- object$arguments$params$density_bw else 
        object$arguments$params$density_bw <- bw
    if( bw=='auto' ) 
    {
        # calculate the bw from the global profile
        bw <- .profileDensity(colSums(mut_extended))$bw
    }
    message(paste('Assigned bandwidth:', round(bw,2)))
    object$entropy$bw <- bw
    weights <- .alnWeights(alignment)
    object$entropy$uniform <- .makeUniformModel(mut_extended, bw=bw, nboot=1000,
        weights=weights, plotOUT=FALSE)

    # Calculate the entropy values
    globalProfile <- colSums(mut_extended)
    uniform <- object$entropy$uniform
    absval <- .profileEntropy(globalProfile, norm=FALSE, bw=bw)
    log10pval <- .profileEntropy(globalProfile, norm=TRUE, bw=bw, model=uniform)
    pvalue <- 10^log10pval

    object$entropy$absval <- absval
    object$entropy$log10pval <- log10pval
    object$entropy$pvalue <- pvalue
    object$entropy$conservation_thr <- conservation

    # null profile
    # Null profile calculated on the global profile
    nullOut <- .makeNullProfile(mut_extended, bw=bw, 
        nboot=1000, weights=.alnWeights(alignment))

    pvals <- nullOut$pvalue
    filteredPvals <- pvals
    filteredPvals[object$alignment$df$conservation < conservation] <- NA
    qvals <- p.adjust(filteredPvals, method='BH')

    object$alignment$df <- cbind(
        object$alignment$df[, c('consensus', 'conservation')]
        , nullOut, qvalue=qvals
        )
    return(object)
    }

.lfmSimple <- function(object, metric='qvalue', threshold=.05) {
    #Checks on parameters
    if(! (metric %in% c("qvalue" , "pvalue") ))
        stop("metric parameter can be only 'pvalue' or 'qvalue'")
    if(!(threshold <= 1 && threshold>=0))
        stop("pvalue/qvalue threshold must be a number between 0 and 1")
    entrop <- object$entropy
    if(identical(entrop , list()))
        stop("Entropy slot is empty. Launch the method entropy first!")
    if( nrow(object$mutations$data)==0 ) {
        message('No mutations available for this object.')
    } else {
        if( metric == 'qvalue' ) {
            ## use qvalue already stored
            qvalue <- object$alignment$df$qvalue
            signifPos <- which(qvalue < threshold)
        } else {
            ## use pvalue
            pvalue <- object$alignment$df$pvalue
            signifPos <- which(pvalue < threshold)
        }
        out <- lapply(signifPos, function(pos) {
            selectedRows <- object$alignment$ALIGNMENT$Align == pos
            alnData <- object$alignment$ALIGNMENT[selectedRows, ]
            selectedColumns <- c('Gene_Symbol', 'Ref', 'Envelope_Start'
                , 'Envelope_End')
            alnData <- alnData[!is.na(alnData$Ref), selectedColumns]
            colnames(alnData) <- c('Gene_Symbol', 'Amino_Acid_Position'
                , 'Envelope_Start', 'Envelope_End')
            selectedColumns <- c('Gene_Symbol', 'Amino_Acid_Position'
                , 'Amino_Acid_Change', 'Sample', 'Tumor_Type')
            mutData <- object$mutations$data[, selectedColumns]
            lfmSimple <- merge(mutData, alnData)
            if( nrow(lfmSimple)>0 ){
                lfmSimple$Multiple_Aln_pos <- pos
                lfmSimple$metric <- switch(metric, 'pvalue'=pvalue[pos]
                                           , 'qvalue'=qvalue[pos])
            } else {
                lfmSimple$Multiple_Aln_pos <- character(0)
                lfmSimple$metric <- numeric(0)
            }
            return(lfmSimple)
            })
        out <- do.call('rbind', out)
        # load data
        myUni <- LowMACAAnnotation::getMyUni()
        selectedColumns <- c('Gene_Symbol', 'Entrez', 'Entry', 'UNIPROT'
            , 'Chromosome', 'Protein.name')
        myUniSmall <- myUni[, selectedColumns]
        out <- merge(out, myUniSmall)
        return(out)
    }
    }

.nullProfileSimple <- function(object) {


    if( length(object$alignment)==0 )
        stop('Perform alignment on the object before plotting.')

    if( length(object$mutations)==0 )
        stop('Retrieve mutations for the object before plotting.')

    if( length(object$entropy)==0 )
        stop('Perform entropy calculation on the object before plotting.')

    if( nrow(object$mutations$data)==0 ) {
        message('No mutations available for this object.')
    } else {
        windowlimits <- seq_len(nrow(object$alignment$df))
        conservation <- object$entropy$conservation_thr
        mean <- object$alignment$df$mean
        lowerThreshold <- object$alignment$df$lTsh
        upperThreshold <- object$alignment$df$uTsh
        profile <- object$alignment$df$profile
        pvalue <- object$alignment$df$pvalue
            ## use qvalue already stored
        qvalue <- object$alignment$df$qvalue
        qvalue <- qvalue
        qvalSignif_x <- which(qvalue < 5e-2)
        qvalSignif_y <- profile[qvalSignif_x] + max(profile)/20
        max_y <- max(c(
            object$alignment$df$lTsh,
            object$alignment$df$uTsh,
            object$alignment$df$profile*1.1
            ), na.rm=TRUE)
        blackProfile <- vapply(seq_len(length(profile)),function(i) {
          min(profile[i], upperThreshold[i])
          } , numeric(1))
        orangeProfile <- profile - blackProfile
        mp <- barplot(rbind(blackProfile, orangeProfile), 
            col=c('black', 'orange'), ylim=c(0, max_y), ylab='Mutation density'
            )
        axis(1, at=mp, labels=windowlimits, las=2)
        lines(mp, upperThreshold, lty=2, lwd=2, col='blue')
        qvalSignif_x <- mp[qvalue < 5e-2]

        ### plot an asterisk above the residues which are significant in 
        # terms of the pvalue
        if( length(qvalSignif_x) > 0 ) {
            text(qvalSignif_x, qvalSignif_y, '*', cex=2, col='red')
        }
    }
    }

.lmPlotSimple <- function(object, conservation=NULL , splitLen=NULL) {

    opar <- par("mfrow" , "mar")
    on.exit(par(opar))
    if( length(object$alignment)==0 )
        stop('Perform alignment on the object before plotting.')

    if( length(object$mutations)==0 )
        stop('Retrieve mutations for the object before plotting.')

    if( length(object$entropy)==0 )
        stop('Perform entropy calculation on the object before plotting.')

    if( nrow(object$mutations$data)==0 ) {
        message('No mutations available for this object.')
    } else {
        if(is.null(conservation))
            conservation <- object$entropy$conservation_thr
        if( is.null(splitLen) )
            splitLen <- ncol(object$mutations$aligned)

        windowlimits <- seq_len(ncol(object$mutations$aligned))
        windowlimitsSplit <- split(windowlimits, ceiling(windowlimits/splitLen))

        mode <- object$arguments$mode
        nObj <- nrow(object$arguments$input)

        par(mar=c(2,4,2,2))
        layoutMatrix <- as.matrix(c(1,1,1,1,3,2,2,2,2))
        multipleMat <- as.list(rep(NA), length(windowlimitsSplit))
        for( i in seq_len(length(windowlimitsSplit) )){ 
            multipleMat[[i]] <- layoutMatrix+(max(layoutMatrix)*(i-1))
        }
        layoutMatrix <- do.call('rbind', multipleMat)
        plotType <- 3
        layout(layoutMatrix)
        log10pval <- object$entropy$log10pval
        pvalue <- object$entropy$pvalue

        ## plot 1
        if( plotType == 3) par(mar=c(0,4,4,2))
        par(mar=c(2,4,4,2))
        myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral"))
                              , space="Lab")
        lenAln <- ncol(object$mutations$aligned)
        mut_aligned <- object$mutations$aligned
        mp <- barplot(
            mut_aligned
            , col=myPalette(nrow(object$mutations$aligned))
            , border=if(lenAln<50) 'black' else NA
            , main=paste("Log10 P-Value:" , round(log10pval , 2) , 
                "P-Value:" , signif( pvalue , 2 ) , "Bw:" 
                , signif(object$entropy$bw,3) 
                )
            , ylab='Mutation #'
            , ylim=c(0, max(colSums(object$mutations$aligned)))
            )
        axis(1, at=mp, labels=windowlimits, las=2)
        par(mar=c(2,4,2,2))
        ## plot 2
        .nullProfileSimple(object)

        #############
        ## in case the analysis is on a single gene 
        ## plot its domains
        ############### 
        if( plotType == 3 ) {

            myPfam <- LowMACAAnnotation::getMyPfam()
            gene <- as.character(object$arguments$input$Gene_Symbol)
            domains <- myPfam[myPfam$Gene_Symbol==gene
                , c("Envelope_Start" , "Envelope_End" , "Pfam_Name")]
            ## create empty plot
            plot.new()
            plot.window(
                xlim=range(windowlimits)
                , ylim=c(0,0.05)
                )
            par(mar=c(0,0,0,0))
            ## plot domains
            if(nrow(domains)>0) {
                for( i in seq_len(nrow(domains)) ) {
                int <- 
                  intersect(
                    domains$Envelope_Start[i]:domains$Envelope_End[i]
                    , windowlimits)
                if( length(int)>0 ) {
                    domains$Envelope_Start[i] <- min(int)
                    domains$Envelope_End[i] <- max(int)
                } else {
                    domains <- domains[-i,]
                }
                }
                for(i in seq_len(nrow(domains))) {
                    xleft=as.numeric(domains[i , "Envelope_Start"])
                    xright=as.numeric(domains[i , "Envelope_End"])
                    ytop=0.05
                    ybottom=0
                    col=topo.colors(nrow(domains) , alpha=0.5)[i]
                    characters <- nchar(domains[i , "Pfam_Name"])
                    rect(xleft=xleft , xright=xright 
                        , ytop=ytop , ybottom=ybottom 
                        , col=col )
                    if(characters<=3){
                        text(x=(xright+xleft)/2 , y=0.025 
                        , domains[i , "Pfam_Name"] 
                        , font=2)
                    } else {
                        if((xright-xleft)<=100){
                                text(x=(xright+xleft)/2 , y=0.025 
                                    , domains[i , "Pfam_Name"] 
                                    , font=2 , cex=0.8)
                        } else {
                                text(x=(xright+xleft)/2 , y=0.025 
                                    , domains[i , "Pfam_Name"] 
                                    , font=2)
                        }
                    }
                }
                } else {
                    text(x=length(windowlimits)/2, y=0.025
                        , 'no pfam domains within gene sequence', cex=1.5)
                }
                par(mar=c(2,4,4,2))
            }

    }

    }
gmelloni/PrecisionTrialDrawer documentation built on March 4, 2023, 1:48 a.m.