R/plotSex.R

Defines functions plotDiscrepancyGenders plotPredictedGender returnPredictedGender returnGivenGender

## Return gender covariate if provided in the covariates
returnGivenGender <- function(covariates){
    givenGender <- NULL
    
    if (!is.null(covariates)){
        possibilities <- c("gender","Gender","sex","Sex","GENDER","SEX")
        sum <- sum(possibilities %in% colnames(covariates))
        if (sum!=0){
            goodColumn <- possibilities[possibilities %in% colnames(covariates)][1]
            givenGender <- as.character(covariates[,goodColumn])
            givenGender <- substr(toupper(givenGender),1,1)
            givenGender <- as.character(givenGender)
        }
    }
    
    return(givenGender)
}

## Predict gender with X and Y chr intensities
returnPredictedGender <- function(cnQuantiles,
                                  cutoff = (-0.3)
){
    ## It assumes that cnQuantiles is a list containing $X and $Y
    X <- cnQuantiles$X
    Y <- cnQuantiles$Y
    med <- floor(nrow(X)/2)
    
    x = X[med,]
    y = Y[med,]
    diff <- log2(y)-log2(x)
    n <- length(diff)
    
    predictedGender = rep("M", n)
    predictedGender[which(diff < cutoff)] <- "F"
    names(predictedGender) <- colnames(X)
    
    predictedGender <- as.character(predictedGender)
    return(predictedGender)
}

plotPredictedGender <- function(cnQuantiles,
                                cutoff =(-0.3),
                                color = NULL,
                                legend=TRUE,
                                bty="o"
){
    X <- cnQuantiles$X
    Y <- cnQuantiles$Y
    med <- floor(nrow(X)/2)
    x <- X[med,]
    y <- Y[med,]
    diff <- log2(y)-log2(x)
    n <- length(diff)
    
    if (is.null(color)){
        color <-  rep("lightskyblue", n)
        color[which(diff < cutoff)] <- "orange"
    }
    
    plot(diff, 
         jitter(rep(0,n),factor=1.5), 
         ylim = c(-1,1), 
         pch=18, 
         cex=2,
         col= color, 
         yaxt="n", 
         xlab="median CN(Y)  - median CN(X)", 
         ylab="",
         bty=bty)
    abline(v=as.numeric(cutoff),lty=3,lwd=2)
    
    
    if (legend){
        legend("topright",
               c("Predicted male","Predicted female"),
               cex=2, 
               pch=18, 
               col=c("lightskyblue","orange"),
               bty="n"
               )
    }
    
}

plotDiscrepancyGenders <- function(cutoff = (-0.3),
                                   covariates,
                                   cnQuantiles,
                                   bty="o"
){
    predictedGender <- returnPredictedGender(cutoff=cutoff,
                                             cnQuantiles=cnQuantiles)
    givenGender     <- returnGivenGender(covariates)
    ## In the case a gender information was not provided:
    if (is.null(givenGender)){
        plotPredictedGender(cnQuantiles = cnQuantiles, 
                            cutoff = cutoff, 
                            color = NULL,
                            legend = TRUE,
                            bty = bty)
    } else {
    ## In the case a gender information WAS provided:
        X <- cnQuantiles$X
        Y <- cnQuantiles$Y
        med <- floor(nrow(X)/2)
        x = X[med,]
        y = Y[med,]
        diff <- log2(y)-log2(x)
        n <- length(diff)
        
        color <- predictedGender
        color[color == "M"] <- "lightskyblue"
        color[color == "F"] <- "orange"
        
        ## For the samples with different predicted gender:
        non.matching <- predictedGender  != givenGender
        color[non.matching] <- "black"
        
                                        # For the samples with missing values:
        na.positions <- is.na(givenGender)
        color[na.positions] <- "grey"
        
        plotPredictedGender(cnQuantiles = cnQuantiles, 
                            cutoff = cutoff, 
                            color = color,
                            legend = FALSE, bty=bty)
        
        ## To add the sample names in the plot for the discrepancy samples:
        ## if (sum(color=="black" & color!="grey")>=1){
        ## indices <- which(color=="black" & color != "grey")
        ## text(diff[indices],-0.2, names(X)[indices])
        
        ## To add the legend: 
        legend.colors <- c("lightskyblue","orange")
        legend.names  <- c("Predicted male","Predicted female")
        
        ## In the case there are unmatching samples:
        if (sum(color=="black" & color!="grey")>=1){
            legend.colors <- c(legend.colors, "black")
            legend.names  <- c(legend.names, "Unmatching samples")
        }
        
        ## In the case there are missing values in the provided sex covariate:
        if (sum(color=="grey")>=1){
            legend.colors <- c(legend.colors, "grey")
            legend.names  <- c(legend.names, "Sex not provided by user")
        }
        
        legend( "topright",
               legend = legend.names,
               col = legend.colors,
               cex = 1.5, 
               pch = 18,
               bty="n")
    }
}
Jfortin1/shinyMethyl documentation built on July 18, 2024, 4:29 a.m.