R/EventPointer_IGV.R

Defines functions EventPointer_IGV

Documented in EventPointer_IGV

#' EventPointer IGV Visualization
#'
#' Generates of files to be loaded in IGV for visualization and interpretation of events
#'
#' @param Events Data.frame generated by EventPointer with the events to be included
#'               in the GTF file.
#' @param input Reference transcriprome. Must be one of: 'Ensembl', 'UCSC' , 'AffyGTF'
#'              or 'CustomGTF'.
#' @param inputFile If input is 'AffyGTF' or 'CustomGTF', inputFile should point to the
#'                  GTF file to be used.
#' @param PSR Path to the Exon probes txt file.
#' @param Junc Path to the Junction probes txt file.
#' @param PathGTF Directory where to write the GTF files.
#' @param EventsFile Path to EventsFound.txt file generated with CDFfromGTF function.
#' @param microarray Microarray used to create the CDF file. Must be one of: HTA-2_0,
#'                    ClariomD, RTA or MTA
#'
#' @return The function displays a progress bar to show the user the progress of the function.
#' Once the progress bar reaches 100%, two .gtf files are written to the specified directory
#' in PathGTF. The created files are: 1) paths.gtf : GTF file representing the alternative splicing
#' events and 2) probes.gtf : GTF file representing the probes
#' that measure each event and each path.
#'
#' @examples
#'
#' \dontrun{
#'    PathFiles<-system.file('extdata',package='EventPointer')
#'    DONSON_GTF<-paste(PathFiles,'/DONSON.gtf',sep='')
#'    PSRProbes<-paste(PathFiles,'/PSR_Probes.txt',sep='')
#'    JunctionProbes<-paste(PathFiles,'/Junction_Probes.txt',sep='')
#'    Directory<-tempdir()
#'
#'    data(ArraysData)
#'
#'    Dmatrix<-matrix(c(1,1,1,1,0,0,1,1),nrow=4,ncol=2,byrow=FALSE)
#'    Cmatrix<-t(t(c(0,1)))
#'    EventsFound<-paste(system.file('extdata',package='EventPointer'),'/EventsFound.txt',sep='')
#'
#'    Events<-EventPointer(Design=Dmatrix,
#'                       Contrast=Cmatrix,
#'                       ExFit=ArraysData,
#'                       Eventstxt=EventsFound,
#'                       Filter=TRUE,
#'                       Qn=0.25,
#'                       Statistic='LogFC',
#'                       PSI=TRUE)
#'
#'  EventPointer_IGV(Events=Events[1,,drop=FALSE],
#'                   input='AffyGTF',
#'                   inputFile=DONSON_GTF,
#'                   PSR=PSRProbes,
#'                   Junc=JunctionProbes,
#'                   PathGTF=Directory,
#'                  EventsFile= EventsFound,
#'                  microarray='HTA-2_0')
#'                  
#' }
#'
#' @export



EventPointer_IGV <- function(Events, input, 
    inputFile = NULL, PSR, Junc, PathGTF, 
    EventsFile, microarray = NULL) {
    
    # EventPointer_IGV is the function to
    # obtain the IGV visualization for the
    # detected Events using EventPointer.
    # Compared to previous versions, this
    # time we will not need to load any
    # .Rdata files in order to create the gtf
    # files, as a result certain functions
    # might need to be called again.
    
    # Input Files: input -> GTF File of the
    # transcriptome Dsource -> Information
    # about the CDF (not required) PSR -> Txt
    # file with the information of the PSRs
    # of the array Junc -> Txt file with the
    # information of junction probes in the
    # array
    
    # Crate TxDB object that contains all the
    # information contained in the gtf file
    # that was given in the input, the
    # function makeTxDbFromGFF corresponds to
    # the SGSeq R package
    cat("Creating SG Information...")
    
    
    if (is.null(Events)) {
        stop("No alternative splicing events provided")
    }
    
    if (is.null(PSR) | is.null(Junc)) {
        stop("Missing PSR and/or Junc files")
    }
    
    # Possible Arrays: HTA-2_0, ClariomD, RTA
    # and MTA
    
    if (is.null(microarray)) {
        stop("Microarray field empty")
    }
    
    if (microarray == "HTA-2_0") {
        if (input == "Ensembl") {
            TxDb <- makeTxDbFromBiomart(biomart = "ENSEMBL_MART_ENSEMBL", 
                dataset = "hsapiens_gene_ensembl", 
                host = "grch37.ensembl.org")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "UCSC") {
            TxDb <- makeTxDbFromUCSC(genome = "hg19", 
                tablename = "knownGene")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "AffyGTF" | input == 
            "CustomGTF") {
            if (is.null(inputFile)) {
                stop("inputFile parameter is empty")
            }
            
            TxDb <- makeTxDbFromGFF(file = inputFile, 
                format = "gtf", dataSource = "Custom GTF")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else {
            stop("Unknown reference genome")
        }
    } else if (microarray == "ClariomD") {
        if (input == "Ensembl") {
            TxDb <- makeTxDbFromBiomart(biomart = "ENSEMBL_MART_ENSEMBL", 
                dataset = "hsapiens_gene_ensembl")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "UCSC") {
            TxDb <- makeTxDbFromUCSC(genome = "hg38", 
                tablename = "knownGene")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "AffyGTF" | input == 
            "CustomGTF") {
            if (is.null(inputFile)) {
                stop("inputFile parameter is empty")
            }
            
            TxDb <- makeTxDbFromGFF(file = inputFile, 
                format = "gtf", dataSource = "Custom GTF")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else {
            stop("Unknown reference genome")
        }
    } else if (microarray == "RTA") {
        if (input == "Ensembl") {
            TxDb <- makeTxDbFromBiomart(biomart = "ENSEMBL_MART_ENSEMBL", 
                dataset = "rnorvegicus_gene_ensembl")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "UCSC") {
            TxDb <- makeTxDbFromUCSC(genome = "rn6", 
                tablename = "refGene")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "AffyGTF" | input == 
            "CustomGTF") {
            if (is.null(inputFile)) {
                stop("inputFile parameter is empty")
            }
            
            TxDb <- makeTxDbFromGFF(file = inputFile, 
                format = "gtf", dataSource = "Custom GTF")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else {
            stop("Unknown reference genome")
        }
    } else if (microarray == "MTA") {
        if (input == "Ensembl") {
            TxDb <- makeTxDbFromBiomart(biomart = "ENSEMBL_MART_ENSEMBL", 
                dataset = "mmusculus_gene_ensembl")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "UCSC") {
            TxDb <- makeTxDbFromUCSC(genome = "mm10", 
                tablename = "knownGene")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else if (input == "AffyGTF" | input == 
            "CustomGTF") {
            if (is.null(inputFile)) {
                stop("inputFile parameter is empty")
            }
            
            TxDb <- makeTxDbFromGFF(file = inputFile, 
                format = "gtf", dataSource = "Custom GTF")
            TranscriptFeatures <- convertToTxFeatures(TxDb)
            SplicingGraphFeatures <- convertToSGFeatures(TranscriptFeatures)
        } else {
            stop("Unknown reference genome")
        }
    } else {
        stop("Microarray should be: HTA-2_0, ClariomD, MTA or RTA")
    }
    # Read Information for the probes in the
    # array
    
    cat("\nReading Information On Probes...")
    
    
    # Read ProbeSets TXT
    ProbeSets <- read.delim(file = PSR, sep = "\t", 
        header = TRUE, stringsAsFactors = FALSE)
    
    # Arrange the information in the txt file
    # to have the information in the way the
    # algorithm needs it
    
    ProbeSets <- PrepareProbes(ProbeSets, 
        "PSR")
    
    # Read Junctions TXT
    Junctions <- read.delim(file = Junc, 
        sep = "\t", header = TRUE, stringsAsFactors = FALSE)
    
    # Arrange the information in the txt file
    # to have the information in the way the
    # algorithm needs it
    
    Junctions <- PrepareProbes(Junctions, 
        "Junction")
    
    
    if (input == "Ensembl" | input == "UCSC" | 
        input == "CustomGTF") {
        Junc <- makeGRangesFromDataFrame(Junctions)
        PSRs <- makeGRangesFromDataFrame(ProbeSets)
        
        seqlevelsStyle(Junc) <- seqlevelsStyle(SplicingGraphFeatures)
        seqlevelsStyle(PSRs) <- seqlevelsStyle(SplicingGraphFeatures)
        
        
        Overlap_Junc <- findOverlaps(Junc, 
            SplicingGraphFeatures)
        
        GeN <- geneName(SplicingGraphFeatures[subjectHits(Overlap_Junc)])
        GeN <- sapply(GeN, function(X) {
            A <- X[1]
            return(A)
        })
        
        GeN_iix <- cbind(queryHits(Overlap_Junc), 
            GeN)
        GeN_iix <- unique(GeN_iix)
        Junctions[as.numeric(GeN_iix[, 1]), 
            "Gene"] <- GeN_iix[, 2]
        
        
        Overlap_PSR <- findOverlaps(PSRs, 
            SplicingGraphFeatures)
        
        GeN <- geneName(SplicingGraphFeatures[subjectHits(Overlap_PSR)])
        GeN <- sapply(GeN, function(X) {
            A <- X[1]
            return(A)
        })
        
        GeN_iix <- cbind(queryHits(Overlap_PSR), 
            GeN)
        GeN_iix <- unique(GeN_iix)
        ProbeSets[as.numeric(GeN_iix[, 1]), 
            "Gene"] <- GeN_iix[, 2]
    }
    
    cat("Done")
    
    # Get genes with probes and index them to
    # get locations in Junctions and Probes
    
    cat("\nIndexing Genes and Probes...")
    
    geneIds <- geneID(SplicingGraphFeatures)
    Genes <- as.list(geneName(SplicingGraphFeatures))
    LLs <- unlist(lapply(Genes, length))
    geneIds <- rep(geneIds, LLs)
    Genes <- unlist(Genes)
    GeneIndex <- cbind(Genes, geneIds)
    
    # Get Genes with Junctions and PSR probes
    KeptGenes <- intersect(GeneIndex[, 1], 
        intersect(Junctions[, "Gene"], ProbeSets[, 
            "Gene"]))
    
    iix <- match(KeptGenes, GeneIndex[, 1])
    GeneIndex <- GeneIndex[iix, , drop = FALSE]
    
    ii.P <- match(ProbeSets[, "Gene"], KeptGenes)
    ii.P <- which(!is.na(ii.P))
    ProbeSets <- ProbeSets[ii.P, , drop = FALSE]
    
    ii.J <- match(Junctions[, "Gene"], KeptGenes)
    ii.J <- which(!is.na(ii.J))
    Junctions <- Junctions[ii.J, , drop = FALSE]
    
    Ord <- order(KeptGenes)
    GeneIndex <- GeneIndex[Ord, , drop = FALSE]
    KeptGenes <- KeptGenes[Ord]
    ProbeSets <- ProbeSets[order(ProbeSets[, 
        "Gene"]), ]
    Junctions <- Junctions[order(Junctions[, 
        "Gene"]), ]
    
    Junc_rle <- rle(Junctions[, "Gene"])
    PS_rle <- rle(ProbeSets[, "Gene"])
    
    indexE_Junc <- cumsum(Junc_rle$lengths)
    indexS_Junc <- c(1, indexE_Junc[seq_len((length(indexE_Junc) - 
        1))] + 1)
    
    indexE_PS <- cumsum(PS_rle$lengths)
    indexS_PS <- c(1, indexE_PS[seq_len((length(indexE_PS) - 
        1))] + 1)
    
    EventsInfo <- read.delim(file = paste(EventsFile, 
        sep = ""), sep = "\t", header = TRUE)
    rownames(EventsInfo) <- paste(EventsInfo[, 
        1], "_", EventsInfo[, 3], sep = "")
    
    iix <- match(rownames(Events), rownames(EventsInfo))
    EventsInfo <- EventsInfo[iix, , drop = FALSE]
    
    # iix <-
    # match(matrix(unlist(strsplit(rownames(Events),
    # '_')), ncol = 2, byrow = TRUE)[, 1],
    # GeneIndex[, 1])
    
    Idmat <- matrix(unlist(strsplit(rownames(Events), 
        "_")), ncol = 2, byrow = TRUE)
    # EvNum<-sapply(EvId,tail,1)
    # Idmat<-cbind(EvId[,1],EvNum)
    
    iix <- match(Idmat[, 1], GeneIndex[, 
        1])
    
    GeneIndex <- GeneIndex[iix, , drop = FALSE]
    
    iix <- match(GeneIndex[, 1], KeptGenes)
    
    indexE_Junc <- indexE_Junc[iix]
    indexS_Junc <- indexS_Junc[iix]
    
    indexE_PS <- indexE_PS[iix]
    indexS_PS <- indexS_PS[iix]
    
    cat("Done")
    
    # Create file to store gtf for patths
    # (events)
    FILE.paths <- paste(PathGTF, "/paths.gtf", 
        sep = "")
    cat(file = FILE.paths, paste("#track name=", 
        shQuote("paths", type = "cmd"), " gffTags=", 
        shQuote("on", type = "cmd"), sep = ""), 
        "\n")
    
    # Create file to store gtf for probes
    # (events)
    FILE.paths <- paste(PathGTF, "/probes.gtf", 
        sep = "")
    cat(file = FILE.paths, paste("#track name=", 
        shQuote("probes", type = "cmd"), 
        " gffTags=", shQuote("on", type = "cmd"), 
        sep = ""), "\n")
    
    cat("\n Generating GTF Files...")
    
    pb <- txtProgressBar(min = 0, max = nrow(Events), 
        style = 3)
    
    for (jj in seq_len(nrow(GeneIndex))) {
        setTxtProgressBar(pb, jj)
        
        Gene <- GeneIndex[jj, 1]
        
        SG_Gene <- SplicingGraphFeatures[unlist(geneID(SplicingGraphFeatures)) == 
            GeneIndex[jj, 2]]
        
        #SG_Edges <- SG_Info(SG_Gene)$Edges
        SG_Edges <- SG_creation_RNASeq(SG_Gene)$Edges
        
        if (SG_Edges[1, "Strand"] == "") {
            SG_Edges[, 6] <- gsub("", "-", 
                SG_Edges[, 6])
        }
        
       
        EventPaths <- GetIGVPaths(EventsInfo[jj, 
            ], SG_Edges)
        
        JnProbes <- Junctions[indexS_Junc[jj]:indexE_Junc[jj], 
            ]
        JnProbes[, "Start"] <- round((as.numeric(JnProbes[, 
            "Start"]) + as.numeric(JnProbes[, 
            "Stop"]))/2)
        
        EventProbes <- rbind(ProbeSets[indexS_PS[jj]:indexE_PS[jj], 
            ], JnProbes)
        
        xProbe.P1 <- as.numeric(unlist(strsplit(as.vector(EventsInfo[jj, 
            "Probes.P1"]), ",")))
        ii.P1 <- match(xProbe.P1, EventProbes[, 
            1])
        xProbe.P2 <- as.numeric(unlist(strsplit(as.vector(EventsInfo[jj, 
            "Probes.P2"]), ",")))
        ii.P2 <- match(xProbe.P2, EventProbes[, 
            1])
        xProbe.R <- as.numeric(unlist(strsplit(as.vector(EventsInfo[jj, 
            "Probes.Ref"]), ",")))
        ii.PR <- match(xProbe.R, EventProbes[, 
            1])
        IDs <- c(rep("Path1", length(xProbe.P1)), 
            rep("Path2", length(xProbe.P2)), 
            rep("Ref", length(xProbe.R)))
        Wds <- rep(25, length(IDs))
        EventProbes <- cbind(EventProbes[c(ii.P1, 
            ii.P2, ii.PR), c(1, 5, 6)], Wds, 
            EventProbes[c(ii.P1, ii.P2, ii.PR), 
                8], IDs)
        colnames(EventProbes) <- c("names", 
            "chromosome", "start", "width", 
            "strand", "Path")
        
        class(EventPaths[, 2]) <- "integer"
        class(EventPaths[, 3]) <- "integer"
        WriteGTF(PathGTF, EventsInfo[jj, 
            ], EventProbes, EventPaths)
    }
    
    close(pb)
    cat("\n")
}
jpromeror/EventPointer documentation built on May 17, 2023, 10:29 p.m.