R/kissplice2counts.R

Defines functions print.countsData kissplice2counts

Documented in kissplice2counts

kissplice2counts <- function(fileName, counts=2, pairedEnd=FALSE, order=NULL,
                             exonicReads=TRUE, k2rg=FALSE, keep=c("All"),
                             remove=NULL) {
  
  ######## check function inputs
  
  if(!file.exists(fileName)) {
    stop("Input error: user's file does not exist.")
  }
  if(!counts%in%c(0,1,2)){
    stop("Input error: 'counts' option is not equal to 0, 1 or 2.")
  }
  if(!is.logical(pairedEnd)){
    stop("Input error: 'pairedEnd' option must be a boolean.")
  }
  if(!is.logical(exonicReads)){
    stop("Input error: 'exonicReads' option must be a boolean.")
  }
  if(!is.logical(k2rg)){
    stop("Input error: 'k2rg' option must be a boolean.")
  }
  if(!is.null(order) & !is.vector(order, mode="numeric")) {
    stop("Input error: 'order' option must be a numeric vector.")
  }
  if(!pairedEnd & !is.null(order)) {
    stop("Input error: 'order' option can be set only if 
            pairedEnd is TRUE.")
  }
  
  if(k2rg & keep!=c("All")) {
    if(!is.vector(keep)) {
      stop("Input error: 'keep' option must be a vector.")
    }
    
    keep <- unique(keep)
    values <- c("deletion", "insertion", "IR", "ES", "altA", "altD", 
                "altAD", "-", "indel")
    
    # vector of unauthorized 'keep' values
    notinValues <- keep[!keep%in%values]
    if(length(notinValues) > 0) {
      notinValues <- paste(notinValues, collapse = ", ")
      stop("Input error: 'keep' must be a vector of values in 
    c('deletion', 'insertion', 'IR', 'ES', 'altA', 'altD', 'altAD', 
    '-', 'indel').")
    }
  }
  
  if(k2rg & !is.null(remove)) {
    if(!is.vector(remove)) {
      stop("Input error: 'remove' must be a vector.")
    }
    
    if(!"ES"%in%keep) {
      remove <- unique(remove)
      values <- c("deletion", "insertion", "IR", "ES", "altA", "altD", 
                  "altAD", "-", "indel", "MULTI")
      
      # vector of unauthorized 'remove' values
      notinValues <- remove[!remove%in%values]
      if(length(notinValues) > 0) {
        notinValues <- paste(notinValues, collapse = ", ")
        stop("Input error: 'remove' must be a vector of values in 
        c('deletion', 'insertion', 'IR', 'ES', 'altA', 'altD', 'altAD', 
        '-', 'indel', 'MULTI').")
      }
    }
    
    
    if(keep!=c("All") & "ES"%in%keep) {
      values <- c("altA", "altD", "altAD", "MULTI",
                  "MULTI_altA", "MULTI_altD", "MULTI_altAD")
      
      # vector of unauthorized 'remove' values
      notinValues <- remove[!remove%in%values]
      if(length(notinValues) > 0) {
        notinValues <- paste(notinValues, collapse = ", ")
        stop("Input error: if 'keep' contains 'ES', 'remove' must be a 
    vector of values in: c('altA', 'altD', 'altAD', 'MULTI', 'MULTI_altA',
    MULTI_altD', 'MULTI_altAD').")
      }
    } else if(keep!=c("All")) {
      stop("Input error: 'keep' and 'remove' can not be used together,
    unless 'keep' contains 'ES' (see the vignette for more informations).")
    }
  }
  
  ######## check options compatibility
  
  splitFileName <- strsplit(fileName, "_type_")[[1]]
  l <- length(splitFileName)
  if(l > 1) {
    fileType <- substr(splitFileName[l], 1, 1)
    if(fileType == "0" & counts != 0) {
      counts <- 0
      warning("Changing 'counts' value to 0 for consistency with inputed file type.")
    }
  }
  
  if (counts == 1 & exonicReads == TRUE) { 
    ## when counts=1 set automatically exonicReads=FALSE
    exonicReads <- FALSE
    warning("Changing 'exonicReads' value to FALSE for consistency with
    counts=1.")
  }
  if (counts == 0 & exonicReads == FALSE) { 
    ## when counts=0 set automatically exonicReads=TRUE
    exonicReads <- TRUE
    warning("Changing 'exonicReads' value to TRUE for consistency with
    counts=0.")
  }
  if (k2rg == FALSE & (keep != c("All") | !is.null(remove))) {
    ## keep and remove should only be used when k2rg=TRUE
    keep <- c("All")
    remove <- NULL
    warning("Changing 'keep' and 'remove' options to default value for 
    consistency with k2rg=FALSE.")
  }
  
  ########
  
  fpath <- file(fileName, open="r")
  
  if (k2rg == FALSE) {
    fileNameK2RG <- NULL
    lines <- readLines(fpath)
    lines <- lines[startsWith(lines, ">")]
    lines <- sub("^>", "", lines)
    lines <- strsplit(x = lines, split = "|", fixed = TRUE)
    
    
    if (any(grepl("branching_nodes", lines[1]))) {
      indexStart <- 6
    } else {
      indexStart <- 5
    }
    
    ## get all the informations for all lines
    infoLines <- lapply(lines, .getInfoLine, counts, pairedEnd, order, 
                        exonicReads, indexStart)
    
    eventsnames <- unlist(lapply(infoLines, function(X) X$eventName))
    
    nbLines <- length(infoLines)
    eventsmat <- cbind(
      unlist(lapply(infoLines, function(X) X$variantLength)),
      matrix(unlist(lapply(infoLines, function(X) X$variantCounts)), 
             nbLines, byrow = TRUE))
    
    infoFirstLine <- infoLines[[1]]
    psiInfo <- matrix(NA, nbLines, length(infoFirstLine$psiInfo))
    for (index in seq_len(nbLines))
      psiInfo[index, ] <- infoLines[[index]]$psiInfo
    
  } else {
    fileNameK2RG <- fileName
    POS <- 3
    EVENT <- 5
    PARA <- 13
    EVENTNAME <- 16
    QUANT <- 20
    keepEvents <- .wantedEvents(keep, remove)
    
    lines <- readLines(fpath)
    lines <- lines[!startsWith(lines, "#")]
    lines <- strsplit(x = lines, split = "\t", fixed = TRUE)
    
    firstLine <- lines[[1]]
    if(is.na(firstLine[QUANT]) | firstLine[QUANT]=="NA" | firstLine[QUANT]=="") {
      stop("Input error: it seems that your kissplice2refgenome file is not quantified.
           Are you sure you used the quantified output of KisSplice (NOT in the \"results_without_read_coherency/\" folder)?")
    }
    
    
    ## keep only events of the selected type (keepEvents)
    keptLines <- lapply(lines, function(X) {if(!X[PARA] %in% c("one_path_mapped","not_mapped","missed_type2") & sum(strsplit(X[EVENT],split = ",")[[1]] %in% keepEvents)>0) X})
    keptLines <- keptLines[!vapply(keptLines, is.null, isTRUE(1))]
    keptLines <- keptLines[!duplicated(unlist(lapply(keptLines, 
                                                     function(X) X[EVENTNAME])))]
    lEvents <- unlist(lapply(keptLines, function(X) X[EVENTNAME]))
    
    nbLines <- length(keptLines)
    
    ## get all the informations for all lines
    infoLines <- lapply(keptLines, .getInfoLineK2rg, counts, pairedEnd,
                        order, exonicReads)
    
    ## events.names
    eventsnames <- rep(NA, nbLines * 2)
    eventsnames[seq(1, nbLines * 2, by = 2)] <- unlist(lapply(infoLines, 
                                                              function(X) X$eventName))
    eventsnames[seq(2, nbLines * 2, by = 2)] <- unlist(lapply(infoLines, 
                                                              function(X) X$eventName))
    
    ## events.mat
    infoFirstLine <- infoLines[[1]]
    eventsmat <- matrix(NA, nbLines * 2, 
                        length(infoFirstLine$variantCountsUp) + 1)
    eventsmat[seq(1, nbLines * 2, by = 2), ] <- cbind(
      unlist(lapply(infoLines, function(X) X$variantLengthUp)),
      matrix(unlist(lapply(infoLines, function(X) X$variantCountsUp)), 
             nbLines, byrow = TRUE))
    eventsmat[seq(2, nbLines * 2, by = 2), ] <- cbind(
      unlist(lapply(infoLines, function(X) X$variantLengthLow)),
      matrix(unlist(lapply(infoLines, function(X) X$variantCountsLow)), 
             nbLines, byrow = TRUE))
    
    ## psiInfo
    psiInfo <- matrix(NA, nbLines * 2, length(infoFirstLine$psiInfoUp))
    # For loop that can not be optimized at the moment because of NULL 
    # value yielded by the '.countsSet' function
    for (index in seq_len(nbLines)) {
      psiInfo[(index - 1)*2 + 1, ] <- infoLines[[index]]$psiInfoUp
      psiInfo[(index - 1)*2 + 2, ] <- infoLines[[index]]$psiInfoLow
    }
    
    
  }
  
  eventsdf <- data.frame(events.names = eventsnames, events.mat = eventsmat)
  colnames(eventsdf) <- c("events.names", 
                          "events.length", 
                          paste("counts", 
                                seq_len(length(colnames(eventsdf)) - 2), 
                                sep=""))
  
  psiInfo <- data.frame(events.names = eventsnames, as.data.frame(psiInfo))
  
  output <- list(countsEvents = eventsdf, 
                 psiInfo = psiInfo, 
                 exonicReadsInfo = exonicReads, 
                 k2rgFile = fileNameK2RG)
  class(output) <- c("list", "countsData")
  
  close(fpath)
  return(output)
}

print.countsData <- function(x, ...) {
  print(x$countsEvents)
}
aursiber/kissDE documentation built on Jan. 28, 2024, 10:01 a.m.