R/Modifier-Inosine-class.R

Defines functions ModSetInosine .find_inosine .get_inosine_score .aggregate_inosine .calculate_inosine_score .norm_ModInosine_settings ModInosine

Documented in ModInosine ModSetInosine

#' @include RNAmodR.R
#' @include Modifier-class.R
#' @include ModifierSet-class.R
NULL

# documentation ----------------------------------------------------------------

#' @name ModInosine
#' @aliases Inosine ModifierInosine ICE-Seq
#' 
#' @author Felix G.M. Ernst [aut]
#' 
#' @title ModInosine
#' 
#' @description 
#' Inosine can be detected in RNA-Seq data by the conversion of A positions to 
#' G. This conversion is detected by \code{ModInosine} and used to search for 
#' Inosine positions. \code{dataType} is \code{"PileupSequenceData"}.
#' 
#' Only samples labeled with the condition \code{treated} are used for this
#' analysis, since the A to G conversion is common feature among the reverse
#' transcriptases usually emploied. Let us know, if that is not the case, and
#' the class needs to be modified.
#' 
#' Further information on \code{\link[=ModInosine-functions]{Functions}} of 
#' \code{ModInosine}.
#' 
#' @details
#' \code{ModInosine} score: the scores for reported Inosine positions are 
#' between 0 and 1. They are calculated as the relative amount of called G bases 
#' (\code{(G / N)}) and only saved for genomic A positions.
#' 
#' @param x the input which can be of the different types depending on whether
#' a \code{ModRiboMethSeq} or a \code{ModSetRiboMethSeq} object is to be 
#' constructed. For more information have a look at the documentation of
#' the \code{\link[RNAmodR:Modifier-class]{Modifier}} and 
#' \code{\link[RNAmodR:ModifierSet-class]{ModifierSet}} classes.
#' @param annotation annotation data, which must match the information contained
#' in the BAM files. This is parameter is only required, if \code{x} is not a 
#' \code{Modifier} object.
#' @param sequences sequences matching the target sequences the reads were 
#' mapped onto. This must match the information contained in the BAM files. This
#' is parameter is only required, if \code{x} is not a \code{Modifier} object.
#' @param seqinfo An optional \code{\link[GenomeInfoDb:Seqinfo-class]{Seqinfo}} 
#' argument or character vector, which can be coerced to one, to subset the 
#' sequences to be analyzed on a per chromosome basis.
#' @param ... Optional arguments overwriting default values, which are
#' \itemize{
#' \item{minCoverage:} {The minimal coverage at the position as integer value 
#' (default: \code{minCoverage = 10L}).}
#' \item{minReplicate:} {minimum number of replicates needed for the analysis 
#' (default: \code{minReplicate = 1L}).}
#' \item{minScore:} {minimum score to identify Inosine positions de novo 
#' (default: \code{minScore = 0.4}).}
#' }
#' 
#' @return a \code{ModInosine} or \code{ModSetInosine} object
#' 
#' @examples
#' # construction of ModInosine object
#' library(RNAmodR.Data)
#' library(rtracklayer)
#' annotation <- GFF3File(RNAmodR.Data.example.man.gff3())
#' sequences <- RNAmodR.Data.example.man.fasta()
#' files <- c(treated = RNAmodR.Data.example.wt.1())
#' mi <- ModInosine(files,annotation = annotation ,sequences = sequences)
#' # construction of ModSetInosine object
#' \dontrun{
#' files <- list("SampleSet1" = c(treated = RNAmodR.Data.example.wt.1(),
#'                                treated = RNAmodR.Data.example.wt.2(),
#'                                treated = RNAmodR.Data.example.wt.3()),
#'               "SampleSet2" = c(treated = RNAmodR.Data.example.bud23.1(),
#'                                treated = RNAmodR.Data.example.bud23.2()),
#'               "SampleSet3" = c(treated = RNAmodR.Data.example.trm8.1(),
#'                                treated = RNAmodR.Data.example.trm8.2()))
#' msi <- ModSetInosine(files, annotation = annotation, sequences = sequences)
#' }
NULL

#' @name ModInosine-internals
#' @aliases .dataTracks,ModInosine,GRanges,GRanges,XString-method
#' 
#' @title ModInosine internal functions
#' 
#' @description
#' These functions are not intended for general use, but are used for 
#' additional package development.
#' 
#' @param x,data,seqdata,sequence,args internally used arguments
NULL

#' @name ModInosine-functions
#' 
#' @title Functions for ModInosine
#' 
#' @description
#' All of the functions of \code{\link[RNAmodR:Modifier-class]{Modifier}} and
#' the \code{\link[RNAmodR:ModifierSet-class]{ModifierSet}} classes are 
#' inherited by the \code{ModInosine} and \code{ModSetInosine} classes.
#' 
#' Check below for the specifically implemented functions.
#' 
#' @param x a \code{\link[RNAmodR:Modifier-class]{Modifier}} or a 
#' \code{\link[RNAmodR:ModifierSet-class]{ModifierSet}} object. For more details
#' see also the man pages for the functions mentioned below.
#' @param value See \code{\link[RNAmodR:Modifier-functions]{settings}}
#' @param coord,name,from,to,type,window.size,... See 
#' \code{\link{plotData}}
#' 
#' @details 
#' \code{ModInosine} specific arguments for \link{plotData}:
#' \itemize{
#' \item{\code{colour.bases} - }{a named character vector of \code{length = 4} 
#' for the colours of the individual bases. The names are expected to be 
#' \code{c("G","A","U","C")}}
#' }
#' 
#' @return 
#' \itemize{
#' \item{\code{settings}} {See \code{\link[=Modifier-functions]{settings}}.}
#' \item{\code{aggregate}} {See \code{\link{aggregate}}.}
#' \item{\code{modify}} {See \code{\link{modify}}.}
#' \item{\code{getDataTrack}} {a list of 
#' \code{\link[Gviz:DataTrack-class]{DataTrack}} objects. See 
#' \code{\link{plotDataByCoord}}.}
#' \item{\code{plotData}} {See \code{\link{plotDataByCoord}}.}
#' \item{\code{plotDataByCoord}} {See \code{\link{plotDataByCoord}}.}
#' }
#' 
#' @examples 
#' data(msi,package="RNAmodR")
#' mi <- msi[[1]]
#' settings(mi)
#' \dontrun{
#' aggregate(mi)
#' modify(mi)
#' }
#' getDataTrack(mi, "1", mainScore(mi))
NULL

# class ------------------------------------------------------------------------

#' @rdname ModInosine
#' @export
setClass("ModInosine",
         contains = c("RNAModifier"),
         prototype = list(mod = "I",
                          score = "score",
                          dataType = "PileupSequenceData"))

# constructor ------------------------------------------------------------------

# Create Modifier class from file character, fasta and gff file
#' @rdname ModInosine
#' @export
ModInosine <- function(x, annotation, sequences, seqinfo, ...){
  Modifier("ModInosine", x = x, annotation = annotation, sequences = sequences,
           seqinfo = seqinfo, ...)
}

# settings ---------------------------------------------------------------------

.ModInosine_settings <- data.frame(
  variable = c("minScore"),
  testFUN = c(".not_numeric_between_0_100"),
  errorValue = c(TRUE),
  errorMessage = c("'minScore' must be numeric with a value between 0 and 100."),
  stringsAsFactors = FALSE)

.norm_ModInosine_settings <- function(input){
  minScore <- 0.4
  args <- .norm_settings(input, .ModInosine_settings, minScore)
  args
}

#' @rdname ModInosine-functions
#' @export
setReplaceMethod(f = "settings", 
    signature = signature(x = "ModInosine"),
    definition = function(x, value){
        x <- callNextMethod()
        names <- names(value)
        value <- .norm_ModInosine_settings(value)
        x <- .add_settings_value(x, value, names)
        x
    }
)

# functions --------------------------------------------------------------------

.calculate_inosine_score <- function(x, letters){
  data <- unlist(x)
  letters <- unlist(letters)
  scores <- 
    data$means.treated.G / (data$means.treated.A + data$means.treated.T + 
                              data$means.treated.C +data$means.treated.G)
  scores[is.infinite(scores) | is.na(scores)] <- 0
  scores[letters != "A"] <- 0
  ans <- S4Vectors::DataFrame(value = scores)
  relist(ans, x)
}

.aggregate_inosine <- function(x){
  mod <- aggregate(sequenceData(x))
  letters <- IRanges::CharacterList(strsplit(as.character(sequences(x)),""))
  score <- .calculate_inosine_score(mod, letters)
  ans <- cbind(S4Vectors::DataFrame(score = unlist(score)$value,
                                    row.names = NULL))
  ans <- relist(ans, mod)
  rownames(ans) <- rownames(mod)
  ans
}

#' @rdname ModInosine-functions
#' @export
setMethod(f = "aggregateData", 
          signature = signature(x = "ModInosine"),
          definition = 
            function(x){
              .aggregate_inosine(x)
            }
)

.get_inosine_score <- function(data){
  list(score = data$score)
}

.find_inosine <- function(x){
  if(!hasAggregateData(x)){
    stop("")
  }
  letters <- IRanges::CharacterList(strsplit(as.character(sequences(x)),""))
  # get the aggregate data
  mod <- getAggregateData(x)
  coverage <- pileupToCoverage(sequenceData(x))
  # get arguments
  minCoverage <- settings(x,"minCoverage")
  minReplicate <- settings(x,"minReplicate")
  minScore <- settings(x,"minScore")
  # construct logical vector for passing the coverage threshold
  coverage <- rowSums(as.data.frame(unlist(coverage))) >= minCoverage
  coverage <- relist(unname(coverage),IRanges::PartitioningByEnd(mod))
  # find inosine positions by looking for A to G conversion at position with 
  # enough coverage
  grl <- ranges(x)
  modifications <- Map(
    function(m,c,l,r){
      m <- m[l == "A" &
               m$score >= minScore & 
               c,,drop=FALSE] # coverage check
      if(nrow(m) == 0L) return(NULL)
      ans <- constructModRanges(r, m, modType = "I",
                                RNAmodR:::.get_inosine_score, "RNAmodR",
                                "RNAMOD")
      ans
    },
    mod,
    coverage,
    letters,
    grl)
  f <- !vapply(modifications,
               is.null,
               logical(1))
  modifications <- Map(
    function(m,name){
      m$Parent <- name
      m
    },
    modifications[f],
    names(grl)[f])
  modifications <- GenomicRanges::GRangesList(modifications)
  unname(unlist(modifications))
}

#' @rdname ModInosine-functions
#' @export
setMethod("findMod",
          signature = c(x = "ModInosine"),
          function(x){
            .find_inosine(x)
          }
)


# ModSetInosine ----------------------------------------------------------------

#' @rdname ModInosine
#' @export
setClass("ModSetInosine",
         contains = "ModifierSet",
         prototype = list(elementType = "ModInosine"))

#' @rdname ModInosine
#' @export
ModSetInosine <- function(x, annotation = NA, sequences = NA, seqinfo = NA, 
                          ...){
  RNAmodR::ModifierSet("ModInosine", x = x, annotation = annotation,
                       sequences = sequences, seqinfo = seqinfo, ...)
}
FelixErnst/RNAmodR documentation built on March 27, 2024, 2:42 a.m.