R/swinsor.R

Defines functions .swinsor_oneRNA swinsor

Documented in swinsor

###Function in the normalizing functions family.

#' Smooth Winsorization
#'
#' Performs sliding window Winsorization given treated GRanges generated by
#' comp() function. It winsorizes values in windows (of a size specified by
#' window_size) sliding by 1 nt over whole transcript length and reports mean
#' winsorized value for each nucleotide (as well as standard deviation).
#'
#' @param Comp_GR GRanges object made by comp() function.
#' @param winsor_level Winsorization level. Bottom outliers will be set to
#' (1-winsor_level)/2 quantile and top outliers to (1+winsor_level)/2 quantile.
#' @param window_size Size of a sliding window.
#' @param only_top If TRUE then bottom values are not Winsorized and are set to
#' 0.
#' @param nt_offset How many position in the 5' direction should the signal be
#' offset to account for the fact that reverse transcription termination occurs
#' before site of modification.
#' @param add_to GRanges object made by other normalization function (dtcr(),
#' slograt(), swinsor(), compdata()) to which normalized values should be
#' added.
#' @return GRanges object with "swinsor" (mean smooth-Winsor values) and
#' "swinsor.sd" (standard deviation of smooth-Winsor values) metadata.
#' @author Lukasz Jan Kielpinski, Jeppe Vinther, Nikos Sidiropoulos
#' @seealso \code{\link{comp}}, \code{\link{dtcr}}, \code{\link{slograt}},
#' \code{\link{compdata}}, \code{\link{GR2norm_df}},
#' \code{\link{plotRNA}}, \code{\link{norm2bedgraph}}, \code{\link{winsor}},
#' \code{\link{swinsor_vector}}
#' @references "Analysis of sequencing based RNA structure probing data"
#' Kielpinski, Sidiropoulos, Vinther. Chapter in "Methods in Enzymology"
#' (in preparation)
#' @examples
#'
#' dummy_euc_GR <- GRanges(seqnames="DummyRNA",
#'                         IRanges(start=round(runif(100)*100),
#'                         width=round(runif(100)*100+1)), strand="+",
#'                         EUC=round(runif(100)*100))
#' dummy_comp_GR <- comp(dummy_euc_GR)
#' swinsor(dummy_comp_GR)
#'
#' @import GenomicRanges
#' @export swinsor
swinsor <- function(Comp_GR, winsor_level=0.9, window_size=71, only_top= FALSE,
                    nt_offset=1, add_to){

###Check conditions:
    if(nt_offset < 0)
        stop("ERROR: nt_offset must be >= 0")

    ###Function body:
    Comp_df <- GR2norm_df(Comp_GR)

    Comp_df[is.na(Comp_df)] <- 0 #Changes NA to 0 - treat no events as 0 events.

    Comp_df <- Comp_df[order(Comp_df$RNAid, Comp_df$Pos),]
    Comp_df_by_RNA <- split(Comp_df, f=Comp_df$RNAid, drop=TRUE)

    #Do processing (smooth, offset, p-values)
    normalized <- do.call(rbind, lapply(Comp_df_by_RNA, FUN=.swinsor_oneRNA,
                                        winsor_level, window_size, only_top,
                                        nt_offset))
    #Keep only relevant columns
    normalized <- data.frame(RNAid=normalized$RNAid, Pos=normalized$Pos,
                             nt=normalized$nt, swinsor=normalized$swinsor,
                             swinsor.sd=normalized$swinsor.sd)

    #Change NaN to NA, for consistency
    normalized$swinsor[is.nan(normalized$swinsor)] <- NA
    normalized$swinsor.sd[is.nan(normalized$swinsor.sd)] <- NA

    #If add_to specified, merge with existing normalized data frame:
    if(!missing(add_to)){
        add_to_df <- GR2norm_df(add_to)
        normalized <- merge(add_to_df, normalized,
                            by=c("RNAid", "Pos", "nt"),
                            suffixes=c(".old",".new"))
    }
    ###

    normalized <- normalized[order(normalized$RNAid, normalized$Pos),]
    norm_df2GR(normalized)
}

###Auxiliary functions:

#Process single RNA:
.swinsor_oneRNA <- function(oneRNA_comp_df, winsor_level, window_size, only_top,
                            nt_offset){
    #Check if data is properly sorted
    if(prod(diff(oneRNA_comp_df$Pos)==rep(1, nrow(oneRNA_comp_df)-1))==1){
        means_and_sds <- swinsor_vector(oneRNA_comp_df$TC,
                                        window_size=window_size,
                                        winsor_level=winsor_level,
                                        only_top=only_top)
        oneRNA_comp_df$swinsor <-
            means_and_sds[[1]][(1+nt_offset):(nrow(oneRNA_comp_df)+nt_offset)]
        oneRNA_comp_df$swinsor.sd <-
            means_and_sds[[2]][(1+nt_offset):(nrow(oneRNA_comp_df)+nt_offset)]

        oneRNA_comp_df[1:(nrow(oneRNA_comp_df)-nt_offset),]
    }
    else {
        Message <- "Check if data was properly sorted by comp() function.
                    Problem with"
        stop(strwrap(paste(Message, oneRNA_comp_df$RNAid[1])))
    }
}

Try the RNAprobR package in your browser

Any scripts or data that you put into this service are public.

RNAprobR documentation built on Nov. 8, 2020, 5:57 p.m.