### Auxiliary functions for package RNAprobR
#Auxiliary function speeding up calculations in functions operating on
#sliding windows.
#Returns a matrix with (length(input_vector) + window_size - 1) columns and
#window_size rows. Each row contains input vector values starting at column
#equal to row number.
.construct_smoothing_matrix <- function(input_vector, window_size){
vector_length <- length(input_vector)
#Create empty matrix.
my_mat <- matrix(nrow=window_size, ncol=(vector_length+window_size-1))
#Fill the matrix, offsetting by 1 in each cycle.
for(i in 1:(window_size))
{
my_mat[i,i:(vector_length+i-1)] <- input_vector
}
my_mat
}
#Moving average calculation. NA values ignored.
#Rreturns a numeric vector. Each position represents the mean of values in
#window_size positions centred at this position.
.moving_average <- function(input_vector, window_size)
{
window_side <- window_size/2-0.5
colMeans(.construct_smoothing_matrix(input_vector, window_size),
na.rm=TRUE)[(window_side+1):(length(input_vector)+window_side)]
}
#' Winsor normalization with fitting to <0,1> range.
#'
#' Function performs Winsor normalization of a supplied vector. Steps:
#' 1. Calcualate top winsor value [(1+winsor_level)/2 quantile], and bottom
#' winsor value ((1-winsor_level)/2 quantile)
#' 2. Each value below bottom winsor value set to bottom winsor value; each
#' value above top winsor value set to top winsor value
#' 3. Transform linearly all the values to [0,1] range
#'
#' @param input_vector Vector with values to be Winsorized
#' @param winsor_level Winsorization level. Bottom outliers will be set to
#' (1-winsor_level)/2 quantile and top outliers to (1+winsor_level)/2 quantile.
#' @param only_top If TRUE then bottom values are not Winsorized and the lowest
#' is set to 0.
#' @return Vector of numerics within <0,1>.
#' @author Lukasz Jan Kielpinski
#' @references Hastings, Cecil; Mosteller, Frederick; Tukey, John W.; Winsor,
#' Charles P. Low Moments for Small Samples: A Comparative Study of Order
#' Statistics. The Annals of Mathematical Statistics 18 (1947), no. 3,
#' 413--426.
#' @keywords ~winsorising
#' @examples
#'
#' data_set <- runif(1:100)*100
#' plot(winsor(data_set, winsor_level=0.8) ~ data_set)
#'
#' @export winsor
winsor <- function(input_vector, winsor_level=0.9, only_top= FALSE)
{
#!!! Function changes integers to numerics!
bounds <- quantile(input_vector, c((1-winsor_level)/2,
1-(1-winsor_level)/2), names= FALSE, na.rm=TRUE)
if(only_top)
bounds[1] <- 0
input_vector[input_vector < bounds[1]] <- bounds[1]
input_vector[input_vector > bounds[2]] <- bounds[2]
if((bounds[2]-bounds[1])>0)
input_vector <- (input_vector-bounds[1])/(bounds[2]-bounds[1])
else
input_vector[1:length(input_vector)] <- NA
input_vector
}
#' Smooth Winsor Normalization
#'
#' Function performs Winsor normalization (see winsor() function) of each
#' window of specified window_size, sliding in a given vector by 1 position,
#' and reports a list of (1) mean Winsorized values for each vector position
#' (mean of Winsorized value for a given position as calculated within each
#' overlapping window) and (2) standard deviation of those Winsorized values.
#'
#' @param input_vector Vector with values to be smooth-Winsorized
#' @param window_size Size of a sliding window.
#' @param winsor_level Winsorization level. Bottom outliers will be set to
#' (1-winsor_level)/2 quantile and top outliers to (1+winsor_level)/2 quantile.
#' @param only_top If TRUE then bottom values are not Winsorized and are set to
#' 0.
#' @return \item{comp1}{Vector with mean Winsorized values for each
#' input_vector position} \item{comp2}{Vector with standard deviation of
#' Winsorized values for each input_vector position}
#' @author Lukasz Jan Kielpinski
#' @references "Analysis of sequencing based RNA structure probing data"
#' Kielpinski, Sidiropoulos, Vinther. Chapter in "Methods in Enzymology"
#' (in preparation)
#' @examples
#'
#' data_set <- runif(1:100)*100
#' plot(swinsor_vector(data_set, window_size=71,
#' winsor_level=0.8)[[1]] ~ data_set)
#'
#' @export swinsor_vector
swinsor_vector <- function(input_vector, window_size, winsor_level=0.9,
only_top= FALSE)
{
my_matrix <- .construct_smoothing_matrix(input_vector, window_size)
winsorized_matrix <- apply(my_matrix, 2, winsor, winsor_level=winsor_level,
only_top=only_top)
#Control the border behaviour
winsorized_matrix[,c(1:(window_size-1),
(length(input_vector)+1):ncol(winsorized_matrix))] <- NA
aligned_winsorized_matrix <- .align_smoothing_matrix(winsorized_matrix)
means_vector <- colMeans(aligned_winsorized_matrix, na.rm=TRUE)
sds_vector <- apply(aligned_winsorized_matrix, 2, FUN=sd, na.rm=TRUE)
list(means_vector, sds_vector)
}
#Define function for aligning matrix generated by construct_smoothing_matrix
.align_smoothing_matrix <- function(input_matrix)
{
#Length of the vector used to construct the matrix in
#construct_smoothing_matrix function.
vector_length <- ncol(input_matrix)-nrow(input_matrix)+1
#Create empty matrix.
my_mat <- matrix(nrow=nrow(input_matrix), ncol=vector_length)
#Fill the matrix, offseting by -1 in each cycle.
for(i in 1:nrow(input_matrix))
my_mat[i,] <- input_matrix[i, i:(vector_length + i - 1)]
my_mat
}
#' Export normalized GRanges object to data frame
#'
#' Function to make data frame out of GRanges output of normalizing functions
#' (dtcr(), slograt(), swinsor(), compdata()) for all or a set of chosen
#' transcripts in the file.
#'
#' @param norm_GR GRanges object made by other normalization function (dtcr(),
#' slograt(), swinsor(), compdata()) from which data is to be extracted
#' @param RNAid Transcript identifiers of transcripts that are to be extracted
#' @param norm_methods Names of the columns to be extracted.
#' @return Data frame object with columns: RNAid, Pos and desired metadata
#' columns (e.g. nt, dtcr)
#' @author Lukasz Jan Kielpinski, Nikos Sidiropoulos
#' @seealso \code{\link{norm_df2GR}}, \code{\link{dtcr}},
#' \code{\link{swinsor}}, \code{\link{slograt}}, \code{\link{compdata}}
#' @examples
#'
#' dummy_euc_GR_treated <- GRanges(seqnames="DummyRNA",
#' IRanges(start=round(runif(100)*100),
#' width=round(runif(100)*100+1)), strand="+",
#' EUC=round(runif(100)*100))
#' dummy_comp_GR_treated <- comp(dummy_euc_GR_treated)
#' dummy_swinsor <- swinsor(dummy_comp_GR_treated)
#' GR2norm_df(dummy_swinsor)
#'
#' @import GenomicRanges
#' @export GR2norm_df
GR2norm_df <- function(norm_GR, RNAid="all", norm_methods="all")
{
if(norm_methods=="all" || missing(norm_methods))
norm_methods <- names(mcols(norm_GR))
if(RNAid=="all" || missing(RNAid))
RNAid <- levels(seqnames(norm_GR))
#Parsing normalized file based on the selected RNAid(s) and normalization
#method(s).
norm_GR <- norm_GR[seqnames(norm_GR) %in% RNAid, norm_methods]
data.frame(RNAid=as.character(seqnames(norm_GR)),
Pos=as.integer(start(norm_GR)), mcols(norm_GR))
}
#' Function to convert norm_df data frame (product of GR2norm_df()) to GRanges.
#'
#' @param norm_df norm_df data frame needs to have columns: RNAid (equivalent
#' to seqnames in GRanges) and Pos (equivalent to start in GRanges) and
#' metadata
#' @return GRanges compatible with objects created by normalizing functions
#' (dtcr(), slograt(), swinsor(), compdata())
#' @author Lukasz Jan Kielpinski
#' @seealso \code{\link{dtcr}}, \code{\link{slograt}}, \code{\link{swinsor}},
#' \code{\link{compdata}}, \code{\link{GR2norm_df}}, \code{\link{norm2bedgraph}}
#' @examples
#'
#' dummy_norm_df <- data.frame(RNAid="dummyRNA", Pos=1:100,
#' my_data1=runif(1:100))
#' norm_df2GR(dummy_norm_df)
#'
#' @export norm_df2GR
norm_df2GR <- function(norm_df)
{
GRanges(seqnames=norm_df$RNAid, IRanges(start=norm_df$Pos, width=1),
strand="+", norm_df[names(norm_df)!="RNAid" & names(norm_df)!="Pos"])
}
#Special function to repair merged Comp_df data frames if there is gap between
#merged nucleotides:
.correct_merged <- function(oneRNA_compmerg){
df_gaps <- diff(oneRNA_compmerg$Pos)
rnaid_column <- which(names(oneRNA_compmerg)=="RNAid")
oneRNA_out <- oneRNA_compmerg[c(rep(1:length(df_gaps), df_gaps),
nrow(oneRNA_compmerg)),]
oneRNA_out[duplicated(rep(1:length(df_gaps), df_gaps)),
(1:ncol(oneRNA_out))[-rnaid_column]] <- NA
oneRNA_out$Pos <- oneRNA_compmerg$Pos[1]:(oneRNA_compmerg$Pos[1] +
nrow(oneRNA_out)-1)
oneRNA_out$nt[is.na(oneRNA_out$nt)] <- "N"
oneRNA_out[is.na(oneRNA_out)] <- 0
oneRNA_out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.