#
# This file is part of the `BraDiPluS` R package
#
# Copyright (c) 2016 EMBL-EBI
#
# File author(s): Federica Eduati (federica.eduati@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://github.com/saezlab/BraDiPluS
# --------------------------------------------------------
#
#' Separate data in samples (i.e. experimental conditions).
#'
#' \code{samplesSelection} define samples using the barcoding channel to separate them.
#'
#' This function allows to select the samples using the selected barcoding channel to separate replicates (peaks) from
#' different experimental conditons. This function first detect the barcoding peaks (in the channel defined by "BCchannel") and that is
#' done considering the blue signal that goes above a defined threshold (which is the “BCthr” argument) and with a minimum width
#' (defined by "BCminLength" argument). Then it looks at the space between selected blue peaks and considers as samples those
#' with a distance higher than a threshold (defined by "distThr").
#'
#' @param data data to be analyzed (formatted as data.frame with 4 columns: green, orange, blue, time)
#' @param channel channel(s) to be visualized, which is in general the main channel with the data. Default="green"
#' @param BCchannel channel used for barcoding, default is "blue"
#' @param BCthr threshold used in order to define what is a peak in the barcoding channel. Default is 0.05
#' @param distThr minimum duration of a sample (the unit is "time" as defined in the data)
#' @param plotMyData TRUE to show the plot of the data, FALSE not to show it. If data are large plotting can take seconds/minutes
#' @param BCminLength minimum length of a plug in number of data points for the barcoding channel,
#' default is 100 (note that unit here is number of data points, not "time")
#' @param barcodePos specify if the barcode is before or after the sample ("before" or "after"), default is "after"
#' @return This function returns a list with two components:
#' \describe{
#' \item{samples}{list containing samples as elements. Each sample is a data.frame
#' with the same structure of the input data}
#' \item{barcode}{corresponding barcode for each sample. Each barcode is a data.frame
#' with the same structure of the input data}
#' }
#' @seealso \code{\link{selectSamplesPeaks}} to select the peaks in each sample
#' @examples
#' data(BxPC3_data,package="BraDiPluS")
#' res <- samplesSelection(data=MyData, BCchannel="blue",BCthr=0.01,
#' BCminLength=100, distThr=16, barcodePos="before")
#' samples <-res$samples
#' barcode <-res$barcode
#' @export
samplesSelection <- function(data, channel="green", BCchannel="blue", BCthr=0.05, distThr=16, plotMyData=FALSE, BCminLength=100, barcodePos="after"){
# slect peaks in the barcoding channel
peaks <- peaksSelection(data, channel=BCchannel, baseThr=BCthr, minLength=BCminLength)
if (plotMyData==TRUE){plotData(data, channels=c(BCchannel, channel), peaks=peaks)}
# distance of each peak to the previous one
peaksDist<-c(0, peaks$start[2:length(peaks$start)] - peaks$end[1:length(peaks$start)-1])
# these are the indices of the first peak of each barcode (except for the first BC)
ix <- c(which(peaksDist>distThr))
if (length(ix)==0) stop(paste("Error: check the argument 'distThr', your maximum distance between peaks is", max(peaksDist)))
####
# define beginning and end of the BC samples
BCstart.time<-c(peaks$start[1], peaks$start[ix])
#abline(v=BCstart.time, col="red")
BCstart.ix<-sapply(BCstart.time,function (x) which(data$time>=x)[1]) - 10
BCstart.ix[BCstart.ix<=1]<-1
BCend.time<-c(peaks$end[ix-1], peaks$end[nrow(peaks)])
#abline(v=BCend.time, col="blue")
BCend.ix<-sapply(BCend.time,function (x) which(data$time>=x)[1]) + 10
BCend.ix[BCstart.ix>=nrow(data)]<-nrow(data)
####
# define beginning and end of the real sample
RSstart.time<-c(peaks$end[ix-1])
#abline(v=RSstart.time, col="blue")
RSstart.ix<-sapply(RSstart.time,function (x) which(data$time>=x)[1]) - 10
RSstart.ix[RSstart.ix<=1]<-1
RSend.time<-c(peaks$start[ix])
#abline(v=RSend.time, col="red")
RSend.ix<-sapply(RSend.time,function (x) which(data$time>=x)[1]) + 10
RSend.ix[RSend.ix>=nrow(data)]<-nrow(data)
samples <- list()
for (i in 1:length(RSstart.time)){
samples[[i]]<-data[RSstart.ix[i]:RSend.ix[i],]
names(samples)[i] <- paste("sample", i, sep="_")
if (plotMyData==TRUE){
# colour only border
tmp <- data[,c(channel, BCchannel)]
rect(RSstart.time[i],0,RSend.time[i],max(tmp), border="#31a354")
text(x=(RSstart.time[i]+(RSend.time[i]-RSstart.time[i])/2), y=max(tmp), labels=i, cex=0.8)
}
}
barcode <- list()
for (i in 1:length(BCstart.time)){
barcode[[i]]<-data[BCstart.ix[i]:BCend.ix[i],]
names(barcode)[i] <- paste("sample", i, sep="_")
# if (plotMyData==TRUE){
# # colour only border
# tmp <- data[,c(BCchannel)]
# rect(BCstart.time[i],0,BCend.time[i],max(tmp), border="#2F4F4F")
# text(x=(BCstart.time[i]+(BCend.time[i]-BCstart.time[i])/2), y=max(tmp), labels=i, cex=0.8)
# }
}
# the barcoding starts at the end of the sample
if (barcodePos=="after"){
barcode<-barcode[2:length(barcode)]
}else if (barcodePos=="before"){
barcode<-barcode[1:(length(barcode)-1)]
}else{
stop(cat("Please specify a valid argument for barcode, can be: \'before\' or \'after\'"))
}
return(list(samples=samples, barcode=barcode))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.