R/functions.R
In FourCSeq: Package analyse 4C sequencing data

Documented in addPeaks combineFragEnds getAllResults getDifferences getDistAroundVp getNormalizationFactors getZScores normalizeRPM smoothCounts smoothHitPerCent writeTrackFiles

##' Combine the counts of both fragment ends.
##' 
##' 
##' \code{combineFragEnds} combines the counts of both fragment ends. A
##' multiplication factor can be used for fragments that only have counts for
##' one valid fragment end.
##' 
##' @param object A \code{FourC} object.
##' @param multFactor Multiplication factor that can be used to multiply the
##' counts of fragments which only have one valid end. Default is \code{1}.
##' \code{filter} is automatically set to \code{TRUE} if \code{multFactor} 
##' is different from 1.
##' @param filter If filter is \code{TRUE}, only reads from valid fragment
##' ends are summed up for each fragment. This means if only one fragment end
##' is valid only counts from this end are considered.
##' If filter is \code{FALSE}, counts from both fragment ends are summed up
##' without any filtering.
##' @return Returns an updated \code{FourC} object with a new assay \code{counts}
##' containing the combined count data of both fragment ends for all viewpoints.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{FourC}}, \code{\link{countFragmentOverlaps}}
##' @examples
##' 
##' 
##' 
##'                        
##' data(fc, package="FourCSeq")
##' 
##' fc <- combineFragEnds(fc)
##' fc
##' 
##' @export
combineFragEnds <- function(object,
                            multFactor=1,
                            filter=FALSE){

  stopifnot(class(object)=="FourC")
  
  ## 
  if(any( ! c("countsLeftFragmentEnd","countsRightFragmentEnd") %in% names(assays(object))))
      stop("Use countFragmentOverlaps to import the counts for the left and right fragment ends.")
  
  if(multFactor != 1 | filter){
    ## create empty matrix for the combined counts
    counts=matrix(NA, nrow=length(rowRanges(object)), 
      ncol=nrow(colData(object)))

    ## get fragments for which both ends are valid
    bothValid = mcols(object)$leftValid & mcols(object)$rightValid

    counts[bothValid,] <- assay(object, "countsLeftFragmentEnd")[bothValid,] +
      assay(object, "countsRightFragmentEnd")[bothValid,]
  
    counts[!bothValid & rowRanges(object)$leftValid] <- multFactor *
      assay(object, "countsLeftFragmentEnd")[!bothValid & rowRanges(object)$leftValid,]
  
    counts[!bothValid & rowRanges(object)$rightValid] <- multFactor * 
      assay(object, "countsRightFragmentEnd")[!bothValid & rowRanges(object)$rightValid,]

  } else {
    counts <- assay(object, "countsLeftFragmentEnd") + assay(object, "countsRightFragmentEnd")
  }
  if (any(round(counts) != counts)) {
    stop("Some values in counts are not integers.
          Make sure that countsRight/LeftFragmentEnd and multFactor
          only contain integers.")
  } 
  mode(counts) <- "integer"

  assays(object)[["counts"]] <- counts
  ## reorder that counts is the first assay as required by DESeq2
  assays(object) <- assays(object)[c("counts", setdiff(names(assays(object)), "counts"))]

  object
}


##' Write track files of an selected \code{assay}
##' 
##' The files are saved in the specified folder. The filenames are the 
##' combination of the assay name, the selected column name and the corresponding
##' file extension.
##'
##' \code{writeTrackFiles} 
##' 
##' @param object A \code{FourC} object.
##' @param assay Character vector selecting the assay of the \code{FourC} object
##'         that should be saved as track file.
##' @param folder Path relative to the project folder, where the results are track files should be saved.
##' @param format Character vector specifying the format of the output.
##' Can either be 'bedGraph' or 'bw'. 'bw' is the default.
##' @param removeZeros Define whether fragments with zero counts should be included with
##'        value 0 or not. On default zeros are removed.        
##' @return Message whether the track export of assay was successful.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @examples  
##' 
##' 
##' 
##'                        
##' data(fc, package="FourCSeq")
##' metadata(fc)$projectPath = tempdir()
##' 
##' fc <- combineFragEnds(fc)
##' fc
##' 
##' writeTrackFiles(fc)
##' 
##' @export
writeTrackFiles <- function(object, 
                          assay="counts", 
                          folder="tracks",
                          format="bw",
                          removeZeros=TRUE){    

  stopifnot(class(object)=="FourC")
  
  if(!format %in% c("bw", "bedGraph"))
     stop("Format has to be 'bw' or 'bedGraph'")  
  
  if(!assay %in% names(assays(object))) 
    stop("Select a valid assay of the FourC object.")

  ## get the fragment intervals
  ivCols <- c("seqnames", "start", "end")
  fragData = rowRanges(object)
  mcols(fragData) = NULL

  ## get the count data of the fragment ends
  data <- assays(object)[[assay]]
  
  outputDir <- file.path(metadata(object)$projectPath, folder)
  if(!file.exists(outputDir)) dir.create(outputDir)
  
  tmp <- options(scipen=12, digits=2)
  
  for(name in colnames(data)){
    outputFile = file.path(outputDir, paste0(assay, "_", name, ".", format))

    notNA <- !is.na(data[, name])
    if(removeZeros) notNA[notNA][(data[notNA,name] == 0)] <- FALSE

    gr = fragData[notNA,]
    gr$score = data[notNA, name]
    gr=keepSeqlevels(gr, unique(as.character(seqnames(gr))))
    export(gr, outputFile, format=format)
    rm(gr)
  }
  options(tmp)
  return(paste0("Successfully created ", format, 
                " files of the ", assay, " data."))
}

##' Normalize count data to rpm
##' 
##' \code{normalizeRPM} Normalizes the counts of each experiment to RPM. 
##' 
##' @param object A \code{FourC} object.
##' @param assay Character vector selecting the assay of the \code{FourC} object
##'        that should be normalized.
##' @param normalized Character vector specifying the assay name of the normalized
##'        data.
##' @param removeHighestValues Defines the number of highest count values that should
##'        be removed before the library sizes are calculated in the denominator.
##' \code{normalized} containing the normalized data.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{FourC}}, \code{\link{combineFragEnds}}
##' @examples  
##' 
##'                        
##' data(fc, package="FourCSeq")
##' 
##' fc <- combineFragEnds(fc)
##' fc
##' 
##' fc <- normalizeRPM(fc)
##' fc
##' 
##' @export
normalizeRPM <- function(object, 
                         assay="counts",
                         normalized="rpm",
                         removeHighestValues=0){    
  
  stopifnot(class(object)=="FourC")
  
  if(!assay %in% names(assays(object))) 
    stop("Select a valid assay of the FourC object.")
  
  assays(object)[[normalized]] <- apply(assays(object)[[assay]], 
                                        2, 
                                        function(x, remove){
                                          if(remove > 0) 
                                            1e+6*x/sum(x[x < sort(x, decreasing=TRUE)[remove]])
                                          else 
                                            1e+6*x/sum(x)
                                          }, 
                                        remove=removeHighestValues)
  object  
}

##' getDistAroundVp
##' 
##' \code{getDistAroundVp} Add the distance information for a given viewpoint 
##' to the fragment data.
##' 
##' @param vp A single character string with the selected viewpoint.
##' @param vpData \code{DataFrame} containing the column data of the \code{FourC} object.
##' @param fragData \code{GRanges} object containing the row data (fragment data) of 
##' the \code{FourC} object.
##' 
##' @return Updated \code{GRanges} object that contains the distance from the viewpoint 
##'         for the fragments on the viewpoint chromosome. 
##'         
##' @details Mid positions of the fragments are defined as start + (end - start) %/% 2
##' 
##' @examples
##' 
##' 
##' data(fc, package="FourCSeq")
##' 
##' fragmentDataWithDistance <- getDistAroundVp("ap", colData(fc), rowRanges(fc))
##' 
##' @export
getDistAroundVp <- function(vp, vpData, fragData){
  ## calculate the distance from the view point for the same chromosome using the fragment mid points
  ## get the genomic coordinates of the current viewpoint
  curVp = unique(vpData[vpData$viewpoint==vp,c("chr", "start","end")])
  curVp$mid = curVp$start + (curVp$end - curVp$start)%/%2
  
  fragData$mid = start(fragData) + (end(fragData) - start(fragData))%/%2
    
  fragData$dist = NA

  sameChr = as.logical(seqnames(fragData) == curVp$chr)

  fragData$dist[sameChr] = fragData$mid[sameChr] - curVp$mid

  fragData$posLeft = fragData$dist<0
  fragData$posLeft[is.na(fragData$posLeft)] = FALSE
  fragData$posRight = fragData$dist>0
  fragData$posRight[is.na(fragData$posRight)] = FALSE

  ## add information about blind fragments
  fragData$blind = fragData$leftSize < 0  & fragData$rightSize < 0
  
  ## add vp chromosome
  fragData$vpChr = sameChr

  newCols <- c("mid", "dist", "posLeft", "posRight", "blind", "vpChr")
  mcolsRows <- DataFrame(type=rep("viewpointInfo", length(newCols)),
                         description=rep("",length(newCols)))
  mcols(mcols(fragData))[colnames(mcols(fragData)) %in% newCols,] <- mcolsRows
  
  fragData
}

##' Detect differences
##' 
##' \code{getDifferences} detects differences in the interaction frequencies between 
##' different conditions. For each viewpoint all possible combinations of 
##' conditions are tested using the \code{DESeq2} package.
##' 
##'                
##' @param object A \code{FourC} object.
##' @param referenceCondition Reference condition to be used for testing.
##' @param fitNormFactors Defines if the distance dependency should be used to 
##' estimate normalizationFactors. Default is \code{TRUE}.
##' @return Returns an updated \code{FourC} object that contains the results and
##'         information from the differential testing. Normalization factors
##'         are added to the assays slot.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{FourC}}, \code{\link{getZScores}}, 
##'          \code{\link[DESeq2]{estimateDispersions}},
##'          \code{\link[DESeq2]{nbinomWaldTest}}
##' @examples
##' 
##'                        
##' data(fcf, package="FourCSeq")
##' 
##' fcf <- getDifferences(fcf, referenceCondition="WE_68h")
##' 
##' results <- getAllResults(fcf)
##' results
##' 
##' @export
getDifferences <- function(object,
                           referenceCondition=NULL,
                           fitNormFactors=TRUE){      
  stopifnot(is(object, "FourC"))
  
  if( ! c("counts") %in% names(assays(object)))
    stop("No assay 'counts' found. Use combineFragEnds first.")
  if ("results" %in% mcols(mcols(object))$type) {
    stop("Differences are already calculated. To recalculate use the object returned by 'getZScores'.")
  }
  
  ## get the viewpoint data
  viewpoint = unique(colData(object)$viewpoint)
  if(length(viewpoint) != 1)
    stop("None or more than one viewpoint are contained in the 'FourC' object.
         Use a 'FourC' object that contains only one viewpoint.")
  print(viewpoint)
  
  if(!fitNormFactors) viewpoint = paste0(viewpoint, "_noNormFactors")

  colData(object)$condition <- factor(colData(object)$condition)
  
  if(is.null(referenceCondition)){
    referenceCondition <- levels(colData(object)$condition)[1]
    cat("No reference condition provided. ", referenceCondition, 
        " was chosen by default.")
  }
  
  if(referenceCondition %in% levels(colData(object)$condition)){
    colData(object)$condition <- relevel(colData(object)$condition, referenceCondition)
  } else {
    cat("Provided reference condition ", referenceCondition, " not found in levels.\n")
    cat("Using ", levels(colData(object)$condition)[1], " as reference condition instead.")  
  }
  object <- estimateSizeFactors(object)
  
  ## fit normalizationFactors/estimate dispersions
  if(fitNormFactors) {
    object <- try(getNormalizationFactors(object))
    if(is(object, "try-error")) { stop() }     
  }
  
  ## test for differences  
  design(object) <- formula(~ condition)
  object <- try(estimateDispersions(object))
  object <- nbinomWaldTest(object)
  
  invisible(object)
}


##' Calculate z-scores using the residuals of the general trend fit.
##' 
##' \code{getZScores} calculates the z-score for each fragment within a
##' distance from the viewpoint defined by \code{distAroundVP}. For the
##' calculation the count data is first transformed with a variance stabilizing
##' transformation from the \code{DESeq2} package. The decay trend of this transformed
##' data with distance from the viewpoint is fitted either with local regression
##' model using \code{locfit} or a monotone decay fit using the \code{fda} package. 
##' z-scores are finally calculated from the residuals of the fit values.
##' 
##' @param object A \code{FourC} object.
##' @param removeZeros Parameter to define if fragments with zero counts across 
##' all replicates and conditions should be removed from the calculation. 
##' Default is \code{TRUE}.
##' @param minCount The minimum median count value across replicates, a fragment 
##' has to have to be used for the calculation of z-scores. Default is \code{40}
##' @param minDist Define a minimum distance to both sides of the viewpoint that is
##' discarded for analysis. If undefined (\code{NULL}), the borders of the 
##' viewpoint peak are estimated as the minimum values next to the viewpoint before
##' the signal rises again.
##' @param fitFun Fit function to be used. Either \code{distFitMonotoneSymmetric} (default), 
##' \code{distFitMonotone} or any self-defined function to fit the distance dependency.
##' @param sdFun Function to calculate the variation of the data. Default is \code{mad}.
##' @param ... Additional Parameters passed to the distance fit function specified in
##' fitFun  (e.g. \code{distFitMonotone}, \code{distFitMonotoneSymmetric}).
##' @return Returns a \code{FourC} object for the selected viewpoint
##' with z-score values for all fragments on the viewpoint chromosome that 
##' passed the \code{minCount} threshold and that were not too close to the 
##' viewpoint. All additional required data is saved in the object. Especially
##' the following information is added to the \code{FourC} object:
##' \enumerate{
##'      \item{metadata:} parameters passed to the getZScore function.
##'      \item{colData:} sdFun values calculated from the fit residuals
##'      \item{rowRanges:} the distance information to the viewpoint and information
##'                      calculated for the variance stabilizing data are added.
##'      \item{assays:} variance stabilized count values (trafo), fit values (fit),
##'                     z-score values (zScore), associated p-values (pValue) and
##'                     adjusted p-values (pAdjusted)
##' }
##' 
##' 
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{FourC}}, \code{\link{countFragmentOverlaps}},
##' \code{\link{distFitMonotone}}, \code{\link{distFitMonotoneSymmetric}}
##' @examples 
##'                        
##' data(fc, package="FourCSeq")
##' 
##' fcf <- getZScores(fc)
##' 
##' fcf
##' 
##'  
##' @export
getZScores <- function(object,
                       removeZeros = TRUE,                             
                       minCount=40,
                       minDist=NULL,
                       fitFun="distFitMonotoneSymmetric",
                       sdFun=mad,
                       ...){  
  stopifnot(class(object)=="FourC")
  
  if( ! c("counts") %in% names(assays(object)))
    stop("No assay 'counts' found. Use 'combineFragEnds' first.")

  if(any(!c("chr", "start", "end") %in% names(colData(object))))
    stop("No information about viewpoint position provided. Add this information as described in the vignette.")
    
  if( c("zScore") %in% names(assays(object)))
    stop("z-scores are already calculated. To recalculate z-scores use the object returned by 'combineFragEnds'.")
  
  ## get the viewpoint data
  viewpoint = unique(colData(object)$viewpoint)
  if(length(viewpoint) != 1)
    stop("None or more than one viewpoint are contained in the 'FourC' object.
         Use a 'FourC' object that contains only one viewpoint.")
  print(viewpoint)
  
  ## just keep viewpoint chromosome
  object <- object[seqnames(object) == unique(colData(object)$chr),]
    
  #######################################################
  ## calculated distances around the current viewpoint
  #######################################################  
  fragData = getDistAroundVp(viewpoint, colData(object), rowRanges(object))
  
  ## calculate the mean counts for each fragments for filtering
  medianCounts <- apply(counts(object), 1, median)
    
  ## remove counts of fragments to close to the viewpoint
  if(!is.null(minDist)){
    tooClose = abs(fragData$dist) <= minDist
  } else {
    ## find min to left
    toLeft <- fragData$dist > -20000 & fragData$dist < 0 & !is.na(fragData$dist)
    afterMin <- 1:sum(toLeft) > tail(which(sign(diff(medianCounts[toLeft])) < 0), 1) + 1
    toExclude <- which(toLeft)[afterMin]
    
    ## find min to right
    toRight <- fragData$dist < 20000 & fragData$dist > 0 & !is.na(fragData$dist)
    beforeMin <- 1:sum(toRight) < which(sign(diff(medianCounts[toRight])) > 0)[1]
    toExclude <- c(toExclude, which(toRight)[beforeMin])
    
    ## make sure minimum distance from the viewpoint is 1000
    toExclude = union(toExclude, which(abs(fragData$dist) < 1000))
    
    tooClose = rep(FALSE, length(fragData$dist))
    tooClose[toExclude] = TRUE
  }
  
  ## remove points to close to the viewpoint and store the info for fitting 
  fragData$posLeft[tooClose] = FALSE
  fragData$posRight[tooClose] = FALSE
  fragData$tooClose = tooClose
  
  ## remove low count values and store the info for fitting
  lowCounts = medianCounts < minCount
  fragData$posLeft[lowCounts] = FALSE
  fragData$posRight[lowCounts] = FALSE
  fragData$lowCounts = lowCounts
  
  ## select only values that fullfill the filter criterion & have loggeomeans finite
  fragData$selectedForFit <- (fragData$posLeft | fragData$posRight) 
  
  ## update metadata 
  newCols <- c("tooClose", "lowCounts", "selectedForFit")
  mcolsRows <- DataFrame(type=rep("fragmentSelection", length(newCols)),
                         description=rep("",length(newCols)))
  mcols(mcols(fragData))[colnames(mcols(fragData)) %in% newCols,] <- mcolsRows
  
  ## replace rowRanges with fragData containing distance information and convert fragData to data.frame
  rowRanges(object) <- fragData
  fragData = as.data.frame(fragData)
  
  ## factor condition
  colData(object)$condition <- factor(colData(object)$condition)
  
  ###############################
  ## calculate z-scores
  ###############################
  
  ## select only fragments that passed the filter
  dds <- object[mcols(object)$selectedForFit,]
  dds <- estimateSizeFactors(dds)
  dds <- estimateDispersions(dds)

  if (attr(dispersionFunction(dds), "fitType") != "parametric") {
    stop("Failed to estimate the parameters of the Variance stabilizing transformation.")
  } else {
    coefs <- attr(dispersionFunction(dds), "coefficients")
    attr(dds, "vst") <- function(q) {
      log((1 + coefs["extraPois"] + 2 * coefs["asymptDisp"] * 
             q + 2 * sqrt(coefs["asymptDisp"] * q * (1 + coefs["extraPois"] + 
                                                       coefs["asymptDisp"] * q)))/(4 * coefs["asymptDisp"]))/log(2)}
    attr(dds, "inverse-vst") <- function(q){
      (4*coefs["asymptDisp"]*2^q-(1+coefs["extraPois"]))^2 / 
        (4*coefs["asymptDisp"]*(1+coefs["extraPois"] + 
                                  (4*coefs["asymptDisp"]*2^q-(1+coefs["extraPois"]))))}
  }

  ## get transformed values
  trafo <- getVST(dds)(counts(dds))

  fit <- apply(trafo,
               2,
               fitFun,
               fragData=as.data.frame(rowRanges(dds)),
               removeZeros=removeZeros,
               ...)
  residuals <- trafo - fit
  sd <- apply(residuals, 2, sdFun)

  zScore <- sweep(residuals, 2, sd, "/")
  
  ## calculate p-values
  pValue <- apply(zScore, 2, pnorm, lower.tail=FALSE)
  pAdjusted <- apply(pValue,2, p.adjust, method="BH")
    
  ## update colData with sd values
  colData(dds)$sd <- sd
  idx <- which(colnames(colData(dds)) == "sd")
  metaDataFrame <- DataFrame(type="intermediate",
                             description="sd/mad calculated from the residuals")
  mcols(colData(dds))[idx,] <- metaDataFrame
  
  ## save the used parameters of the function call in metadata
  metadata(dds)$parameter <- DataFrame(fitFun=fitFun,
                                       removeZeros=removeZeros,
                                       minCount=minCount,
                                       sdFun=sdFun,
                                       ...)
  if(!is.null(minDist)) metadata(dds)$parameter$minDist = minDist
  
  ## add assays
  assays(dds) <- c(assays(dds), SimpleList(trafo=trafo,
                                           fit=fit,
                                           zScore=zScore,
                                           pValue=pValue,
                                           pAdjusted=pAdjusted))
  
  invisible(dds)
}

##' Get normalization factors for each fragment
##' 
##' \code{getNormalizationFactors} uses the distance dependency to calculate 
##' normalization factors for each fragment.
##' 
##'                
##' @param object A \code{FourC} object.
##' @return Returns an updated \code{FourC} object that contains normalization
##' factors.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{getDifferences}}
##' @examples
##'                        
##' data(fcf, package="FourCSeq")
##' 
##' normalizationFactors <- getNormalizationFactors(fcf)
##' head(normalizationFactors)
##' 
##' @export
getNormalizationFactors <- function(object){
  stopifnot(class(object)=="FourC") 
  if(! ("fit" %in% names(assays(object)))) stop("Call getZScores first!")
   
  
  fit <- getVST(object, inverse=TRUE)(assays(object)[["fit"]])
  
  ## estimate size factors for each fragment
  loggeomeans <- rowMeans(log(fit))

  normFactors <- exp(log(fit) - loggeomeans)

  normalizationFactors(object) <- normFactors
  if("sizeFactor" %in% names(colData(object))) colData(object) <- colData(object)[!names(colData(object)) == "sizeFactor"]
  
  invisible(object)
}



##' Fit the distance dependency
##' 
##' \code{distFitMonotone} takes the variance stabilized count values and 
##' calculates a monotone fit for the distance dependency. The position 
##' information about the viewpoint is stored in \code{fragData}. 
##' The signal trend is fitted for the data left and right to the viewpoint 
##' separately using the fda package. 
##' 
##' @param count Matrix of variance stabilized count data in the defined interval around
##' the viewpoint.
##' @param fragData Data frame with all the information on restriction
##' fragments and the interval around the viewpoint.
##' @param alpha Approximate number of fragments between two breaks of the B-spline
##' basis. \code{floor((number of fragments))/alpha)} is passed to \code{create.bspline.basis} 
##' as \code{nbasis} argument (if this value is smaller than 4, 4 is the default value for \code{nbasis}). 
##' Default is \code{20}.
##' @param penalty Penalty term passed to  \code{fdPar} as \code{lambda} argument.
##' Default is \code{0.1}.
##' @param removeZeros Defines if zero values should be removed from the fit. 
##' Default is \code{FALSE}.
##' @param ... Additional parameters.
##' @return Returns a matrix with z-score values of all fragments around the given
##' viewpoint.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' 
##' @export
distFitMonotone <- function (count, fragData, alpha = 20, penalty=0.1, removeZeros=FALSE, ...) 
{

  if(removeZeros){
    fragData$posLeft <- fragData$posLeft & count > min(count)
    fragData$posRight <- fragData$posRight & count > min(count)
  }
  
  fragData$logDist = log(abs(fragData$dist))

  ## left side
  basisobj <- create.bspline.basis(c(min(fragData$logDist[fragData$posLeft]),max(fragData$logDist[fragData$posLeft])), 
                                   nbasis=max(floor(sum(fragData$posLeft)/alpha), 4))
  fdParobj = fdPar(basisobj, 2, penalty)
  monoLeft = smooth.monotone(argvals=fragData$logDist[fragData$posLeft], y=count[fragData$posLeft], WfdParobj=fdParobj)
  betaLeft <- monoLeft$beta

  ## right side 
  basisobj <- create.bspline.basis(c(min(fragData$logDist[fragData$posRight]),max(fragData$logDist[fragData$posRight])), 
                                   nbasis=max(floor(sum(fragData$posRight)/alpha), 4))
  fdParobj = fdPar(basisobj, 2, penalty)
  monoRight = smooth.monotone(argvals=fragData$logDist[fragData$posRight], y=count[fragData$posRight], WfdParobj=fdParobj)
  betaRight <- monoRight$beta

  fit = rep(NA, length(count))
  selLeft <- fragData$dist < 0
  if ("tooClose" %in% names(fragData)) 
    selLeft <- selLeft & !fragData$tooClose
  fit[selLeft] <- predict(monoLeft, fragData$logDist[selLeft])
  ## extend the minimum value to the end
  fit[selLeft][!is.finite(fit[selLeft])] <- min(fit[selLeft], na.rm=TRUE)

  selRight <- fragData$dist > 0
  if ("tooClose" %in% names(fragData)) 
    selRight <- selRight & !fragData$tooClose
  fit[selRight] <- predict(monoRight, fragData$logDist[selRight])
  ## extend the minimum value to the end  
  fit[selRight][!is.finite(fit[selRight])] <- min(fit[selRight], na.rm=TRUE)
  
  invisible(fit)
}


##' Fit the distance dependency
##' 
##' \code{distFitMonotoneSymmetric} takes the variance stabilized count values and 
##' calculates a symmetric monotone fit for the distance dependency. The position 
##' information about the viewpoint is stored in \code{fragData}. 
##' The signal trend is fitted for the combined data left and right to the viewpoint
##' using the fda package. 
##' 
##' @param count Matrix of variance stabilized count data in the defined interval around
##' the viewpoint.
##' @param fragData Data frame with all the information on restriction
##' fragments and the interval around the viewpoint.
##' @param alpha Approximate number of fragments between two breaks of the B-spline
##' basis. \code{floor((max(number of fragments left or right))/alpha)} is passed to 
##' \code{create.bspline.basis} as \code{nbasis} argument (if this value is smaller 
##' than 4, 4 is the default value). Default is \code{20}.
##' @param penalty Penalty term passed to  \code{fdPar} as \code{lambda} argument.
##' Default is \code{0.1}.
##' @param removeZeros Defines if zero values should be removed from the fit. 
##' Default is \code{FALSE}.
##' @param ... Additional parameters.
##' @return Returns a matrix with z-score values of all fragments around the given
##' viewpoint.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' 
##' @export
distFitMonotoneSymmetric <- function (count, fragData, alpha = 20, penalty=0.1, removeZeros=FALSE, ...) 
{
  
  if(removeZeros){
    fragData$posLeft <- fragData$posLeft & count > min(count)
    fragData$posRight <- fragData$posRight & count > min(count)
  }
  
  fragData$logDist = log(abs(fragData$dist))
  
  ## left side
  basisobj <- create.bspline.basis(c(min(fragData$logDist[fragData$posLeft | fragData$posRight]),
                                     max(fragData$logDist[fragData$posLeft | fragData$posRight])), 
                                   nbasis=max(floor(max(sum(fragData$posLeft), sum(fragData$posRight))/alpha), 4))
  fdParobj = fdPar(basisobj, 2, penalty)
  mono = smooth.monotone(argvals=fragData$logDist[fragData$posLeft | fragData$posRight], 
                             y=count[fragData$posLeft | fragData$posRight], 
                             WfdParobj=fdParobj)
  beta <- mono$beta

  
  fit = rep(NA, length(count))
  sel <- fragData$dist != 0
  if ("tooClose" %in% names(fragData)) 
    sel <- sel & !fragData$tooClose
  fit[sel] <- predict(mono, fragData$logDist[sel])
  ## extend the minimum value to the end
  fit[sel][!is.finite(fit[sel])] <- min(fit[sel], na.rm=TRUE)
  
  invisible(fit)
}


getVST <- function(object, inverse=FALSE){
  if(!is.null(attr(object, "vst")) & !is.null(attr(object, "inverse-vst"))){
    if(!inverse) return(attr(object, "vst"))
    return(attr(object, "inverse-vst"))
  } else {
    stop("No variance stabilizing transformation available. Call getZScores first.")
  }
}


##' FourCSeq analysis results
##'
##' Using the \code{DESEeq2} function \code{results},
##' \code{getAllResults} extracts results from a FourCSeq analysis
##' giving base means across samples,
##' log2 fold changes, standard errors, test statistics,
##' p-values and adjusted p-values for all pair-wise
##' conditions tested.
##' 
##' @param object \code{FourC} object for which z-scores and differences
##' have been calculated.
##' @param ... Additional parameters that can be passed to \code{results}.
##' @return DataFrame of results columns for all pairwise tests, with metadata
##' columns of coefficient and test information
##' 
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' 
##' @seealso \code{\link[DESeq2]{results}}
##' 
##' @examples
##' 
##'                        
##' data(fcf, package="FourCSeq")
##' 
##' fcf <- getDifferences(fcf, referenceCondition="WE_68h")
##' 
##' results <- getAllResults(fcf)
##' results
##' 
##' @export
getAllResults <- function(object, ...){
  stopifnot(class(object)=="FourC") 
  if(!"results" %in% mcols(mcols(object))$type) stop("Call getDifferences first!")
  
  object <- object[rowRanges(object)$selectedForFit,]
  lvls <- levels(colData(object)$condition)
  
  if(length(lvls) > 1){
    combi <- combinations(length(lvls), 2)
    
    res <- do.call(cbind, lapply(seq_len(nrow(combi)), function(i, ...){
      tmp <- results(object, contrast=c("condition", lvls[combi[i,]]), ...)[,-1]
      colnames(tmp) <- paste(colnames(tmp),
                             paste(lvls[combi[i,]], collapse="_"),
                             sep="_")
      tmp
    }, ...))
    res = cbind(mcols(object)[,"baseMean", drop=FALSE], res)
  } else stop("Only one level and no differential results")
  invisible(res)
}

##' Add peaks based on z-scores and adjusted p-values
##' 
##' Fragments are annotated to be a z-score peak (TRUE or FALSE) based 
##' on z-scores and fdr values for the given fragment
##' 
##' @param object \code{FourC} object for which z-scores have been calculated.
##' @param zScoreThresh Threshold on z-score that has to met by all replicates 
##' of a condition.
##' @param fdrThresh Threshold on adjusted p-value that has to be met in at 
##' least one replicated of a condition.
##' @return \code{FourC} object with an additional assay 'peaks' of boolean 
##' values. The parameters of the call to addPeaks are stored as DataFrame 
##' \code{peakParameter} in the metadata slot.
##' 
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' 
##' data(fcf, package="FourCSeq")
##' 
##' fcf <- addPeaks(fcf)
##' 
##' @export
addPeaks <- function(object, zScoreThresh = 2, fdrThresh = 0.1){
  stopifnot(class(object)=="FourC") 
  if(! ("zScore" %in% names(assays(object)))) stop("Call getZScores first!")
  
  zScore <- assay(object, "zScore")
  fdr <- assay(object, "pAdjusted")
  
  peaks = array(FALSE, dim=dim(fdr), dimnames=dimnames(fdr))
  
  for(lvl in levels(colData(object)$condition)){
    condCols <- colData(object)$condition == lvl 
    tmp <- (apply(fdr[,condCols, drop=FALSE] < fdrThresh, 1, any, na.rm=TRUE) & 
               apply(zScore[,condCols, drop=FALSE] > zScoreThresh, 1, all, na.rm=TRUE))    
    ## annotate peaks
    peaks[tmp,condCols] = TRUE
  }

  assays(object)[["peaks"]] <- peaks
  
  metadata(object)$peakParameter <- DataFrame(zScoreThresh = zScoreThresh, 
                                              fdrThresh = fdrThresh)
  
  invisible(object)
}


##' Smooth the counts of neighboring fragments
##' 
##' Counts are smoothed using the number of fragments provided by binWidth.
##' binWidth has to be an odd number so that an equal number of fragments
##' to each side of the current fragment are used for smoothing.
##' 
##' @param object A \code{FourC} object.
##' @param assay Assay name that will be smoothed.
##' @param binWidth Integer vector of odd numbers.
##' @return Returns an updated \code{FourC} object with smoothed counts
##' for each binWidth as new assays.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{FourC}}
##' @examples 
##' 
##' data(fc, package="FourCSeq")
##' 
##' fc <- smoothCounts(fc)
##' fc
##' 
##' @export
smoothCounts <- function(object,
                         assay="counts",
                         binWidth = 5){
  
  stopifnot(class(object)=="FourC")
  
  if(any( ! c("counts") %in% names(assays(object))))
    stop("Use combineFragEnds to combine the counts for left and right fragment ends.")
  if(any( binWidth %% 2 == 0))
    stop("binWidth must only contain odd numbers.")
  
  toSmooth = assay(object, assay)
  
  smoothed = lapply((binWidth - 1)/2, function(w){
    smooth = array(0, dim=dim(toSmooth), dimnames=dimnames(toSmooth))
    for(chr in unique(seqnames(rowRanges(object)))){
      cat(2*w + 1, chr, "\n")
      currentChr = as.logical(seqnames(rowRanges(object)) == chr)
      if(sum(currentChr) > 2*w + 1){
        band = bandSparse(n=sum(currentChr), m=sum(currentChr), k=(-w):w)
        band = band / rowSums(band)
        smooth[currentChr,] = matrix(band %*% toSmooth[currentChr,])
      } else {
        cat("Smoothing window to larg than chromosome size.\n")
        cat("Values are replaces by mean values.\n")
        smooth[currentChr,] <- t(apply(toSmooth[currentChr,,drop=FALSE], 2, mean))
      }
    }
    invisible(smooth)
  })
  
  names(smoothed) <- paste0(assay, "_", binWidth)  
  smoothed <- SimpleList(smoothed)  
  assays(object) <- c(assays(object), smoothed)
  
  invisible(object)
}


##' Smooth the hits of neighboring fragments
##' 
##' Counts are transformed to a hit if they exceed the given threshold.
##' Hits are then smoothed using the number of fragments provided by binWidth.
##' binWidth has to be an odd number so that an equal number of fragments
##' to each side of the current fragment are used for smoothing.
##' 
##' @param object A \code{FourC} object.
##' @param binWidth Integer vector of odd numbers.
##' @param thresh Single integer defining the threshold for calling a fragment a hit.
##' @return Returns an updated \code{FourC} object with smoothed counts
##' for each binWidth as new assays.
##' @author Felix A. Klein, \email{felix.klein@@embl.de}
##' @seealso \code{\link{FourC}}
##' @examples 
##' 
##' data(fc, package="FourCSeq")
##' 
##' fc <- smoothHitPerCent(fc)
##' fc
##' 
##' @export
smoothHitPerCent <- function(object,
                             binWidth = 101,
                             thresh=1){
  
  stopifnot(class(object)=="FourC")
  
  if(any( ! c("counts") %in% names(assays(object))))
    stop("Use combineFragEnds to combine the counts for left and right fragment ends.")
  if(any( binWidth %% 2 == 0))
    stop("binWidth must only contain odd numbers.")
  
  hits = abs(counts(object) > thresh)
  
  smoothed = lapply((binWidth - 1)/2, function(w){
    smooth = array(0, dim=dim(hits), dimnames=dimnames(hits))
    for(chr in unique(seqnames(rowRanges(object)))){
      cat(2*w + 1, chr, "\n")
      currentChr = as.logical(seqnames(rowRanges(object)) == chr)
      if(sum(currentChr) > 2*w + 1){
        band = bandSparse(n=sum(currentChr), m=sum(currentChr), k=(-w):w)
        band = band / rowSums(band)
        smooth[currentChr,] = matrix(band %*% hits[currentChr,])
      } else {
        cat("Smoothing window larger than chromosome size.\n")
        cat("Values are replaces by mean values.\n")
        tmp <- t(smooth[currentChr,])
        tmp[,] <- apply(hits[currentChr,,drop=FALSE], 2, mean)
        smooth[currentChr,] <- t(tmp)
      }
    }
    invisible(smooth)
  })
  
  names(smoothed) <- paste0("hitPerCent_", binWidth, "_", thresh)  
  smoothed <- SimpleList(smoothed)  
  assays(object) <- c(assays(object), smoothed)
  
  invisible(object)
}
Any scripts or data that you put into this service are public.
FourCSeq documentation built on Nov. 8, 2020, 7:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
FourCSeq
Package analyse 4C sequencing data

R/functions.R
In FourCSeq: Package analyse 4C sequencing data

Defines functions smoothHitPerCent smoothCounts addPeaks getAllResults getVST getNormalizationFactors getZScores getDifferences getDistAroundVp normalizeRPM writeTrackFiles combineFragEnds

Documented in addPeaks combineFragEnds getAllResults getDifferences getDistAroundVp getNormalizationFactors getZScores normalizeRPM smoothCounts smoothHitPerCent writeTrackFiles

Try the FourCSeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

FourCSeq Package analyse 4C sequencing data

R/functions.R In FourCSeq: Package analyse 4C sequencing data

Defines functions smoothHitPerCent smoothCounts addPeaks getAllResults getVST getNormalizationFactors getZScores getDifferences getDistAroundVp normalizeRPM writeTrackFiles combineFragEnds

Documented in addPeaks combineFragEnds getAllResults getDifferences getDistAroundVp getNormalizationFactors getZScores normalizeRPM smoothCounts smoothHitPerCent writeTrackFiles

Try the FourCSeq package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

FourCSeq
Package analyse 4C sequencing data

R/functions.R
In FourCSeq: Package analyse 4C sequencing data