Nothing
#' @include pipeline_io.R
NULL
#' Per-sample loop quantities
#'
#' \code{numLoops} counts number of loops for each sample based on the
#' index of nloops and returns a data.frame
#'
#' This function shows the number of unique loops with at least
#' \code{nloops} in counts. Can be used to quickly visualize relative
#' sequencing depth between samples
#'
#' @param dlo A loops object
#' @param nloops A numeric vector of counts to be considered
#'
#' @return A data.frame
#'
#' @examples
#' # Determine what samples have loops with 1-20 counts
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' nLoops <- numLoops(loops.small, 1:20)
#'
#' # Determine what samples loops with 1-10 counts by default
#' nLoops <- numLoops(loops.small)
#' @export
setGeneric(name = "numLoops", def = function(dlo, nloops = 1:10) standardGeneric("numLoops"))
.numLoops <- function(dlo, nloops) {
nres <- sapply(nloops, function(n) colSums(dlo@counts > n))
return(as.data.frame(cbind(nloops, t(nres))))
}
#' @rdname numLoops
setMethod(f = "numLoops", signature = c("loops", "numeric"),
definition = function(dlo, nloops) {
.numLoops(dlo, nloops)
})
#' @rdname numLoops
setMethod(f = "numLoops", signature = c("loops", "missing"),
definition = function(dlo, nloops) {
.numLoops(dlo, 1:10)
})
#' Remove self loops
#'
#' \code{removeSelfLoops} removes instances where a loop is observed
#' between the same anchor
#'
#' This function removes loops from the \code{interactions} slot that reference
#' the same index of the \code{anchors} slot.
#'
#' @param dlo A loops object
#'
#' @return A loops object
#'
#' @examples
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' jpn_unique <- removeSelfLoops(loops.small)
#'
#' @export
setGeneric(name = "removeSelfLoops", def = function(dlo) standardGeneric("removeSelfLoops"))
#' @rdname removeSelfLoops
setMethod(f = "removeSelfLoops", signature = c("loops"), definition = function(dlo) {
return(subsetLoops(dlo, dlo@interactions[, 1] != dlo@interactions[, 2]))
})
# Return Boolean Vector for loops in loops if both anchors
# are unique... Internal method
setGeneric(name = "uniqueLoops", def = function(dlo) standardGeneric("uniqueLoops"))
setMethod(f = "uniqueLoops", signature = c("loops"), definition = function(dlo) {
return(((!is.na(dlo@interactions[, 1]) & !is.na(dlo@interactions[,
2])) & (dlo@interactions[, 1] != dlo@interactions[, 2])))
})
# Determine loop type as either unique (with called anchors),
# self, one unique anchor = single, or no unique anchors
# Internal method
setGeneric(name = "classifyLoops", def = function(dlo) standardGeneric("classifyLoops"))
setMethod(f = "classifyLoops", signature = c("loops"), definition = function(dlo) {
x <- dlo@interactions
type <- rep(NA, nrow(x))
unique <- (!is.na(x[, 1]) & !is.na(x[, 2])) & (x[, 1] !=
x[, 2])
self <- (!is.na(x[, 1]) & !is.na(x[, 2])) & (x[, 1] == x[,
2])
single <- (!is.na(x[, 1]) & is.na(x[, 2])) | (is.na(x[, 1]) &
!is.na(x[, 2]))
none <- (is.na(x[, 1]) & is.na(x[, 2]))
type[unique] <- "unique"
type[self] <- "self"
type[single] <- "single"
type[none] <- "none"
type
})
#' Types of loops
#'
#' \code{loopMetrics} counts number of loops for each sample and returns
#' whether they are single, self, unique, or none
#'
#' This function shows the number of loops for each sample based on four
#' types. Single refers to having only one anchor for a the loop whereas
#' none has no unique anchors. If using the \code{loopsMake} pipeline, only
#' self and unique loops will be observed when running this function
#'
#' @param dlo A loops object
#'
#' @return A data.frame
#'
#' @examples
#' # Return loop metrics for number of each type for each sample
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' loopMetrics(loops.small)
#'
#' @export
setGeneric(name = "loopMetrics", def = function(dlo) standardGeneric("loopMetrics"))
#' @rdname loopMetrics
setMethod(f = "loopMetrics", signature = c("loops"), definition = function(dlo) {
return(do.call("rbind", by(dlo@counts, classifyLoops(dlo),
colSums, na.rm = TRUE)))
})
#' Loop widths
#'
#' \code{loopWidth} returns the width of a loop, which is defined as the
#' distance between the anchors containing a loop
#'
#' This function returns a positive integer value of the number of basepairs
#' that separate two loops. If they are on separate chromosomes, it still
#' returns a value, but it will be non-sensical, so consider subsetting to
#' only intrachromosomal loops. Also, self-loops will return a postive number
#' that is the inter-anchor width. These loops should be handled using the
#' removeSelfLoops() function.
#'
#' @param dlo A loops object
#'
#' @return An integer vector
#'
#' @examples
#' # Return the width for loops
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' w <- loopWidth(loops.small)
#'
#' @export
setGeneric(name = "loopWidth", def = function(dlo) standardGeneric("loopWidth"))
#' @rdname loopWidth
setMethod(f = "loopWidth", signature = c("loops"), definition = function(dlo) {
w <- (start(dlo@anchors[dlo@interactions[, 2]]) + end(dlo@anchors[dlo@interactions[, 2]]))/2 -
(start(dlo@anchors[dlo@interactions[, 1]]) + end(dlo@anchors[dlo@interactions[, 1]]))/2
return(as.integer(w))
})
#' Subset loops
#'
#' \code{subsetLoops} restricts the loops and counts matrix to only those
#' specified by \code{idxa}, either numerically or logically
#'
#' This function returns a loops object where the loops are retained only
#' if they meet a logical criteria or are included in the numeric vector of
#' \code{idxa}. Only the anchors that reference a loop in the subsetted
#' loops object are retained.
#'
#' @param dlo A loops object
#' @param idxa A numeric vector or logical vector
#'
#' @return A loops object
#'
#' @examples
#' # Return the first 10 loops
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' #' ten <- subsetLoops(loops.small, 1:10)
#'
#' # Subset loops with widths greater than 10000
#' big <- subsetLoops(loops.small, loopWidth(loops.small) >= 10000)
#'
#' @export
setGeneric(name = "subsetLoops", def = function(dlo, idxa) standardGeneric("subsetLoops"))
.subsetLoops <- function(dlo, idxa) {
# Keep Unique Interactions and their counts
slot(dlo, "interactions", check = TRUE) <- as.matrix(dlo@interactions[idxa, , drop = FALSE])
slot(dlo, "counts", check = TRUE) <- as.matrix(dlo@counts[idxa, , drop = FALSE])
nRowData <- dlo@rowData[idxa, , drop = FALSE]
row.names(nRowData) <- NULL
slot(dlo, "rowData", check = TRUE) <- nRowData
return(cleanup(dlo))
}
#' @rdname subsetLoops
setMethod(f = "subsetLoops", signature = c("loops", "logical"),
definition = function(dlo, idxa) {
if (all(idxa))
return(dlo) else return(.subsetLoops(dlo, idxa))
})
#' @rdname subsetLoops
setMethod(f = "subsetLoops", signature = c("loops", "numeric"),
definition = function(dlo, idxa) {
.subsetLoops(dlo, idxa)
})
#' Filter loops
#'
#' \code{filterLoops} filters out loops that aren't wide, aren't prevalent
#' within samples or prevalent between samples
#'
#' Function that restricts loops in a loops object. \code{width} specifies
#' the minimum width between anchors. Default is zero. \code{nreplicates}
#' restricts loops to at least this specified amount of counts is present
#' in at least one sample. Instead of \code{nreplicates} being present in only
#' one sample, \code{nsamples} specifies how many individual samples that a
#' loop must have \code{nreplicates} in to be included after filtering.
#'
#' @param dlo A loops object
#' @param width Minimum loop width
#' @param nreplicates Minimum number of counts per loop
#' @param nsamples Minimum number of samples per loop per counts
#'
#' @return A loops object
#'
#' @examples
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' # Restrict loops to > 5kb width
#' filtered.jpn1 <- filterLoops(loops.small, 5000, 0, 0)
#' # Restrict loops to > 5kb width and have >= 3 replicates in >= 1 sample
#' filtered.jpn2 <- filterLoops(loops.small, 5000, 3, 1)
#' # Restrict loops to > 10kb width and have >= 3 replicates in >= 2 samples
#' filtered.jpn3 <- filterLoops(loops.small, 10000, 3, 2)
#' @export
setGeneric(name = "filterLoops", def = function(dlo, width = 0,
nreplicates = 0, nsamples = 1) standardGeneric("filterLoops"))
#' @rdname filterLoops
setMethod(f = "filterLoops", definition = function(dlo, width = 0,
nreplicates = 0, nsamples = 1) {
n <- dim(dlo@interactions)[1]
idxw <- rep(TRUE, n)
idxr <- rep(TRUE, n)
idxn <- rep(TRUE, n)
idxa <- rep(TRUE, n)
if (width != 0)
idxw <- (loopWidth(dlo) >= width)
if (nreplicates != 0)
idxr <- apply(dlo@counts, 1, function(t) any(t >= nreplicates))
if (nsamples != 1 | nsamples != 0)
idxn <- apply(dlo@counts, 1, function(t) sum(as.numeric(t >=
nreplicates)) >= nsamples)
idxa <- (idxw & idxr & idxn)
ndlo <- subsetLoops(dlo, idxa)
if (width > 0)
ndlo <- removeSelfLoops(ndlo)
return(ndlo)
})
#' Determine genes contained within loops
#'
#' \code{loopGenes} determines all gene bodies partially or fully contained
#' in a loop.
#'
#' Function that annotates all loops. If there are multiple, the function returns a
#' comma separated list. Adds a "loopGenes" column to the rowData slot. If
#' the genesGR is left blank, diffloop will use protein coding genes for human
#' from hg19.
#'
#'
#' @param dlo A loops object
#' @param genesGR A GRanges object of genes in first mcol.
#'
#' @return A matrix of comma separated gene names
#'
#' @examples
#' # Determine the genes housed in the loops from our example
#' genes <- getHumanGenes()
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#' loops.small <- loopGenes(loops.small,genes)
#'
#' @import GenomicRanges
#' @importFrom IRanges IRanges
#' @export
setGeneric(name = "loopGenes", def = function(dlo, genesGR) standardGeneric("loopGenes"))
#' @rdname loopGenes
setMethod(f = "loopGenes", signature = c("loops", "GRanges"),
definition = function(dlo, genesGR) {
df <- summary(dlo)
gr.loop <- makeGRangesFromDataFrame(df, seqnames.field=c("chr_1"), start.field = "start_1", end.field = "end_2")
ovl <- findOverlaps(gr.loop, genesGR)
qh <- queryHits(ovl)
sh <- subjectHits(ovl)
values.t <- as.data.frame(tapply(mcols(genesGR[sh])[,1], qh, paste, collapse = ","))
#A lot of extra effort to handle regions with no values
colnames(values.t) <- "gbv"
vNA <- data.frame(matrix(NA, ncol = 1, nrow = as.numeric(dim(dlo)[2])))
colnames(vNA) <- "NAss"
ugly <- merge(vNA, values.t, by=0, all = TRUE, sort = FALSE)
ugly <- ugly[order(as.numeric(ugly$Row.names)), ]
values <- unname(ugly$gbv, force = TRUE)
dlo@rowData$loopGenes <- values
return(dlo)
})
#' @rdname loopGenes
setMethod(f = "loopGenes", signature = c("loops", "missing"),
definition = function(dlo, genesGR) {
df <- summary(dlo)
gr.loop <- makeGRangesFromDataFrame(df, seqnames.field=c("chr_1"), start.field = "start_1", end.field = "end_2")
geneinfo <- NULL
rda <- paste(system.file("rda", package = "diffloop"), "geneinfo.h.rda", sep = "/")
load(rda)
genesGR <- makeGRangesFromDataFrame(geneinfo[,c(1,2,3,4)], keep.extra.columns = TRUE)
ovl <- findOverlaps(gr.loop, genesGR)
qh <- queryHits(ovl)
sh <- subjectHits(ovl)
values.t <- as.data.frame(tapply(mcols(genesGR[sh])[,1], qh, paste, collapse = ","))
#A lot of extra effort to handle regions with no values
colnames(values.t) <- "gbv"
vNA <- data.frame(matrix(NA, ncol = 1, nrow = as.numeric(dim(dlo)[2])))
colnames(vNA) <- "NAss"
ugly <- merge(vNA, values.t, by=0, all = TRUE, sort = FALSE)
ugly <- ugly[order(as.numeric(ugly$Row.names)), ]
values <- unname(ugly$gbv, force = TRUE)
dlo@rowData$loopGenes <- values
return(dlo)
})
#' Loops within chromosomes
#'
#' \code{intrachromosomal} restricts interactions to those where anchors are observed
#' on the same chromosomes
#'
#' This function subsets the \code{loops} object into only those interactions that
#' have both anchors on the same chromosome
#'
#' @param dlo A loops object
#'
#' @return A loops object where all loops are on the same chromosome.
#'
#' @examples
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#'
#' # Compute number of interactions on same chromosome
#' dim(intrachromosomal(loops.small))
#' samechromo <- intrachromosomal(loops.small)
#'
#' @export
setGeneric(name = "intrachromosomal", def = function(dlo) standardGeneric("intrachromosomal"))
#' @rdname intrachromosomal
setMethod(f = "intrachromosomal", signature = c("loops"), definition = function(dlo) {
idx <- as.character(dlo@anchors[dlo@interactions[, 1]]@seqnames) ==
as.character(dlo@anchors[dlo@interactions[, 2]]@seqnames)
return(subsetLoops(dlo, idx))
})
#' Loops between chromosomes
#'
#' \code{interchromosomal} restricts loops to those where anchors are observed
#' on different chromosomes
#'
#' This function subsets the \code{loops} object into only those loops that
#' have anchors on different chromosomes
#'
#' @param dlo A loops object
#' @return A loops object with all loops on different chromosomes
#'
#' @examples
#' rda<-paste(system.file('rda',package='diffloop'),'loops.small.rda',sep='/')
#' load(rda)
#'
#' # Compute number of interactions on same chromosome
#' dim(intrachromosomal(loops.small))
#' samechromo <- intrachromosomal(loops.small)
#'
#' # Compute number of interactions on same chromosome
#' # dim(interchromosomal(loops.small))
#' # This will throw and error since the toy only has intrachromosomal loops
#'
#' @export
setGeneric(name = "interchromosomal", def = function(dlo) standardGeneric("interchromosomal"))
#' @rdname interchromosomal
setMethod(f = "interchromosomal", signature = c("loops"), definition = function(dlo) {
idx <- as.character(dlo@anchors[dlo@interactions[, 1]]@seqnames) !=
as.character(dlo@anchors[dlo@interactions[, 2]]@seqnames)
return(subsetLoops(dlo, idx))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.