#' Summarize by features
#'
#' Compute summary statistics of interactions for a given feature set.
#' This includes the number of interactions (the sum of all interaction counts),
#' number of unique interactions and number of trans (i.e., interchromosomal) interations.
#' It also returns some statistics for the distances of interactions for all interactions of the feature,
#' and for the different interaction types, e.g., promoter-distal.
#'
#' @param GIObject An annotated \linkS4class{GenomicInteractions} object.
#' @param features A \linkS4class{GRanges} object containing the feature set.
#' @param feature.name String specifying the name of the feature set.
#' @param distance.method String specifying the method for calculating distances between anchors,
#' see \code{?\link{calculateDistances}}.
#' @param annotate.self Logical scalar indicating whether to annotate self interactions,
#' i.e., where a feature in `features` overlaps both anchors of an interaction.
#'
#' @return A data.frame with one line for each range in \code{features}.
#'
#' @docType methods
#' @import GenomicRanges
#' @importFrom stats median
#' @examples
#' data("hic_example_data")
#' hic_example_data <- updateObject(hic_example_data)
#'
#' data("mm9_refseq_promoters")
#' annotateInteractions(hic_example_data, list(promoter = mm9_refseq_promoters))
#' summariseByFeatures(hic_example_data, mm9_refseq_promoters[1:10], "promoter")
#' @rdname summariseByFeatures
#' @aliases summariseByFeatures
#' @export
setMethod("summariseByFeatures", "GenomicInteractions",
function(GIObject, features, feature.name, distance.method="midpoint", annotate.self=FALSE)
{
if(!.has_annotations(GIObject)) {
stop("GIObject has not been annotated")
}
potential.node.classes = unique(annotationFeatures(GIObject))
feature.names.full <- .get_gr_names(features)
feature.names <- unique(feature.names.full)
summary.df = data.frame(matrix(0,
ncol=(5 + (length(potential.node.classes) * 2) + ifelse(annotate.self, (length(potential.node.classes)-1) * 2, 0) + 5),
nrow=length(feature.names)))
summary.names = c(paste(capitalize(feature.name), "id", sep="."),
paste("numberOf", capitalize(feature.name), "Interactions", sep=""),
paste("numberOf", capitalize(feature.name), "UniqueInteractions", sep=""),
paste("numberOf", capitalize(feature.name), "InterChromosomalInteractions", sep=""),
paste("numberOf", capitalize(feature.name), "UniqueInterChromosomalInteractions", sep=""),
paste("numberOf", capitalize(feature.name), capitalize(potential.node.classes), "Interactions", sep=""),
paste("numberOfUnique", capitalize(feature.name), capitalize(potential.node.classes), "Interactions", sep=""),
paste(capitalize(feature.name), "DistanceMedian", sep=""),
paste(capitalize(feature.name), "DistanceMean", sep=""),
paste(capitalize(feature.name), "DistanceMinimum", sep=""),
paste(capitalize(feature.name), "DistanceMaximum", sep=""),
paste(capitalize(feature.name), "DistanceWeightedMedian", sep=""))
if (annotate.self){
pc= potential.node.classes[ -which(potential.node.classes == "distal")]
summary.names = append(summary.names, c(
paste("numberOfSelf", capitalize(feature.name), capitalize(pc), "Interactions", sep=""),
paste("numberOfSelfUnique", capitalize(feature.name), capitalize(pc), "Interactions", sep="")
))
}
names(summary.df) = summary.names
summary.df[,paste(capitalize(feature.name), "id", sep=".")] = feature.names
#hack
anchor_one <- anchorOne(GIObject)
anchor_two <- anchorTwo(GIObject)
one.ol = findOverlaps(features, anchor_one)
two.ol = findOverlaps(features, anchor_two)
one.indexes = queryHits(one.ol)[anchor_one[ subjectHits(one.ol) ]$node.class == feature.name]
two.indexes = queryHits(two.ol)[anchor_two[ subjectHits(two.ol) ]$node.class == feature.name]
features.with.interactions.indexes = unique(c(one.indexes, two.indexes))
features.with.interactions.indexes = features.with.interactions.indexes[order(features.with.interactions.indexes)]
feature.names.with.interactions = feature.names.full[ features.with.interactions.indexes]
#for(i in features.with.interactions.indexes){
for (fn in unique(feature.names.with.interactions)) {
#print(fn)
i = which(summary.df[,paste(capitalize(feature.name), "id", sep=".")]==fn)
iss = which(feature.names.full==fn)
#print(i)
#if(length(iss)>1){
# print(fn)
# print(i)
# print(iss)
#}
interactions = unique(c(subjectHits(one.ol[ queryHits(one.ol) %in% iss]), subjectHits(two.ol[ queryHits(two.ol) %in% iss])))
interactions.one = unique(subjectHits(one.ol[ queryHits(one.ol) %in% iss]))
interactions.two = unique(subjectHits(two.ol[ queryHits(two.ol) %in% iss]))
numberOfInteractions = sum(interactionCounts(GIObject)[ interactions ])
numberOfUniqueInteractions = length(interactions)
summary.df[i,paste("numberOf", capitalize(feature.name), "Interactions", sep="")] = numberOfInteractions
summary.df[i,paste("numberOf", capitalize(feature.name), "UniqueInteractions", sep="")] = numberOfUniqueInteractions
intercis = interactions[as.character(seqnames(anchor_one[interactions])) != as.character(seqnames( anchor_two[interactions]))]
if (length(intercis)>0) {
numberOfInterChromosomalInteractions = sum(interactionCounts(GIObject)[intercis])
summary.df[i,paste("numberOf", capitalize(feature.name), "InterChromosomalInteractions", sep="")] = numberOfInterChromosomalInteractions
numberOfUniqueInterChromosomalInteractions = length(intercis)
summary.df[i,paste("numberOf", capitalize(feature.name), "UniqueInterChromosomalInteractions", sep="")] = numberOfUniqueInterChromosomalInteractions
}
for (nc in potential.node.classes) {
nc1Counts = 0
nc1Indexes = c()
nc2Counts = 0
nc2Indexes = c()
if(length(interactions.one)>0){
nc1Indexes = interactions.one[which( anchor_one$node.class[interactions.one] == feature.name & anchor_two$node.class[interactions.one] == nc)]
nc1Counts = sum(interactionCounts(GIObject)[nc1Indexes])
}
if(length(interactions.two)>0){
nc2Indexes = interactions.two[which(anchor_one$node.class[interactions.two] == nc & anchor_two$node.class[interactions.two] == feature.name)]
if(nc == feature.name ){
if(length(nc2Indexes[ (nc2Indexes %in% nc1Indexes) ])>0) {
summary.df[i,paste("numberOfSelf", capitalize(feature.name), capitalize(nc), "Interactions", sep="")] = sum(interactionCounts(GIObject)[nc2Indexes[(nc2Indexes %in% nc1Indexes)]])
summary.df[i,paste("numberOfSelfUnique", capitalize(feature.name), capitalize(nc), "Interactions", sep="")] = length(nc2Indexes[(nc2Indexes %in% nc1Indexes)])
}
nc2Indexes = nc2Indexes[!(nc2Indexes %in% nc1Indexes)] # stop double counting of interactions where both anchors are in the same feature
}
nc2Counts = sum(interactionCounts(GIObject)[nc2Indexes])
}
if((nc1Counts > 0) | (nc2Counts > 0)){
numberOfNCInteractions = nc1Counts + nc2Counts
summary.df[i,paste("numberOf", capitalize(feature.name), capitalize(nc), "Interactions", sep="")] = numberOfNCInteractions
numberOfUniqueNCInteractions = length(c(nc1Indexes, nc2Indexes))
summary.df[i,paste("numberOfUnique", capitalize(feature.name), capitalize(nc), "Interactions", sep="")] = numberOfUniqueNCInteractions
}
}
# annotate.self .. ie id is the same id at both ends for different classes
if(annotate.self) {
for (nc in potential.node.classes) {
if (nc != feature.name & nc != "distal") {
ncs1Counts = 0
ncs1Indexes = c()
ncs2Counts = 0
ncs2Indexes = c()
if(length(interactions.one)>0){
ncs1Indexes = interactions.one[which(anchor_one$node.class[interactions.one] == feature.name & anchor_two$node.class[interactions.one] == nc)]
ncs1Indexes = names(features)[i] %in% elementMetadata(anchor_two)[[ paste(nc, "id", sep=".") ]][ncs1Indexes]
ncs1Counts = sum(interactionCounts(GIObject)[ncs1Indexes])
}
if(length(interactions.two)>0){
nc2sIndexes = interactions.two[which(anchor_one$node.class[interactions.two] == nc & anchor_two$node.class[interactions.two] == feature.name)]
ncs2Indexes = names(features)[i] %in% elementMetadata(anchor_one)[[ paste(nc, "id", sep=".") ]][ncs2Indexes]
ncs2Counts = sum(interactionCounts(GIObject)[ncs2Indexes])
}
if((ncs1Counts > 0) | (ncs2Counts > 0)){
numberOfNCSInteractions = ncs1Counts + ncs2Counts
summary.df[i,paste("numberOfSelf", capitalize(feature.name), capitalize(nc), "Interactions", sep="")] = numberOfNCInteractions
numberOfUniqueNCSInteractions = length(c(ncs1Indexes, ncs2Indexes))
summary.df[i,paste("numberOfSelfUnique", capitalize(feature.name), capitalize(nc), "Interactions", sep="")] = numberOfUniqueNCInteractions
}
}
}
}
distances = pairdist(GIObject[interactions])
dis = distances[!is.na(distances)]
wdis = rep(distances, interactionCounts(GIObject)[interactions])
wdis = wdis[!is.na(wdis)]
median.distance = median(dis)
median.distance = ifelse(is.infinite(median.distance) | is.nan(median.distance), NA, median.distance)
mean.distance = median(dis)
mean.distance = ifelse(is.infinite(mean.distance) | is.nan(mean.distance), NA, mean.distance)
min.distance = min(dis)
min.distance = ifelse(is.infinite(min.distance) | is.nan(min.distance), NA, min.distance)
max.distance = max(dis)
max.distance = ifelse(is.infinite(max.distance) | is.nan(max.distance), NA, max.distance)
wmedian.distance = median(wdis)
wmedian.distance = ifelse(is.infinite(wmedian.distance) | is.nan(wmedian.distance), NA, wmedian.distance)
summary.df[i,paste(capitalize(feature.name), "DistanceMedian", sep="")] = median.distance
summary.df[i,paste(capitalize(feature.name), "DistanceMean", sep="")] = mean.distance
summary.df[i,paste(capitalize(feature.name), "DistanceMinimum", sep="")] = min.distance
summary.df[i,paste(capitalize(feature.name), "DistanceMaximum", sep="")] = max.distance
summary.df[i,paste(capitalize(feature.name), "DistanceWeightedMedian", sep="")] = wmedian.distance
}
return(summary.df)
})
#' Summarise by feature pairs
#'
#' Calculat the number of interactions between two user-provided sets of features.
#' This allows the summarisation of the number of features of a specific type that a particular region is involved in,
#' and how many interactions exist between those features.
#'
#' @param GIObject An annotated \linkS4class{GenomicInteractions} object.
#' @param features.one A \linkS4class{GRanges} object containing the feature set of interest.
#' @param feature.name.one String containing the name of the first feature set of interest.
#' @param features.two A \linkS4class{GRanges} object containing the second feature set of interest.
#' @param feature.name.two String containing the name of the second feature set of interest.
#'
#' @return A data.frame with one line for each combination of \code{features.one} and \code{features.two}.
#'
#' @docType methods
#' @import GenomicRanges
#'
#' @examples
#' data("hic_example_data")
#' hic_example_data <- updateObject(hic_example_data)
#'
#' data("mm9_refseq_promoters")
#' data("thymus_enhancers")
#' annotateInteractions(hic_example_data, list(promoter = mm9_refseq_promoters, enhancer = thymus_enh))
#'
#' # can be slow so subset of features used for examples
#' regs <- regions(hic_example_data, type=1)
#' p <- unique(unlist(head(regs$promoter.id)))
#' e <- unique(unlist(head(regs$enhancer.id)))
#' p <- p[!is.na(p)]
#' p <- mm9_refseq_promoters[p]
#' e <- e[!is.na(e)]
#' e <- thymus_enh[e]
#' ep_summary <- summariseByFeaturePairs(hic_example_data, p, "promoter", e, "enhancer")
#' @rdname summariseByFeaturePairs
#' @export
setMethod("summariseByFeaturePairs", "GenomicInteractions",
function(GIObject, features.one, feature.name.one, features.two, feature.name.two)
{
if(!.has_annotations(GIObject)) {
stop("GIObject has not been annotated")
}
potential.node.classes = unique(annotationFeatures(GIObject))
feature.one.names.full <- .get_gr_names(features.one)
feature.one.names <- unique(feature.one.names.full)
feature.two.names.full <- .get_gr_names(features.two)
feature.two.names <- unique(feature.two.names.full)
x.gi = subsetByFeatures(GIObject, features.one)
x.gi = subsetByFeatures(x.gi, features.two)
anchor_one <- anchorOne(x.gi)
anchor_two <- anchorTwo(x.gi)
one.one.ol = findOverlaps(features.one, anchor_one)
two.one.ol = findOverlaps(features.one, anchor_two)
f1.one.f2.two = subjectHits(one.one.ol)[
which(anchor_one[ subjectHits(one.one.ol) ]$node.class == feature.name.one &
anchor_two[ subjectHits(one.one.ol) ]$node.class == feature.name.two)]
f1.two.f2.one = subjectHits(two.one.ol)[
which(anchor_two[ subjectHits(two.one.ol) ]$node.class == feature.name.one &
anchor_one[ subjectHits(two.one.ol) ]$node.class == feature.name.two)]
results = NULL
# this now results in a GI object only contain feature.name.one:feature.name.two interactions
x.gi = x.gi[unique(c(f1.one.f2.two, f1.two.f2.one)),]
one.one.ol = findOverlaps(features.one, anchor_one)
two.one.ol = findOverlaps(features.one, anchor_two)
one.two.ol = findOverlaps(features.two, anchor_one)
two.two.ol = findOverlaps(features.two, anchor_two)
one.indexes = queryHits(one.one.ol)[anchor_one[ subjectHits(one.one.ol) ]$node.class == feature.name.one]
two.indexes = queryHits(two.one.ol)[anchor_two[ subjectHits(two.one.ol) ]$node.class == feature.name.one]
features.one.with.interactions.indexes = unique(c(one.indexes, two.indexes))
all.features.one.with.interactions = features.one[features.one.with.interactions.indexes]
feature.ones.id = feature.one.names.full[features.one.with.interactions.indexes]
for( fn in unique(feature.ones.id)){
#print(fn)
iss = which(feature.one.names.full ==fn)
interactions = unique(c(subjectHits(one.one.ol[ queryHits(one.one.ol) %in% iss]), subjectHits(two.one.ol[ queryHits(two.one.ol) %in% iss])))
interactions.one = unique(subjectHits(one.one.ol[ queryHits(one.one.ol) %in% iss]))
interactions.two = unique(subjectHits(two.one.ol[ queryHits(two.one.ol) %in% iss]))
features.two.involved.one = unique(unlist(elementMetadata(anchorOne(x.gi))[[paste(feature.name.two, "id", sep=".")]][interactions.two]))
features.two.involved.two = unique(unlist(elementMetadata(anchorTwo(x.gi))[[paste(feature.name.two, "id", sep=".")]][interactions.one]))
#print(unique(c(features.two.involved.one, features.two.involved.two)))
for( fn.two in unique(c(features.two.involved.one, features.two.involved.two))){
#print(feature.two)
iss.two = which(feature.two.names.full ==fn.two)
#print(iss.two)
indexes = unique(intersect(interactions, unique(c(subjectHits(one.two.ol[ queryHits(one.two.ol) %in% iss.two]),
subjectHits(two.two.ol[ queryHits(two.two.ol) %in% iss.two])))))
counts = sum(interactionCounts(x.gi)[indexes])
#print(counts)
results = rbind(results, c(fn, fn.two, counts))
}
}
results = data.frame(results[,1], results[,2], as.numeric(results[,3]))
colnames(results) = c(paste(capitalize(feature.name.one), "id", sep="."),
paste(capitalize(feature.name.two), "id", sep="."),
"counts")
return(results)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.