## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()
## ----include=FALSE---------------------------------------------------------
library(knitr)
opts_chunk$set(
concordance=FALSE
)
## ----include=FALSE---------------------------------------------------------
library(knitr)
opts_chunk$set(
background = "#C0C0C0"
)
## ----options,echo=FALSE-----------------------------------
options(width=60)
## ----include=FALSE----------------------------------------
library(GenomicRanges)
## ----results="hide"---------------------------------------
library(TFARM)
## ---------------------------------------------------------
# Load and visualize the dataset:
data("MCF7_chr1")
length(MCF7_chr1)
MCF7_chr1
## ---------------------------------------------------------
# Coming back to the example on the transcription factors of cell line
# MCF-7, in the promotorial regions of chromosome 1.
# Suppose that the user wants to find the most relevant association
# rules for the prediction of the presence of TEAD4. This means extracting
# all the association rules with right-hand-side equal to {TEAD4=1}
# setting the parameter type = TRUE; the minimun support and minimum
# confidence thresholds are set, as an example, to 0.005 and 0.62,
# respectively:
r_TEAD4 <- rulesGen(MCF7_chr1, "TEAD4=1", 0.005, 0.62, TRUE)
dim(r_TEAD4)
head(r_TEAD4)
## ---------------------------------------------------------
# Transcription factors present in at least one of the regions:
c <- names(mcols(MCF7_chr1))
c
lc <- length(c)
names(presAbs(c, r_TEAD4, TRUE))
# Transcription factors present in at least one of the association rules:
p_TFs <- presAbs(c, r_TEAD4, TRUE)$pres
p_TFs
# Transcription factors absent in all the association rules:
a <- presAbs(c[1:lc], r_TEAD4, TRUE)$abs
a
## ---------------------------------------------------------
# To find the subset of rules containing the transcription factor FOSL2:
r_FOSL2 <- rulesTF(TFi = 'FOSL2=1', rules = r_TEAD4, verbose = TRUE)
head(r_FOSL2)
dim(r_FOSL2)[1]
## ---------------------------------------------------------
# If none of the rules in input to rulesTF contains the given item TFi,
# and verbose = TRUE, a warnig message is reported to the user:
r_CTCF <- rulesTF(TFi = 'CTCF=1', rules = r_TEAD4, verbose = TRUE)
## ----results="hide"---------------------------------------
# For example to evaluate FOSL2 importance in the set of rules r_FOSL2:
r_noFOSL2 <- rulesTF0('FOSL2=1', r_FOSL2, r_TEAD4, MCF7_chr1, "TEAD4=1")
## ---------------------------------------------------------
row.names(r_FOSL2) <- match(r_FOSL2$lhs, r_TEAD4$lhs)
row.names(r_noFOSL2) <- match(r_FOSL2$lhs, r_TEAD4$lhs)
head(r_noFOSL2)
## ----IComp, fig.show='hide', fig.width=12, fig.height=5----
# Perform the IComp function to compute the Importance Index distribution:
imp_FOSL2 <- IComp('FOSL2=1', r_FOSL2, r_noFOSL2, figures=TRUE)
names(imp_FOSL2)
imp_FOSL2$imp
head(imp_FOSL2$delta)
head(imp_FOSL2$rwi)
head(imp_FOSL2$rwo)
## ----results="hide"---------------------------------------
# For the considered example the user could run:
DELTA_mean_supp <- vector("list", length(p_TFs))
DELTA_mean_conf <- vector("list", length(p_TFs))
all <- lapply(p_TFs, function(pi) {
A <- rulesTF(pi, r_TEAD4, FALSE)
B <- rulesTF0(pi, A, r_TEAD4, MCF7_chr1, "TEAD4=1")
IComp(pi, A, B, figures=FALSE)
})
for (i in 1:length(p_TFs)) {
IMP_Z[[i]] <- all[[i]]$imp
# Extract the delta variations of support and confidence:
DELTA_mean_supp[[i]] <- apply(all[[i]]$delta[1], 2, mean)
DELTA_mean_conf[[i]] <- apply(all[[i]]$delta[2], 2, mean)
}
IMP <- data.frame(
TF = p_TFs,
imp = sapply(IMP_Z, mean),
sd = sapply(IMP_Z, sd),
delta_support = as.numeric(DELTA_mean_supp),
delta_confidence = as.numeric(DELTA_mean_conf),
nrules = sapply(IMP_Z, length),
stringsAsFactors=FALSE
)
library(plyr)
## ---------------------------------------------------------
# Sort by imp column of IMP
IMP.ord <- arrange(IMP, desc(imp))
IMP.ord
## ----IPCA, fig.show='hide', fig.width=9, fig.height=7.5----
# Select the candidate co-regulators and the number of rules
# associated with them, then perform the Principal Component Analysis:
colnames(IMP)
TF_Imp <- data.frame(IMP$TF, IMP$imp, IMP$nrules)
i.pc <- IPCA(DELTA, TF_Imp)
names(i.pc)
i.pc$summary
head(i.pc$loadings)
head(i.pc$scores)
## ----distribViz, fig.show='hide', fig.width=10, fig.height=6----
# Considering for example the candidate co-regulators
# found in the set of rules r_TEAD4:
distribViz(IMP_Z, p_TFs)
## ---------------------------------------------------------
# Select the index of the list of importances IMP_Z
# containing importance distributions of transcription factor ZNF217
ZNF217_index <- which(p_TFs == 'ZNF217=1')
# Select outlier rules where ZNF217 has importance greater than 0
o <- IMP_Z[[ZNF217_index]] > 0
rule_o <- all[[ZNF217_index]]$rwi[o,]
# Display the one rule for example the sixth
rule_o[6,]
# So, ZNF217 is very relevant in the pattern of transcription factors
# {FOSL2=1,GABPA=1,MYC=1,MAX=1,ZNF217=1}
# for the prediction of the presence of TEAD4.
# To extract support, confidence and lift of the corresponding rule
# without ZNF217:
all <- all[[ZNF217_index]]$rwo[o,]
all[6,]
# Since the measure of the rule obtained removing ZNF217 is equal to zero,
# the rule {FOSL2=1,GABPA=1,MYC=1,MAX=1,ZNF217=0} -> {TEAD4=1},
# obtained removing ZNF217, is found in the relevant rules for the prediction
# of the presence of TEAD4.
## ---------------------------------------------------------
# Construct couples as a vector in which all possible combinations of
# transcription factors (present in at least one association rules)
# are included:
couples_0 <- combn(p_TFs, 2)
couples <- paste(couples_0[1,], couples_0[2,], sep=',')
head(couples)
## ----results="hide"---------------------------------------
# The evaluation of the mean Importance Index of each pair is
# then computed similarly as previously done for single transcription factors:
# Compute rulesTF, rulesTF0 and IComp for each pair, avoiding pairs not
# found in the r_TEAD4 set of rules
IMP_c <- lapply(couples, function(ci) {
A_c <- rulesTF(ci, r_TEAD4, FALSE)
if (all(!is.na(A_c[[1]][1]))){
B_c <- rulesTF0(ci, A_c, r_TEAD4, MCF7_chr1, "TEAD4=1")
IComp(ci, A_c, B_c, figures=FALSE)$imp
}
})
# Delete all NULL elements and compute the mean Importance Index of each pair
I_c <- matrix(0, length(couples), 2)
I_c <- data.frame(I_c)
I_c[,1] <- paste(couples)
null.indexes <- vapply(IMP_c, is.null, numeric(1))
IMP_c <- IMP_c[!null.indexes]
I_c <- I_c[!null.indexes,]
I_c[,2] <- vapply(IMP_c, mean, numeric(1))
colnames(I_c) <- colnames(IMP[,1:2])
## ---------------------------------------------------------
# Select rows with mean Importance Index different from NaN, then order I_c:
I_c <- I_c[!is.na(I_c[,2]),]
I_c_ord <- arrange(I_c, desc(imp))
head(I_c_ord)
## ----heatmap, fig.show='hide', fig.width=15, fig.height=15----
# Construction of a vector in which mean Importance Index values of pairs
# of transcription factors are represented.
# These transcription factors are taken from the output of presAbs as
# present in at least one association rules.
# The function rbind is used to combine IMP columns and I_c_ord columns and
# then the function arrange orders the data frame by the imp column.
I_c_2 <- arrange(rbind(IMP[,1:2], I_c_ord), desc(imp))
p_TFs <- sub("=1", "", p_TFs)
I_c_2$TF <-sub("=1", "",I_c_2$TF)
i.heat <- heatI(p_TFs, I_c_2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.