#' @importFrom S4Vectors metadata DataFrame
#' @importFrom GenomeInfoDb seqinfo seqnames seqlevelsInUse seqlevels
#' @importFrom IRanges IRanges
#' @importFrom InteractionSet GInteractions anchors
#' @importFrom GenomicRanges GRanges start end seqnames findOverlaps reduce
#' @importFrom plyr dlply . alply ddply
#' @importFrom stats p.adjust smooth.spline dpois ppois quantile
#' @importFrom futile.logger flog.info
#' @importFrom methods is
#' @importFrom BiocParallel bpstart bpstop bpisup bpparam bpworkers bplapply
#' @import BH
#' @importFrom bigmemory filebacked.big.matrix describe attach.big.matrix as.matrix
#' @importClassesFrom bigmemory big.matrix
############################################# Main function for stage 4:
#--------------------------------------------
# Main function for interaction calls: GENERAL NOTE: Currently the inter are not
# supported. If not intra is found either the algorithm will return error of
# empty data.
#--------------------------------------------
Stage_4_Main_fun = function(SA_prefix, S4_AnalysisDir, S4_FitObject, S4_IntraObject = NULL,
S4_InterObject = NULL, S4_FDR_peak, S4_method, S4_image, S4_minPETs, S4_PeakExt) {
#---------------
# NOTE: inter not included at the time so set them to NULL:
#---------------
S4_InterObject = NULL
# global variables for Rcheck: Take time:
Analysis.time.start = Sys.time()
# create folder to save data:
if (!dir.exists(S4_AnalysisDir))
dir.create(S4_AnalysisDir)
#----------------------------
# Write in file:
#----------------------------
futile.logger::flog.info(paste("Minimum number of allowed interaction PETs is set to:",
S4_minPETs), name = "SA_LogFile", capture = FALSE)
futile.logger::flog.info(paste("FDR cut-off of peaks to be used in the analysis: ",
S4_FDR_peak), name = "SA_LogFile", capture = FALSE)
#--------------------------------------------
#------Prepare the data for the interactions:
#--------------------------------------------
cat("Preparing interactions data...\n")
InteractionData = Prepare_InteractionsData_fun(SA_prefix = SA_prefix, S4_AnalysisDir = S4_AnalysisDir,
S4_FitObject = S4_FitObject, S4_IntraObject = S4_IntraObject, S4_InterObject = S4_InterObject,
S4_FDR_peak = S4_FDR_peak, S4_image = S4_image, S4_minPETs = S4_minPETs,
S4_PeakExt = S4_PeakExt)
#--------------------------------------------
#------Run interaction analysis:
#--------------------------------------------
cat("|=================== Running interactions analysis ===================|\n")
InteractionResults = Run_InteractionAnalysis_fun(InteractionData = InteractionData,
S4_method = S4_method)
futile.logger::flog.info("=====================================", name = "SA_LogFile",
capture = FALSE)
futile.logger::flog.info(paste("Total interactions processed: ", InteractionResults$TotIntAdded),
name = "SA_LogFile", capture = FALSE)
futile.logger::flog.info(paste("Total bi-products removed: ", InteractionResults$TotBiRem),
name = "SA_LogFile", capture = FALSE)
#--------------------------------------------
#----Create Genome Map object (suppressWarnings for trimming)
#--------------------------------------------
suppressWarnings(Create_GenomeMapObject(InteractionResults = InteractionResults,
SA_prefix = SA_prefix, S4_AnalysisDir = S4_AnalysisDir))
futile.logger::flog.info("The Genome map is successfully built!", name = "SA_LogFile",
capture = FALSE)
#--------------------------------------------
#------print and write the end:
#--------------------------------------------
futile.logger::flog.info("=====================================", name = "SA_LogFile",
capture = FALSE)
futile.logger::flog.info("Stage 4 is done!", name = "SA_LogFile", capture = FALSE)
futile.logger::flog.info(paste("Analysis results for stage 4 are in:\n", S4_AnalysisDir),
name = "SA_LogFile", capture = FALSE)
# save time:
Analysis.time.end = Sys.time()
Total.Time = Analysis.time.end - Analysis.time.start
LogFile = paste("Total stage 4 time:", Total.Time, " ", units(Total.Time))
futile.logger::flog.info(LogFile, name = "SA_LogFile", capture = FALSE)
}
# done
#----------------------------
# Main function for preparing the data
#----------------------------
Prepare_InteractionsData_fun = function(SA_prefix, S4_AnalysisDir, S4_FitObject,
S4_IntraObject, S4_InterObject, S4_FDR_peak, S4_image, S4_minPETs, S4_PeakExt) {
#----------------------------
# Prepare the peaks data:
# suppress trimming warnings which I dont need
#----------------------------
PeaksSplit = suppressWarnings(Get_PeaksSplit_fun(S4_FitObject = S4_FitObject, S4_FDR_peak = S4_FDR_peak,
S4_PeakExt = S4_PeakExt))
#----------------------------
# Get the PETs split data
#----------------------------
PETsSplit = Get_PETsSplit_fun(S4_IntraObject = S4_IntraObject, S4_InterObject = S4_InterObject)
#----------------------------
# Find overlaps of PETs with the peaks
#----------------------------
ConnectionPETsData = Create_ConnectionPETsData_fun(PETsData = PETsSplit$PETsData,
PeaksGR = PeaksSplit$PeaksGR)
#----------------------------
# Create the structures used in the analysis. BigMatrices, Networks etc.
#----------------------------
InteractionData = Create_InteractionStructures_fun(SA_prefix = SA_prefix, S4_AnalysisDir = S4_AnalysisDir,
ConnectionPETsData = ConnectionPETsData, PeaksSplit = PeaksSplit, PETsSplit = PETsSplit,
S4_image = S4_image, S4_minPETs = S4_minPETs)
return(InteractionData)
}
# done
#----------------------------
# Function for preparing the Peaks
#----------------------------
Get_PeaksSplit_fun = function(S4_FitObject, S4_FDR_peak, S4_PeakExt) {
# Rcheck:
FDR=NULL
queryHits=NULL
#------
# take Peak data and subset by significance:
#------
PeakData = S4Vectors::metadata(S4_FitObject)$Peaks.Info
NGlobalPeaks = nrow(PeakData) #for print, and image
PeakData = subset(PeakData, FDR < S4_FDR_peak) #subset the data
SeqInfo = GenomeInfoDb::seqinfo(S4_FitObject) #to save again
# keep what you need for the analysis:
PeakData = PeakData[, c("Chrom", "Peak.Summit", "CIQ.Up.start", "CIQ.Down.end", "FDR")]
NFDRPeaks = nrow(PeakData) #the total number of Peaks left in the data after FDR
#------
# write in file and print:
#------
if (NFDRPeaks == 0)
stop("No peaks to use for interaction analysis on FDR ", S4_FDR_peak, "! Use a higher threshold!",
call. = FALSE)
futile.logger::flog.info(paste("Total peaks passing the FDR cut-off:", NFDRPeaks,
"(", NFDRPeaks/NGlobalPeaks * 100, "% of the total peaks )"), name = "SA_LogFile",
capture = FALSE)
#------
# Extend the peaks from their center:
#------
futile.logger::flog.info(paste("Extending peak intervals by", S4_PeakExt, "bp on either side."),
name = "SA_LogFile", capture = FALSE)
LeftInterval = PeakData$CIQ.Up.start - S4_PeakExt
RightInterval = PeakData$CIQ.Down.end + S4_PeakExt
#------
# Merge overlapping and create the final PeaksGR object
#------
futile.logger::flog.info("Merging overlapping peaks...Done", name = "SA_LogFile",
capture = FALSE)
NM_PeaksIR = IRanges::IRanges(start = LeftInterval, end = RightInterval)
NM_PeaksGR = GenomicRanges::GRanges(seqnames = PeakData$Chrom, ranges = NM_PeaksIR,
seqinfo = SeqInfo) #not merged yet
PeaksGR = GenomicRanges::reduce(x = NM_PeaksGR, min.gapwidth = 0, ignore.strand = TRUE) #merged
NPeaksMerged = length(PeaksGR) #total peaks after merging
# Find overlaps from PeaksGR to NM_PeaksGR to decide the summit based on FDR
MergedNotMergedOv = GenomicRanges::findOverlaps(query = PeaksGR, subject = NM_PeaksGR,
ignore.strand = TRUE)
MergedNotMergedOv = as.data.frame(MergedNotMergedOv)
MergedNotMergedOv = MergedNotMergedOv[with(MergedNotMergedOv, order(queryHits)),]
# find the summit:
PeakSummit = Get_NewPeakSummit_fun_Rcpp(MergedNotMergedOv$queryHits,
MergedNotMergedOv$subjectHits, PeakData$Peak.Summit, PeakData$FDR,
nrow(MergedNotMergedOv), NPeaksMerged)
# save:
PeaksGR$PeakSummit = PeakSummit
PeaksGR$LID = seq_len(NPeaksMerged)
futile.logger::flog.info(paste("Total peaks after merging: ", NPeaksMerged),
name = "SA_LogFile", capture = FALSE)
return(list(PeaksGR = PeaksGR, NFDRPeaks = NFDRPeaks, NGlobalPeaks = NGlobalPeaks,
NPeaksMerged = NPeaksMerged))
}
# done
#----------------------------
#--Function for the intra/inter ligated pets
#----------------------------
Get_PETsSplit_fun = function(S4_IntraObject, S4_InterObject) {
#----------------------------
# Merge the Intra-inter into one data: this will be used to check overlaps: But
# first take cases for what exists:
#----------------------------
if (!is.null(S4_IntraObject) & !is.null(S4_InterObject)) {
cat("Merging Intra- and Inter-chromosomal PETs data...")
Anchor_1_intra = InteractionSet::anchors(S4_IntraObject, type = "first")
Anchor_2_intra = InteractionSet::anchors(S4_IntraObject, type = "second")
Anchor_1_inter = InteractionSet::anchors(S4_InterObject, type = "first")
Anchor_2_inter = InteractionSet::anchors(S4_InterObject, type = "second")
Anchor_1_both = c(Anchor_1_intra, Anchor_1_inter)
Anchor_2_both = c(Anchor_2_intra, Anchor_2_inter)
PETsData = InteractionSet::GInteractions(anchor1 = Anchor_1_both, anchor2 = Anchor_2_both)
} else if (!is.null(S4_IntraObject)) {
cat("Including only Intra-ligated PETs in the analysis (Inter-ligated are empty)...")
Anchor_1_intra = InteractionSet::anchors(S4_IntraObject, type = "first")
Anchor_2_intra = InteractionSet::anchors(S4_IntraObject, type = "second")
PETsData = InteractionSet::GInteractions(anchor1 = Anchor_1_intra, anchor2 = Anchor_2_intra)
} else {
cat("Including only Inter-ligated PETs in the analysis (Intra-ligated are empty)...")
Anchor_1_inter = InteractionSet::anchors(S4_InterObject, type = "first")
Anchor_2_inter = InteractionSet::anchors(S4_InterObject, type = "second")
PETsData = InteractionSet::GInteractions(anchor1 = Anchor_1_inter, anchor2 = Anchor_2_inter)
}
NGlobalInterPETs = length(PETsData) #the total connection PETs in data
if (NGlobalInterPETs == 0)
stop("No Intra/Inter-chromosomal PETs to use for interaction analysis!",
call. = FALSE)
cat("Done\n")
return(list(PETsData = PETsData, NGlobalInterPETs = NGlobalInterPETs))
}
# done
#----------------------------
# function creating the connection data: This function uses the Anchor 1 and 2 of
# each interaction PET in PETsData And it seperately finds overlaps with each
# Peak in PeaksGR . The resulting data will then be used to classify each hit/Tag
# in interactions PETs by the Get_PETsInfoMat_fun_Rcpp c++ funtion
#----------------------------
Create_ConnectionPETsData_fun = function(PETsData, PeaksGR) {
# Rcheck:
query = NULL
# -------
cat("Intersecting the PETs with the peaks...")
#----------------------------
# Get single overlap for Anchor 1 in PETsData with PeaksGR
#----------------------------
Anchor1 = InteractionSet::anchors(x = PETsData, type = "first")
Anchor1ovlp = GenomicRanges::findOverlaps(query = Anchor1, subject = PeaksGR,
ignore.strand = TRUE, type = "any", select = "all")
Anchor1ovlp = as(Anchor1ovlp, "DataFrame")
if (nrow(Anchor1ovlp) == 0)
stop("None of the intra/inter-ligation PETs overlaps with any Peak!", call. = FALSE)
names(Anchor1ovlp) = c("query", "subject")
Anchor1ovlp$Type = 1 #means anchor 1.
# Find the tag to be used in the classification process
Anchor1ovlp$Tag = (GenomicRanges::start(Anchor1[Anchor1ovlp$query]) + GenomicRanges::end(Anchor1[Anchor1ovlp$query]))/2
#----------------------------
# Get single overlap for Anchor 2 with PeaksGR
#----------------------------
Anchor2 = InteractionSet::anchors(x = PETsData, type = "second")
Anchor2ovlp = GenomicRanges::findOverlaps(query = Anchor2, subject = PeaksGR,
ignore.strand = TRUE, type = "any", select = "all")
Anchor2ovlp = as(Anchor2ovlp, "DataFrame")
if (nrow(Anchor2ovlp) == 0)
stop("None of the intra/inter-ligation PETs overlaps with any Peak!", call. = FALSE)
names(Anchor2ovlp) = c("query", "subject")
Anchor2ovlp$Type = 2 #means anchor 2
# Find the tag to be used in the classification process
Anchor2ovlp$Tag = (GenomicRanges::start(Anchor2[Anchor2ovlp$query]) + GenomicRanges::end(Anchor2[Anchor2ovlp$query]))/2
#----------------------------
# Merge the Anchors and sort by query name (Tag): Then it is easier to subset by
# query in c++
#----------------------------
ConPETsData = rbind(Anchor1ovlp, Anchor2ovlp)
NIntTagsloop = nrow(ConPETsData) #the total number of tags to classify in c++
OrderQuery = order(ConPETsData$query)
ConPETsData = ConPETsData[OrderQuery, ]
#----------------------------
# Give information that you need to run the classification in c++:
#----------------------------
ConPETsData$LID = PeaksGR$LID[ConPETsData$subject]
ConPETsData$PeakSummit = PeaksGR$PeakSummit[ConPETsData$subject]
cat("Done\n")
return(list(ConPETsData = ConPETsData, NIntTagsloop = NIntTagsloop))
}
# done
#----------------------------
# Main function for creating the interaction structures used in the analysis
#----------------------------
Create_InteractionStructures_fun = function(SA_prefix, S4_AnalysisDir, ConnectionPETsData,
PeaksSplit, PETsSplit, S4_image, S4_minPETs) {
# Rcheck:
V1 = NULL
V2 = NULL
#----------------------------
# Create the PETsInfoMat:
#----------------------------
cat("Counting PETs in the peaks...")
PETsInfoMat = Get_PETsInfoMat_fun_Rcpp(ConnectionPETsData$ConPETsData$query,
ConnectionPETsData$ConPETsData$Type, ConnectionPETsData$ConPETsData$Tag,
ConnectionPETsData$ConPETsData$LID, ConnectionPETsData$ConPETsData$PeakSummit,
PETsSplit$NGlobalInterPETs, ConnectionPETsData$NIntTagsloop)
if (is.null(PETsInfoMat))
stop("None of the intra/inter-ligation PETs overlaps with two Peaks!", call. = FALSE)
#----------------------------
# Turn PETsInfoMat into a data.frame and split by V1 (PBS-i),and V2 (PBS-j) to
# get statistics. At the end of the following the InteractionInfo Will have
# col-1, PBS-i, col2-PBSj and col3 total PETs. The PBS ids are in R index
#----------------------------
PETsInfoMat = as.data.frame(PETsInfoMat)
InteractionInfo = plyr::dlply(.data = PETsInfoMat, plyr::.(V1, V2), .fun = Get_PETsStats_fun,
S4_minPETs = S4_minPETs)
InteractionInfo = do.call(rbind, InteractionInfo)
NInteractions = nrow(InteractionInfo) # the potential interactions
if (NInteractions == 0)
stop("No candidate interactions found at threshold S4_minPETs = ", S4_minPETs,
". Try reducing S4_minPETs.", call. = FALSE)
rownames(InteractionInfo) = NULL
NPETsInvolved = sum(InteractionInfo[, c(3)]) # total number of PETs involved
#----------------------------
# Find the chromosome combinations and return the data.
#----------------------------
ChromData = as.character(GenomicRanges::seqnames(PeaksSplit$PeaksGR))
ChromCombData = Get_ChromCombData_fun(InteractionInfo = InteractionInfo, ChromData = ChromData)
InteractionInfo = ChromCombData$InteractionInfo #used for building the InteractionsMatrix
UniqueChromComb = ChromCombData$UniqueChromComb #Info about the Chromosome combinations, not used after next line
NPeaksInvolved = sum(UniqueChromComb$TotPeaks) #Peaks involved in general
NetworkBuildData = ChromCombData$NetworkBuildData #used for building the genomic networks
cat("Done\n")
#----------------------------
# Summarize
#----------------------------
futile.logger::flog.info(paste("Total", NInteractions, "candidate interactions will be processed"),
name = "SA_LogFile", capture = FALSE)
futile.logger::flog.info(paste("Total", NPeaksInvolved, "peaks are involved in potential interactions",
"(", NPeaksInvolved/PeaksSplit$NPeaksMerged * 100, "% of the total FDR peaks )"),
name = "SA_LogFile", capture = FALSE)
futile.logger::flog.info(paste("Total", NPETsInvolved, "PETs are involved in potential interactions",
"(", NPETsInvolved/PETsSplit$NGlobalInterPETs * 100, "% of the total interaction PETs )"),
name = "SA_LogFile", capture = FALSE)
#----------------------------
# create the InteractionInfMat:
#----------------------------
# rows=NInteractions, each row corresponds to one interaction col 1/2: the left
# LID on the PeaksData (R indeces) col 3/4 are the ids on the network element (R
# indeces for node names, changing) those are used for bi-products and for
# finding paths when peaks are merged. col 5/6 are the node ids for the Adj
# matrices, not changing, col 7/8 are node IDS for the NiNjMat, not changing. The
# NiNjMat is global in case you also include inter afterwards. col 9/10 p-value
# and FDR, col 11, the nij of the interaction col 12, the QCell ID in the
# expected matrix for the DVijij(NA on inter) col 13:
# order of the interaction col 14 is the Chromosome combination ID of the
# interaction col 15 is the intra indicator(intra=1, inter=0)
cat("Summarizing interaction information...")
InteractionInfMat = matrix(0, nrow = NInteractions, ncol = 15)
colnames(InteractionInfMat) = c("PBS_i", "PBS_j", "AdjBiNode_i", "AdjBiNode_j",
"AdjNode_i", "AdjNode_j", "NiNjNode_i", "NiNjNode_j", "pvalue", "FDR", "nij",
"QCell", "Order", "Chrom12ID", "IntraID")
#----------------------------
# fill up InteractionInfMat, dont need to return it, it is by reference Also
# count the observed PETs in each combination.
#----------------------------
Indeces_Vij = Initiate_InteractionInfMat_fun_Rcpp(InteractionInfMat, InteractionInfo,
NPeaksInvolved, NInteractions)
cat("Done\n")
#----------------------------
# plot:
#----------------------------
if (S4_image) {
Get_image_S4_P1_fun(S4_AnalysisDir = S4_AnalysisDir, SA_prefix = SA_prefix,
NGlobalInterPETs = PETsSplit$NGlobalInterPETs, NPETsInvolved = NPETsInvolved,
NFDRPeaks = PeaksSplit$NFDRPeaks, NGlobalPeaks = PeaksSplit$NGlobalPeaks,
NPeaksInvolved = NPeaksInvolved)
}
#----------------------------
# Create the Newtorks as well as the Adjucency matrices
#----------------------------
NetworksData = Create_Networks_fun(NetworkBuildData = NetworkBuildData, PeakSummit = PeaksSplit$PeaksGR$PeakSummit,
SA_prefix = SA_prefix, S4_AnalysisDir = S4_AnalysisDir)
#----------------------------
# return:
#----------------------------
return(list(PeaksGR = PeaksSplit$PeaksGR, InteractionInfMat = InteractionInfMat,
NInteractions = NInteractions, AllInteIndeces = Indeces_Vij$AllInteIndeces,
NiNjMat = Indeces_Vij$NiNjMat, NetworksData = NetworksData))
}
# done
#----------------------------
# Function for getting the sampled statistics for each PET group
#----------------------------
Get_PETsStats_fun = function(x, S4_minPETs) {
#----------------------------
# check if Total PETs are above the S4_minPETs:
#----------------------------
N_x = nrow(x) #Pets in the interaction
if (N_x < S4_minPETs)
return(NULL)
#----------------------------
# Else return the total PETs for each interaction:
#----------------------------
InteractionInfo_x = c(x$V1[1], x$V2[1], N_x)
return(InteractionInfo_x)
}
# done
#----------------------------
# Function to give new node names to peaks and separate them by chromosome
# combination Important: This functions works for intra. It will not crash if
# Intra are not empty. If intra are empty, the algorithm will return an error at
# a previous point. If you include inter at any time, this function has to be
# updated
#----------------------------
Get_ChromCombData_fun = function(InteractionInfo, ChromData) {
#----------------------------
# First Create the Chromosome Combination data:
#----------------------------
ChromComb = data.frame(Chrom1 = ChromData[InteractionInfo[, c(1)]], Chrom2 = ChromData[InteractionInfo[,
c(2)]])
ChromComb$Chrom12 = paste(ChromComb$Chrom1, "-", ChromComb$Chrom2, sep = "") #merged comb
# Get unique Comb, make data frame and give unique IDS:
UniqueChromComb = table(ChromComb$Chrom12)
UniqueChromComb = as.data.frame(UniqueChromComb, stringsAsFactors = FALSE)
names(UniqueChromComb) = c("Chrom12", "Totals") #note the Totals are total interactions, NOT Peaks
# Check which intra/inter:
Chrom12 = do.call(rbind, strsplit(x = UniqueChromComb$Chrom12, split = "-"))
IntraComb = which(Chrom12[, c(1)] == Chrom12[, c(2)])
UniqueChromComb$Intra = 0 #indicating no intra combination
if (length(IntraComb) != 0)
UniqueChromComb$Intra[IntraComb] = 1
# sort intra/inter combination before giving IDS: This helps to create a list
# with no empty entries afterwards As inter=0 will go at the end
UniqueChromComb = UniqueChromComb[with(UniqueChromComb, order(Intra, decreasing = TRUE)),
]
UniqueChromComb$Chrom12ID = seq_len(nrow(UniqueChromComb)) #give unique IDS
#----------------------------
# Assign the combination ID to the InteractionInfo
#----------------------------
MatchChrom12 = match(ChromComb$Chrom12, UniqueChromComb$Chrom12)
Intra = UniqueChromComb$Intra[MatchChrom12]
Chrom12ID = UniqueChromComb$Chrom12ID[MatchChrom12]
#----------------------------
# Make the InteractionInfo to data.frame. Assign the Chrom12ID and Intra Split
# the data frame to also given Node names for the big.matrices
#----------------------------
InteractionInfo = as.data.frame(InteractionInfo)
InteractionInfo$Chrom12ID = Chrom12ID
InteractionInfo$Intra = Intra
colnames(InteractionInfo) = c("PBS_i", "PBS_j", "nij", "Chrom12ID", "Intra")
# Assign the new node names, this will also assign node names to inter, but it is
# ok:
InteractionInfo = plyr::ddply(InteractionInfo, plyr::.(Chrom12ID), function(x) {
# take the PBS:
PBS_x = sort(unique(c(x$PBS_i, x$PBS_j)))
# Match the PBS_i/j to the PBS_x and this will give the new node IDS:
x$AdjNode_i = match(x$PBS_i, PBS_x)
x$AdjNode_j = match(x$PBS_j, PBS_x)
# return:
return(x)
})
# Also take the Total Peaks involved in each combination for creating the
# matrices:
TotPeaksComb = plyr::ddply(InteractionInfo, plyr::.(Chrom12ID), function(x) {
# take the PBS:
Tot_x = length(unique(c(x$PBS_i, x$PBS_j)))
# return:
return(data.frame(Chrom12ID = x$Chrom12ID[1], Tot_x = Tot_x))
})
UniqueChromComb$TotPeaks = TotPeaksComb$Tot_x[match(UniqueChromComb$Chrom12ID,
TotPeaksComb$Chrom12ID)]
#----------------------------
# Then, create the NiNjNodes for the NiNj vector which is global:
#----------------------------
PBSunique = sort(unique(c(InteractionInfo$PBS_i, InteractionInfo$PBS_j)))
InteractionInfo$NiNjNode_i = match(InteractionInfo$PBS_i, PBSunique)
InteractionInfo$NiNjNode_j = match(InteractionInfo$PBS_j, PBSunique)
#----------------------------
# Also load the PBS and AdjNodes indeces to create List used for building the
# networks This will only return intra-ligated networks The Chrom12ID are sorted,
# so the list will have in Chrom12ID_comb position the Chrom12ID_comb of the
# specific combination. So access is used by Chrom12ID_comb
#----------------------------
NetworkBuildData = plyr::dlply(InteractionInfo, plyr::.(Chrom12ID), function(x) {
# If inter return NULL:
if (x$Intra[1] == 0)
return(NULL)
# else take info
PBS_ij = unique(c(x$PBS_i, x$PBS_j))
AdjNode_ij = unique(c(x$AdjNode_i, x$AdjNode_j))
NiNjNode_ij = unique(c(x$NiNjNode_i, x$NiNjNode_j))
# take order to sort all the correct way:
Order_PBS_ij = order(PBS_ij, decreasing = FALSE)
PBS_comb = PBS_ij[Order_PBS_ij]
AdjNode_comb = AdjNode_ij[Order_PBS_ij]
NiNjNode_comb = NiNjNode_ij[Order_PBS_ij]
Chrom12ID_comb = x$Chrom12ID[1]
return(list(PBS_comb = PBS_comb, AdjNode_comb = AdjNode_comb, NiNjNode_comb = NiNjNode_comb,
Chrom12ID_comb = Chrom12ID_comb))
})
# remove the NULL if any:
RmNULL = unlist(lapply(NetworkBuildData, length))
NetworkBuildData = NetworkBuildData[which(RmNULL != 0)]
#----------------------------
# Transform the InteractionInfo to matrix and return:
#----------------------------
InteractionInfo = as.matrix(InteractionInfo)
return(list(InteractionInfo = InteractionInfo, UniqueChromComb = UniqueChromComb,
NetworkBuildData = NetworkBuildData))
}
# done
#----------------------------
# function for plotting for stage 4 part 1: Used peaks/involved Peaks and used
# PETs
#----------------------------
Get_image_S4_P1_fun = function(S4_AnalysisDir, SA_prefix, NGlobalInterPETs, NPETsInvolved,
NFDRPeaks, NGlobalPeaks, NPeaksInvolved) {
# Rcheck:
Value = NULL
Kind = NULL
# image dir:
S4_P1_image_dir = file.path(S4_AnalysisDir, paste(SA_prefix, "_stage_4_p1_image.jpg",
sep = ""))
#-------------
# create data:
#-------------
PeaksAboveFDR = (NGlobalPeaks - NFDRPeaks)/NGlobalPeaks * 100
PeaksBelowFDRnotInv = (NFDRPeaks - NPeaksInvolved)/NGlobalPeaks * 100
PeaksInvolved = NPeaksInvolved/NGlobalPeaks * 100
PETSNotInvolved = (NGlobalInterPETs - NPETsInvolved)/NGlobalInterPETs * 100
PETSInvolved = NPETsInvolved/NGlobalInterPETs * 100
S4_imagedata_1 = data.frame(Kind = c(paste("Peaks not passing FDR (", round(PeaksAboveFDR,
digits = 1), "%)", sep = ""), paste("Peaks passing FDR, but not involved in interactions (",
round(PeaksBelowFDRnotInv, digits = 1), "%)", sep = ""), paste("Peaks passing FDR, and involved in interactions (",
round(PeaksInvolved, digits = 1), "%)", sep = ""), paste("Interaction PETs not involved in interactions (",
round(PETSNotInvolved, digits = 1), "%)", sep = ""), paste("Interaction PETs involved in interactions (",
round(PETSInvolved, digits = 1), "%)", sep = "")), Value = c(PeaksAboveFDR,
PeaksBelowFDRnotInv, PeaksInvolved, PETSNotInvolved, PETSInvolved), facet = c("Peaks Summary",
"Peaks Summary", "Peaks Summary", "PETs summary", "PETs summary"))
#-------------
# plot the split:
#-------------
S4_image_p1 = ggplot2::ggplot(S4_imagedata_1, ggplot2::aes(x = "", y = Value,
fill = factor(Kind))) + ggplot2::geom_bar(width = 1, stat = "identity") +
ggplot2::facet_wrap(~facet) + ggplot2::coord_polar(theta = "y") + ggplot2::theme(axis.title = ggplot2::element_blank(),
plot.title = ggplot2::element_text(size = 20, color = "black"), legend.title = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 17), axis.text = ggplot2::element_blank(),
legend.position = "bottom", legend.direction = "vertical", axis.ticks = ggplot2::element_blank()) +
ggplot2::ggtitle("Pie chart for the summary of the potential interactions.") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + ggplot2::scale_fill_brewer(palette = "Dark2")
# save:
ggplot2::ggsave(plot = S4_image_p1, file = S4_P1_image_dir, scale = 2)
}
# done
#----------------------------
# Function for creating the Network structures and the Adj matrices
#----------------------------
Create_Networks_fun = function(NetworkBuildData, PeakSummit, SA_prefix, S4_AnalysisDir) {
#----------------------------
# Initiate the structures you will use:
#----------------------------
TotNetworks = length(NetworkBuildData)
NetworkList = vector(mode = "list", length = TotNetworks) #list of networks
UpdateIndecesList = vector(mode = "list", length = TotNetworks) #list of updating indeces for merging nodes
MergedNodesIndexList = vector(mode = "list", length = TotNetworks) #list of merging nodes IDs
BigInfoMatDescList = vector(mode = "list", length = TotNetworks) #list of description of the AdjMatrices
NadjList = vector(mode = "list", length = TotNetworks) #list of the length of the AdjMatrices
NPeaksInvolvedList = vector(mode = "list", length = TotNetworks) #list of the total Peaks in each combination
InteractionPairsList = vector(mode = "list", length = TotNetworks) #list of the total interaction pairs involved(reducing), note this does NOT include inter
NiNjIndecesList = vector(mode = "list", length = TotNetworks) #indeces in R, used to access the NiNjMat
#----------------------------
# create directory for the big.matrix:
#----------------------------
S4_MatrixDir = file.path(S4_AnalysisDir, "GenomeMapMatrices")
if (dir.exists(S4_MatrixDir))
unlink(S4_MatrixDir, recursive = TRUE)
dir.create(S4_MatrixDir)
#----------------------------
# Loop in NetworkBuildData elements and create what you need:
#----------------------------
for (Net in seq_len(TotNetworks)) {
#----------------------------
# Take network elements:
#----------------------------
PBS_Net = NetworkBuildData[[Net]]$PBS_comb
AdjNode_Net = NetworkBuildData[[Net]]$AdjNode_comb
Chrom12ID_Net = NetworkBuildData[[Net]]$Chrom12ID_comb
NPeaksInvolved_Net = length(AdjNode_Net) #total peaks for network
NiNjNode_Net = NetworkBuildData[[Net]]$NiNjNode_comb
#----------------------------
# Initiate Network structure:
#----------------------------
Network_Net = Initiate_GenomeMap_fun_Rcpp(NPeaksInvolved_Net, AdjNode_Net,
PBS_Net, PeakSummit, Chrom12ID_Net, TRUE)
#----------------------------
# Create AdjMatrix for the chromosome
#----------------------------
Nadj_Net = NPeaksInvolved_Net * (NPeaksInvolved_Net - 1)/2
bkFile_BigInfoMat_Net = paste(SA_prefix, "_BigInfoMat_", Chrom12ID_Net, ".bk",
sep = "")
descFile_BigInfoMat_Net = paste(SA_prefix, "_BigInfoMat_", Chrom12ID_Net,
".desc", sep = "")
BigInfoMat_Net = bigmemory::filebacked.big.matrix(nrow = Nadj_Net, ncol = 1,
type = "double", backingfile = bkFile_BigInfoMat_Net, backingpath = S4_MatrixDir,
descriptorfile = descFile_BigInfoMat_Net, dimnames = c(NULL, NULL))
# get description:
BigInfoMatDesc_Net = bigmemory::describe(BigInfoMat_Net)
#----------------------------
# create the indeces to run in parallel for AdjMat:
#----------------------------
PeakSeq_Net = seq_len(NPeaksInvolved_Net - 1) #dont need the last one
if (BiocParallel::bpisup(BiocParallel::bpparam())) {
# then parallel backhead, so split in tasks get number of registered cores:
Cores_Net = max(1, BiocParallel::bpworkers(BiocParallel::bpparam()))
# get correct dimentions for the vector by extending with NAs
length(PeakSeq_Net) = suppressWarnings(prod(dim(matrix(PeakSeq_Net, ncol = Cores_Net,
byrow = TRUE))))
# create the matrix:
UpdateIndeces_Net = matrix(PeakSeq_Net, byrow = TRUE, ncol = Cores_Net)
UpdateIndeces_Net = plyr::alply(.data = UpdateIndeces_Net, .margins = 2,
.fun = function(x) x[!is.na(x)])
} else {
# no parallel backhead sto do it linearly
UpdateIndeces_Net = list(PeakSeq_Net)
}
#----------------------------
# Create also the MergeNodesIndex to keep track of the merging nodes
#----------------------------
MergedNodesIndex_Net = as.list(seq_len(NPeaksInvolved_Net))
#----------------------------
# Save element:
#----------------------------
NetworkList[[Net]] = Network_Net
UpdateIndecesList[[Net]] = UpdateIndeces_Net
MergedNodesIndexList[[Net]] = MergedNodesIndex_Net
BigInfoMatDescList[[Net]] = BigInfoMatDesc_Net
NadjList[[Net]] = Nadj_Net
NPeaksInvolvedList[[Net]] = NPeaksInvolved_Net
InteractionPairsList[[Net]] = Nadj_Net
NiNjIndecesList[[Net]] = NiNjNode_Net
}
cat("|================ Network Initialization is finished =================|\n")
# return:
return(list(NetworkList = NetworkList, UpdateIndecesList = UpdateIndecesList,
MergedNodesIndexList = MergedNodesIndexList, BigInfoMatDescList = BigInfoMatDescList,
NadjList = NadjList, NPeaksInvolvedList = NPeaksInvolvedList, S4_MatrixDir = S4_MatrixDir,
TotNetworks = TotNetworks, InteractionPairsList = InteractionPairsList, NiNjIndecesList = NiNjIndecesList))
}
# done
#----------------------------
# Main Interaction analysis function:
#----------------------------
Run_InteractionAnalysis_fun = function(InteractionData, S4_method) {
#----------------------------
# Initiate by breaking the InteractionData input:
#----------------------------
PeaksGR = InteractionData$PeaksGR # peaks data for return
InteractionInfMat = InteractionData$InteractionInfMat #the matrix of information
NInteractions = InteractionData$NInteractions #total interactions to test
AllInteIndeces = InteractionData$AllInteIndeces #interaction ids
NiNjMat = InteractionData$NiNjMat #matrix with the ni/nj
# network related data:
NetworksData = InteractionData$NetworksData
S4_MatrixDir = NetworksData$S4_MatrixDir
NetworksData$S4_MatrixDir = NULL
NetUpdateIndicator = seq_len(NetworksData$TotNetworks) # Network Update indicators(for not updating all distances all the time)
# Quantiles:
QuantileProbs = seq(from = 0, to = 1, length = 1001)[-c(1)] #the size of the bins(plus one will be the last bin with the inter)
SavedQuantiles = lapply(as.list(seq_len(NetworksData$TotNetworks)), function(x) return(list(BinsVij = NA,
ChunkSizeVij = NA))) #save for not computing again
# counters
TotIntAdded = 0 #the total interactions added in the model
TotBiRem = 0 #total bi-products removed from the model
OrdersCount = 1 #Counter for the interactions added in the model(multiple interactions counter)
#----------------------------
# run the loop to add interactions:
#----------------------------
while (TotIntAdded + TotBiRem < NInteractions) {
#----------------------------
# Compute the quantiles and the expected PETs under H0:
#----------------------------
QuantRes = Get_ExpectedPETs_fun(InteractionInfMat = InteractionInfMat, AllInteIndeces = AllInteIndeces,
NiNjMat = NiNjMat, QuantileProbs = QuantileProbs, NetUpdateIndicator = NetUpdateIndicator,
NetworksData = NetworksData, SavedQuantiles = SavedQuantiles)
# break output:
BinMatVij = QuantRes$BinMatVij
NetworksData = QuantRes$NetworksData
InteractionInfMat = QuantRes$InteractionInfMat
SavedQuantiles = QuantRes$SavedQuantiles
#----------------------------
# Generate p-values:
#----------------------------
pValues_round = BiocParallel::bplapply(X = as.list(AllInteIndeces), FUN = Assess_Interaction_fun_Rcpp,
InteractionInfMat = InteractionInfMat, Poiss_fun = Poiss_fun, BinMatVij = BinMatVij)
pValues_round = do.call(rbind, pValues_round)
#----------------------------
# get latest interaction information
#----------------------------
LaIn = Get_LatestInteraction_fun(pValues_round = pValues_round, S4_method = S4_method,
TotPairs = sum(unlist(NetworksData$InteractionPairsList)), InteractionInfMat = InteractionInfMat)
LastInteractions = LaIn$LastInteractions
InteractionInfMat = LaIn$InteractionInfMat
TR_Si = length(LastInteractions) #total round significant interactions
#----------------------------
# Update the network for the newley added interactions
#----------------------------
NetworkUpdates = Update_Network_fun(AllInteIndeces = AllInteIndeces, TotIntAdded = TotIntAdded,
TotBiRem = TotBiRem, NInteractions = NInteractions, LastInteractions = LastInteractions,
TR_Si = TR_Si, InteractionInfMat = InteractionInfMat, OrdersCount = OrdersCount,
NetworksData = NetworksData)
NetworksData = NetworkUpdates$NetworksData
TotIntAdded = NetworkUpdates$TotIntAdded
TotBiRem = NetworkUpdates$TotBiRem
AllInteIndeces = NetworkUpdates$AllInteIndeces
InteractionInfMat = NetworkUpdates$InteractionInfMat
NetUpdateIndicator = NetworkUpdates$NetUpdateIndicator
# check if break:
if (TotIntAdded + TotBiRem == NInteractions) {
cat("\n")
break
}
#----------------------------
# increase counter:
#----------------------------
OrdersCount = OrdersCount + 1
}
#----------------------------
# print and write:
#----------------------------
futile.logger::flog.info("Interaction analysis completed!", name = "SA_LogFile",
capture = FALSE)
#--------------------------------------------
#------Delete matrices
#--------------------------------------------
unlink(x = S4_MatrixDir, recursive = TRUE, force = TRUE)
#----------------------------
# return:
#----------------------------
return(list(PeaksGR = PeaksGR, NInteractions = NInteractions, InteractionInfMat = InteractionInfMat,
TotIntAdded = TotIntAdded, TotBiRem = TotBiRem))
}
# done
#----------------------------
# Function for finding the expected number of PETs if random
#----------------------------
Get_ExpectedPETs_fun = function(InteractionInfMat, AllInteIndeces, NiNjMat, QuantileProbs,
NetUpdateIndicator, NetworksData, SavedQuantiles) {
#----------------------------
# Compute the SP for the need changed chromosomes:
#----------------------------
for (Net in NetUpdateIndicator) {
# Gather network data:
Network_Net = NetworksData$NetworkList[[Net]]
UpdateIndeces_Net = NetworksData$UpdateIndecesList[[Net]]
MergedNodesIndex_Net = NetworksData$MergedNodesIndexList[[Net]]
BigInfoMatDesc_Net = NetworksData$BigInfoMatDescList[[Net]]
Nadj_Net = NetworksData$NadjList[[Net]]
NPeaksInvolved_Net = NetworksData$NPeaksInvolvedList[[Net]]
NiNjIndeces_Net = NetworksData$NiNjIndecesList[[Net]]
# update SP:
InteractionPairs_Net = BiocParallel::bplapply(X = UpdateIndeces_Net, FUN = BigMat_SP_fun,
NPeaksInvolved = NPeaksInvolved_Net, Nadj = Nadj_Net, BigInfoMatDesc = BigInfoMatDesc_Net,
Network = Network_Net, MergedNodesIndex = MergedNodesIndex_Net, NiNjIndeces = NiNjIndeces_Net,
NiNjMat = NiNjMat)
InteractionPairs_Net = Reduce("+", InteractionPairs_Net)
# Update InteractionPairs:
NetworksData$InteractionPairsList[[Net]] = InteractionPairs_Net
}
# ---------------------------- Find quantiles for Vij=ni*nj/ Dij
# ----------------------------
BigMatQuantiles = BiocParallel::bplapply(X = as.list(NetUpdateIndicator), FUN = Get_QuantileChunks_fun,
QuantileProbs = QuantileProbs, BigInfoMatDescList = NetworksData$BigInfoMatDescList)
# save in the SavedQuantiles
SavedQuantiles[NetUpdateIndicator] = BigMatQuantiles
# split quantiles:
BinsVij = Reduce("+", lapply(SavedQuantiles, "[[", 1))
ChunkSizeVij = Reduce("+", lapply(SavedQuantiles, "[[", 2)) #the chunk total
# finalize quantiles:
BinsVij = unique(BinsVij/ChunkSizeVij)
BinsVijSize = length(BinsVij)
# ---------------------------- Load it from the data itself:
# ----------------------------
ObsVij = Get_Vij_Data_fun(InteractionInfMat = InteractionInfMat, NetworksData = NetworksData,
AllInteIndeces = AllInteIndeces)
#----------------------------
# Count PETs and assign the QCells
#----------------------------
QCellPETCountsVij = numeric(BinsVijSize)
Get_QCellPETCounts_fun_Rcpp(BinsVij, BinsVijSize, ObsVij, InteractionInfMat,
AllInteIndeces, QCellPETCountsVij)
#----------------------------
# Find Cell counts, now you will loop all the networks:
#----------------------------
QCellCombCountsVij = numeric(BinsVijSize)
for (Net in seq_len(NetworksData$TotNetworks)) {
# Gather network data:
UpdateIndeces_Net = NetworksData$UpdateIndecesList[[Net]]
BigInfoMatDesc_Net = NetworksData$BigInfoMatDescList[[Net]]
Nadj_Net = NetworksData$NadjList[[Net]]
NPeaksInvolved_Net = NetworksData$NPeaksInvolvedList[[Net]]
# Count in cells
QCellCombCountsVij_Net = BiocParallel::bplapply(X = UpdateIndeces_Net, FUN = Get_QCellCombCounts_fun,
BinsVij = BinsVij, BinsVijSize = BinsVijSize, BigInfoMatDesc = BigInfoMatDesc_Net,
NPeaksInvolved = NPeaksInvolved_Net, Nadj = Nadj_Net)
# Sum
QCellCombCountsVij_Net = Reduce("+", QCellCombCountsVij_Net)
QCellCombCountsVij = QCellCombCountsVij + QCellCombCountsVij_Net
}
#----------------------------
# Create Bin Matrix for Vij
#----------------------------
BinMatVij = cbind(seq_len(BinsVijSize), BinsVij, QCellPETCountsVij/QCellCombCountsVij)
colnames(BinMatVij) = NULL
#----------------------------
# Create and smooth bins
#----------------------------
BinMatVij = SmoothSplines_fun(BinMat = BinMatVij, Which = 3)
# return:
return(list(BinMatVij = BinMatVij, NetworksData = NetworksData,
InteractionInfMat = InteractionInfMat, SavedQuantiles = SavedQuantiles))
}
# done
#----------------------------
# Function for filling the Big.matrix with the shortest paths. IF the distance
# is zero, it becomes NA, as the nodes are merged
#----------------------------
BigMat_SP_fun = function(Indeces_x, NPeaksInvolved, Nadj, BigInfoMatDesc, Network,
MergedNodesIndex, NiNjIndeces, NiNjMat) {
#----------------------------
# attach the matrices:
#----------------------------
BigInfoMatDescInst = bigmemory::attach.big.matrix(BigInfoMatDesc)
InteractionPairs = numeric(1) #the intra combinations
#----------------------------
# loop through the indeces:
#----------------------------
for (ind in Indeces_x) {
#----------------------------
# Take all the k from the MergedNodesIndex:
#----------------------------
k_vect = MergedNodesIndex[[ind]]
if (is.na(k_vect[1]))
next #skip element if NA, merged, will be updated another time
k_vect = sort(x = k_vect, decreasing = FALSE, na.last = TRUE) #sort, need the bigger k in the beginning
#----------------------------
# Find SP for the first k index:
#----------------------------
GlobalNodesDist = Dijkstra_GSP_fun_Rcpp(k_vect[1], Network, NPeaksInvolved)
#----------------------------
# save the entries in the AdjucencyMatDescInst for all the k indeces
#----------------------------
for (k in k_vect) {
#----------------------------
# Get start and end of the saving indeces
#----------------------------
StartInd = Get_VectPosIndex_fun_Rcpp(NPeaksInvolved, Nadj, k - 1, k) #c++
EndInd = Get_VectPosIndex_fun_Rcpp(NPeaksInvolved, Nadj, k - 1, NPeaksInvolved -
1) #c++
#----------------------------
# Fill in the SP values:
#----------------------------
Save_BigMat_fun_Rcpp(BigInfoMatDescInst@address, GlobalNodesDist, k,
StartInd, EndInd, InteractionPairs, NiNjIndeces, NiNjMat)
}
}
return(InteractionPairs)
}
# done
#----------------------------
# Function for the Quantiles for Vij
#----------------------------
Get_QuantileChunks_fun = function(Net, QuantileProbs, BigInfoMatDescList) {
#----------------------------
# Attach and load matrix:
#----------------------------
BigInfoMatDescInst = bigmemory::attach.big.matrix(BigInfoMatDescList[[Net]])
BigInfoMatDescInst = bigmemory::as.matrix(BigInfoMatDescInst)
ChunkVij = which(!is.na(BigInfoMatDescInst))
ChunkSizeVij = length(ChunkVij)
#----------------------------
# Get quantiles:
#----------------------------
if (ChunkSizeVij != 0) {
# For Distance:
BinsVij = stats::quantile(x = BigInfoMatDescInst[ChunkVij], probs = QuantileProbs,
na.rm = TRUE, names = FALSE, type = 7) * ChunkSizeVij
} else {
BinsVij = rep(0, length(QuantileProbs))
}
return(list(BinsVij = BinsVij, ChunkSizeVij = ChunkSizeVij))
}
# done
#----------------------------
# Function for returning the observed Vij in the order of the interactions
#----------------------------
Get_Vij_Data_fun = function(InteractionInfMat, NetworksData, AllInteIndeces) {
#----------------------------
# First subset the data and keep what you need:
#----------------------------
InteractionInfMatSub = InteractionInfMat[AllInteIndeces, c("AdjNode_i", "AdjNode_j",
"NiNjNode_i", "NiNjNode_j", "Chrom12ID", "IntraID")]
if (!methods::is(InteractionInfMatSub, "matrix")){
InteractionInfMatSub = t(as.matrix(InteractionInfMatSub))
}
InteractionInfMatSub = InteractionInfMatSub[which(InteractionInfMatSub[, c("IntraID")] ==
1), ] #keep intra in that part
if (!methods::is(InteractionInfMatSub, "matrix")){
InteractionInfMatSub = t(as.matrix(InteractionInfMatSub))
}
#----------------------------
# Create ObsDVij vector and loop to update
#----------------------------
ObsVij = numeric(length(AllInteIndeces))
Chrom12IDUnique = unique(InteractionInfMatSub[, c("Chrom12ID")])
for (Net in Chrom12IDUnique) {
# attach:
BigInfoMatDescInst = bigmemory::attach.big.matrix(NetworksData$BigInfoMatDescList[[Net]])
# Get AdjNodes/NiNjNode:
Pos_Net = which(InteractionInfMatSub[, c("Chrom12ID")] == Net)
AdjNode_i_Net = InteractionInfMatSub[Pos_Net, c("AdjNode_i")]
AdjNode_j_Net = InteractionInfMatSub[Pos_Net, c("AdjNode_j")]
# Get Nadj and NPeaks:
Nadj_Net = NetworksData$NadjList[[Net]]
NPeaksInvolved_Net = NetworksData$NPeaksInvolvedList[[Net]]
# Get vect position on the matrix in R indeces!
AdjNode_ij_Net = Get_VectPosIndex_Vectorized_fun_Rcpp(NPeaksInvolved_Net,
Nadj_Net, AdjNode_i_Net, AdjNode_j_Net)
# load the Vij and save them:
ObsVij[Pos_Net] = BigInfoMatDescInst[AdjNode_ij_Net, c(1)]
}
return(ObsVij)
}
# done
#----------------------------
# Function for finding the QCellIDs and counting in the cells
#----------------------------
Get_QCellCombCounts_fun = function(Indeces_x, BinsVij, BinsVijSize, BigInfoMatDesc,
NPeaksInvolved, Nadj) {
#----------------------------
# attach the matrices:
#----------------------------
BigInfoMatDescInst = bigmemory::attach.big.matrix(BigInfoMatDesc)
#----------------------------
# Initialize:
#----------------------------
QCellCombCountsVij_Net = numeric(BinsVijSize)
#----------------------------
# loop through the indeces:
#----------------------------
for (ind in Indeces_x) {
#----------------------------
# Find start and end indeces on the matrices
#----------------------------
StartInd = Get_VectPosIndex_fun_Rcpp(NPeaksInvolved, Nadj, ind - 1, ind) #c++
EndInd = Get_VectPosIndex_fun_Rcpp(NPeaksInvolved, Nadj, ind - 1, NPeaksInvolved -
1) #c++
#----------------------------
# load the Dij and take order:
#----------------------------
VkhOrder = order(x = BigInfoMatDescInst[seq(from = StartInd + 1, to = EndInd +
1), c(1)], na.last = TRUE, decreasing = FALSE)
#----------------------------
# Fill inn the values:
#----------------------------
Get_QCellCombCounts_fun_Rcpp(BinsVij, BinsVijSize, BigInfoMatDescInst@address,
VkhOrder, QCellCombCountsVij_Net, StartInd, EndInd)
}
return(QCellCombCountsVij_Net)
}
# done
#----------------------------
# Splines
#----------------------------
SmoothSplines_fun = function(BinMat, Which) {
#----------------------------
# replace inf with 0:
#----------------------------
InfBins = which(is.infinite(BinMat[, Which]))
if (length(InfBins) != 0)
BinMat[InfBins, Which] = 0
#----------------------------
# Remove Na bins, those are with no pairs in them, if you set them to zero they
# will affect the nearby values So dont consider them just.
#----------------------------
NoNABins = which(!is.na(BinMat[, Which])) #the non NA
#----------------------------
# smooth with splines:
#----------------------------
if (length(NoNABins) > 4) {
# keep cells which are used because the lambda ij should not become zero:
UsedQuantiles = which(BinMat[NoNABins, Which] != 0) #cells that are used
# splines
Splines = stats::smooth.spline(x = log10(BinMat[NoNABins, c(2)]), y = BinMat[NoNABins,
Which], all.knots = TRUE, spar = 0.75)
# fix zero:
if (any(Splines$y < 0))
Splines$y[which(Splines$y < 0)] = 0
BinMat[NoNABins, Which] = Splines$y
#----------------------------
# Splines might set a Cell which lij!=0, to lij==0, and this will be a problem
# with the p-values So fix those by setting them to .Machine$eps
#----------------------------
BinMat[NoNABins[UsedQuantiles], Which] = pmax(.Machine$double.eps, BinMat[NoNABins[UsedQuantiles],
Which])
}
return(BinMat)
}
# done
#----------------------------
# Poisson p-value
#----------------------------
Poiss_fun = function(nij, lij) {
pval = stats::dpois(x = nij, lambda = lij, log = FALSE) + stats::ppois(q = nij,
lambda = lij, lower.tail = FALSE, log.p = FALSE)
return(pval)
}
# done
#----------------------------
# Function for finding which interaction IDs will be added based on their FDR
#----------------------------
Get_LatestInteraction_fun = function(pValues_round, S4_method, TotPairs, InteractionInfMat) {
#----------------------------
# Adjust with FDR, note that the p-values of the non-interacting are also needed,
# which are pval=1:
#----------------------------
FDR_round = stats::p.adjust(p = pValues_round[, c(2)], method = S4_method, n = TotPairs)
#----------------------------
# Update the InteractionsInfMat too:
#----------------------------
InteractionInfMat[pValues_round[,c(1)], "pvalue"] = pValues_round[, c(2)]
InteractionInfMat[pValues_round[,c(1)], "FDR"] = FDR_round
#----------------------------
# Find the most significant FDR and the interactions who have it
#----------------------------
Min_round_FDR = min(FDR_round)
WhichMinFDR_round = which(FDR_round == Min_round_FDR)
LastInteractions = pValues_round[WhichMinFDR_round, c(1)] #R index
return(list(LastInteractions = LastInteractions, InteractionInfMat = InteractionInfMat))
}
# Done
#----------------------------
# Function for updating the network
#----------------------------
Update_Network_fun = function(AllInteIndeces, TotIntAdded, TotBiRem, NInteractions,
LastInteractions, TR_Si, InteractionInfMat, OrdersCount, NetworksData) {
#----------------------------
# First remove all the interactions to be added from the interaction from
# looping, and initiate which chrom comb will be updated next:
#----------------------------
AllInteIndeces = AllInteIndeces[-which(AllInteIndeces %in% LastInteractions)]
NetUpdateIndicator = c()
#----------------------------
# loop on the interactions to add:
#----------------------------
for (i in seq_len(TR_Si)) {
#----------------------------
# print:
#----------------------------
TotIntAdded = TotIntAdded + 1
TotIntAddedPrintPers = TotIntAdded/NInteractions * 100
cat("|---- Total interactions processed: ", TotIntAdded, " (", TotIntAddedPrintPers,
"%) ----|\r")
#----------------------------
# Take interaction information ID
#----------------------------
ID_i = LastInteractions[i] #row ID of InteractionInfMat
#----------------------------
# update InteractionInfMat:
#----------------------------
InteractionInfMat[ID_i, c("Order")] = OrdersCount
Chrom12ID_i = InteractionInfMat[ID_i, c("Chrom12ID")] #for updating paths again
NetUpdateIndicator = c(NetUpdateIndicator, Chrom12ID_i)
#----------------------------
# take the nodes, you will merge the biggest to the smallest in the network:
#----------------------------
Node_kh = sort(InteractionInfMat[ID_i, c("AdjBiNode_i", "AdjBiNode_j")],
decreasing = FALSE)
# Check if nodes are merged already(not this is NOT a bi-product)
if (Node_kh[1] == Node_kh[2])
next #everything is done so go to next
#----------------------------
# Move all nodes to the MergedNodesIndex:
#----------------------------
k = Node_kh[1]
names(k) = NULL
h = Node_kh[2]
names(h) = NULL
NetworksData$MergedNodesIndexList[[Chrom12ID_i]][[k]] = c(NetworksData$MergedNodesIndexList[[Chrom12ID_i]][[k]],
NetworksData$MergedNodesIndexList[[Chrom12ID_i]][[h]])
NetworksData$MergedNodesIndexList[[Chrom12ID_i]][[h]] = NA
#----------------------------
# Update the Nodes of the rest of the interactions to be added: Those are the
# interactions which are significant this round but not added yet, not the rest
# of the interactions left.
#----------------------------
Update_ToBeAddedInter_fun_Rcpp(InteractionInfMat, k, h, i, TR_Si, LastInteractions,
Chrom12ID_i)
#----------------------------
# update the rest of the interactions, not added yet. update the index and check
# if bi-products (same index) The bi-products are removed from the model, their
# p-value/FDR will be NA and the order will be NA too Their index will be removed
# from the AllInteIndeces
#----------------------------
BiPorductsInfo = Check_BiProd_fun_Rcpp(InteractionInfMat, k, h, AllInteIndeces,
TotBiRem, Chrom12ID_i, OrdersCount)
# Check bi-product rejected:
if (BiPorductsInfo$TotBiRem != TotBiRem) {
# then at least one bi-product. remove from the rest of the interactions
AllInteIndeces = AllInteIndeces[-which(AllInteIndeces %in% BiPorductsInfo$BiProductIDSreject)]
TotBiRem = BiPorductsInfo$TotBiRem
}
# check bi-products accepted:
if (BiPorductsInfo$TotBiAcc != 0) {
# then at least one bi-product. remove from the rest of the interactions
AllInteIndeces = AllInteIndeces[-which(AllInteIndeces %in% BiPorductsInfo$BiProductIDSaccepted)]
TotIntAdded = TotIntAdded + BiPorductsInfo$TotBiAcc
}
#----------------------------
# Update the network:
#----------------------------
Network_i = NetworksData$NetworkList[[Chrom12ID_i]]
Network_i = Update_Network_kh_fun(Network = Network_i, k = k, h = h)
NetworksData$NetworkList[[Chrom12ID_i]] = Network_i
}
# finalize NetUpdateIndicator:
NetUpdateIndicator = sort(unique(NetUpdateIndicator))
#----------------------------
# return
#----------------------------
return(list(NetworksData = NetworksData, TotIntAdded = TotIntAdded, TotBiRem = TotBiRem,
AllInteIndeces = AllInteIndeces, InteractionInfMat = InteractionInfMat,
NetUpdateIndicator = NetUpdateIndicator))
}
# done
#----------------------------
# Function to update the Network by merging h to k node
#----------------------------
Update_Network_kh_fun = function(Network, k, h) {
# --------------------- NOTE: First move all h edges to k (in commons keep min
# dist) In h set k edge only, weight 0. (dont delete element since then you need
# to reduce all indeces which takes time) (just leave it empty pointing to k
# only) Dont remove the h edge from k! Because you need the distance The
# InteractionsMat will not have h anymore since it is replaced with k for all.
# All edges connected to h should also be traversed and the h name replaced to k
# (and the dist to min again.) -------------------- Take the k and h networks:
# --------------------- k:
Network_k = Network[[k]]
edges_k = Network_k$edges
weights_k = Network_k$weights
# h:
Network_h = Network[[h]]
edges_h = Network_h$edges
weights_h = Network_h$weights
# -------------------- Remove h edge from k if there ---------------------
if (h %in% edges_k) {
Rmh = which(edges_k == h)
edges_k = edges_k[-c(Rmh)]
weights_k = weights_k[-c(Rmh)]
}
# -------------------- Remove k edge from h if there ---------------------
if (k %in% edges_h) {
Rmk = which(edges_h == k)
edges_h = edges_h[-c(Rmk)]
weights_h = weights_h[-c(Rmk)]
}
# -------------------- update the common elements to the min
# ---------------------
if (any(edges_h %in% edges_k)) {
# take common edges
edges_hk = intersect(edges_h, edges_k)
# take weights of h:
Common_h = which(edges_h %in% edges_hk)
weights_hk_h = weights_h[Common_h]
# take weights of k:
Common_k = which(edges_k %in% edges_hk)
weights_hk_k = weights_k[Common_k]
# take pairwise min:
weights_min = pmin(weights_hk_k, weights_hk_h)
# update:
weights_k[Common_k] = weights_min
}
# -------------------- Add exclusive elements of h into k ---------------------
if (any(!edges_h %in% edges_k)) {
Exclusive_h = which(!edges_h %in% edges_k)
edges_k = c(edges_k, edges_h[Exclusive_h])
weights_k = c(weights_k, weights_h[Exclusive_h])
}
# -------------------- Add h to the k elements and sort them:
# ---------------------
edges_k = c(edges_k, h)
weights_k = c(weights_k, 0)
Orderk = order(weights_k, decreasing = FALSE)
edges_k = edges_k[Orderk]
weights_k = weights_k[Orderk]
# -------------------- Go into the elements of h and update h edge to k This will
# reduce the recurrence ---------------------
for (el in edges_h) {
# note k is removed from edges_h Also all the el are in k, so set k in el too
# with same distance take network:
Network_el = Network[[el]]
edges_el = Network_el$edges
weights_el = Network_el$weights
# remove the k and h edges completely:
Rmkh = which(edges_el %in% c(k, h))
edges_el = edges_el[-c(Rmkh)]
weights_el = weights_el[-c(Rmkh)]
# add the k edge, weights_k[Addk] has the min already:
Addk = which(edges_k %in% c(el))
edges_el = c(edges_el, k)
weights_el = c(weights_el, weights_k[Addk])
# sort:
Orderel = order(weights_el, decreasing = FALSE)
edges_el = edges_el[Orderel]
weights_el = weights_el[Orderel]
# save to network:
Network_el$edges = edges_el
Network_el$weights = weights_el
Network[[el]] = Network_el
}
# -------------------- update network for k and h: ---------------------
Network_k$edges = edges_k
Network_k$weights = weights_k
Network[[k]] = Network_k
Network_h$edges = k
Network_h$weights = 0
Network[[h]] = Network_h
return(Network)
}
# done
#----------------------------
# Function for creating the genome map object
#----------------------------
Create_GenomeMapObject = function(InteractionResults, SA_prefix, S4_AnalysisDir) {
cat("Creating the GenomeMap Object...")
# R check:
Order = NULL
#----------------------------
# Create the significant information matrix nrow=Tot_interactions, col1=PBS from,
# col2=PBS to (rows in the PeaksData) col3=pvalue, col4=FDR, col5=order, col6=Tot
# interaction PETs between the two PBS
#----------------------------
InteractionInfo = Get_InteractionInfo_fun_Rcpp(InteractionResults$InteractionInfMat,
InteractionResults$NInteractions)
InteractionInfo = as.data.frame(InteractionInfo)
colnames(InteractionInfo) = c("PBSF", "PBST", "pvalue", "FDR", "Order", "TotalInterPETs")
# subset the order being >0, remove not added interactions/bi products:
InteractionInfo = subset(InteractionInfo, !(is.na(Order) | Order == 0))
# sort by order:
GetOrder = order(InteractionInfo$Order)
InteractionInfo = InteractionInfo[GetOrder, ]
#----------------------------
# create the GRanges object from the peaks:
#----------------------------
# take the PeaksGR:
PeaksGR = InteractionResults$PeaksGR
# reduce unused levels:
LevelsUsed = GenomeInfoDb::seqlevelsInUse(PeaksGR)
GenomeInfoDb::seqlevels(PeaksGR) = LevelsUsed
#----------------------------
# create the GInteractions object from the PeaksGR:
#----------------------------
AnchorSummits = round(PeaksGR$PeakSummit)
PeaksGR$PeakSummit = NULL
PeaksGR$LID = NULL
# Take Anchor 1:
Anchor1 = PeaksGR[InteractionInfo$PBSF]
Anchor1Summit = AnchorSummits[InteractionInfo$PBSF]
# Take Anchor 2:
Anchor2 = PeaksGR[InteractionInfo$PBST]
Anchor2Summit = AnchorSummits[InteractionInfo$PBST]
#----------------------------
# create the GenomeMapData
#----------------------------
GenomeMapData = InteractionSet::GInteractions(anchor1 = Anchor1, anchor2 = Anchor2)
# Add the summits:
GenomeMapData$Anchor1Summit = Anchor1Summit
GenomeMapData$Anchor2Summit = Anchor2Summit
# make InteractionInfo DataFrame:
InteractionInfo = S4Vectors::DataFrame(InteractionInfo[, c("pvalue", "FDR", "Order",
"TotalInterPETs")])
#----------------------------
# Add Metadata:
#----------------------------
S4Vectors::metadata(GenomeMapData)$InteractionInfo = InteractionInfo
#----------------------------
# Convert To class and save:
#----------------------------
class(GenomeMapData) = "GenomeMap"
NameGenomeMapData = paste(SA_prefix, "_GenomeMapData", sep = "")
assign(NameGenomeMapData, GenomeMapData) #assign value.
save(list = NameGenomeMapData, file = file.path(S4_AnalysisDir, NameGenomeMapData))
cat("Done\n")
}
# done
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.