ReadSettings <- function(input.file){
org.input.file <- input.file
# if can't find the input.file search the package external data
if( !file.exists(input.file)){
input.file <- system.file(
"extdata",
input.file,
package = "PhenoGeneRanker"
)
}
if(!file.exists(input.file)){
input.file <- org.input.file
}
files <- read.table(input.file, header=TRUE,sep="\t",
stringsAsFactors = FALSE)
LG <- sum(files$type=="gene")
LP <- sum(files$type=="phenotype")
output=list(
FilesDF=files,
LG=LG,
LP=LP)
}
ReadNetworkLayers <- function(filesDF){
# read the gene layer names
filesDF <- filesDF[filesDF$type%in%c("gene", "phenotype", "bipartite"),]
NetworkLayers <- vector("list",nrow(filesDF))
j <- 1
for(f in filesDF$file_name){
f <- system.file(
"extdata",
f,
package = "PhenoGeneRanker"
)
NetworkLayers[[j]][["DF"]] <- read.table(f, header=TRUE, sep="\t",
stringsAsFactors = FALSE)
NetworkLayers[[j]][["DF"]]$from <- as.character(
NetworkLayers[[j]][["DF"]]$from)
NetworkLayers[[j]][["DF"]]$to <- as.character(NetworkLayers[[j]][["DF"]]$to)
NetworkLayers[[j]][["type"]] <- filesDF$type[j]
NetworkLayers[[j]][["graph"]] <- graph.data.frame(
NetworkLayers[[j]][["DF"]], directed = FALSE)
NetworkLayers[[j]][["name"]] <- f
j <- j+1
}
gene_pool_nodes_sorted <- GeneratePoolNodes(NetworkLayers, type = "gene")
phenotype_pool_nodes_sorted <- GeneratePoolNodes(NetworkLayers,
type = "phenotype")
idx <- which(lapply(NetworkLayers, `[[`, "type") == "gene")
for(i in idx){
NetworkLayers[[i]][["graph"]] <- AddMissingNodesToGraph(
NetworkLayers[[i]][["graph"]],
"gene", gene_pool_nodes_sorted)
}
idx <- which(lapply(NetworkLayers, `[[`, "type") == "phenotype")
for(i in idx){
NetworkLayers[[i]][["graph"]] <- AddMissingNodesToGraph(
NetworkLayers[[i]][["graph"]],
"phenotype", phenotype_pool_nodes_sorted)
}
output=list(
NetworkLayers=NetworkLayers,
gene_pool_nodes_sorted=gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted=phenotype_pool_nodes_sorted)
return(output)
}
ReadSeeds <- function(fileName, gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted){
AllSeeds <- read.csv(fileName, header=FALSE, sep="\t",
stringsAsFactors = FALSE)
AllSeeds <- AllSeeds$V1
SeedList <- CheckSeeds(AllSeeds,gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted)
return(SeedList)
}
GeneratePoolNodes <- function(FullNet, type){
idx <- which(lapply(FullNet, `[[`, "type") == type)
DFs <- lapply(FullNet[idx], `[[`, "DF")
Node_Names_all <- unique(c(unlist(lapply(DFs, '[[', 'from')),
unlist(lapply(DFs, '[[', 'to'))))
## We remove duplicates and sort
pool_nodes_sorted <- sort(Node_Names_all)
return(pool_nodes_sorted)
}
AddMissingNodesToGraph <- function(g, type, pns){
#pns is pool_nodes_sorted
## We add to each layer the missing nodes of the total set of nodes,
#of the pool of nodes.
Node_Names_Layer <- V(g)$name
Missing_Nodes <- pns[which(!pns %in% Node_Names_Layer)]
g <- add_vertices(g ,length(Missing_Nodes), name=Missing_Nodes)
return(g)
}
AddParameters <- function(delta, zeta, lambda){
if (delta > 1 || delta< 0){
stop("Incorrect delta, it must be between 0 and 1")}
if (zeta > 1 || zeta < 0){
stop("Incorrect zeta, it must be between 0 and 1")}
if (lambda > 1 || lambda < 0){
stop("Incorrect lambda, it must be between 0 and 1")}
parameters <- list(delta, zeta, lambda)
names(parameters) <- c("delta", "zeta", "lambda")
return(parameters)
}
CheckSeeds <- function(Seeds, All_genes,All_Phenotypes){
Genes_Seeds_Ok <- Seeds[which(Seeds %in% All_genes)]
Phenotype_Seeds_Ok <- Seeds[which(Seeds %in% All_Phenotypes)]
All_seeds_ok <- c(Genes_Seeds_Ok,Phenotype_Seeds_Ok)
All_seeds_ko <- Seeds[which(!Seeds %in% All_seeds_ok)]
list_Seeds_Ok <- list(Genes_Seeds_Ok,Phenotype_Seeds_Ok)
if(length(All_seeds_ko) != 0){
print("Seeds below do not exist in the network: ")
print(paste(All_seeds_ko, sep=" "))
}
if ((length(Genes_Seeds_Ok) == 0) && (length(Phenotype_Seeds_Ok) ==0)){
stop("Seeds not found in the network")
} else {
return(list(Genes_Seeds=Genes_Seeds_Ok, Pheno_Seeds=Phenotype_Seeds_Ok))
}
}
GetSeedScores <- function(geneSeeds, phenoSeeds, eta, LG, LP, tau, phi) {
n <- length(geneSeeds)
m <- length(phenoSeeds)
if ((n != 0 && m != 0)) {
Seed_Genes_Layer_Labeled <- paste0(rep(geneSeeds, LG), sep = "_",
rep(seq(LG), length.out = n * LG, each = n))
Seeds_Genes_Scores <- rep(((1 - eta) * tau)/n, n)
Seed_Phenos_Layer_Labeled <- paste0(rep(phenoSeeds, LP), sep = "_",
rep(seq(LP),length.out = m * LP, each = m))
Seeds_Phenos_Scores <- rep((eta * phi)/m, m)
} else {
eta <- 1
if (n == 0) {
Seed_Genes_Layer_Labeled <- character()
Seeds_Genes_Scores <- numeric()
Seed_Phenos_Layer_Labeled <- paste0(rep(phenoSeeds, LP), sep = "_",
rep(seq(LP), length.out = m * LP, each = m))
Seeds_Phenos_Scores <- rep((eta * phi)/m, m)
} else {
Seed_Genes_Layer_Labeled <- paste0(rep(geneSeeds, LG), sep = "_",
rep(seq(LG), length.out = n * LG, each = n))
Seeds_Genes_Scores <- rep(tau/n, n)
Seed_Phenos_Layer_Labeled <- character()
Seeds_Phenos_Scores <- numeric()
}
}
### We prepare a data frame with the seeds.
Seeds_Score <- data.frame(Seeds_ID = c(Seed_Genes_Layer_Labeled,
Seed_Phenos_Layer_Labeled),
Score = c(Seeds_Genes_Scores, Seeds_Phenos_Scores),
stringsAsFactors = FALSE)
return(Seeds_Score)
}
CreateSupraadjacencyMatrix <- function(WholeNet, type, N, L, zeta,
is.weighted.graph) {
Graphs <- WholeNet[which(lapply(WholeNet, `[[`, "type") == type)]
Graphs <- lapply(Graphs, `[[`, "graph")
Idem_Matrix <- Diagonal(N, x = 1)
Col_Node_Names <- character()
Row_Node_Names <- character()
if (getDoParWorkers() > 1)
registerDoSEQ()
cl <- makeCluster(L)
registerDoParallel(cl)
i <- 1
SupraAdjacencyResult <- foreach(i = 1:L, .packages = c("igraph",
"Matrix")) %dopar%
{
SupraAdjacencyMatrixPart <- Matrix(0, ncol = N * L,
nrow = N, sparse = TRUE)
Adjacency_Layer <- as_adjacency_matrix(Graphs[[i]], attr = "weight",
sparse = TRUE)
if (!is.weighted.graph) {
Adjacency_Layer <- as_adjacency_matrix(Graphs[[i]], sparse = TRUE)
}
## We order the matrix by the node name. This way all the matrix will have the
## same. Additionally we include a label with the layer number for
##each node name.
Adjacency_Layer <- Adjacency_Layer[order(rownames(Adjacency_Layer)),
order(colnames(Adjacency_Layer))]
Layer_Row_Col_Names <- paste(colnames(Adjacency_Layer), i, sep = "_")
## We fill the diagonal blocks with the adjacencies matrix of each layer.
Position_ini_row <- 1
Position_end_row <- N
Position_ini_col <- 1 + (i - 1) * N
Position_end_col <- N + (i - 1) * N
SupraAdjacencyMatrixPart[(Position_ini_row:Position_end_row),
(Position_ini_col:Position_end_col)] <-
(1 - zeta) * (Adjacency_Layer)
# avoid division by zero for monoplex network
L_mdfd <- L - 1
if (L == 1)
L_mdfd <- 1
## We fill the off-diagonal blocks with the transition probability
##among layers.
for (j in 1:L) {
Position_ini_col <- 1 + (j - 1) * N
Position_end_col <- N + (j - 1) * N
if (j != i) {
SupraAdjacencyMatrixPart[(Position_ini_row:Position_end_row),
(Position_ini_col:Position_end_col)] <-
(zeta/(L_mdfd)) * Idem_Matrix
}
}
return(list(SupraAdjacencyMatrixPart, Layer_Row_Col_Names))
}
stopCluster(cl)
SupraAdjacencyResult <- unlist(SupraAdjacencyResult, recursive = FALSE)
# Row-Col names are even indexed
Col_Names <- do.call("c", SupraAdjacencyResult[seq(2, 2 * L, by = 2)])
# Parallele parts of the SupraAdjacencyMatrix are odd indexed
SupraAdjacencyMatrix <- do.call("rbind", SupraAdjacencyResult[seq(1, 2 * L,
by = 2)])
rownames(SupraAdjacencyMatrix) <- Col_Names
colnames(SupraAdjacencyMatrix) <- Col_Names
return(SupraAdjacencyMatrix)
}
CreateBipartiteMatrix <- function(WholeNet, N, M, gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted, numCores) {
Gene_Phenoivar_Network <- WholeNet[which(lapply(WholeNet, `[[`, "type") ==
"bipartite")]
Gene_Phenoivar_Network <- lapply(Gene_Phenoivar_Network, `[[`, "DF")[[1]]
# Get the Subset of Gene-Phenoivar relations which have common genes in whole
# network
Gene_Phenoivar_Network <- Gene_Phenoivar_Network[which(
Gene_Phenoivar_Network$from %in% gene_pool_nodes_sorted), ]
Gene_Phenoivar_Network <- Gene_Phenoivar_Network[which(
Gene_Phenoivar_Network$to %in% phenotype_pool_nodes_sorted), ]
Gene_Phenoivar_Network <- graph.data.frame(Gene_Phenoivar_Network,
directed = FALSE)
el <- as_edgelist(Gene_Phenoivar_Network)
value <- edge_attr(Gene_Phenoivar_Network, name = "weight")
Bipartite_matrix <- Matrix(data = 0, nrow = N, ncol = M)
rownames(Bipartite_matrix) <- gene_pool_nodes_sorted
colnames(Bipartite_matrix) <- phenotype_pool_nodes_sorted
rindx <- unlist(lapply(el[, 1], function(x)
which(rownames(Bipartite_matrix) %in% x)))
cindx <- unlist(lapply(el[, 2], function(x)
which(colnames(Bipartite_matrix) %in% x)))
lenRindx <- length(rindx)
partLen <- floor(lenRindx/numCores)
if (partLen == 0) {
cat("numCores:", numCores, " - partLen: ", partLen, "\n")
stop("Assigned numCores is greater than data length! Assign less!")
}
rindx_parts <- list()
cindx_parts <- list()
value_parts <- list()
for (i in 1:numCores) {
stInd <- (i - 1) * partLen + 1
endInd <- i * partLen
if (i == numCores) {
endInd <- lenRindx
}
rindx_parts[[i]] <- rindx[stInd:endInd]
cindx_parts[[i]] <- cindx[stInd:endInd]
value_parts[[i]] <- value[stInd:endInd]
}
cl <- makeCluster(numCores)
registerDoParallel(cl)
Bipartite_matrix_result <- foreach(i = 1:numCores,
.packages = "Matrix") %dopar%
{
for (j in 1:length(rindx_parts[[i]])) {
Bipartite_matrix[rindx_parts[[i]][j],
cindx_parts[[i]][j]] <- value_parts[[i]][j]
}
return(Bipartite_matrix)
}
stopCluster(cl)
Bipartite_matrix <- Reduce("+", Bipartite_matrix_result)
return(Bipartite_matrix)
}
CreateSuprabipartiteMatrix <- function(Bipartite_matrix, N, M, LG, LP) {
SupraBipartiteMatrix <- Matrix(0, nrow = N * LG, ncol = M * LP, sparse = TRUE)
Row_Node_Names <- sprintf(paste0(rep(rownames(Bipartite_matrix), LG), "_%d"),
rep(seq_len(LG), each = N))
SupraBipartiteMatrix <- do.call(rbind, replicate(LG, Bipartite_matrix,
simplify = FALSE))
rownames(SupraBipartiteMatrix) <- Row_Node_Names
Col_Node_Names <- sprintf(paste0(rep(colnames(Bipartite_matrix), LP), "_%d"),
rep(seq_len(LP), each = M))
SupraBipartiteMatrix <- do.call(cbind, replicate(LP, SupraBipartiteMatrix,
simplify = FALSE))
colnames(SupraBipartiteMatrix) <- Col_Node_Names
return(SupraBipartiteMatrix)
}
CreateTransitionMatrix <- function(SupraBipartiteMatrix, N, M, LG, LP, lambda,
isTranspose) {
### TRUE = Row wise/pheno-gene, FALSE = Col Wise/gene-pheno
if (isTranspose) {
#### Transition Matrix for the inter-subnetworks links
TransitionMatrix <- Matrix(0, nrow = M * LP, ncol = N * LG, sparse = TRUE)
colnames(TransitionMatrix) <- rownames(SupraBipartiteMatrix)
rownames(TransitionMatrix) <- colnames(SupraBipartiteMatrix)
Row_Sum_Bipartite <- rowSums(SupraBipartiteMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
# row wise normalization for propability
for (i in 1:(N * LG)) {
if (Row_Sum_Bipartite[i] != 0) {
TransitionMatrix[, i] <-
(lambda * SupraBipartiteMatrix[i, ])/Row_Sum_Bipartite[i]
}
}
} else {
#### Transition Matrix for the inter-subnetworks links
TransitionMatrix <- Matrix(0, nrow = N * LG, ncol = M * LP, sparse = TRUE)
colnames(TransitionMatrix) <- colnames(SupraBipartiteMatrix)
rownames(TransitionMatrix) <- rownames(SupraBipartiteMatrix)
Col_Sum_Bipartite <- colSums(SupraBipartiteMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
# columnwise normalization for propability
for (j in 1:(M * LP)) {
if (Col_Sum_Bipartite[j] != 0) {
TransitionMatrix[, j] <-
(lambda * SupraBipartiteMatrix[, j])/Col_Sum_Bipartite[j]
}
}
}
return(TransitionMatrix)
}
CreateTransitionMatrix.gene_pheno <- function(SupraBipartiteMatrix, N, M,
LG, LP, lambda) {
#### Transition Matrix for the inter-subnetworks links
Transition_Gene_Phenoivar <- Matrix(0, nrow = N * LG, ncol = M * LP,
sparse = TRUE)
colnames(Transition_Gene_Phenoivar) <- colnames(SupraBipartiteMatrix)
rownames(Transition_Gene_Phenoivar) <- rownames(SupraBipartiteMatrix)
Col_Sum_Bipartite <- colSums(SupraBipartiteMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
# columnwise normalization for propability
for (j in 1:(M * LP)) {
if (Col_Sum_Bipartite[j] != 0) {
Transition_Gene_Phenoivar[, j] <-
(lambda * SupraBipartiteMatrix[, j])/Col_Sum_Bipartite[j]
}
}
return(Transition_Gene_Phenoivar)
}
CreateTransitionMatrix.pheno_gene <- function(SupraBipartiteMatrix, N, M,
LG, LP, lambda) {
Transition_Phenoivar_Gene <- Matrix(0, nrow = M * LP, ncol = N * LG,
sparse = TRUE)
colnames(Transition_Phenoivar_Gene) <- rownames(SupraBipartiteMatrix)
rownames(Transition_Phenoivar_Gene) <- colnames(SupraBipartiteMatrix)
Row_Sum_Bipartite <- rowSums(SupraBipartiteMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
# row wise normalization for propability
for (i in 1:(N * LG)) {
if (Row_Sum_Bipartite[i] != 0) {
Transition_Phenoivar_Gene[, i] <-
(lambda * SupraBipartiteMatrix[i, ])/Row_Sum_Bipartite[i]
}
}
return(Transition_Phenoivar_Gene)
}
CreateTransitionMultiplexNetwork <- function(SupraAdjacencyMatrix,
SupraBipartiteMatrix, Num,
inputLength, lambda, numCores) {
#### Transition Matrix for the intra-subnetworks links
Transition_Multiplex_Network <- Matrix(0, nrow = Num * inputLength,
ncol = Num * inputLength, sparse = TRUE)
rownames(Transition_Multiplex_Network) <- rownames(SupraAdjacencyMatrix)
colnames(Transition_Multiplex_Network) <- colnames(SupraAdjacencyMatrix)
Col_Sum_Multiplex <- colSums(SupraAdjacencyMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
Row_Sum_Bipartite <- rowSums(SupraBipartiteMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
partLen <- floor(Num * inputLength/numCores)
# below can only happen with toy samples
if (partLen == 0) {
stop("Assigned numCores is greater than data length! Assign less!")
}
parts <- vector("list", numCores)
for (i in 1:numCores) {
stInd <- (i - 1) * partLen + 1
endInd <- i * partLen
if (i == numCores) {
endInd <- Num * inputLength
}
parts[[i]][["start"]] <- stInd
parts[[i]][["end"]] <- endInd
}
cl <- makeCluster(numCores + 1)
registerDoParallel(cl)
Transition_Multiplex_Network_Result <- foreach(i = 1:numCores,
.packages = "Matrix") %dopar%
{
for (j in parts[[i]]["start"]:parts[[i]]["end"]) {
if (Row_Sum_Bipartite[j] != 0) {
Transition_Multiplex_Network[, j] <- ((1 - lambda) *
SupraAdjacencyMatrix[, j])/Col_Sum_Multiplex[j]
} else {
Transition_Multiplex_Network[, j] <-
SupraAdjacencyMatrix[, j]/Col_Sum_Multiplex[j]
}
}
return(Transition_Multiplex_Network)
}
Transition_Multiplex_Network <- Reduce("+",
Transition_Multiplex_Network_Result)
stopCluster(cl)
return(Transition_Multiplex_Network)
}
CreateGeneTransitionMultiplexNetwork <- function(SupraAdjacencyMatrix,
SupraBipartiteMatrix, N, LG,
lambda, numCores) {
#### Transition Matrix for the intra-subnetworks links
Transition_Multiplex_Network <- Matrix(0, nrow = N * LG, ncol = N * LG,
sparse = TRUE)
rownames(Transition_Multiplex_Network) <- rownames(SupraAdjacencyMatrix)
colnames(Transition_Multiplex_Network) <- colnames(SupraAdjacencyMatrix)
Col_Sum_Multiplex <- colSums(SupraAdjacencyMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
Row_Sum_Bipartite <- rowSums(SupraBipartiteMatrix, na.rm = FALSE, dims = 1,
sparseResult = FALSE)
partLen <- floor(N * LG/numCores)
# below can only happen with toy samples
if (partLen == 0) {
stop("Assigned numCores is greater than data length! Assign less!")
}
parts <- vector("list", numCores)
for (i in 1:numCores) {
stInd <- (i - 1) * partLen + 1
endInd <- i * partLen
if (i == numCores) {
endInd <- N * LG
}
parts[[i]][["start"]] <- stInd
parts[[i]][["end"]] <- endInd
}
cl <- makeCluster(numCores + 1)
registerDoParallel(cl)
Transition_Multiplex_Network_Result <- foreach(i = 1:numCores,
.packages = "Matrix") %dopar%
{
for (j in parts[[i]]$start:parts[[i]]$end) {
if (Row_Sum_Bipartite[j] != 0) {
Transition_Multiplex_Network[, j] <- ((1 - lambda) *
SupraAdjacencyMatrix[, j])/Col_Sum_Multiplex[j]
} else {
Transition_Multiplex_Network[, j] <-
SupraAdjacencyMatrix[, j]/Col_Sum_Multiplex[j]
}
}
return(Transition_Multiplex_Network)
}
Transition_Multiplex_Network <- Reduce("+",
Transition_Multiplex_Network_Result)
stopCluster(cl)
return(Transition_Multiplex_Network)
}
CreatePhenoTransitionMultiplexNetwork <- function(SupraAdjacencyMatrixPheno,
SupraBipartiteMatrix, M, LP,
lambda, numCores) {
Transition_Multiplex_Network_Pheno <- Matrix(0, nrow = M * LP, ncol = M * LP,
sparse = TRUE)
rownames(Transition_Multiplex_Network_Pheno) <-
rownames(SupraAdjacencyMatrixPheno)
colnames(Transition_Multiplex_Network_Pheno) <-
colnames(SupraAdjacencyMatrixPheno)
Col_Sum_Multiplex <- colSums(SupraAdjacencyMatrixPheno, na.rm = FALSE,
dims = 1, sparseResult = FALSE)
Col_Sum_Bipartite <- colSums(SupraBipartiteMatrix, na.rm = FALSE,
dims = 1, sparseResult = FALSE)
partLen <- floor(M * LP/numCores)
# below can only happen with toy samples
if (partLen == 0) {
stop("Assigned numCores is greater than data length! Assign less!")
}
parts <- vector("list", numCores)
for (i in 1:numCores) {
stInd <- (i - 1) * partLen + 1
endInd <- i * partLen
if (i == numCores) {
endInd <- M * LP
}
parts[[i]][["start"]] <- stInd
parts[[i]][["end"]] <- endInd
}
cl <- makeCluster(numCores + 1)
registerDoParallel(cl)
Transition_Multiplex_Network_Pheno_Result <- foreach(i = 1:numCores,
.packages = "Matrix") %dopar%
{
for (j in parts[[i]]$start:parts[[i]]$end) {
if (Col_Sum_Bipartite[j] != 0) {
Transition_Multiplex_Network_Pheno[, j] <- ((1 - lambda) *
SupraAdjacencyMatrixPheno[, j])/Col_Sum_Multiplex[j]
} else {
Transition_Multiplex_Network_Pheno[, j] <-
SupraAdjacencyMatrixPheno[, j]/Col_Sum_Multiplex[j]
}
}
return(Transition_Multiplex_Network_Pheno)
}
Transition_Multiplex_Network_Pheno <- Reduce("+",
Transition_Multiplex_Network_Pheno_Result)
stopCluster(cl)
return(Transition_Multiplex_Network_Pheno)
}
rankGenes <- function(Number_Genes, Number_Layers, Results, Seeds) {
## We sort the score to obtain the ranking of Genes and Phenotypes.
genes_rank <- data.frame(Node = character(length = Number_Genes), Score = 0)
genes_rank$Node <- gsub("_1", "", row.names(Results)[1:Number_Genes])
## We calculate the Geometric Mean among the genes in the different layers.
genes_rank$Score <- GeometricMean(as.vector(Results[, 1]),
Number_Layers, Number_Genes)
genes_rank_sort <- genes_rank[with(genes_rank, order(-Score, Node)), ]
### We remove the seed genes from the Ranking
genes_rank_sort_NoSeeds <-
genes_rank_sort[which(!genes_rank_sort$Node %in% Seeds), ]
genes_rank_sort_NoSeeds <-
genes_rank_sort_NoSeeds[, c("Node", "Score")]
return(genes_rank_sort_NoSeeds)
}
RankPhenotypes <- function(Number_Genes, Num_Gene_Layers, Number_Phenotypes,
Num_Pheno_Layers, Results, Seeds) {
## rank_phenotypes
phenotypes_rank <- data.frame(Node = character(length = Number_Phenotypes),
Score = 0)
phenotypes_rank$Node <- gsub("_1", "",
row.names(Results)[(Number_Genes *
Num_Gene_Layers + 1):(Number_Genes *
Num_Gene_Layers + Number_Phenotypes)])
phenotypes_rank$Score <- GeometricMean(as.vector(Results[, 1])[(Number_Genes *
Num_Gene_Layers + 1):nrow(Results)],
Num_Pheno_Layers, Number_Phenotypes)
phenotypes_rank_sort <- phenotypes_rank[with(phenotypes_rank, order(-Score,
Node)), ]
phenotypes_rank_sort_NoSeeds <- phenotypes_rank_sort[which(
!phenotypes_rank_sort$Node %in% Seeds), ]
phenotypes_rank_sort_NoSeeds <- phenotypes_rank_sort_NoSeeds[, c("Node", "Score")]
return(phenotypes_rank_sort_NoSeeds)
}
GeometricMean <- function(Scores, L, N) {
FinalScore <- numeric(length = N)
for (i in seq_len(N)) {
FinalScore[i] <- prod(Scores[seq(from = i, to = N * L, by = N)])^(1/L)
}
return(FinalScore)
}
#' @title Random Walk Restart (RWR)
#'
#' @description This method runs the random walk with restart on the provided
#' walk matrix. It returns a data frame including ranked genes and phenotypes,
#' and RWR scores of the genes and phenotypes. If generatePvalue is
#' TRUE then it generates p-values along with the ranks.
#'
#' @param walkMatrix This is the walk matrix generated by the function
#' CreateWalkMatrix.
#' @param geneSeeds This is a vector for storing the ids of the genes that RWR starts
#' its walk. The final ranks show the proximity of the genes/phenotypes to the seed genes.
#' @param phenoSeeds This is a vector for storing the ids of the phenotypes that RWR starts
#' its walk. The final ranks show the proximity of the genes/phenotypes to the seed phenotypes.
#' @param generatePValue If this is TRUE, it will generate the probability values for each
#' of the gene/phenotype rankings. If it is FALSE then the function will only return the ranks of genes/phenotype.
#' @param numCores This is the number of cores used for parallel processing.
#' @param r This parameter controls the global restart probability of RWR, and it has a default value of 0.7.
#' @param eta This parameter controls restarting of RWR either to a gene seed or phenotype seeds,
#' higher eta means utilizing gene seeds more than phenotype seeds, and it has a default value of 0.5.
#' @param tau This is a vector that stores weights for each of the 'gene'
#' layer in the complex gene and phenotype network. Each value of the vector
#' corresponds to the order of the network files in the input file of CreateWalkMatrix function. The weights must
#' sum up to the same number of gene layers. Default value gives equal weight to gene layers.
#' @param phi This is a vector that stores weights for each of the 'phenotype'
#' layer in the complex gene and phenotype network. Each value of the vector
#' corresponds to the order of the network files in the input file of CreateWalkMatrix function. The weights must
#' sum up to the same number of phenotype layers. Default value gives equal weight to phenotype layers.
#' @param S This is the number of random samples to be used for p-value calculation
#' It is highly recommended to use S=1000.
#' @return If the parameter generatePValue is TRUE, then this function returns a
#' data frame of ranked genes/phenotypes with p-values with three columns; Gene/Phenotype
#' ID, score, p-value. If generatePValue is FALSE, then it returns a data frame
#' of ranked genes/phenotypes with two columns; Gene/Phenotype ID, score.
#'
#' @examples
#' wm <- CreateWalkMatrix('input_file.txt')
#' ranksWithoutPVal <- RandomWalkRestart(wm, c('g1', 'g2'), c('p1'), FALSE)
#' ranksWithPVal <- RandomWalkRestart(wm, c('g1', 'g2'), c(), TRUE, S=10)
#' ranksWithoutPval <- RandomWalkRestart(wm, c('g1'), c(),
#' FALSE, 1, 0.5, 0.6, tau=c(1.5,0.5), phi=c(0.5,1.5))
#'
RandomWalkRestart <- function(walkMatrix, geneSeeds, phenoSeeds,
generatePValue = TRUE, numCores = 1,
r = 0.7, eta = 0.5, tau = NULL, phi = NULL, S=1000) {
if (is.null(tau)) {
tau <- rep(1, walkMatrix[["LG"]])
if (sum(tau)/walkMatrix[["LG"]] != 1) {
stop("Incorrect tau, the sum of its values should be equal to the number
of gene layers")
}
}
if (is.null(phi)) {
phi <- rep(1, walkMatrix[["LP"]])
if (sum(phi)/walkMatrix[["LP"]] != 1) {
stop("Incorrect phi, the sum of its values should be equal to the number
of phenotype layers")
}
}
gene_pool_nodes_sorted <- walkMatrix[["genes"]]
phenotype_pool_nodes_sorted <- walkMatrix[["phenotypes"]]
SeedList <- CheckSeeds(c(geneSeeds, phenoSeeds), gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted)
Seeds_Score <- GetSeedScores(SeedList[["Genes_Seeds"]],
SeedList[["Pheno_Seeds"]],
eta, walkMatrix[["LG"]],
walkMatrix[["LP"]], tau/walkMatrix[["LG"]],
phi/walkMatrix[["LP"]])
Threeshold <- 1e-10
NetworkSize <- ncol(walkMatrix[["WM"]])
### We initialize the variables to control the flux in the RW algo.
residue <- 1
iter <- 1
#We define the prox_vector(The vector we will move after the
#first RW iteration.
#We start from The seed. We have to take in account that the walker with
#restart in some of the Seed genes,
#depending on the score we gave in that file).
prox_vector <- Matrix(0, nrow = NetworkSize, ncol = 1, sparse = TRUE)
prox_vector[which(colnames(walkMatrix[["WM"]]) %in% Seeds_Score[, 1])] <-
(Seeds_Score[, 2])
prox_vector <- prox_vector/sum(prox_vector)
restart_vector <- prox_vector
while (residue >= Threeshold) {
old_prox_vector <- prox_vector
prox_vector <- (1 - r) * (walkMatrix[["WM"]] %*% prox_vector) +
r * restart_vector
residue <- sqrt(sum((prox_vector - old_prox_vector)^2))
iter <- iter + 1
}
RWGeneRankDF <- rankGenes(walkMatrix[["N"]], walkMatrix[["LG"]],
prox_vector, SeedList[["Genes_Seeds"]])
RWPhenoRankDF <- RankPhenotypes(walkMatrix[["N"]], walkMatrix[["LG"]],
walkMatrix[["M"]], walkMatrix[["LP"]],
prox_vector, SeedList[["Pheno_Seeds"]])
RWNodeRankDF <- rbind(RWGeneRankDF, RWPhenoRankDF)
if (generatePValue) {
# Generate random seeds
RandomSeeds <- GenerateRandomSeedVector(walkMatrix,
SeedList[["Genes_Seeds"]],
SeedList[["Pheno_Seeds"]], S)
# RandomSeeds calculate random ranks walkMatrix, geneSeedsList,
# phenoSeedsList, N, LG, LP, eta, tau, phi, r, funcs, no.cores=4
Rand_Seed_Node_Rank <- RandomWalkRestartBatch(walkMatrix[["WM"]],
RandomSeeds[["gene"]],
RandomSeeds[["phenotype"]],
walkMatrix[["N"]],
walkMatrix[["M"]],
walkMatrix[["LG"]],
walkMatrix[["LP"]], eta,
tau/walkMatrix[["LG"]],
phi/walkMatrix[["LP"]],
r, numCores)
# calculate p-values using random ranks
dfRank <- CalculatePvalues(RWNodeRankDF, Rand_Seed_Node_Rank, numCores)
# returns cropped version will have col are node, score, p value
dfRankCropped <- dfRank[, c(1:2, (ncol(dfRank) - 2))]
return(dfRankCropped)
} else {
rownames(RWNodeRankDF) <- NULL
return(RWNodeRankDF)
}
}
RandomWalkRestartSingle <- function(walkMatrix, r, Seeds_Score) {
#We define the threshold and the number maximum of iterations for the randon
#walker. Seeds_Score <- GetSeedScores(geneSeeds,CultSeeds, Parameters$eta, LG,
#LC, Parameters$tau/LG, Parameters$phi/LC)
Threeshold <- 1e-10
NetworkSize <- ncol(walkMatrix)
### We initialize the variables to control the flux in the RW algo.
residue <- 1
iter <- 1
#We define the prox_vector(The vector we will move after the first RW iteration.
#We start from The seed. We have to take in account that the walker with restart
#in some of the Seed genes, depending on the score we gave in that file).
prox_vector <- Matrix(0, nrow = NetworkSize, ncol = 1, sparse = TRUE)
prox_vector[which(colnames(walkMatrix) %in% Seeds_Score[, 1])] <-
(Seeds_Score[, 2])
prox_vector <- prox_vector/sum(prox_vector)
restart_vector <- prox_vector
while (residue >= Threeshold) {
old_prox_vector <- prox_vector
prox_vector <- (1 - r) * (walkMatrix %*% prox_vector) + r * restart_vector
residue <- sqrt(sum((prox_vector - old_prox_vector)^2))
iter <- iter + 1
}
print("RWR-MH number of iteration: ")
print(iter - 1)
return(prox_vector)
}
GetConnectivity <- function(NetworkDF, gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted) {
WholeNet <- do.call("rbind", (lapply(NetworkDF, `[[`, "DF")))
g <- graph.data.frame(WholeNet, directed = FALSE)
A <- as_adjacency_matrix(g, sparse = TRUE, attr = "weight")
Degree = apply(A, 2, sum)
Connectivity <- data.frame(Node = as.character(A@Dimnames[[1]]),
Degree = Degree, row.names = NULL)
Connectivity <- Connectivity[order(Connectivity$Degree, decreasing = TRUE), ]
Connectivity$Node <- as.character(Connectivity$Node)
GeneConnectivity <- Connectivity[which(Connectivity$Node %in%
gene_pool_nodes_sorted), ]
PhenoConnectivity <- Connectivity[which(Connectivity$Node %in%
phenotype_pool_nodes_sorted), ]
return(list(gene = GeneConnectivity, pheno = PhenoConnectivity))
}
#' @title Create Walk Matrix
#'
#' @description Generates a Walk Matrix (Transition Matrix) from Gene and Phenotype networks for RWR.
#'
#' @param inputFileName The name of the text file that contains the names of
#' gene and phenotype network files. For more information on the file formatting,
#' please refer the vignette.
#' @param numCores This is the number of cores used for parallel processing.
#'
#' @param delta This is the probability of jumping between gene layers. High delta means
#' RWR is high likely to jump to other layers in gene multiplex network. It has a default value of 0.5.
#' @param zeta This is the probability of jumping between phenotype layers. High zeta means
#' RWR is high likely to jump to other layers in phenotype multiplex network. It has a default value of 0.5.
#' @param lambda This is the probability of jumping between gene and phenotype multiplex networks. High lambda
#' means RWR is more likely to exploit the bipartite relation. It has a default value of 0.5.
#'
#' @return This returns a list containing the walk matrix, a sorted list of gene ids, a sorted list of phenotype ids,
#' the connectivity degree of the genes, the connectivity degree of the phenotypes, the number of gene layers,
#' the number of phenotype layers, the number of genes and the number of phenotypes in the final complex network.
#'
#' @export
#'
#' @examples
#' wm <- CreateWalkMatrix('input_file.txt')
#' customWm <- CreateWalkMatrix('input_file.txt', numCores=1, delta=0.7, zeta=0.7, lambda=0.7)
#'
#'
CreateWalkMatrix <- function(inputFileName, numCores = 1, delta = 0.5,
zeta = 0.5, lambda = 0.5) {
Settings <- ReadSettings(inputFileName)
Parameters <- AddParameters(delta, zeta, lambda)
FilesDF <- Settings$FilesDF
LG <- Settings$LG
LP <- Settings$LP
FullNet <- ReadNetworkLayers(FilesDF)
gene_pool_nodes_sorted <- FullNet$gene_pool_nodes_sorted
phenotype_pool_nodes_sorted <- FullNet$phenotype_pool_nodes_sorted
FullNet <- FullNet$NetworkLayers
N <- length(gene_pool_nodes_sorted)
M <- length(phenotype_pool_nodes_sorted)
SupraAdjacencyMatrix <- CreateSupraadjacencyMatrix(FullNet, "gene", N, LG,
Parameters$zeta, TRUE)
SupraAdjacencyMatrixPheno <- CreateSupraadjacencyMatrix(FullNet, "phenotype",
M, LP,
Parameters$delta,
TRUE)
BipartiteMatrix <- CreateBipartiteMatrix(FullNet, N, M,
gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted, numCores)
## We expand the biparite graph to fit the multiplex dimensions. The biparti
## matrix has now NL x MK The genes in all the layers have to point to the
## phenotypes in all layers
SupraBipartiteMatrix <- CreateSuprabipartiteMatrix(BipartiteMatrix, N, M, LG,
LP)
Transition_Gene_Phenoivar <- CreateTransitionMatrix(SupraBipartiteMatrix, N,
M, LG, LP,
Parameters$lambda, FALSE)
Transition_Phenoivar_Gene <- CreateTransitionMatrix(SupraBipartiteMatrix, N,
M, LG, LP,
Parameters$lambda, TRUE)
Gene_Transition_Multiplex_Network <- CreateGeneTransitionMultiplexNetwork(
SupraAdjacencyMatrix,
SupraBipartiteMatrix,
N, LG,
Parameters$lambda,
numCores)
# CREATE TRANSITION MULTIPLEX NETWORK FOR CULTIVARS t1 <- Sys.time()
Pheno_Transition_Multiplex_Network <- CreatePhenoTransitionMultiplexNetwork(
SupraAdjacencyMatrixPheno,
SupraBipartiteMatrix, M,
LP, Parameters$lambda,
numCores)
Multiplex_Heterogeneous_Matrix <- rbind(cbind(
Gene_Transition_Multiplex_Network,
Transition_Gene_Phenoivar),
cbind(Transition_Phenoivar_Gene,
Pheno_Transition_Multiplex_Network))
# Extract candidate genes for further Random Walks on this WM
GenePhenoDF <- lapply(FullNet[which(lapply(FullNet, `[[`, "type") ==
"bipartite")], `[[`, "DF")[[1]]
CandidateGenes <- unique(GenePhenoDF$from)
Connectivity <- GetConnectivity(FullNet, gene_pool_nodes_sorted,
phenotype_pool_nodes_sorted)
WM <- list(WM = Multiplex_Heterogeneous_Matrix,
genes = gene_pool_nodes_sorted,
phenotypes = phenotype_pool_nodes_sorted,
gene_connectivity = Connectivity[["gene"]],
phenotype_connectivity = Connectivity[["pheno"]],
LG = LG, LP = LP, N = N, M = M)
registerDoSEQ()
return(WM)
}
AssignGroupToConnectivityDF <- function(ConnectivityDF, no.groups) {
chunk.size <- ceiling(nrow(ConnectivityDF)/no.groups)
groups <- rep(1:no.groups, each = chunk.size, length.out =
nrow(ConnectivityDF))
ConnectivityDF$Group <- groups
ConnectivityDF
}
GenerateRandomSeeds <- function(Seeds, ConnectivityDF, S = 1000, no.groups = 10,
replace_bool = FALSE) {
seed.set.size <- length(Seeds)
sample_size <- ceiling((S/no.groups) * seed.set.size)
#set.seed(1)
# Stratified Sample 'sample_size' nodes from each group as Random Seeds
ConnectivityDF <- ConnectivityDF[which(!ConnectivityDF$Node %in% Seeds), ]
ConnectivityDF <- dplyr::group_by(ConnectivityDF, ConnectivityDF$Group)
RandomSeeds <- dplyr::sample_n(ConnectivityDF, sample_size,
replace = replace_bool)
#We are creating 'seed.set.size' length seed sets by taking 'nodes' from each
#group To this end, we determine a split order and sort the RandomSeeds DF wrt
#to this 'order' column
order_vec <- vector()
for (i in 1:no.groups) {
order_vec <- c(order_vec, seq(from = i, to = S * seed.set.size,
by = no.groups))
}
RandomSeeds$Order <- as.factor(order_vec)
RandomSeeds <- RandomSeeds[order(RandomSeeds$Order), ]
# split the sorted DF into 'seed.set.size' length vectors
RandomSeeds <- split(RandomSeeds$Node,
(seq(nrow(RandomSeeds)) - 1)%/%seed.set.size)
RandomSeeds
}
GenerateRandomSeedVector <- function(WM, geneSeeds, phenoSeeds, S = 1000,
no.groups.gene = 10,
no.groups.pheno = 5) {
if (length(geneSeeds) == 0 && length(phenoSeeds) == 0)
stop("No seeds provided!")
GeneConnectivity <- AssignGroupToConnectivityDF(WM[["gene_connectivity"]],
no.groups = no.groups.gene)
PhenoConnectivity<-AssignGroupToConnectivityDF(WM[["phenotype_connectivity"]],
no.groups = no.groups.pheno)
RandomgeneSeeds <- list()
if (length(geneSeeds) != 0) {
RandomgeneSeeds <- GenerateRandomSeeds(geneSeeds, GeneConnectivity, S,
no.groups.gene, TRUE)
# if (length(geneSeeds) != 1 && any(duplicated(RandomgeneSeeds[1:S])))
# warning("WARN: Duplicated random 'gene' seeds generated!")
}
RandomphenoSeeds <- list()
if (length(phenoSeeds) != 0) {
RandomphenoSeeds <- GenerateRandomSeeds(phenoSeeds, PhenoConnectivity,
S, no.groups.pheno, TRUE)
# if (length(phenoSeeds) != 1 && any(duplicated(RandomphenoSeeds[1:S])))
# warning("WARN: Duplicated random 'phenotype' seeds generated!")
}
return(list(gene = RandomgeneSeeds, phenotype = RandomphenoSeeds))
}
CalculatePvalues <- function(RWGeneRanks, Rand_Seed_Gene_Rank, no.cores) {
# t <- Sys.time()
S <- ncol(Rand_Seed_Gene_Rank)/3
cl <- makeCluster(no.cores)
registerDoParallel(cl)
dfRanks <- foreach(i = 1:(S)) %dopar% {
# traverse all gene names and get their ranks in random run result
rand_ranks <- sapply(RWGeneRanks$Node, function(node) {
which(Rand_Seed_Gene_Rank[, 2 + 3 * (i - 1)] %in% node)
})
#if genes are in the random seed set for this run then there are no rank for
#them
idx <- !(sapply(rand_ranks, length))
rand_ranks[idx] <- NA
dfRank <- unname(unlist(rand_ranks))
dfRank
}
stopCluster(cl)
# getDoParWorkers() create dfRanks DF from list output of random seed ranks
dfRanks <- suppressMessages(as.data.frame(bind_cols(dfRanks)))
# add the Gene names as the first column
dfRanks <- cbind(RWGeneRanks$Node, RWGeneRanks$Score, dfRanks,
stringsAsFactors = FALSE)
colnames(dfRanks)[1:2] <- c("Node", "Score")
rank.offset <- seq(100)
# create offset rank columns by adding offset vector for comparison
dfRanks <- cbind(dfRanks, replicate(length(rank.offset),
as.numeric(rownames(dfRanks))) +
t(replicate(nrow(dfRanks), rank.offset)))
# colnames(dfRanks)[(S + 3):(S + length(rank.offset) + 2)] <- paste0("Rank",
# rank.offset)
# calculate P values by comparing (random seed rank + offset value) vs (actual
# seed rank)
for (i in 1:length(rank.offset)) {
dfRanks["P_value"] <- base::rowMeans(dfRanks[, 3:(S+2)] <
(dfRanks[,S+2+i]), na.rm = TRUE)
}
# Calculate Median and Average ranks of genes for Random Seeds
dfRanks$Med <- apply(dfRanks[, 3:(2 + S)], 1, median)
dfRanks$Ave <- rowMeans(dfRanks[, 3:(2 + S)])
dfRanks
}
RandomWalkRestartBatch <- function(walkMatrix, geneSeedsList, phenoSeedsList,
N, M, LG, LP, eta, tau, phi, r, no.cores = 4) {
cl <- makeCluster(no.cores)
registerDoParallel(cl)
seedsLength <- ifelse(length(geneSeedsList) != 0, length(geneSeedsList),
length(phenoSeedsList))
funcs <- c("GetSeedScores", "RandomWalkRestartSingle", "rankGenes", "RankPhenotypes",
"GeometricMean")
#globalVariables("i")
i <- 1
Rand_Seed_Gene_Rank <- foreach(i = 1:seedsLength, .combine = cbind, .export =
funcs, .packages = c("Matrix")) %dopar% {
if (length(geneSeedsList) != 0 && length(phenoSeedsList) != 0) {
Seeds_Score <- GetSeedScores(geneSeedsList[[i]], phenoSeedsList[[i]],
eta, LG, LP, tau, phi)
} else if (length(geneSeedsList) != 0) {
Seeds_Score <- GetSeedScores(geneSeedsList[[i]], vector(), eta, LG,
LP, tau, phi)
} else {
Seeds_Score <- GetSeedScores(vector(), phenoSeedsList[[i]], eta, LG,
LP, tau, phi)
}
Rand_Seed_Res <- RandomWalkRestartSingle(walkMatrix, r, Seeds_Score)
Rand_Seed_Gene_Rank <- rankGenes(N, LG, Rand_Seed_Res,
ifelse(length(geneSeedsList) !=
0, geneSeedsList[[i]], vector()))
Rand_Seed_Pheno_Rank <- RankPhenotypes(N, LG, M, LP, Rand_Seed_Res,
ifelse(length(phenoSeedsList) !=
0, phenoSeedsList[[i]], vector()))
return(rbind(Rand_Seed_Gene_Rank, Rand_Seed_Pheno_Rank))
}
stopCluster(cl)
return(Rand_Seed_Gene_Rank)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.