# get_exp_cor_edges
#' @title get co-expression genes
#' @description compute the correlation coefficient of gene expression data,
#' return the most related genes
#' @param gene_list, a vector of characters
#' @param data_std, a matrix of data, such as gene expression data, whose
#' rownames are gene
#' names or ids and colnames are sample names
#' @param method, a chareater, which method to compare the correlation of gene
#' expression data
#' it could be "pearson", "kendall", "spearman", "spearman" is default
#' @param num, an integer, the top number of co-expressed genes to choose, 5 is
#' default
#' @param cor_threshold, a numeric, the threshold of the correlation coefficient
#' to choose, default is \emph{NULL}
#' @details This function computes the correlation coefficient of gene
#' expression data between \code{gene_list}
#' and \code{data_std}, it will return a data.frame which can be translated
#' a graph or network.
#' In the data.frame, \code{source} refers to the genes in \code{gene_list},
#' \code{target} refers to the top coexpressed genes,
#' \code{weight} refers to the correlated coefficient of genes in \code{source}
#' and \code{target}, \code{pathway} is "uncertain" and \code{edge_type} is
#' "coexp".Note, when choosing the top co-expressed genes, we will use the
#' \code{num} param if the \code{cor_threshold} param is \emph{NULL}. If not,
#' we will choose the \code{cor_threshold} param.
#' @return the coexp of edges.
#' @examples
#' data(data_std)
#' data(PFP_test1)
#' rank1 <- rank_PFP(object = PFP_test1,total_rank = TRUE)
#' pathway_select <- refnet_info(rank1)[1,"id"]
#' gene_test <- pathways_score(rank1)$genes_score[[pathway_select]]$ENTREZID
#' edges_coexp <- get_exp_cor_edges(gene_test,data_std)
#' @export
get_exp_cor_edges <- function(gene_list,
data_std,
method="spearman",
num=5,cor_threshold=NULL){
bg_genelist <- rownames(data_std)
match_list <- match(x = gene_list,table = bg_genelist,nomatch = 0)
if (sum(match_list==0)==length(gene_list)){
stop("gene_list must have common elements with rownames of data_std,\
maybe the gene id types you choose in the two data set are not\
consistent.")
}else if(sum(match_list!=0)<length(match_list)){
print("Some genes in gene_list can't be found in data_std. These genes\
will be removed.")
print(gene_list[match_list==0])
gene_list <- gene_list[match_list!=0]
}
#get the correlation number of gene.
get_gene_cor_num <- function(gene,data_std,method,num){
cor_list <- vapply(X =rownames(data_std),
FUN = function(name0)cor(x = unlist(data_std[gene,]),
y = unlist(data_std[name0,]),
method = method),0)
cor_list <- cor_list[order(abs(cor_list),decreasing = TRUE)]
cor_data_frame <- data.frame(source=rep(x = gene,num),
target=names(cor_list)[2:(num+1)],
weight=cor_list[2:(num+1)])
}
#get the gene correlation thresh.
get_gene_cor_thresh <- function(gene,data_std,method,cor_threshold){
cor_list <- vapply(X =rownames(data_std),
FUN = function(name0)cor(x = unlist(data_std[gene,]),
y = unlist(data_std[name0,]),
method = method),0)
target <- as.character(names(cor_list)[cor_list>cor_threshold])
weight <- cor_list[cor_list>cor_threshold]
cor_data_frame <- data.frame(source=rep(x = gene,length(target)),
target=target,weight=weight)
}
if (is.null(cor_threshold)){
edges_coeff_list <- lapply(X = gene_list,
FUN = get_gene_cor_num,
data_std,
method,num)
}else if(is.numeric(cor_threshold)){
edges_coeff_list <- lapply(X = gene_list,
FUN = get_gene_cor_thresh,
data_std,
method,
cor_threshold)
}else{
stop("cor_threshold must be a numeric!")
}
edges_coeff <- do.call(rbind,edges_coeff_list)
edges_coeff2 <- data.frame(source=edges_coeff$target,
target=edges_coeff$source,
weight=edges_coeff$weight)
delete_row <- lapply(X = seq_len(nrow(edges_coeff)),
FUN = function(x)seq_len(
nrow(edges_coeff))[
edges_coeff[x,1]==edges_coeff2[,1] &
edges_coeff[x,2]==edges_coeff2[,2]])
delete_row <- lapply(X = seq_len(length(delete_row)),
function(x)delete_row[[x]]>x)
names(delete_row) <- seq_len(nrow(edges_coeff))
delete_row <- unlist(delete_row)
delete_row <- delete_row[delete_row==TRUE]
delete_row <- as.numeric(names(delete_row))
if (length(delete_row)!=0){
edges_coeff <- edges_coeff[-delete_row,]
}
rownames(edges_coeff) <- seq_len(1):nrow(edges_coeff)
edges_coeff[["pathway"]] <- rep("uncertain",nrow(edges_coeff))
edges_coeff[["edge_type"]] <- rep("coexp",nrow(edges_coeff))
return(edges_coeff)
}
# get_bg_related_kegg
#' @title get_bg_related_kegg
#' @description This function will select all genes in all kegg pathways which
#' are directly connected with the genes in \code{gene_list}
#' @param gene_list, a vector of characters, refers to genes ids
#' @param PFPRefnet, an object of PFPRefnet class, it contains all kegg pathways.
#' @param rm_duplicated, a logical, whether to remove the duplicated kegg edges
#' in different pathways.
#' Defalut is \emph{FALSE}
#' @details It will return a data.frame which can be translated a graph or
#' network.
#' In the data.frame, \code{source} refers to the genes in \code{gene_list},
#' \code{target} refers to the directly connected genes in kegg, \code{weight}
#' is 0.5, no real means, \code{pathway}refers to the pathway which the edge
#' emerge and \code{edge_type} is "kegg".Note, if \code{rm_duplicated} is
#' \emph{FALSE}, it may return many duplicated edges,which will be complex when
#' plotting a network. If \code{rm_duplicated} is \emph{TRUE},it will retain the
#' first pathway which contains the duplicated edge.
#' @return the related kegg network.
#' @examples
#' data(PFPRefnet_hsa)
#' data(gene_list_hsa)
#' edges_kegg <- get_bg_related_kegg(gene_list_hsa,
#' PFPRefnet=PFPRefnet_hsa,
#' rm_duplicated = TRUE)
#' @export
get_bg_related_kegg <- function(gene_list,PFPRefnet,rm_duplicated = FALSE){
if (sum(is.na(as.numeric(gene_list))) > 0)
stop("You should translate all your gene ids into ENTREZID!")
kegg_edges <- lapply(X = network(PFPRefnet),FUN = function(x){
if (is(x, "graphNEL")){
em <- edgeMatrix(x, duplicates = FALSE)
dat <- data.frame(cbind(from = I(nodes(x)[em[1,]])))
dat$from <- as.character(dat$from)
dat$to <- nodes(x)[em[2, ]]
dat$tag1 <- paste(nodes(x)[em[1, ]], nodes(x)[em[2,]], sep = "~")
dat$tag2 <- paste(nodes(x)[em[2, ]], nodes(x)[em[1,]], sep = "~")
return(dat)
}
if (is(x, "igraph")) {
dat <- as_edgelist(x)
return(dat)
}
})
data_tf <- llply(kegg_edges,
function(kegg_edge)apply(X=kegg_edge,
MARGIN=1,
FUN=function(x)(
(x[1] %in% gene_list) | (
x[2] %in% gene_list))))
kegg_edges1 <- lapply(X = names(kegg_edges),
function(x)kegg_edges[[x]][data_tf[[x]],])
kegg_edges1 <- lapply(X = seq_len(length(kegg_edges1)),
function(x)data.frame(source=kegg_edges1[[x]][,1],
target=kegg_edges1[[x]][,2],
weight=rep(0.5,
nrow(
kegg_edges1[[x]])),
pathway=rep(
names(kegg_edges)[x],
nrow(
kegg_edges1[[x]])),
edge_type=rep("kegg",
nrow(
kegg_edges1[[x]]
)
)
)
)
edges_kegg <- do.call(rbind,kegg_edges1)
if (rm_duplicated == TRUE){
edges_kegg <- edges_kegg[!duplicated(edges_kegg[,c("source","target")]),]
}
return(edges_kegg)
}
# trans_edges_id
#' @title trans_edges_id
#' @description translate the id name in edges_data
#' @param edges_data, the edges_data to translate, it can be the data.frame got
#' from \code{\link{get_exp_cor_edges}} or \code{\link{get_asso_net}}, or a
#' data.frame contains the same colnames with them.
#' @param from_type, a character,the type of gene ID, "ENSEMBL","GO","SYMBOL"
#' and so on.
#' @param to_type, a character,the type of gene ID, "ENSEMBL","GO","SYMBOL" and
#' so on.
#' @param gene_info_db, a gene
#' @details Translate the id name in edges_data.
#' Note, the \code{from_type} must be consistent with the genes id type in
#' \code{edges_data}.
#' The \code{gene_info_db} must be consistent with the species in
#' \code{edges_data}
#' @return the id of the edges.
#' @examples
#' data(PFPRefnet_hsa)
#' data(gene_list_hsa)
#' edges_kegg <- get_bg_related_kegg(gene_list_hsa,
#' PFPRefnet=PFPRefnet_hsa,
#' rm_duplicated = TRUE)
#' @export
trans_edges_id <- function(edges_data,from_type="ENTREZID",
to_type="SYMBOL",
gene_info_db=NULL){
source_exp <- bitr(geneID = edges_data$source,
fromType = from_type,
toType = to_type,
OrgDb = gene_info_db)
target_exp <- bitr(geneID = edges_data$target,
fromType = from_type,
toType = to_type,
OrgDb = gene_info_db)
colnames(source_exp) <- c("source","source_SYMBOL")
colnames(target_exp) <- c("target","target_SYMBOL")
edges_data <- merge(x = edges_data,y=source_exp,by="source",all.x=TRUE)
edges_data <- merge(x = edges_data,y=target_exp,by="target",all.x=TRUE)
edges_data <- edges_data[!is.na(edges_data$source_SYMBOL)&!is.na(
edges_data$target_SYMBOL),]
edges_data <- edges_data[c("source_SYMBOL","target_SYMBOL","weight","pathway",
"edge_type")]
colnames(edges_data) <- c("source","target","weight","pathway","edge_type")
return(edges_data)
}
# get_asso_net
#' @title merge the edges_coexp and edges_kegg
#' @description This function will remove the co-expressed edges in edges_coexp
#' which also emerge in edges_kegg.
#' @param edges_coexp, a data.frame whose colnames is "source","target",
#' "weight","pathway","edge_type".
#' @param edges_kegg, a data.frame whose colnames is "source","target",
#' "weight","pathway","edge_type".
#' @param file_dir, a character, the root to save the result of nodes & edges.
#' @param if_symbol, a logical,whether to translate the gene id type. Default is
#' TRUE.
#' @param trans_fun, a function, when \code{if_symbol} is \emph{TRUE},it will
#' use the \code{trans_fun} function to translate the gene ids. Default is
#' \code{trans_edges_id}.
#' @param from_type, a character,the parameter in \code{trans_fun}. It is the
#' type of gene ID, "ENSEMBL","GO","SYMBOL" and so on.
#' @param to_type, a character,the parameter in \code{trans_fun}. It is the type
#' of gene ID, "ENSEMBL","GO","SYMBOL" and so on.
#' @param gene_info_db, an AnnotationDb-object for gene annotation, such as
#' "org.Hs.eg.db".
#' @details This function will remove the co-expressed edges in edges_coexp
#' which also emerge in edges_kegg.
#' It will return a list contains two data.frames. One is the merged data.
#' Another is the nodes information of the edges.
#' @return the nodes information of the edges.
#' @examples
#' data(PFPRefnet_hsa)
#' data(data_std)
#' data(PFP_test1)
#' rank1 <- rank_PFP(object = PFP_test1,total_rank = TRUE)
#' pathway_select <- refnet_info(rank1)[1,"id"]
#' gene_test <- pathways_score(rank1)$genes_score[[pathway_select]]$ENTREZID
#' edges_coexp <- get_exp_cor_edges(gene_test,data_std)
#' gene_list2 <- unique(c(edges_coexp$source,edges_coexp$target))
#' edges_kegg <- get_bg_related_kegg(gene_list2,
#' PFPRefnet=PFPRefnet_hsa,
#' rm_duplicated = TRUE)
#' @export
get_asso_net <- function(edges_coexp,
edges_kegg,
file_dir=NULL,
if_symbol=TRUE,
trans_fun = trans_edges_id,
from_type="ENTREZID",
to_type="SYMBOL",
gene_info_db=NULL){
# trans_id
if (if_symbol==TRUE){
if (is.null(gene_info_db)){
stop("Please input a genome wide annotation packade,\
for example: org.Hs.eg.db.")
}else{
edges_kegg <- trans_edges_id(edges_data = edges_kegg,
from_type = from_type,
to_type = to_type,
gene_info_db = gene_info_db)
edges_coexp <- trans_edges_id(edges_data = edges_coexp,
from_type = from_type,
to_type = to_type,
gene_info_db = gene_info_db)
}
}
# remove duplicated and combine
genes_select <- unique(edges_coexp$source)
delete_row <- vapply(X = seq_len(nrow(edges_coexp)),
FUN = function(x)(
sum((edges_coexp[x,1]==edges_kegg[,1])&(
edges_coexp[x,2]==edges_kegg[,2])|(
edges_coexp[x,1]==edges_kegg[,2])&(
edges_coexp[x,2]==edges_kegg[,1]))>0)*x,0)
edges_coexp_new <- edges_coexp[delete_row==0,]
genes_coexp_new <- unique(c(edges_coexp_new$source,edges_coexp_new$target))
genes_coexp <- setdiff(genes_coexp_new,genes_select)
asso_net_edges <- rbind(edges_coexp_new,edges_kegg)
genes_kegg <- setdiff(unique(c(asso_net_edges$source,asso_net_edges$target)),
union(genes_coexp,genes_select))
asso_net <- list(nodes=data.frame(nodes=c(genes_select,
genes_coexp,
genes_kegg),
group=c(rep("select",
length(genes_select)),
rep("coexp",
length(genes_coexp)),
rep("kegg",
length(genes_kegg)))),
edges=asso_net_edges)
if (!is.null(file_dir)){
if (substr(file_dir,nchar(file_dir),nchar(file_dir)) == "/"){
file_dir <- substr(file_dir,1,(nchar(file_dir)-1))
}
write.csv(x = asso_net_edges,file = paste0(file_dir,"/",
"asso_net_edges.csv"))
write.csv(x = asso_net[["nodes"]],file=paste0(file_dir,"/",
"asso_net_nodes.csv"))
}
return(asso_net)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.