Nothing
#' Annotate KEGG edge mappings with user data
#' @description Add data column[s] to object created from function
#' expand_KEGG_edges
#' @export
#' @importFrom gtools smartbind
#' @importFrom plyr rename
#' @param expanded_edges The data frame object generated via the function
#' expand_KEGG_edges
#' @param KEGG_mappings KEGG_mappings The data.frame object generated by the
#' function expand_KEGG_mappings
#' @param user_data A data frame where in which the first two columns contain
#' gene symbols representing an edge and any/all other column[s] contain
#' corresponding edge data.
#' @param data_column_no The column index for desired user data to be added
#' @param map_type If the genes in your data set are left untranslated
#' set to "NUMBER" (assuming numbers are gene accession numbers)
#' @param only_mapped A logical indicator; if set to FALSE will return 'de-novo'
#' edges that 'exist' in data but are not documented in KEGG
#' @return A data frame object with detailed KEGG edge mappings annotated with
#' user data
#' @examples
#' p53_KGML <- get_KGML('hsa04115')
#' p53_KEGG_mappings <- expand_KEGG_mappings(p53_KGML)
#' p53_edges <- expand_KEGG_edges(p53_KGML, p53_KEGG_mappings)
#' p53_HA1E_data <- overlap_info(p53_KGML, p53_KEGG_mappings, 'HA1E',
#' data_type = '100_bing', only_mapped = FALSE)
#'
#' p53_edges_HA1E <- add_edge_data(p53_edges, p53_KEGG_mappings,
#' p53_HA1E_data, c(3, 10,12))
add_edge_data <- function(expanded_edges, KEGG_mappings,
user_data, data_column_no = 3, map_type = "SYMBOL",
only_mapped = TRUE) {
expanded_edges <- expanded_edges[expanded_edges$type != "maplink", ]
if (nrow(expanded_edges) > 0) {
if (map_type == "SYMBOL"){
expanded_edges$unique_ID =
paste0(expanded_edges$entry1symbol, ",",
expanded_edges$entry2symbol)
expanded_edges$unique_IDR =
paste0(expanded_edges$entry2symbol, ",",
expanded_edges$entry1symbol)
}
if (map_type == "NUMBER"){
expanded_edges$unique_ID =
paste0(expanded_edges$entry1accession, ",",
expanded_edges$entry2accession)
expanded_edges$unique_IDR =
paste0(expanded_edges$entry2accession, ",",
expanded_edges$entry1accession)
}
user_data$unique_ID = paste0(user_data[,1], ",",
user_data[,2])
pre_mapped1 <- subset(user_data, user_data$unique_ID %in%
expanded_edges$unique_ID)
pre_mapped2 <- subset(user_data, user_data$unique_ID %in%
expanded_edges$unique_IDR)
pre_mapped2 <- pre_mapped2[, c(2, 1, 3:ncol(pre_mapped2))]
names(pre_mapped2) = names(pre_mapped1)
if (nrow(pre_mapped2) >= 1 & nrow(pre_mapped1) >= 1) {
pre_mapped2$unique_ID <- paste0(pre_mapped2[,1], ",",
pre_mapped2[,2])
pre_mapped <- rbind(pre_mapped1, pre_mapped2)
}
if (nrow(pre_mapped2) == 0 & (nrow(pre_mapped1) == 0)) {
pre_mapped <- data.frame(unique_ID = NA)
}
if (nrow(pre_mapped1) == 0 & nrow(pre_mapped2) >= 1) {
pre_mapped2$unique_ID <- paste0(pre_mapped2[,1], ",",
pre_mapped2[,2])
pre_mapped <- pre_mapped2
}
if (nrow(pre_mapped2) == 0 & nrow(pre_mapped1) >= 1) {
pre_mapped <- pre_mapped1
}
if (!is.na(pre_mapped[1, 1])) {
expanded_edges_1 <- subset(expanded_edges, expanded_edges$unique_ID
%in% pre_mapped$unique_ID)
expanded_edges_1 <- expanded_edges[expanded_edges$unique_ID %in%
pre_mapped$unique_ID, ]
expanded_edges_2 <- expanded_edges[!expanded_edges$unique_ID %in%
pre_mapped$unique_ID, ]
expanded_edges_1$has_data = 1
testval <- nrow(expanded_edges_2)
if (testval > 0) {
expanded_edges_2$has_data = 0
edge_set <- rbind(expanded_edges_1, expanded_edges_2)
}
else {
edge_set <- expanded_edges_1
}
edge_set <- edge_set[order(edge_set$unique_ID), ]
data_to_add <- data.frame("unique_ID" = pre_mapped$unique_ID, stringsAsFactors = FALSE)
data_to_add <- cbind(data_to_add, pre_mapped[, data_column_no])
data_to_add <- data_to_add[order(data_to_add$unique_ID), ]
colnames(data_to_add)[2] <- "summary_score"
annotated_edges <- merge(edge_set, data_to_add, "unique_ID",
all.x = TRUE)
drops <- c("unique_ID", "unique_IDR")
annotated_edges <- annotated_edges[, !(names(annotated_edges) %in%
drops)]
cat(paste0("Number of edges documented in selected pathway = ",
nrow(annotated_edges)), "\n")
cat(paste0("Number of edges with corresponding user data = ",
sum(annotated_edges$has_data), "\n"))
cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/
nrow(annotated_edges) * 100, 2), "%", "\n"))
annotated_edges$premapped <- 1
# un_mapped <- subset(user_data, !user_data$unique_ID %in%
# expanded_edges$unique_ID &
# !user_data$unique_ID %in% expanded_edges$unique_IDR)
# un_mapped_edges <- un_mapped[, c(1:2, data_column_no,
# ncol(un_mapped))]
if (only_mapped) {
return(annotated_edges)
}
}
else {
annotated_edges <- expanded_edges
annotated_edges$premapped <- 1
annotated_edges$has_data <- 0
un_mapped <- user_data
un_mapped$unique_ID <- paste0(un_mapped$knockout1,
un_mapped$knockout2)
un_mapped_edges <- un_mapped[, c(1:2, data_column_no,
ncol(un_mapped))]
un_mapped_edges$premapped <- 0
un_mapped_edges$has_data <- 0
cat(paste0("Number of edges documented in selected pathway = ",
nrow(annotated_edges)), "\n")
cat(paste0("Number of edges with corresponding user data = ",
sum(annotated_edges$has_data), "\n"))
cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/
nrow(annotated_edges) * 100, 2), "%", "\n"))
if (only_mapped) {
cat(paste0("No documented edges are found in data;
only data for de-novo edges can be mapped \n"))
return(annotated_edges)
}
}
}
else if (nrow(expanded_edges) == 0) {
annotated_edges <- expanded_edges
annotated_edges$premapped <- 1
annotated_edges$has_data <- 0
un_mapped <- user_data
un_mapped$unique_ID <- paste0(un_mapped$knockout1, un_mapped$knockout2)
un_mapped_edges <- un_mapped[, c(1:2, data_column_no, ncol(un_mapped))]
un_mapped_edges$premapped <- 0
un_mapped_edges$has_data <- 0
cat(paste0("Number of edges documented in selected pathway = ",
nrow(annotated_edges)), "\n")
cat(paste0("Number of edges with corresponding user data = ",
sum(annotated_edges$has_data), "\n"))
cat(paste0("Coverage = ", round(sum(annotated_edges$has_data)/
nrow(annotated_edges) * 100, 2), "%", "\n"))
if (only_mapped) {
return(annotated_edges)
}
}
names(un_mapped_edges)[1:2] <- c("entryNAME_1", "entryNAME_2")
for (i in 1:nrow(un_mapped_edges)) {
un_mapped_edges$Source_eid[i] <-
list(KEGG_mappings$entryID[which(KEGG_mappings$entrySYMBOL ==
un_mapped_edges$entryNAME_1[i])])
un_mapped_edges$Target_eid[i] <-
list(KEGG_mappings$entryID[which(KEGG_mappings$entrySYMBOL ==
un_mapped_edges$entryNAME_2[i])])
x <- length(unlist(un_mapped_edges$Source_eid[i]))
y <- length(unlist(un_mapped_edges$Target_eid[i]))
if (x > 1 | y > 1) {
un_mapped_edges$simple[i] <- FALSE
} else {
un_mapped_edges$simple[i] <- TRUE
}
}
simple_un_mapped_edges <- un_mapped_edges[un_mapped_edges$simple == TRUE,
-ncol(un_mapped_edges)]
complex_un_mapped_edges <- un_mapped_edges[un_mapped_edges$simple == FALSE,
-ncol(un_mapped_edges)]
test_val <- nrow(complex_un_mapped_edges)
if (test_val == 0) {
un_mapped_edges <- simple_un_mapped_edges
} else {
keeps <- c("Source_eid", "Target_eid", "unique_ID")
c_temp <- complex_un_mapped_edges[, (names(complex_un_mapped_edges) %in%
keeps)]
for (i in 1:nrow(c_temp)) {
x <- length(unlist(c_temp$Source_eid[i]))
y <- length(unlist(c_temp$Target_eid[i]))
l <- x * y
c_temp$Source_eid[i] <- list(sort(unlist(rep(c_temp$Source_eid[i],
y))))
c_temp$Target_eid[i] <- list(unlist(rep(c_temp$Target_eid[i], x)))
c_temp$unique_ID[i] <- list(rep(c_temp$unique_ID[i], l))
}
c_temp <- data.frame(unique_ID = unlist(c_temp$unique_ID), Source_eid =
unlist(c_temp$Source_eid),
Target_eid = unlist(c_temp$Target_eid))
drops <- c("Source_eid", "Target_eid")
complex_un_mapped_edges <-
complex_un_mapped_edges[, !(names(complex_un_mapped_edges) %in%
drops)]
complex_un_mapped_edges <- merge(complex_un_mapped_edges, c_temp)
un_mapped_edges <- rbind(simple_un_mapped_edges,
complex_un_mapped_edges)
}
un_mapped_edges$Source_eid <- unlist(un_mapped_edges$Source_eid)
un_mapped_edges$Target_eid <- unlist(un_mapped_edges$Target_eid)
un_mapped_edges <- plyr::rename(un_mapped_edges, c(Source_eid = "entry1",
Target_eid = "entry2", entryNAME_1 = "entry1symbol",
entryNAME_2 = "entry2symbol"))
un_mapped_edges$subtype1 <- "de_novo"
drops <- c("unique_ID")
un_mapped_edges <- un_mapped_edges[, !(names(un_mapped_edges) %in% drops)]
un_mapped_edges$has_data <- 1
un_mapped_edges$premapped <- 0
for (i in 1:nrow(un_mapped_edges)) {
un_mapped_edges$entry1accession[i] <-
KEGG_mappings$entryACCESSION[which(KEGG_mappings$entrySYMBOL ==
un_mapped_edges$entry1symbol[i])][1]
un_mapped_edges$entry2accession[i] <-
KEGG_mappings$entryACCESSION[which(KEGG_mappings$entrySYMBOL ==
un_mapped_edges$entry2symbol[i])][1]
un_mapped_edges$entry1type[i] <-
KEGG_mappings$entryTYPE[which(KEGG_mappings$entrySYMBOL ==
un_mapped_edges$entry1symbol[i])][1]
un_mapped_edges$entry2type[i] <-
KEGG_mappings$entryTYPE[which(KEGG_mappings$entrySYMBOL ==
un_mapped_edges$entry2symbol[i])][1]
}
if (nrow(annotated_edges) > 0 & nrow(un_mapped_edges) > 0) {
all_edges <- gtools::smartbind(annotated_edges, un_mapped_edges)
} else if (nrow(annotated_edges) > 0)
all_edges <- annotated_edges else {
all_edges <- un_mapped_edges
all_edges$value <- NA
all_edges$subtype2 <- NA
all_edges$value2 <- NA
all_edges$specific_subtype <- NA
all_edges$type <- NA
all_edges$is_direct <- 1
all_edges$edgeID <- seq(1:nrow(un_mapped_edges))
refcols <- c("edgeID", "entry1accession", "entry2accession", "entry1",
"entry2")
all_edges <- all_edges[, c(refcols, setdiff(names(all_edges), refcols))]
cat(paste0("All documented edges are of type maplink; only data for
de-novo edges can be mapped \n"))
}
for (i in 1:nrow(all_edges)) {
if (all_edges$entry1[i] == all_edges$entry2[i]) {
all_edges$paralogs[i] <- 1
} else {
all_edges$paralogs[i] <- 0
}
}
return(all_edges)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.