R/CALC-calc_percentage.R

Defines functions calc_percentage

Documented in calc_percentage

#' @title Calculate the percentage.
#' @description Calculate the percentage of a certain pathway in the data. 
#' @usage calc_percentage (tibble_ko, y_axis, data_experiment = NULL)
#' @param tibble_ko a tibble object from mapping_ko.
#' @param y_axis a character, indicating the pathway to analyze.
#' @param data_experiment optional. a data frame object containing metadata 
#' information.
#' @details Calculate the percentage of a certain pathway in the data.
#' @import  tibble dplyr stringr tidyr  rlang
#' @examples 
#' # calc_percentage (ko_bin_mapp, Pathway)   
#' @return A data frame with the calculated percentage of genes in a certain 
#' pathway of the data.
#' @export

#Sets the syntax of the function-------------------------------------------####
calc_percentage <- function(tibble_ko,
                            y_axis,
                            data_experiment = NULL){
# Enquoting ---------------------------------------------------------------####
  y_axis_enquo <- enquo(y_axis)
  y_axis_label <- as_label(y_axis_enquo)
# Select data -------------------------------------------------------------####
  data_to_select <- c("Module", "Module_description", "Pathway", 
                      "Pathway_description", "Genes", 
                      "Gene_description", "Enzyme", "Cycle", "Pathway_cycle",
                      "Detail_cycle", "rbims_pathway", "rbims_sub_pathway", 
                      "KO", "dbCAN", "domain_name", "Pfam", "PFAM", "INTERPRO")
  
# Transform from wide to long ---------------------------------------------####
  Kegg_long<- tibble_ko %>%
    pivot_longer(cols = -any_of(data_to_select), 
                 values_to = "Abundance",
                 names_to = "Bin_name") %>%
    distinct()
# Count the total number of genes per metabolism---------------------------####
  metabolism_counts_totals <- Kegg_long %>%
    select({{y_axis_enquo}}, .data$KO) %>%
    distinct() %>%
    count(!!y_axis_enquo, sort = T) %>%
    rename(Abundance_metabolism = .data$n) %>%
    mutate(new2 = str_replace(.data$Abundance_metabolism, "$", "\\)")) %>%
    mutate(new2 = str_replace(.data$new2, "^", "\\(")) %>%
    unite("New", {{y_axis_enquo}}, .data$new2, sep = " ", remove=F) %>%
    select(.data$New, {{y_axis_enquo}}, .data$Abundance_metabolism)
# Count the total number of genes per metabolism in genome-----------------####
  metabolism_counts_genome <- Kegg_long %>%
    select({{y_axis_enquo}}, .data$Bin_name, .data$KO, 
           .data$Abundance) %>%
    distinct() %>%
    filter(.data$Abundance != "0") %>%
    group_by(.data$Bin_name) %>%
    count(!!y_axis_enquo) %>%
    rename(Abundance_metabolism_genome = n)
# Calculate the percentage-------------------------------------------------####
  Table_with_percentage <- left_join(
    metabolism_counts_genome, metabolism_counts_totals, 
    by = y_axis_label) %>%
    mutate(Percentage = (.data$Abundance_metabolism_genome * 100) / 
                         .data$Abundance_metabolism ) %>%
    select(.data$New, {{y_axis_enquo}}, .data$Bin_name, .data$Percentage) %>%
    distinct() %>%
    drop_na() %>%
    rename(Pathway_number_of_total_elements = .data$New)
# Join data experiment ----------------------------------------------------####
  if(is.null(data_experiment) == F){
    Table_with_percentage<-Table_with_percentage %>%
    left_join(data_experiment, by = "Bin_name")
  }
  
  return(Table_with_percentage)
}
mirnavazquez/RbiMs documentation built on Sept. 6, 2024, 1:05 p.m.