Nothing
#' Load GWAS data
#'
#' @param association_file The association file
#' @param effects_file The effects file
#' @param association_columns The names of the columns in your association data
#' for Trait, Marker, Chromosome, Site, F, p, and marker_Rsquared
#' @param effects_columns The names of the columns in your effects data for
#' Trait, Marker, Chromosome, Site, and effect
#' @return The association data and the effects data merged into a dataframe
#' with one row for each SNP
#' @export
#' @importFrom rlang .data
#' @import utils
#' @import dplyr
#' @examples
#' demo_association_file = system.file("extdata", "association.txt.xz",
#' package = "PAST", mustWork = TRUE)
#' demo_effects_file = system.file("extdata", "effects.txt.xz",
#' package = "PAST", mustWork = TRUE)
#' gwas_data <- load_GWAS_data(demo_association_file, demo_effects_file)
load_GWAS_data <- function(association_file,
effects_file,
association_columns = c("Trait",
"Marker",
"Locus",
"Site",
"p",
"marker_R2"),
effects_columns = c("Trait",
"Marker",
"Locus",
"Site",
"Effect")) {
stats <- read.table(association_file, header = TRUE, sep = "\t") %>%
dplyr::mutate(Trait = !!as.name(association_columns[1]),
Marker_original = !!as.name(association_columns[2]),
Chr = !!as.name(association_columns[3]),
Pos = !!as.name(association_columns[4]),
Marker = paste0(.data$Chr, "_", .data$Pos),
p = !!as.name(association_columns[5]),
marker_R2 = !!as.name(association_columns[6])) %>%
dplyr::select(.data$Marker,
.data$Marker_original,
.data$Trait,
.data$Chr,
.data$Pos,
.data$p,
.data$marker_R2)
effects <- read.table(effects_file, header = TRUE, sep = "\t") %>%
dplyr::mutate(Trait = !!as.name(effects_columns[1]),
Marker_original = !!as.name(effects_columns[2]),
Chr = !!as.name(effects_columns[3]),
Pos = !!as.name(effects_columns[4]),
Effect = !!as.name(effects_columns[5])) %>%
dplyr::select(.data$Marker_original,
.data$Trait,
.data$Chr,
.data$Pos,
.data$Effect)
# Delete all markers in effects and stats with more or less alleles than 2
non_biallelic <- effects %>%
dplyr::group_by(.data$Marker_original) %>%
dplyr::summarise(count = n()) %>%
dplyr::filter(count != 2)
effects <-
effects %>%
dplyr::filter(!(.data$Marker_original %in% non_biallelic$Marker_original))
stats <-
stats %>%
dplyr::filter(!(.data$Marker_original %in% non_biallelic$Marker_original))
# Remove all NaN data to prevent math with NaN
stats <- stats %>% dplyr::filter(.data$marker_R2 != "NaN")
# Split effects into even and odd rows and
# recombine into a single row without duplicate columns
odd_effects <- effects[seq(1, nrow(effects), by = 2), ]
even_effects <- effects[seq(2, nrow(effects), by = 2), ]
effects <- merge(odd_effects, even_effects, by = "Marker_original")
effects <- dplyr::mutate(
effects,
Trait = effects$Trait.x,
Trait.x = NULL,
Trait.y = NULL
)
# Merge stats and effects and return
all_data <- merge(stats, effects, by = "Marker_original") %>%
dplyr::mutate(
Trait = .data$"Trait.x",
Trait.x = NULL,
Trait.y = NULL
) %>%
dplyr::select(
.data$Marker,
.data$Marker_original,
.data$Chr,
.data$Pos,
.data$p,
.data$marker_R2,
.data$Effect.x,
.data$Effect.y
)
all_data
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.