# genetics library(vcfR) devtools::install_github("nickbrazeau/NFBtools") library(NFBtools) # tidy library(tidyverse) library(DT) # plot library(RColorBrewer) library(grid) # networks library(ggraph) library(tidygraph)
# download this file: url <- "ftp://ngs.sanger.ac.uk/production/malaria/pf-crosses/1.0/3d7_hb3.combined.final.vcf.gz" destfile <- "~/Downloads/temp.vcf.gz" httr::GET(url=url, httr::write_disk(path=destfile, overwrite = F)) vcfRobject <- vcfR::read.vcfR(file=destfile) vcfRobject <- vcfRobject[vcfR::is.biallelic(vcfRobject)] # biallelic vcfRobject <-vcfR::extract.indels(vcfRobject, return.indels = F) # subset to SNPs vcfRobject <- NFBtools::vcfR2segsites(vcfRobject)
plotObjlist <- NFBtools::vcfR2GTCov(vcfR = vcfRobject) # this is the dataframe we are using to plot, plots by chromosome head(plotObjlist$snpdflist$Pf3D7_01_v3) plot(plotObjlist$plot)
Cross experiment. Two parents. Different levels of relatedness -- nice test/use case. Let's subset to make our life easier... Of note, the LD weights here are going to be very strong since this is a one-generation cross. This approach is assuming random draws from a population, not meiotic siblings.
# two chromosomes to show that this parallelizes well vcfRobject <- NFBtools::vcfR2SubsetChrom(vcfRobject = vcfRobject, chromvect = c("Pf3D7_01_v3")) plotObjlist <- NFBtools::vcfR2GTCov(vcfR = vcfRobject) # smaller plot now plot(plotObjlist$plot)
ret <- NFBtools::vcfR2Fw_pairwisegendist(vcfRobject = vcfRobject, biallelicsnps = T, segsites = NULL) # I did segsites above. this is a required feature colnames(ret) <- c("pair1", "pair2", "dab") # Wrangle this into distance format ret_pairwise_df <- tidyr::spread(ret, key=pair1, value=dab) row.names(ret_pairwise_df) <- smpl <- levels(ret$pair2) # this insanity to create a proper off diagonal ret_pairwise_df <- rbind.data.frame(rep(NA, ncol(ret_pairwise_df)), ret_pairwise_df) row.names(ret_pairwise_df)[1] <- levels(ret$pair1)[1] ret_pairwise_df <- cbind.data.frame(ret_pairwise_df, rep(NA, nrow(ret_pairwise_df))) # returning to normal code smpl <- c(levels(ret$pair1)[1], smpl) ret_pairwise_df <- ret_pairwise_df[,-1] # drop pair retdist <- as.dist(ret_pairwise_df)
DT::datatable(ret, extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'print') ))
fit <- cmdscale(retdist,eig=TRUE, k=3) # k is the number of dim #fit # view results eigpoints <- as.data.frame(fit$points) colnames(eigpoints) <- c("PC1", "PC2", "PC3") ## color schematics n <- length(smpl) qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) colorsample <- data.frame(smpl = smpl, colorsample = sample(col_vector,length(smpl))) eigpoints$smpl <- smpl eigpoints <- left_join(eigpoints, colorsample, by=c("smpl")) ######################## # PCA # ######################## library(plotly) Sys.setenv("plotly_username"="nbrazeau1") Sys.setenv("plotly_api_key"="ePqTRUO0K7qUlYc8DffD") psample <- plot_ly(eigpoints, x = ~PC1, y = ~PC3, z = ~PC2, color = ~smpl, colors = as.character(eigpoints$colorsample)) %>% add_markers() %>% layout(scene = list(xaxis = list(title = 'PC1'), yaxis = list(title = 'PC3'), zaxis = list(title = 'PC2'))) psample
ret_prune <- ret %>% dplyr::filter(dab < 1e-3) f.graph <- as_tbl_graph(ret_prune, directed = FALSE) plotobj <- ggraph(f.graph) + geom_edge_fan(aes(width=dab), colour = "#d9d9d9", alpha=0.8) + scale_edge_width(range = c(0.8, 1.5)) + geom_node_point(aes(color = name, size=2.5)) + geom_node_text(aes(label = name), size = 2, repel = TRUE) + theme_graph() + theme(legend.position = "none") plot(plotobj)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.