Nothing
## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
concordance=TRUE,
cache=TRUE
)
## ----results='hide'-----------------------------------------------------------
# creation of a directory for LEA analyses
dir.create("LEA_analyses")
# set the created directory as the working directory
setwd("LEA_analyses")
## ----results='hide'-----------------------------------------------------------
library(LEA)
# Creation a the genotypic file: "genotypes.lfmm"
# The data include 400 SNPs for 50 individuals.
data("tutorial")
# Write genotypes in the lfmm format
write.lfmm(tutorial.R, "genotypes.lfmm")
# Write genotypes in the geno format
write.geno(tutorial.R, "genotypes.geno")
# creation of an environment gradient file: gradient.env.
# The .env file contains a single ecological variable
# for each individual.
write.env(tutorial.C, "gradients.env")
## ----results='hide'-----------------------------------------------------------
# run of pca
# Available options, K (the number of PCs),
# center and scale.
# Create files: genotypes.eigenvalues - eigenvalues,
# genotypes.eigenvectors - eigenvectors,
# genotypes.sdev - standard deviations,
# genotypes.projections - projections,
# Create a pcaProject object: pc.
pc = pca("genotypes.lfmm", scale = TRUE)
## ----results='hide'-----------------------------------------------------------
# Perfom Tracy-Widom tests on all eigenvalues.
# create file: tuto.tracyWidom - tracy-widom test information.
tw = tracy.widom(pc)
## ----results='asis'-----------------------------------------------------------
# display p-values for the Tracy-Widom tests (first 5 pcs).
tw$pvalues[1:5]
## ----fig.width=4, fig.height=4, echo=TRUE-------------------------------------
# plot the percentage of variance explained by each component
plot(tw$percentage)
## ----results='hide'-----------------------------------------------------------
# main options
# K = number of ancestral populations
# entropy = TRUE: computes the cross-entropy criterion,
# CPU = 4 the number of CPUs.
project = NULL
project = snmf("genotypes.geno",
K = 1:10,
entropy = TRUE,
repetitions = 10,
project = "new")
## ----fig.width=4, fig.height=4, echo=TRUE-------------------------------------
# plot cross-entropy criterion for all runs in the snmf project
plot(project, col = "blue", pch = 19, cex = 1.2)
## ----fig.width=10, fig.height=4, echo=TRUE------------------------------------
# select the best run for K = 4
best = which.min(cross.entropy(project, K = 4))
my.colors <- c("tomato", "lightblue",
"olivedrab", "gold")
barchart(project, K = 4, run = best,
border = NA, space = 0,
col = my.colors,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
## ----fig.width=8, fig.height=6, echo=TRUE, results='hide'---------------------
# Population differentiation tests
p = snmf.pvalues(project,
entropy = TRUE,
ploidy = 2,
K = 4)
pvalues = p$pvalues
par(mfrow = c(2,1))
hist(pvalues, col = "orange")
plot(-log10(pvalues), pch = 19, col = "blue", cex = .5)
## -----------------------------------------------------------------------------
# creation of a genotypic matrix with missing genotypes
dat = as.numeric(tutorial.R)
dat[sample(1:length(dat), 100)] <- 9
dat <- matrix(dat, nrow = 50, ncol = 400)
write.lfmm(dat, "genoM.lfmm")
## ----results='hide'-----------------------------------------------------------
project.missing = snmf("genoM.lfmm", K = 4,
entropy = TRUE, repetitions = 10,
project = "new")
## -----------------------------------------------------------------------------
# select the run with the lowest cross-entropy value
best = which.min(cross.entropy(project.missing, K = 4))
# Impute the missing genotypes
impute(project.missing, "genoM.lfmm",
method = 'mode', K = 4, run = best)
# Proportion of correct imputation results
dat.imp = read.lfmm("genoM.lfmm_imputed.lfmm")
mean( tutorial.R[dat == 9] == dat.imp[dat == 9] )
## ----results='hide'-----------------------------------------------------------
# main options:
# K the number of latent factors
# Runs with K = 6 and 5 repetitions.
project = NULL
project = lfmm("genotypes.lfmm",
"gradients.env",
K = 6,
repetitions = 5,
project = "new")
## -----------------------------------------------------------------------------
# compute adjusted p-values
p = lfmm.pvalues(project, K = 6)
pvalues = p$pvalues
## ----fig.width=8, fig.height=6, echo=TRUE-------------------------------------
# GWAS significance test
par(mfrow = c(2,1))
hist(pvalues, col = "lightblue")
plot(-log10(pvalues), pch = 19, col = "blue", cex = .7)
## -----------------------------------------------------------------------------
for (alpha in c(.05,.1,.15,.2)) {
# expected FDR
print(paste("Expected FDR:", alpha))
L = length(pvalues)
# return a list of candidates with expected FDR alpha.
# Benjamini-Hochberg's algorithm:
w = which(sort(pvalues) < alpha * (1:L) / L)
candidates = order(pvalues)[w]
# estimated FDR and True Positive Rate
Lc = length(candidates)
estimated.FDR = sum(candidates <= 350)/Lc
print(paste("Observed FDR:",
round(estimated.FDR, digits = 2)))
estimated.TPR = sum(candidates > 350)/50
print(paste("Estimated TPR:",
round(estimated.TPR, digits = 2)))
}
## -----------------------------------------------------------------------------
# Simulate non-null effect sizes for 10 target loci
#individuals
n = 100
#loci
L = 1000
# Environmental variable
X = as.matrix(rnorm(n))
# effect sizes
B = rep(0, L)
target = sample(1:L, 10)
B[target] = runif(10, -10, 10)
## -----------------------------------------------------------------------------
# Create 3 hidden factors and their loadings
U = t(tcrossprod(as.matrix(c(-1,0.5,1.5)), X)) +
matrix(rnorm(3*n), ncol = 3)
V <- matrix(rnorm(3*L), ncol = 3)
## -----------------------------------------------------------------------------
# Simulate a matrix containing haploid genotypes
Y <- tcrossprod(as.matrix(X), B) +
tcrossprod(U, V) +
matrix(rnorm(n*L, sd = .5), nrow = n)
Y <- matrix(as.numeric(Y > 0), ncol = L)
## -----------------------------------------------------------------------------
# Fitting an LFMM with K = 3 factors
mod <- lfmm2(input = Y, env = X, K = 3)
## ----fig.width=8, fig.height=6, echo=TRUE, results='hide'---------------------
# Computing P-values and plotting their minus log10 values
pv <- lfmm2.test(object = mod,
input = Y,
env = X,
linear = TRUE)
plot(-log10(pv$pvalues), col = "grey", cex = .6, pch = 19)
points(target, -log10(pv$pvalues[target]), col = "red")
## ----echo=FALSE, results='hide'-----------------------------------------------
# Copy of the pdf figures in the previous directory
# for the creation of the vignette.
file.copy(list.files(".", pattern = ".pdf"), "..")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.