Nothing
## ----echo=FALSE---------------------------------------------------------------------------------------------
options(width=110)
## ----message=FALSE------------------------------------------------------------------------------------------
library(SeqArray)
library(SAIGEgds)
# the genotype file in SeqArray GDS format (1000 Genomes Phase1, chromosome 22)
(geno_fn <- seqExampleFileName("KG_Phase1"))
## -----------------------------------------------------------------------------------------------------------
# open a SeqArray file in the package (1000 Genomes Phase1, chromosome 22)
gds <- seqOpen(geno_fn)
## -----------------------------------------------------------------------------------------------------------
library(SNPRelate)
set.seed(1000)
snpset <- snpgdsLDpruning(gds)
str(snpset)
snpset.id <- unlist(snpset, use.names=FALSE) # get the variant IDs of a LD-pruned set
head(snpset.id)
## -----------------------------------------------------------------------------------------------------------
grm_fn <- "grm_geno.gds"
seqSetFilter(gds, variant.id=snpset.id)
# export to a GDS genotype file without annotation data
seqExport(gds, grm_fn, info.var=character(), fmt.var=character(), samp.var=character())
## -----------------------------------------------------------------------------------------------------------
# close the file
seqClose(gds)
## -----------------------------------------------------------------------------------------------------------
set.seed(1000)
sampid <- seqGetData(grm_fn, "sample.id") # sample IDs in the genotype file
pheno <- data.frame(
sample.id = sampid,
y = sample(c(0, 1), length(sampid), replace=TRUE, prob=c(0.95, 0.05)),
x1 = rnorm(length(sampid)),
x2 = rnorm(length(sampid)),
stringsAsFactors = FALSE)
head(pheno)
# null model fitting using GRM from grm_fn
grm_fn
glmm <- seqFitNullGLMM_SPA(y ~ x1 + x2, pheno, grm_fn, trait.type="binary", sample.col="sample.id")
## -----------------------------------------------------------------------------------------------------------
# genetic variants stored in the file geno_fn
geno_fn
# calculate, using 2 processes
assoc <- seqAssocGLMM_SPA(geno_fn, glmm, mac=10, parallel=2)
head(assoc)
# filtering based on p-value
assoc[assoc$pval < 5e-4, ]
## -----------------------------------------------------------------------------------------------------------
# save to 'assoc.gds'
seqAssocGLMM_SPA(geno_fn, glmm, mac=10, parallel=2, res.savefn="assoc.gds")
## -----------------------------------------------------------------------------------------------------------
# open the GDS file
(f <- openfn.gds("assoc.gds"))
# get p-values
pval <- read.gdsn(index.gdsn(f, "pval"))
summary(pval)
closefn.gds(f)
## -----------------------------------------------------------------------------------------------------------
res <- seqSAIGE_LoadPval("assoc.gds")
head(res)
## ----fig.width=4, fig.height=4, fig.align='center'----------------------------------------------------------
n <- nrow(assoc)
# expected p-values
exp.pval <- (1:n) / n
# observed p-values
obs.pval <- sort(assoc$pval)
plot(-log10(exp.pval), -log10(obs.pval), xlab="-log10(expected P)", ylab="-log10(observed P)", cex=2/3)
abline(0, 1, col="red", lty=2)
## -----------------------------------------------------------------------------------------------------------
sessionInfo()
## ----echo=FALSE---------------------------------------------------------------------------------------------
unlink(c("grm_geno.gds", "assoc.gds"), force=TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.