Nothing
#' Fit Cox survival to all variants in a .vcf.gz file from Sanger imputation
#' server
#'
#' Performs survival analysis using Cox proportional hazard models on imputed
#' genetic data stored in compressed VCF files.
#'
#' @param vcf.file character(1) path to VCF file.
#' @param covariate.file matrix(1) comprising phenotype (time, event) and
#' additional covariate data.
#' @param id.column character(1) providing exact match to sample ID column
#' from \code{covariate.file}
#' @param time.to.event character(1) string that matches time column name
#' in pheno.file
#' @param event character(1) string that matches event column name in pheno.file
#' @param covariates character vector with matching column names in pheno.file
#' of covariates of interest
#' @param inter.term character(1) string giving the column name of the covariate
#' that will be added to the interaction term with SNP (e.g. \code{term*SNP}).
#' See details.
#' @param print.covs character(1) string of either \code{"only"}, \code{"all"}
#' or \code{"some"}, defining which covariate statistics should be printed
#' to the output. See details.
#' @param sample.ids character vector with sample ids to include in analysis
#' @param info.filter integer(1) of imputation quality score filter
#' (i.e. 0.7 will filter info > 0.7)
#' @param maf.filter integer(1) filter out minor allele frequency below
#' threshold (i.e. 0.005 will filter MAF > 0.005)
#' @param chunk.size integer(1) number of variants to process per thread
#' @param out.file character(1) string with output name
#' @param verbose logical(1) for messages that describe which part of the
#' analysis is currently being run
#' @param clusterObj A cluster object that can be used with the \code{parApply}
#' function. See details.
#'
#' @details
#' Testing for SNP-covariate interactions:
#' User can define the column name of the covariate that will be included in the
#' interaction term.
#' For example, for given covariates \code{a} and \code{b}, where \code{c} is
#' defined as the \code{inter.term} the model will be:
#' \code{~ a + b + c + SNP + c*SNP}.
#'
#' Printing results of other covariates:
#' \code{print.covs} argument controls the number of covariates will be printed
#' as output. The function is set to \code{only} by default and will only print
#' the SNP or if an interaction term is given, the results of the interaction
#' term (e.g. \code{SNP*covariate}). Whereas, \code{all} will print results
#' (coef, se.coef, p.value etc) of all covariates included in the model.
#' \code{some} is only applicable if an interaction term is given and will print
#' the results for SNP, covariate tested for interaction and the interaction
#' term. User should be mindful about using the \code{all} option, as it will
#' likely slow down the analysis and will increase the output file size.
#'
#' User defined parallelization:
#' This function uses \code{parApply} from \code{parallel} package to fit models
#' to SNPs in parallel.
#' User is not required to set any options for the parallelization.
#' However, advanced users who wish to optimize it, can provide a cluster object
#' generated by \code{makeCluster} family of functions that suits their need and
#' platform.
#
#' @return
#' Saves two text files directly to disk:
#' \code{.coxph} extension containing CoxPH survival analysis results.
#' \code{.snps_removed} extension containing SNPs that were removed
#' due to low variance or user-defined thresholds.
#'
#' @examples
#' vcf.file <- system.file(package="gwasurvivr",
#' "extdata",
#' "sanger.pbwt_reference_impute.vcf.gz")
#' pheno.fl <- system.file(package="gwasurvivr",
#' "extdata",
#' "simulated_pheno.txt")
#' pheno.file <- read.table(pheno.fl,
#' sep=" ",
#' header=TRUE,
#' stringsAsFactors = FALSE)
#' pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L)
#' sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2
#' sangerCoxSurv(vcf.file=vcf.file,
#' covariate.file=pheno.file,
#' id.column="ID_2",
#' sample.ids=sample.ids,
#' time.to.event="time",
#' event="event",
#' covariates=c("age", "SexFemale", "DrugTxYes"),
#' inter.term=NULL,
#' print.covs="only",
#' out.file="sanger_example",
#' info.filter=0.3,
#' maf.filter=0.005,
#' chunk.size=50,
#' verbose=TRUE,
#' clusterObj=NULL)
#'
#' @importFrom survival Surv coxph.fit
#' @importFrom utils write.table
#' @importFrom matrixStats rowMeans2
#' @importFrom SummarizedExperiment rowRanges
#' @importFrom stats pnorm complete.cases
#' @import parallel
#' @import VariantAnnotation
#'
#' @export
sangerCoxSurv <- function(vcf.file,
covariate.file,
id.column,
sample.ids=NULL,
time.to.event,
event,
covariates,
inter.term=NULL,
print.covs="only",
out.file,
maf.filter=0.05,
info.filter=NULL,
chunk.size=5000,
verbose=TRUE,
clusterObj=NULL
){
if(verbose) message("Analysis started on ",
format(Sys.time(), "%Y-%m-%d"),
" at ",
format(Sys.time(), "%H:%M:%S"))
################################################
#### Phenotype data wrangling ################
cox.params <- coxPheno(covariate.file, covariates, id.column,
inter.term,
time.to.event,
event,
sample.ids,
verbose)
################################################
################################################
########## Cluster object ########################
# create cluster object depending on user pref or OS type,
# also create option to input number of cores
if(!is.null(clusterObj)){
cl <- clusterObj
}else if(.Platform$OS.type == "unix") {
cl <- makePSOCKcluster(getOption("gwasurvivr.cores", 2L))
} else {
cl <- makeCluster(getOption("gwasurvivr.cores", 2L))
}
on.exit(stopCluster(cl), add=TRUE)
################################################
#### open VCF file connection ##################
vcf <- VcfFile(vcf.file, yieldSize=chunk.size)
open(vcf)
################################################
####### read first chunk #######################
chunk.start <- 0
if(verbose) message("Analyzing chunk ",
chunk.start,
"-",
chunk.start+chunk.size)
data <- readVcf(vcf, param=ScanVcfParam(geno="DS",
info=c("RefPanelAF",
"TYPED",
"INFO"))
)
out.list <- coxVcfSanger(data,
covariates,
maf.filter,
info.filter,
cox.params,
cl,
inter.term,
print.covs)
write.table(
out.list$res,
paste0(out.file, ".coxph"),
append = FALSE,
row.names = FALSE,
col.names = TRUE,
quote = FALSE,
sep = "\t"
)
write.table(
out.list$dropped.snps,
paste0(out.file, ".snps_removed"),
append = FALSE,
row.names = FALSE,
col.names = FALSE,
quote = FALSE,
sep = "\t"
)
chunk.start <- chunk.size
snps_removed <- nrow(out.list$dropped.snps)
snps_analyzed <- nrow(out.list$res)
################################################
################################################
##### Start repeat loop ########################
# get genotype probabilities by chunks
# apply the survival function and save output
repeat{
# read in just dosage data from Vcf file
if(verbose) message("Analyzing chunk ",
chunk.start,
"-",
chunk.start+chunk.size)
data <- readVcf(vcf,
param=ScanVcfParam(geno="DS",
info=c("RefPanelAF", "TYPED", "INFO")
)
)
if(nrow(data)==0){
break
}
out.list <- coxVcfSanger(data,
covariates,
maf.filter,
info.filter,
cox.params,
cl,
inter.term,
print.covs)
write.table(
out.list$res,
paste0(out.file, ".coxph"),
append = TRUE,
row.names = FALSE,
col.names = FALSE,
quote = FALSE,
sep = "\t"
)
write.table(
out.list$dropped.snps,
paste0(out.file, ".snps_removed"),
append = TRUE,
row.names = FALSE,
col.names = FALSE,
quote = FALSE,
sep = "\t"
)
chunk.start <- chunk.start+chunk.size
snps_removed <- snps_removed+nrow(out.list$dropped.snps)
snps_analyzed <- snps_analyzed+nrow(out.list$res)
}
################################################
close(vcf)
if(verbose) message("Analysis completed on ",
format(Sys.time(), "%Y-%m-%d"),
" at ",
format(Sys.time(), "%H:%M:%S"))
if(verbose) message(snps_removed,
" SNPs were removed from the analysis for ",
"not meeting the threshold criteria.")
if(verbose) message("List of removed SNPs can be found in ",
paste0(out.file, ".snps_removed"))
if(verbose) message(snps_analyzed,
" SNPs were analyzed in total")
if(verbose) message("The survival output can be found at ",
paste0(out.file, ".coxph"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.