Nothing
# Function to compute constrained conditional likelihood and hybrid conditional likelihood (Bhattacharjee et al. 2009, AJHG)
# data A data frame. No default.
# snp.vars A vector of variable names or a formula. Dominant/Recessive/Additive/General coding of SNPs can
# be specified through a formula e.g., "snp.vars= ~ (SNP1==1) + (SNP1 == 2) + SNP2" implies general coding
# for SNP1 and trend(additive) coding for SNP2.
# No default
# main.vars A vector of variable names or a formula for covariates of interest, excluding the SNP variable.
# The default is NULL
# int.vars A vector of variable names for all covariates of interest that will interact with the SNP variable and
# can be assumed to be independent to snp.vars within strata. Those interactions for which independence is
# not assumed go to X.main. Should NOT include an intercept column.
# The default is NULL
# cc.var Integer matching variable with at most 8 subjects per stratum (e.g. CC matching using getMatchedSets)
# Each stratum has one case matched to one or more controls (or one control matched to one or more cases)
# No default for snp.ccl.main
#
# nn.var Integer matching variable with at most 8 subjects per stratum (e.g. NN matching using getMatchedSets)
# Each stratum can have zero or more cases and controls. But entire data set should have both cases and controls.
# No default for snp.hcl.main
#
# op Control options for CCL and HCL optimizer. List containing members "maxiter" (default 100) and "reltol" (default 1e-5).
#
#
# Author: Samsiddhi Bhattacharjee, William Wheeler and Nilanjan Chatterjee
# Date: January, 2010
#
snp.matched <- function (data, response.var, snp.vars, main.vars = NULL, int.vars = NULL, cc.var = NULL, nn.var = NULL, op=NULL)
{
# Check for errors
if (length(response.var) != 1) stop("response.var must be a single variable")
if (!is.data.frame(data)) stop("data must be a data frame")
ccFlag <- !is.null(cc.var)
nnFlag <- !is.null(nn.var)
if(!ccFlag && !nnFlag) stop("At least one of cc.var and nn.var must be provided")
if (ccFlag && length(cc.var) > 1) stop("cc.var must be a single variable")
if (nnFlag && length(nn.var) > 1) stop("nn.var must be a single variable")
main.form <- ("formula" %in% class(main.vars))
int.form <- ("formula" %in% class(int.vars))
snp.form <- ("formula" %in% class(snp.vars))
# Check variable names
vlist <- list(response.var=response.var, snp.vars=snp.vars, main.vars=main.vars,
int.vars=int.vars, cc.var=cc.var, nn.var=nn.var)
vars <- getAllVars(vlist, names=names(vlist))
temp <- !(vars %in% colnames(data))
if (any(temp)) {
print(vars[temp])
stop("The above variables were not found in the input data")
}
# Remove missing values
temp <- getFormulas(vlist)
miss <- c(NA, NaN, Inf, -Inf)
if (length(temp)) data <- applyFormulas(data, temp, remove=miss)
data <- removeMiss.vars(data, vars=vars, miss=miss)
# Get the response variable
D <- unfactor(data[, response.var])
facVars <- NULL
# Get the strata vars
if (ccFlag) cc.strat <- as.integer(data[, cc.var])
if (nnFlag) nn.strat <- as.integer(data[, nn.var])
# Get the variables that are factors
for (temp in colnames(data)) {
if (is.factor(data[, temp])) facVars <- c(facVars, temp)
}
# Get the S design matrix
design.S0 <- logistic.dsgnMat(data, snp.vars, facVars)$designMatrix
# Get the X design matrix
design.X0 <- logistic.dsgnMat(data, main.vars, facVars)$designMatrix
# Get the V design matrix
design.V0 <- logistic.dsgnMat(data, int.vars, facVars)$designMatrix
rm(vlist, vars)
temp <- gc(verbose=FALSE)
# Call the core functions
ret <- list(CLR=NULL, CCL=NULL , HCL=NULL, model.info=NULL)
colnames(design.S0) <- sapply(colnames(design.S0), function(x) if(" " %in% x) paste("(",x,")",sep="") else x)
if(ccFlag)
{
if(is.null(op)) op <- list(maxiter = 100, reltol = 1e-5)
clg <- try(snp.ccl.main(D, X.snp=design.S0 , X.main=design.X0, X.int=design.V0, cc.strat=cc.strat, op = op))
if(!inherits(clg , "try-error"))
{
ret$CLR <- clg$CLR
ret$CCL <- clg$CCL
}
}
if(nnFlag)
{
if(is.null(op)) op <- list(maxiter = 100, reltol = 1e-5)
hcl <- try(snp.hcl.main(D, X.snp=design.S0 , X.main=design.X0, X.int=design.V0, nn.strat=nn.strat, op = op))
if(!inherits(hcl , "try-error")) ret$HCL <- hcl
}
# Add model info
if (main.form) main.vars <- colnames(design.X0)
if (int.form) int.vars <- colnames(design.V0)
if (snp.form) snp.vars <- colnames(design.S0)
model <- list(data=data, response.var=response.var, snp.vars=snp.vars, main.vars=main.vars, int.vars=int.vars,
cc.var=cc.var, nn.var=nn.var, factors=facVars)
ret$model.info <- model
class(ret) <- "snp.matched"
ret
}
# Functions to compute constrained conditional likelihood and hybrid conditional likelihood (Bhattacharjee et al. 2009, AJHG)
# D Binary response vector coded as 0 (controls) and 1 (cases)
# No default.
# X.snp Design matrix (usually a single column SNP genotype: 0,1,2 assuming a trend model,
# 0,1 assuming dominant/recessive models or two binary vectors assuming general
# model) with which interactions of X.int are to be esimated assuming independence
# within strata. Unlike, CML, this snp is not modeled parametrically, so the
# function works even for a set of non-SNP variables.
# No default.
# X.main Design matrix for all covariates of interest, excluding the SNP variable. This
# matrix should NOT include an intercept column.
# The default is NULL
# X.int Design matrix for all covariates of interest that will interact with the SNP variable and can be assumed
# to be independent to snp.vars within strata. Those interactions for which independence is not assumed
# go to X.main. This matrix should NOT include an intercept column.
# The default is NULL
# cc.strat Integer matching variable with at most 8 subjects per matched set (e.g. CC matching using getMatchedSets). Recommended
# 6 subjects or less per matched sets for larger data sets to avoid memory overload.
# Each stratum has one case matched to one or more controls (or one control matched to one or more cases)
# No default for snp.ccl.main
#
# nn.strat Integer matching variable with at most 8 subjects per matched set(e.g. NN matching using getMatchedSets). Recommended
# 6 subjects or less per stratum for reasonable speed.
# Each stratum can have zero or more cases and controls. But entire data set should have both cases and controls.
# No default for snp.hcl.main
#
# op Control options for optimizer. List containing members "maxiter" (default 100) and "reltol" (default 1e-5).
#
snp.hcl.main <- function(D, X.snp, X.main=NULL, X.int=NULL, nn.strat, op=NULL)
{
# Summarize D and nn.strat
nsub <- length(D)
ncase <- sum(D)
ox <- order(nn.strat, 1 - D)
rl <- rle(nn.strat[ox])
strat.sizes <- rl$lengths
ox <- order(nn.strat, D)
sx <- rep(1, nsub)
sx[ox] <- rep(strat.sizes, strat.sizes)
osx <- order(sx, nn.strat, 1 - D)
rl <- rle(sx[osx])
usz <- rl$values
nsz <- rl$lengths/usz
cnsz <- c(0, cumsum(usz * nsz))[1:length(usz)]
numsz <- length(usz)
mnsz <- max(nsz)
if(usz[length(usz)] > 8) stop("Matched sets of size > 8 not supported currently for HCL")
# Create a combined data frame and call glm(stats) to get initial parameter estimates
psnp <- length(X.snp)/nsub
pmain <- length(X.main)/nsub
pint <- length(X.int)/nsub
p <- 1 + psnp + pmain + psnp * pint
igrid <- data.matrix(expand.grid(1:nsub, 1:pint, 1:psnp))
ngrid <- expand.grid(colnames(X.snp), colnames(X.int))
data.full <- cbind(D, rep(1, nsub), X.snp, X.main)
if(psnp > 0 && pint > 0) data.full <- cbind(data.full, matrix(X.snp[igrid[ , c(1 , 3)]] * X.int[igrid[,c(1 , 2)]], nrow=nsub, byrow=FALSE))
data.full <- data.frame(data.full)
cnames <- c("D", "Intercept", colnames(X.snp) , colnames(X.main))
if(psnp > 0 && pint >0) cnames <- c(cnames, paste(ngrid[,1], ngrid[,2], sep =":"))
colnames(data.full) <- cnames
form <- paste("D ~ 0 +", paste(colnames(data.full)[2:ncol(data.full)], collapse =" + "), sep = "")
uml <- try(glm(as.formula(form), data=data.full, family=binomial()))
if(inherits(uml,"try-error"))
{
warning("snp.hcl.main: Failed to obtain initial estimates from glm.")
beta.init <- c(log(ncase/(nsub - ncase)) , rep(0 , p - 1))
}
else beta.init <- uml$coef
# Call C function to obtain maximize HCL likelihood using Newton Raphson
res <- try(.C("hcl_optim" , BETA = as.double(beta.init) , MAXIT = as.integer(op$maxiter) , TOL = as.double(op$reltol) , NSUB = as.integer(nsub) , D = as.integer(D) , XSNP = as.double(X.snp) ,
NSNP = as.integer(psnp) , XMAIN = as.double(X.main) , NMAIN = as.integer(pmain) , XINT = as.double(X.int) , NINT = as.integer(pint) ,
NUMSZ = as.integer(numsz) , MNSZ = as.integer(mnsz) , USZ = as.integer(usz) , NSZ = as.integer(nsz) , CNSZ = as.integer(cnsz) , OSX = as.integer(osx - 1) ,
LOGLIKE = double(length = 1) , HESS = as.double(diag(1 , p)) , CONV = integer(length = 1) , ITER = integer(length = 1),
PACKAGE = "CGEN"))
# Retrieve relevant information from the returned list
hcl <- list(parms=NULL, cov=NULL, loglike=NULL)
if(inherits(res, "try-error")) { warning("snp.hcl.main: Error in hcl_optim.") ; return(hcl) ; }
if(res$CONV == 0) { warning("snp.hcl.main: Ran out of iterations and did not converge in hcl_optim") ; return(hcl) ; }
hcl$parms <- res$BETA
hcl$cov <- try(solve(matrix(-res$HESS, p, p, byrow = FALSE)))
if(inherits(hcl$cov, "try-error")) {
warning("snp.hcl.main: Singular hessian returned by optimizer")
hcl <- NULL
} else {
colnames(hcl$cov) <- rownames(hcl$cov) <- cnames[-1]
hcl$loglike <- res$LOGLIKE
names(hcl$parms) <- cnames[-1]
}
hcl
}
snp.ccl.main <- function(D, X.snp, X.main=NULL, X.int=NULL, cc.strat, op=NULL)
{
# Summarize D and cc.strat
nsub <- length(D)
ncase <- sum(D)
ox <- order(cc.strat, 1 - D)
rl <- rle(cc.strat[ox])
strat.sizes <- rl$lengths
ox <- order(cc.strat, D)
sx <- rep(1, nsub)
sx[ox] <- rep(strat.sizes, strat.sizes)
osx <- order(sx, cc.strat, 1 - D)
rl <- rle(sx[osx])
usz <- rl$values
nsz <- rl$lengths/usz
cnsz <- c(0, cumsum(usz * nsz))[1:length(usz)]
numsz <- length(usz)
mnsz <- max(nsz)
ndx <- aggregate(data.frame(D), by=list(cc.strat), FUN="sum")[,2]
ndx <- ndx[cc.strat[osx]]
mdx <- sapply(1:numsz , function(u) {max(ndx[cnsz[u] + 1:(nsz[u] * usz[u])]) })
if(usz[length(usz)] > 8) stop("Matched sets of size > 8 not supported currently for CCL")
# Create a combined data frame and call clogit(survival) to get initial parameter estimates
psnp <- length(X.snp)/nsub
pmain <- length(X.main)/nsub
pint <- length(X.int)/nsub
p <- psnp + pmain + psnp * pint
if(p == 0) stop("No main or interaction effects to fit")
ngrid <- expand.grid(colnames(X.snp), colnames(X.int))
data.full <- data.frame(cbind(D, cc.strat, X.snp, X.main, X.int))
colnames(data.full) <- c("D", "CCStrat", colnames(X.snp) , colnames(X.main) , paste(colnames(X.int), rep("_", pint), sep=""))
# Call clogit(survival) on full data
form <- paste("D ~", paste(c(colnames(X.snp), colnames(X.main)), collapse ="+") , sep = " ")
if(psnp > 0 && pint > 0) form <- paste(c(form , paste(ngrid[,1], ":", ngrid[,2], "_", sep ="")) , collapse = " + ")
form <- paste(form, "strata(CCStrat)", sep = " + ")
clg <- try(clogit(as.formula(form), data = data.full, method = "exact"))
if(inherits(clg , "try-error"))
{
warning("snp.ccl.main: Failed to obtain initial estimates from clogit.")
beta.init <- runif(p , log(0.5) , log(2))
}
else beta.init <- clg$coef
clr <- list(parms=NULL, cov=NULL, loglike=NULL)
cnames <- c(colnames(X.snp), colnames(X.main))
if(psnp > 0 && pint > 0) cnames <- c(cnames, paste(ngrid[,1], ngrid[,2], sep =":"))
if(!inherits(clg, "try-error"))
{
clr$parms <- clg$coef
clr$cov <- clg$var
clr$loglike <- clg$loglik[2]
names(clr$parms) <- cnames
colnames(clr$cov) <- rownames(clr$cov) <- cnames
}
# Call C function to obtain maximize CCL likelihood using Newton Raphson
res <- try(.C("ccl_optim" , BETA = as.double(beta.init) , MAXIT = as.integer(op$maxiter) , TOL = as.double(op$reltol) , NSUB = as.integer(nsub) ,
D = as.integer(D) , XSNP = as.double(X.snp) , NSNP = as.integer(psnp) , XMAIN = as.double(X.main) , NMAIN = as.integer(pmain) ,
XINT = as.double(X.int) , NINT = as.integer(pint) , NUMSZ = as.integer(numsz) , MNSZ = as.integer(mnsz) , USZ = as.integer(usz) ,
MDX = as.integer(mdx) , NSZ = as.integer(nsz) , CNSZ = as.integer(cnsz) , OSX = as.integer(osx - 1) , NDX = as.integer(ndx) ,
LOGLIKE = double(length = 1) , HESS = as.double(diag(1 , p)) , CONV = integer(length = 1) , ITER = integer(length = 1),
PACKAGE = "CGEN"))
# Retrieve relevant information from the returned list
ccl <- list(parms=NULL, cov=NULL, loglike=NULL)
if(inherits(res, "try-error")) { warning("snp.ccl.main: Error in ccl_optim.") ; return(ccl) ; }
if(res$CONV == 0) { warning("snp.ccl.main: Ran out of iterations and did not converge in ccl_optim") ; return(ccl) ; }
ccl$parms <- res$BETA
ccl$cov <- try(solve(matrix(-res$HESS, p, p, byrow = FALSE)))
if(inherits(ccl$cov, "try-error")) {
warning("snp.ccl.main: Singular hessian returned by optimizer")
ccl <- NULL
} else {
colnames(ccl$cov) <- rownames(ccl$cov) <- cnames
ccl$loglike <- res$LOGLIKE
names(ccl$parms) <- cnames
}
list(CLR=clr, CCL=ccl)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.