##' @title Simulate a dataset given a set of controls (without phenotype) and a causal model
##' @export
##' @param df control dataset of genotypes to sample from, without phenotype
##' @param N0 number of samples with Y=0
##' @param N1 number of samples with Y=1
##' @param CV which SNPs are causal?
##' @param PWgY0 the distribution of causal snps in controls
##' @param PWgY1 the distribution of causal snps in cases
##' @return simulated genotype data for N0 controls and N1 cases
##' @author Mary Fortune
make_dataset<-function(df,N0,N1,CV,PWgY0,PWgY1){
m <- length(CV)
#make sure we have genotypes in {0,1,2}
if (isTRUE(all.equal(sort(unique(df[,1])),1:3))){
df<-df-1
}
#compute the weights for sampling the cases
ref_data_W<-df[,CV]
Y1weights<-rep(0,nrow(df))
#which order do the Ws come in the vectors of probabilities above?
orderW<-apply(hcube(rep(3,m))-1, 1,function(x){paste(x,collapse="")})
for (ii in 1:nrow(df)){
if (length(CV)==1){
Wvalue<-paste(ref_data_W[ii])
}else{
Wvalue<-paste(ref_data_W[ii,],collapse="")
}
whichw<-which(orderW==Wvalue)
Y1weights[ii]<-PWgY1[whichw]/PWgY0[whichw]
}
rows.index0<-sample(nrow(df), size=N0, replace = TRUE)
rows.index1<-sample(nrow(df), size=N1, replace = TRUE, prob=Y1weights)
dfnew_Y0<-cbind(rep(0,N0),df[rows.index0,])
dfnew_Y1<-cbind(rep(1,N1),df[rows.index1,])
colnames(dfnew_Y0)[1]<-"Y"
colnames(dfnew_Y1)[1]<-"Y"
dfnew<-rbind(dfnew_Y0,dfnew_Y1)
return(list(dfnew,c(rows.index0,rows.index1)))
}
##' @title Simulate a dataset given a set of controls (with phenotype) and a causal model
##' @param df dataset of genotypes to sample from, with phenotype
##' @param N0 number of samples with Y=0
##' @param N1 number of samples with Y=1
##' @param CV which SNPs are causal?
##' @param PWgY0 the distribution of causal snps in controls
##' @param PWgY1 the distribution of causal snps in cases
##' @return simulated genotype data for N0 controls and N1 cases
##' @author Mary Fortune
make_dataset_pheno<-function(df,N0,N1,CV,PWgY0,PWgY1){
#make sure we have genotypes in {0,1,2}
if (isTRUE(all.equal(sort(unique(df[,2])),1:3))){
df[,-1]<-df[,-1]-1
}
#compute the weights for sampling the cases
ref_data_W<-df[,CV+1]
Y1weights<-rep(0,nrow(df))
#which order do the Ws come in the vectors of probabilities above?
orderW<-apply(hcube(rep(3,m))-1, 1,function(x){paste(x,collapse="")})
for (ii in 1:nrow(df)){
if (length(CV)==1){
Wvalue<-paste(ref_data_W[ii])
}else{
Wvalue<-paste(ref_data_W[ii,],collapse="")
}
whichw<-which(orderW==Wvalue)
Y1weights[ii]<-PWgY1[whichw]/PWgY0[whichw]
}
rows.index0<-sample(nrow(df), size=N0, replace = TRUE)
rows.index1<-sample(nrow(df), size=N1, replace = TRUE, prob=Y1weights)
dfnew_Y0<-cbind(rep(0,N0),df[rows.index0,-1])
dfnew_Y1<-cbind(rep(1,N1),df[rows.index1,-1])
colnames(dfnew_Y0)[1]<-"Y"
colnames(dfnew_Y1)[1]<-"Y"
dfnew<-rbind(dfnew_Y0,dfnew_Y1)
return(list(dfnew,c(rows.index0,rows.index1)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.