knitr::opts_chunk$set(echo = TRUE)
This rmarkdown show how we train each imputation model. Three approaches for selecting weights for CUE can be found in our paper. These training processes can be used for users who would like to generate a new set of imputation reference models, for example by providing 450K and EPIC array data for a tissue other than placenta or whole blood (which are the only tissues currently available with our pretrained models).
# generate HM450 as x and HM850 as y n = 200 # number of sampels x <- matrix(rbeta(n*50,1,1),ncol=n) # upperstream 25 probes and downstream 25 probes measured by HM450 y <- rbeta(n,1,1) # the target HM850 probe Meth<-data.frame(cbind(y,t(x))) train=1:160 test=161:200
source("R/refund_lib.R") load("PTSD/Annotations.RData") #load("Data/Annotations.RData") p=dim(annotation.450K)[1] X_450 = matrix(rbeta(n*p,1,1),ncol=n) rownames(X_450) <- rownames(annotation.450K) X_450 = log2(X_450/(1-X_450)) Y=log2(y/(1-y)) X=log2(x/(1-x)) isl_group <- c("", "Island", "N_Shelf", "N_Shore", "S_Shore", "S_Shelf") train.dens <- list() train.funcs <- list() test.dens <- list() test.funcs <- list() for (l in isl_group) { if (l == "") { j <- "NA" } else { j <- l } # max(X)==13.28757 test.dens[[j]] <- X_450[annotation.450K[, "Relation_to_UCSC_CpG_Island"] == l,test] test.funcs[[j]] <- apply(test.dens[[j]], 2, function(x) {density(x,from=-13.38757,to=13.38757)$y}) train.dens[[j]] <- X_450[annotation.450K[, "Relation_to_UCSC_CpG_Island"] == l,train] train.funcs[[j]] <- apply(train.dens[[j]], 2, function(x) {density(x,from=-13.38757,to=13.38757)$y}) } ## random select a target probe Y to impute: target_probe = rownames(annotation.EPIC)[sample.int(dim(annotation.EPIC),1)] j <- paste(annotation.EPIC[target_probe,"Relation_to_UCSC_CpG_Island"]) if (j == "") { j <- "NA" } fit = new_pfr(unlist(Y[train]), t(X[,train]), t(train.funcs[[j]])) X.test <- create_predictors(t(X[,test]), t(test.funcs[[j]])) Y.test <- X.test %*% fit$coefs pred_PFR_test <- 2 ^ Y.test / (2 ^ Y.test + 1)
library(xgboost) bst <- xgboost(data = t(x[,train]), label = y[train], #ax.depth = 2, eta = 1, thread = 1, nrounds = 10, objective = "reg:linear",verbose = 0) pred_XGB_test<-predict(bst,t(x[,test]))
library(randomForest) rf=randomForest(as.formula(paste("y~.")), data = Meth, subset=train ,mty=5,ntree=10) pred_RF_test<-predict(rf,Meth[test,2:(dim(Meth)[2])])
library("class") knn=knn(train = Meth[train,2:dim(x)[1]], test = Meth[test,2:dim(x)[1]], cl = Meth[train,1], k=50, prob=T) pred_KNN_test<-as.numeric(as.matrix(knn))
y[y>0.5]=1;y[y<=0.5]=0; Meth<-data.frame(cbind(y,t(x))) logit=glm(as.formula(paste("y~.")),Meth[train,],family = binomial) # Note for some probes: the alg might not converge. In this case, we recommend to abondon logistic and work with the rest 4 methods if this happens. # glm.fit: algorithm did not convergeglm.fit: fitted probabilities numerically 0 or 1 occurred pred_Logistic_test<-predict(logit,Meth[test,],type = "response")
# equal weights K=4 pred_CUE_equal_weighs <- 1/K * (pred_Logistic_test + pred_KNN_test + pred_RF_test +pred_PFR_test) #pred_CUE <-
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.