Nothing
## ---- eval = FALSE------------------------------------------------------------
# install.packages("BiocManager")
# BiocManager::install("Rcpi")
## ---- eval = FALSE------------------------------------------------------------
# BiocManager::install("Rcpi", dependencies = c("Imports", "Enhances"))
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
#
# # load FASTA files
# extracell <- readFASTA(system.file(
# "vignettedata/extracell.fasta",
# package = "Rcpi"
# ))
# mitonchon <- readFASTA(system.file(
# "vignettedata/mitochondrion.fasta",
# package = "Rcpi"
# ))
## ---- eval = FALSE------------------------------------------------------------
# length(extracell)
## ---- eval = FALSE------------------------------------------------------------
# ## [1] 325
## ---- eval = FALSE------------------------------------------------------------
# length(mitonchon)
## ---- eval = FALSE------------------------------------------------------------
# ## [1] 306
## ---- eval = FALSE------------------------------------------------------------
# extracell <- extracell[(sapply(extracell, checkProt))]
# mitonchon <- mitonchon[(sapply(mitonchon, checkProt))]
## ---- eval = FALSE------------------------------------------------------------
# length(extracell)
## ---- eval = FALSE------------------------------------------------------------
# ## [1] 323
## ---- eval = FALSE------------------------------------------------------------
# length(mitonchon)
## ---- eval = FALSE------------------------------------------------------------
# ## [1] 304
## ---- eval = FALSE------------------------------------------------------------
# # calculate APAAC descriptors
# x1 <- t(sapply(extracell, extractProtAPAAC))
# x2 <- t(sapply(mitonchon, extractProtAPAAC))
# x <- rbind(x1, x2)
#
# # make class labels
# labels <- as.factor(c(rep(0, length(extracell)), rep(1, length(mitonchon))))
## ---- eval = FALSE------------------------------------------------------------
# set.seed(1001)
#
# # split training and test set
# tr.idx <- c(
# sample(1:nrow(x1), round(nrow(x1) * 0.75)),
# sample(nrow(x1) + 1:nrow(x2), round(nrow(x2) * 0.75))
# )
# te.idx <- setdiff(1:nrow(x), tr.idx)
#
# x.tr <- x[tr.idx, ]
# x.te <- x[te.idx, ]
# y.tr <- labels[tr.idx]
# y.te <- labels[te.idx]
## ---- eval = FALSE------------------------------------------------------------
# library("randomForest")
# rf.fit <- randomForest(x.tr, y.tr, cv.fold = 5)
# print(rf.fit)
## ---- eval = FALSE------------------------------------------------------------
# ## Call:
# ## randomForest(x = x.tr, y = y.tr, cv.fold = 5)
# ## Type of random forest: classification
# ## Number of trees: 500
# ## No. of variables tried at each split: 8
# ##
# ## OOB estimate of error rate: 25.11%
# ## Confusion matrix:
# ## 0 1 class.error
# ## 0 196 46 0.1900826
# ## 1 72 156 0.3157895
## ---- eval = FALSE------------------------------------------------------------
# # predict on test set
# rf.pred <- predict(rf.fit, newdata = x.te, type = "prob")[, 1]
#
# # plot ROC curve
# library("pROC")
# plot.roc(y.te, rf.pred, grid = TRUE, print.auc = TRUE)
## ---- eval = FALSE------------------------------------------------------------
# ## Call:
# ## plot.roc.default(x = y.te, predictor = rf.pred, col = "#0080ff",
# ## grid = TRUE, print.auc = TRUE)
# ##
# ## Data: rf.pred in 81 controls (y.te 0) > 76 cases (y.te 1).
# ## Area under the curve: 0.8697
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
#
# RI.smi <- system.file(
# "vignettedata/RI.smi",
# package = "Rcpi"
# )
# RI.csv <- system.file(
# "vignettedata/RI.csv",
# package = "Rcpi"
# )
#
# x.mol <- readMolFromSmi(RI.smi, type = "mol")
# x.tab <- read.table(RI.csv, sep = "\t", header = TRUE)
# y <- x.tab$RI
## ---- eval = FALSE------------------------------------------------------------
# # calculate selected molecular descriptors
# x <- suppressWarnings(cbind(
# extractDrugALOGP(x.mol),
# extractDrugApol(x.mol),
# extractDrugECI(x.mol),
# extractDrugTPSA(x.mol),
# extractDrugWeight(x.mol),
# extractDrugWienerNumbers(x.mol),
# extractDrugZagrebIndex(x.mol)
# ))
## ---- eval = FALSE------------------------------------------------------------
# # regression on training set
# library("caret")
# library("pls")
#
# # cross-validation settings
# ctrl <- trainControl(
# method = "repeatedcv", number = 5, repeats = 10,
# summaryFunction = defaultSummary
# )
#
# # train a pls model
# set.seed(1002)
# pls.fit <- train(
# x, y,
# method = "pls", tuneLength = 10, trControl = ctrl,
# metric = "RMSE", preProc = c("center", "scale")
# )
#
# # print cross-validation result
# print(pls.fit)
## ---- eval = FALSE------------------------------------------------------------
# # # of components vs RMSE
# print(plot(pls.fit, asp = 0.5))
## ---- eval = FALSE------------------------------------------------------------
# # plot experimental RIs vs predicted RIs
# plot(y, predict(pls.fit, x),
# xlim = range(y), ylim = range(y),
# xlab = "Experimental RIs", ylab = "Predicted RIs"
# )
# abline(a = 0, b = 1)
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
#
# fdamdd.smi <- system.file("vignettedata/FDAMDD.smi", package = "Rcpi")
# fdamdd.csv <- system.file("vignettedata/FDAMDD.csv", package = "Rcpi")
#
# x.mol <- readMolFromSmi(fdamdd.smi, type = "mol")
# x.smi <- readMolFromSmi(fdamdd.smi, type = "text")
# y <- as.factor(paste0("class", scan(fdamdd.csv)))
## ---- eval = FALSE------------------------------------------------------------
# # calculate molecular fingerprints
# x1 <- extractDrugEstateComplete(x.mol)
# x2 <- extractDrugMACCSComplete(x.mol)
# x3 <- extractDrugOBFP4(x.smi, type = "smile")
## ---- eval = FALSE------------------------------------------------------------
# library("caret")
#
# # remove near-zero variance variables
# x1 <- x1[, -nearZeroVar(x1)]
# x2 <- x2[, -nearZeroVar(x2)]
# x3 <- x3[, -nearZeroVar(x3)]
#
# # split training and test set
# set.seed(1003)
# tr.idx <- sample(1:nrow(x1), round(nrow(x1) * 0.75))
# te.idx <- setdiff(1:nrow(x1), tr.idx)
# x1.tr <- x1[tr.idx, ]
# x1.te <- x1[te.idx, ]
# x2.tr <- x2[tr.idx, ]
# x2.te <- x2[te.idx, ]
# x3.tr <- x3[tr.idx, ]
# x3.te <- x3[te.idx, ]
# y.tr <- y[tr.idx]
# y.te <- y[te.idx]
## ---- eval = FALSE------------------------------------------------------------
# # svm classification on training sets
# library("kernlab")
#
# # cross-validation settings
# ctrl <- trainControl(
# method = "cv", number = 5, repeats = 10,
# classProbs = TRUE,
# summaryFunction = twoClassSummary
# )
#
# # SVM with RBF kernel
# svm.fit1 <- train(
# x1.tr, y.tr,
# method = "svmRadial", trControl = ctrl,
# metric = "ROC", preProc = c("center", "scale")
# )
# svm.fit2 <- train(
# x2.tr, y.tr,
# method = "svmRadial", trControl = ctrl,
# metric = "ROC", preProc = c("center", "scale")
# )
# svm.fit3 <- train(
# x3.tr, y.tr,
# method = "svmRadial", trControl = ctrl,
# metric = "ROC", preProc = c("center", "scale")
# )
#
# # print cross-validation result
# print(svm.fit1)
# print(svm.fit2)
# print(svm.fit3)
## ---- eval = FALSE------------------------------------------------------------
# ## Support Vector Machines with Radial Basis Function Kernel
# ##
# ## 597 samples
# ## 23 predictors
# ## 2 classes: "class0", "class1"
# ##
# ## Pre-processing: centered, scaled
# ## Resampling: Cross-Validated (5 fold)
# ##
# ## Summary of sample sizes: 478, 479, 477, 477, 477
# ##
# ## Resampling results across tuning parameters:
# ##
# ## C ROC Sens Spec ROC SD Sens SD Spec SD
# ## 0.25 0.797 0.7 0.765 0.0211 0.0442 0.00666
# ## 0.5 0.808 0.696 0.79 0.0173 0.059 0.0236
# ## 1 0.812 0.703 0.781 0.0191 0.0664 0.0228
# ##
# ## Tuning parameter "sigma" was held constant at a value of 0.02921559
# ## ROC was used to select the optimal model using the largest value.
# ## The final values used for the model were sigma = 0.0292 and C = 1.
## ---- eval = FALSE------------------------------------------------------------
# ## Support Vector Machines with Radial Basis Function Kernel
# ##
# ## 597 samples
# ## 126 predictors
# ## 2 classes: "class0", "class1"
# ##
# ## Pre-processing: centered, scaled
# ## Resampling: Cross-Validated (5 fold)
# ##
# ## Summary of sample sizes: 477, 477, 477, 478, 479
# ##
# ## Resampling results across tuning parameters:
# ##
# ## C ROC Sens Spec ROC SD Sens SD Spec SD
# ## 0.25 0.834 0.715 0.775 0.0284 0.0994 0.0589
# ## 0.5 0.848 0.726 0.79 0.0299 0.065 0.0493
# ## 1 0.863 0.769 0.793 0.0307 0.0229 0.0561
# ##
# ## Tuning parameter "sigma" was held constant at a value of 0.004404305
# ## ROC was used to select the optimal model using the largest value.
# ## The final values used for the model were sigma = 0.0044 and C = 1.
## ---- eval = FALSE------------------------------------------------------------
# ## Support Vector Machines with Radial Basis Function Kernel
# ##
# ## 597 samples
# ## 58 predictors
# ## 2 classes: "class0", "class1"
# ##
# ## Pre-processing: centered, scaled
# ## Resampling: Cross-Validated (5 fold)
# ##
# ## Summary of sample sizes: 478, 478, 477, 478, 477
# ##
# ## Resampling results across tuning parameters:
# ##
# ## C ROC Sens Spec ROC SD Sens SD Spec SD
# ## 0.25 0.845 0.769 0.746 0.0498 0.0458 0.0877
# ## 0.5 0.856 0.744 0.777 0.0449 0.0148 0.0728
# ## 1 0.863 0.751 0.777 0.0428 0.036 0.0651
# ##
# ## Tuning parameter "sigma" was held constant at a value of 0.01077024
# ## ROC was used to select the optimal model using the largest value.
# ## The final values used for the model were sigma = 0.0108 and C = 1.
## ---- eval = FALSE------------------------------------------------------------
# # predict on test set
# svm.pred1 <- predict(svm.fit1, newdata = x1.te, type = "prob")[, 1]
# svm.pred2 <- predict(svm.fit2, newdata = x2.te, type = "prob")[, 1]
# svm.pred3 <- predict(svm.fit3, newdata = x3.te, type = "prob")[, 1]
#
# # generate colors
# library("RColorBrewer")
# pal <- brewer.pal(3, "Set1")
#
# # ROC curves of different fingerprints
# library("pROC")
# plot(smooth(roc(y.te, svm.pred1)), col = pal[1], grid = TRUE)
# plot(smooth(roc(y.te, svm.pred2)), col = pal[2], grid = TRUE, add = TRUE)
# plot(smooth(roc(y.te, svm.pred3)), col = pal[3], grid = TRUE, add = TRUE)
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
# mols <- readMolFromSDF(system.file(
# "compseq/tyrphostin.sdf",
# package = "Rcpi"
# ))
## ---- eval = FALSE------------------------------------------------------------
# simmat <- diag(length(mols))
#
# for (i in 1:length(mols)) {
# for (j in i:length(mols)) {
# fp1 <- extractDrugEstate(mols[[i]])
# fp2 <- extractDrugEstate(mols[[j]])
# tmp <- calcDrugFPSim(fp1, fp2, fptype = "compact", metric = "tanimoto")
# simmat[i, j] <- tmp
# simmat[j, i] <- tmp
# }
# }
## ---- eval = FALSE------------------------------------------------------------
# mol.hc <- hclust(as.dist(1 - simmat), method = "ward.D")
#
# library("ape") # tree visualization of clusters
# clus5 <- cutree(mol.hc, 5) # cut dendrogram into 5 clusters
#
# # generate colors
# library("RColorBrewer")
# pal5 <- brewer.pal(5, "Set1")
# plot(as.phylo(mol.hc),
# type = "fan",
# tip.color = pal5[clus5],
# label.offset = 0.1, cex = 0.7
# )
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
#
# mol <- system.file("compseq/DB00530.sdf", package = "Rcpi")
# moldb <- system.file("compseq/tyrphostin.sdf", package = "Rcpi")
## ---- eval = FALSE------------------------------------------------------------
# rank1 <- searchDrug(
# mol, moldb,
# cores = 4, method = "fp",
# fptype = "maccs", fpsim = "tanimoto"
# )
# rank2 <- searchDrug(
# mol, moldb,
# cores = 4, method = "fp",
# fptype = "fp2", fpsim = "cosine"
# )
# rank3 <- searchDrug(
# mol, moldb,
# cores = 4, method = "mcs",
# mcssim = "tanimoto"
# )
## ---- eval = FALSE------------------------------------------------------------
# head(rank1)
# ## 92 100 83 101 1 36
# ## 0.6491228 0.6491228 0.5882353 0.5660377 0.5000000 0.4861111
#
# head(rank2)
# ## 100 92 83 101 94 16
# ## 0.8310005 0.8208663 0.5405856 0.5033150 0.4390790 0.4274081
#
# head(rank3)
# ## 92 100 23 39 94 64
# ## 0.7000000 0.7000000 0.4000000 0.4000000 0.4000000 0.3783784
## ---- eval = FALSE------------------------------------------------------------
# # convert SDF format to SMILES format
# convMolFormat(
# infile = mol, outfile = "DB00530.smi", from = "sdf", to = "smiles"
# )
# convMolFormat(
# infile = moldb, outfile = "tyrphostin.smi", from = "sdf", to = "smiles"
# )
#
# smi1 <- readLines("DB00530.smi")
# smi2 <- readLines("tyrphostin.smi")[92] # select the #92 molecule
# calcDrugMCSSim(smi1, smi2, type = "smile", plot = TRUE)
## ---- eval = FALSE------------------------------------------------------------
# ## [[1]]
# ## An instance of "MCS"
# ## Number of MCSs: 1
# ## 530: 29 atoms
# ## 4705: 22 atoms
# ## MCS: 18 atoms
# ## Tanimoto Coefficient: 0.54545
# ## Overlap Coefficient: 0.81818
# ##
# ## [[2]]
# ## Tanimoto_Coefficient
# ## 0.5454545
# ##
# ## [[3]]
# ## Overlap_Coefficient
# ## 0.8181818
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
#
# gpcr <- read.table(system.file(
# "vignettedata/GPCR.csv",
# package = "Rcpi"
# ),
# header = FALSE, as.is = TRUE
# )
## ---- eval = FALSE------------------------------------------------------------
# head(gpcr)
## ---- eval = FALSE------------------------------------------------------------
# ## V1 V2
# ## 1 hsa:10161 D00528
# ## 2 hsa:10800 D00411
# ## 3 hsa:10800 D01828
# ## 4 hsa:10800 D05129
# ## 5 hsa:11255 D00234
# ## 6 hsa:11255 D00300
## ---- eval = FALSE------------------------------------------------------------
# library("igraph")
# library("arcdiagram")
# library("reshape")
#
# g <- graph.data.frame(gpcr[1:(nrow(gpcr) / 2), ], directed = FALSE)
# edgelist <- get.edgelist(g)
# vlabels <- V(g)$name
# vgroups <- c(rep(0, 95), rep(1, 223))
# vfill <- c(rep("#8B91D4", 95), rep("#B2C771", 223))
# vborders <- c(rep("#6F74A9", 95), rep("#8E9F5A", 223))
# degrees <- degree(g)
#
# xx <- data.frame(vgroups, degrees, vlabels, ind = 1:vcount(g))
# yy <- arrange(xx, desc(vgroups), desc(degrees))
# new.ord <- yy$ind
#
# arcplot(
# edgelist,
# ordering = new.ord, labels = vlabels,
# cex.labels = 0.1, show.nodes = TRUE,
# col.nodes = vborders, bg.nodes = vfill,
# cex.nodes = log10(degrees) + 0.1,
# pch.nodes = 21, line = -0.5, col.arcs = hsv(0, 0, 0.2, 0.25)
# )
## ---- eval = FALSE------------------------------------------------------------
# library("Rcpi")
#
# gpcr <- read.table(system.file(
# "vignettedata/GPCR.csv",
# package = "Rcpi"
# ),
# header = FALSE, as.is = TRUE
# )
#
# protid <- unique(gpcr[, 1])
# drugid <- unique(gpcr[, 2])
#
# protseq <- getSeqFromKEGG(protid, parallel = 5)
# drugseq <- getSmiFromKEGG(drugid, parallel = 50)
## ---- eval = FALSE------------------------------------------------------------
# x0.prot <- cbind(
# t(sapply(unlist(protseq), extractProtAPAAC)),
# t(sapply(unlist(protseq), extractProtCTriad))
# )
#
# x0.drug <- cbind(
# extractDrugEstateComplete(readMolFromSmi(textConnection(drugseq))),
# extractDrugMACCSComplete(readMolFromSmi(textConnection(drugseq))),
# extractDrugOBFP4(drugseq, type = "smile")
# )
## ---- eval = FALSE------------------------------------------------------------
# # generate drug x / protein x / y
# x.prot <- matrix(NA, nrow = nrow(gpcr), ncol = ncol(x0.prot))
# x.drug <- matrix(NA, nrow = nrow(gpcr), ncol = ncol(x0.drug))
# for (i in 1:nrow(gpcr)) x.prot[i, ] <- x0.prot[which(gpcr[, 1][i] == protid), ]
# for (i in 1:nrow(gpcr)) x.drug[i, ] <- x0.drug[which(gpcr[, 2][i] == drugid), ]
#
# y <- as.factor(c(rep("pos", nrow(gpcr) / 2), rep("neg", nrow(gpcr) / 2)))
## ---- eval = FALSE------------------------------------------------------------
# x <- getCPI(x.prot, x.drug, type = "combine")
## ---- eval = FALSE------------------------------------------------------------
# library("caret")
# x <- x[, -nearZeroVar(x)]
#
# # cross-validation settings
# ctrl <- trainControl(
# method = "cv", number = 5, repeats = 10,
# classProbs = TRUE,
# summaryFunction = twoClassSummary
# )
#
# # train a random forest classifier
# library("randomForest")
#
# set.seed(1006)
# rf.fit <- train(
# x, y,
# method = "rf", trControl = ctrl,
# metric = "ROC", preProc = c("center", "scale")
# )
## ---- eval = FALSE------------------------------------------------------------
# ## Random Forest
# ##
# ## 1270 samples
# ## 562 predictors
# ## 2 classes: "neg", "pos"
# ##
# ## Pre-processing: centered, scaled
# ## Resampling: Cross-Validated (5 fold)
# ##
# ## Summary of sample sizes: 1016, 1016, 1016, 1016, 1016
# ##
# ## Resampling results across tuning parameters:
# ##
# ## mtry ROC Sens Spec ROC SD Sens SD Spec SD
# ## 2 0.83 0.726 0.778 0.0221 0.044 0.0395
# ## 33 0.882 0.795 0.82 0.018 0.0522 0.0443
# ## 562 0.893 0.822 0.844 0.0161 0.0437 0.0286
# ##
# ## ROC was used to select the optimal model using the largest value.
# ## The final value used for the model was mtry = 562.
## ---- eval = FALSE------------------------------------------------------------
# rf.pred <- predict(rf.fit$finalModel, x, type = "prob")[, 1]
#
# library("pROC")
# plot(smooth(roc(y, rf.pred)), grid = TRUE, print.auc = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.