Nothing
### R code from vignette source 'BioSeqClass.Rnw'
###################################################
### code chunk number 1: install (eval = FALSE)
###################################################
## if (!requireNamespace("BiocManager", quietly=TRUE))
## install.packages("BiocManager")
## BiocManager::install("BioSeqClass")
###################################################
### code chunk number 2: install2 (eval = FALSE)
###################################################
## source("http://www.biosino.org/download/BioSeqClass/BioSeqClass.R")
## BioSeqClass()
###################################################
### code chunk number 3: load
###################################################
library(BioSeqClass)
###################################################
### code chunk number 4: homologReduction
###################################################
library(BioSeqClass)
# Example data in BioSeqClass.
file=file.path(path.package("BioSeqClass"),"example","acetylation_K.fasta")
posfile = file.path(path.package("BioSeqClass"),
"example", "acetylation_K.site")
# Only a part of lysine acetylation sites are used for demo.
posfile1=tempfile()
write.table(read.table(posfile,sep='\t',header=F)[1:20,], posfile1,
sep='\t', quote=F, row.names=F, col.names=F)
seqList = getTrain(file, posfile1, aa="K", w=7, identity=0.4)
###################################################
### code chunk number 5: readData
###################################################
tmpDir=file.path(path.package('BioSeqClass'), 'example')
tmpFile1=file.path(tmpDir, 'acetylation_K.pos40.pep')
tmpFile2=file.path(tmpDir, 'acetylation_K.neg40.pep')
posSeq=as.matrix(read.csv(tmpFile1,header=F,sep='\t',row.names=1))[,1]
negSeq=as.matrix(read.csv(tmpFile2,header=F,sep='\t',row.names=1))[,1]
seq=c(posSeq,negSeq)
classLable=c(rep("+1",length(posSeq)),rep("-1",length(negSeq)) )
length(seq)
###################################################
### code chunk number 6: featureSelect
###################################################
# Use 0-1 binary coding.
feature1 = featureBinary(seq,elements("aminoacid"))
dim(feature1)
# Use the compostion of paired amino acids.
feature2 = featureGapPairComposition(seq,0,elements("aminoacid"))
dim(feature2)
###################################################
### code chunk number 7: classModel
###################################################
data = data.frame(feature1,classLable)
# Use support vector machine and 5 fold cross validation to do classification.
LIBSVM_CV5 = classify(data, classifyMethod='libsvm',
cv=5, svm.kernel='linear',svm.scale=F)
LIBSVM_CV5[["totalPerformance"]]
# Features selection is done by envoking "CfsSubsetEval" method in WEKA.
FS_LIBSVM_CV5 = classify(data, classifyMethod='libsvm',
cv=5, evaluator='CfsSubsetEval', search='BestFirst',
svm.kernel='linear', svm.scale=F)
FS_LIBSVM_CV5[["totalPerformance"]] ## Accuracy is increased by feature selection.
# Selected features:
colnames(data)[FS_LIBSVM_CV5$features[[1]]]
###################################################
### code chunk number 8: featureTest (eval = FALSE)
###################################################
## fileName = tempfile()
## # Note: It may be time consuming.
## testFeatureSet = featureEvaluate(seq, classLable, fileName, cv=5,
## ele.type='aminoacid', featureMethod=c('Binary','GapPairComposition'),
## classifyMethod='libsvm',
## group=c('aaH', 'aaV', 'aaZ', 'aaP', 'aaF', 'aaS', 'aaE'), g=0,
## hydro.methods=c('kpm', 'SARAH1'), hydro.indexs=c('hydroE', 'hydroF', 'hydroC') )
## summary = read.csv(fileName,sep="\t",header=T)
## # Plot the result of 'featureEvaluate'
## require("scatterplot3d")
## tmp1 = summary[,"Feature_Function"]
## tmp2 = as.factor(sapply(as.vector(summary[,'Feature_Parameter']),
## function(x){unlist(strsplit(x,split='; '))[1]}))
## testResult = data.frame(as.integer(tmp2), as.integer(tmp1), summary[,"acc"])
## s3d=scatterplot3d(testResult,
## color=c('red','blue')[testResult[,2]], pch=19, xlab='Parameter',
## ylab='Feature Coding',
## zlab='Accuracy', lab=c(9,3,7),
## x.ticklabs=gsub('class: ','',sort(unique(tmp2))),
## type='h',ylim=c(0,3),y.margin.add=2.5,
## y.ticklabs=c('',gsub('feature','',sort(unique(tmp1))),'') )
###################################################
### code chunk number 9: featureComSelect (eval = FALSE)
###################################################
## feature.index = 1:3
## tmp <- testFeatureSet[[1]]$data
## tmp1 <- testFeatureSet[[feature.index[1]]]$model
## colnames(tmp) <- paste(
## tmp1["Feature_Function"],
## tmp1["Feature_Parameter"],
## colnames(tmp),sep=" ; ")
## data <- tmp[,-ncol(tmp)]
## for(i in 2:length(feature.index) ){
## tmp <- testFeatureSet[[feature.index[i]]]$data
## tmp1 <- testFeatureSet[[feature.index[i]]]$model
## colnames(tmp) <- paste(
## tmp1["Feature_Function"],
## tmp1["Feature_Parameter"],
## colnames(tmp),sep=" ; ")
## data <- data.frame(data, tmp[,-ncol(tmp)] )
## }
## name <- colnames(data)
## data <- data.frame(data, tmp[,ncol(tmp)] ) ## Combined features
## # Use 'selectFFS' to do feature forward selection.
## # Note: It may be time consuming.
## combineFeatureResult = selectFFS(data,accCutoff=0.005,
## classifyMethod="knn",cv=5) ## It is time consuming.
## tmp = sapply(combineFeatureResult,function(x){
## c(length(x$features),x$performance["acc"])})
## plot(tmp[1,],tmp[2,],xlab="Feature Number",ylab="Accuracy",
## , pch=19)
## lines(tmp[1,],tmp[2,])
###################################################
### code chunk number 10: BioSeqClass.Rnw:488-489
###################################################
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.