Nothing
## ----environment, echo=FALSE, message=FALSE, warning=FALSE--------------------
library("topdownr")
library("topdownrdata")
library("ranger")
library("ggplot2")
library("BiocStyle")
## ----citation, echo=FALSE, results="asis"-------------------------------------
ct <- format(citation("topdownr"), "textVersion")
cat(gsub("DOI: *(.*)$", "DOI: [\\1](https://doi.org/\\1)", ct), "\n")
## ----loadPackage--------------------------------------------------------------
library("topdownr")
## ----listFiles, eval=-1, echo=1, comment=NA-----------------------------------
list.files(topdownrdata::topDownDataPath("myoglobin"))
lapply(
topdownr:::.listTopDownFiles(
topdownrdata::topDownDataPath("myoglobin")),
function(x) {
c(head(
file.path(
"...",
paste(
tail(strsplit(dirname(x), "/")[[1L]], 2),
collapse=.Platform$file.sep
),
basename(x)
),
2
), "...")
}
)
## ----importFiles, warnings=FALSE----------------------------------------------
## the mass adduct for a proton
H <- 1.0078250321
myoglobin <- readTopDownFiles(
## directory path
path = topdownrdata::topDownDataPath("myoglobin"),
## fragmentation types
type = c("a", "b", "c", "x", "y", "z"),
## adducts (add -H/H to c/z and name
## them cmH/zpH (c minus H, z plus H)
adducts = data.frame(
mass=c(-H, H),
to=c("c", "z"),
name=c("cmH", "zpH")),
## initiator methionine removal
modifications = "Met-loss",
## don't use neutral loss
neutralLoss = NULL,
## tolerance for fragment matching
tolerance = 5e-6,
## topdownrdata was generate with an older version of topdownr,
## the method files were generated with FilterString identification,
## use `conditions = "ScanDescription"` (default) for recent data.
conditions = "FilterString"
)
myoglobin
## ----rowViews-----------------------------------------------------------------
rowViews(myoglobin)
## ----colData------------------------------------------------------------------
conditionData(myoglobin)[, 1:5]
## ----assayData----------------------------------------------------------------
assayData(myoglobin)[206:215, 1:10]
## ----subsetting---------------------------------------------------------------
# select the first 100 fragments
myoglobin[1:100]
# select all "c" fragments
myoglobin["c"]
# select just the 100. "c" fragment
myoglobin["c100"]
# select all "a" and "b" fragments but just the first 100 "c"
myoglobin[c("a", "b", paste0("c", 1:100))]
# select condition/run 1 to 10
myoglobin[, 1:10]
# select all conditions from one file
myoglobin[, myoglobin$File == "myo_1211_ETDReagentTarget_1e+06_1"]
# select all "c" fragments from a single file
myoglobin["c", myoglobin$File == "myo_1211_ETDReagentTarget_1e+06_1"]
## ----plotting, eval=1:2-------------------------------------------------------
# plot a single condition
plot(myoglobin[, "C0707.30_1.0e+05_1.0e+06_10.00_00_28_3"])
# example to plot the first ten conditions into a pdf
# (not evaluated in the vignette)
pdf("topdown-conditions.pdf", paper="a4r", width=12)
plot(myoglobin[, 1:10])
dev.off()
## ----importFiles2, ref.label="import_files", eval=FALSE-----------------------
# NA
## ----filterInjectionTimes-----------------------------------------------------
injTimeBefore <- colData(myoglobin)
injTimeBefore$Status <- "before filtering"
## filtering on max deviation and just keep the
## 2 technical replicates per condition with the
## lowest deviation
myoglobin <- filterInjectionTime(
myoglobin,
maxDeviation = log2(3),
keepTopN = 2
)
myoglobin
injTimeAfter <- colData(myoglobin)
injTimeAfter$Status <- "after filtering"
injTime <- as.data.frame(rbind(injTimeBefore, injTimeAfter))
## use ggplot for visualisation
library("ggplot2")
ggplot(injTime,
aes(x = as.factor(AgcTarget),
y = IonInjectionTimeMs,
group = AgcTarget)) +
geom_boxplot() +
facet_grid(Status ~ Mz)
## ----filterCv-----------------------------------------------------------------
myoglobin <- filterCv(myoglobin, threshold=30)
myoglobin
## ----filterIntensity----------------------------------------------------------
myoglobin <- filterIntensity(myoglobin, threshold=0.1)
myoglobin
## ----aggregate----------------------------------------------------------------
myoglobin <- aggregate(myoglobin)
myoglobin
## ----randomForest-------------------------------------------------------------
library("ranger")
## statistics
head(summary(myoglobin))
## number of fragments
nFragments <- summary(myoglobin)$Fragments
## features of interest
foi <- c(
"AgcTarget",
"EtdReagentTarget",
"EtdActivation",
"CidActivation",
"HcdActivation",
"Charge"
)
rfTable <- as.data.frame(colData(myoglobin)[foi])
## set NA to zero
rfTable[is.na(rfTable)] <- 0
rfTable <- as.data.frame(cbind(
scale(rfTable),
Fragments = nFragments
))
featureImportance <- ranger(
Fragments ~ .,
data = rfTable,
importance = "impurity"
)$variable.importance
barplot(
featureImportance/sum(featureImportance),
cex.names = 0.7
)
## ----coerce2NCBSet------------------------------------------------------------
myoglobinNcb <- as(myoglobin, "NCBSet")
myoglobinNcb
## ----bestConditions-----------------------------------------------------------
bestConditions(myoglobinNcb, n=3)
## ----fragmentationMap---------------------------------------------------------
sel <-
myoglobinNcb$Mz == 707.3 &
myoglobinNcb$AgcTarget == 1e6 &
(myoglobinNcb$EtdReagentTarget == 1e7 &
!is.na(myoglobinNcb$EtdReagentTarget))
myoglobinNcbSub <- myoglobinNcb[, sel]
fragmentationMap(
myoglobinNcbSub,
nCombinations = 10,
labels = seq_len(ncol(myoglobinNcbSub))
)
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.