Nothing
## ---- include = FALSE-----------------------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -------------------------------------------------------------------------------------------------
system.file("extdata/proteinGroups.txt",
package = "proDA", mustWork = TRUE)
## -------------------------------------------------------------------------------------------------
full_data <- read.delim(
system.file("extdata/proteinGroups.txt",
package = "proDA", mustWork = TRUE),
stringsAsFactors = FALSE
)
head(colnames(full_data))
## -------------------------------------------------------------------------------------------------
# I use a regular expression (regex) to select the intensity columns
intensity_colnames <- grep("^LFQ\\.intensity\\.", colnames(full_data), value=TRUE)
# Create matrix which only contains the intensity columns
data <- as.matrix(full_data[, intensity_colnames])
colnames(data) <- sub("^LFQ\\.intensity\\.", "", intensity_colnames)
# Code missing values explicitly as NA
data[data == 0] <- NA
# log transformation to account for mean-variance relation
data <- log2(data)
# Overview of data
data[1:7, 1:6]
# Set rownames after showing data, because they are so long
rownames(data) <- full_data$Protein.IDs
## -------------------------------------------------------------------------------------------------
annotation_df <- data.frame(
Condition = sub("\\.\\d+", "", sub("^LFQ\\.intensity\\.",
"", intensity_colnames)),
Replicate = as.numeric(sub("^LFQ\\.intensity\\.[[:alnum:]]+\\.",
"", intensity_colnames)),
stringsAsFactors = FALSE, row.names = colnames(data)
)
head(annotation_df)
## ----eval=FALSE, include=TRUE---------------------------------------------------------------------
# # Not Run
# library(proDA)
# fit <- proDA(data, design= annotation_df$Condition, col_data = annotation_df)
# test_diff(fit, contrast = CG1407 - S2R)
# # End Not Run
## ---- include=FALSE-------------------------------------------------------------------------------
library(SummarizedExperiment)
library(MSnbase)
## -------------------------------------------------------------------------------------------------
library(SummarizedExperiment)
se <- SummarizedExperiment(SimpleList(LFQ=data), colData=annotation_df)
rowData(se) <- full_data[, c("Only.identified.by.site",
"Reverse", "Potential.contaminant")]
se
## -------------------------------------------------------------------------------------------------
library(MSnbase)
fData <- AnnotatedDataFrame(full_data[, c("Only.identified.by.site",
"Reverse", "Potential.contaminant")])
rownames(fData) <- rownames(data)
ms <- MSnSet(data, pData=AnnotatedDataFrame(annotation_df), fData=fData)
ms
## ----eval=FALSE, include=TRUE---------------------------------------------------------------------
# # Not Run
# library(proDA)
# fit <- proDA(se, design = ~ Condition - 1)
# test_diff(fit, contrast = ConditionCG1407 - ConditionS2R)
# # End Not Run
## ---- include=FALSE-------------------------------------------------------------------------------
library(dplyr)
library(stringr)
library(readr)
library(tidyr)
library(tibble)
## -------------------------------------------------------------------------------------------------
library(dplyr)
library(stringr)
library(readr)
library(tidyr)
library(tibble)
# Or short
# library(tidyverse)
## -------------------------------------------------------------------------------------------------
# The read_tsv function works faster and more reliable than read.delim
# But it sometimes needs help to identify the right type for each column,
# because it looks only at the first 1,000 elements.
# Here, I explicitly define the `Reverse` column as a character column
full_data <- read_tsv(
system.file("extdata/proteinGroups.txt",
package = "proDA", mustWork = TRUE),
col_types = cols(Reverse = col_character())
)
full_data
## -------------------------------------------------------------------------------------------------
# I explicitly call `dplyr::select()` because there is a naming conflict
# between the tidyverse and BioConductor packages for `select()` function
tidy_data <- full_data %>%
dplyr::select(ProteinID=Protein.IDs, starts_with("LFQ.intensity.")) %>%
gather(Sample, Intensity, starts_with("LFQ.intensity.")) %>%
mutate(Condition = str_match(Sample,
"LFQ\\.intensity\\.([[:alnum:]]+)\\.\\d+")[,2]) %>%
mutate(Replicate = as.numeric(str_match(Sample,
"LFQ\\.intensity\\.[[:alnum:]]+\\.(\\d+)")[,2])) %>%
mutate(SampleName = paste0(Condition, ".", Replicate))
tidy_data
## -------------------------------------------------------------------------------------------------
data <- tidy_data %>%
mutate(Intensity = ifelse(Intensity == 0, NA, log2(Intensity))) %>%
dplyr::select(ProteinID, SampleName, Intensity) %>%
spread(SampleName, Intensity) %>%
column_to_rownames("ProteinID") %>%
as.matrix()
data[1:4, 1:7]
annotation_df <- tidy_data %>%
dplyr::select(SampleName, Condition, Replicate) %>%
distinct() %>%
arrange(Condition, Replicate) %>%
as.data.frame() %>%
column_to_rownames("SampleName")
annotation_df
## -------------------------------------------------------------------------------------------------
library(SummarizedExperiment)
se <- SummarizedExperiment(SimpleList(LFQ=data), colData=annotation_df)
rowData(se) <- full_data[, c("Only.identified.by.site",
"Reverse", "Potential.contaminant")]
se
## -------------------------------------------------------------------------------------------------
library(MSnbase)
fData <- AnnotatedDataFrame(full_data[, c("Only.identified.by.site",
"Reverse", "Potential.contaminant")])
rownames(fData) <- rownames(data)
ms <- MSnSet(data, pData=AnnotatedDataFrame(annotation_df), fData=fData)
ms
## ----eval=FALSE, include=TRUE---------------------------------------------------------------------
# # Not Run
# library(proDA)
# fit <- proDA(se, design = ~ Condition - 1)
# test_diff(fit, contrast = ConditionCG1407 - ConditionS2R)
# # End Not Run
## -------------------------------------------------------------------------------------------------
library(DEP)
full_data <- read.delim(
system.file("extdata/proteinGroups.txt",
package = "proDA", mustWork = TRUE),
stringsAsFactors = FALSE
)
exp_design <- data.frame(
label =c("LFQ.intensity.CG1407.01", "LFQ.intensity.CG1407.02", "LFQ.intensity.CG1407.03", "LFQ.intensity.CG4676.01", "LFQ.intensity.CG4676.02", "LFQ.intensity.CG4676.03", "LFQ.intensity.CG51963.01", "LFQ.intensity.CG51963.02", "LFQ.intensity.CG51963.03","LFQ.intensity.CG5620A.01", "LFQ.intensity.CG5620A.02", "LFQ.intensity.CG5620A.03", "LFQ.intensity.CG5620B.01","LFQ.intensity.CG5620B.02", "LFQ.intensity.CG5620B.03", "LFQ.intensity.CG5880.01", "LFQ.intensity.CG5880.02", "LFQ.intensity.CG5880.03", "LFQ.intensity.CG6017.01", "LFQ.intensity.CG6017.02", "LFQ.intensity.CG6017.03", "LFQ.intensity.CG6618.01", "LFQ.intensity.CG6618.02", "LFQ.intensity.CG6618.03", "LFQ.intensity.CG6627.01", "LFQ.intensity.CG6627.02", "LFQ.intensity.CG6627.03", "LFQ.intensity.CG8314.01", "LFQ.intensity.CG8314.02", "LFQ.intensity.CG8314.03", "LFQ.intensity.GsbPI.001", "LFQ.intensity.GsbPI.002", "LFQ.intensity.GsbPI.003", "LFQ.intensity.S2R.01", "LFQ.intensity.S2R.02", "LFQ.intensity.S2R.03"),
condition = c("CG1407", "CG1407", "CG1407", "CG4676", "CG4676", "CG4676", "CG51963", "CG51963", "CG51963", "CG5620A", "CG5620A", "CG5620A", "CG5620B", "CG5620B", "CG5620B", "CG5880", "CG5880", "CG5880", "CG6017", "CG6017", "CG6017", "CG6618", "CG6618", "CG6618", "CG6627", "CG6627", "CG6627", "CG8314", "CG8314", "CG8314", "GsbPI", "GsbPI", "GsbPI", "S2R", "S2R", "S2R" ),
replicate = rep(1:3, times=12),
stringsAsFactors = FALSE
)
se <- import_MaxQuant(full_data, exp_design)
se
assay(se)[1:5, 1:5]
## ----eval=FALSE, include=TRUE---------------------------------------------------------------------
# # Not Run
# library(proDA)
# fit <- proDA(se, design = ~ condition - 1)
# # Here, we need to be specific, because DEP also has a test_diff method
# proDA::test_diff(fit, contrast = conditionCG1407 - conditionS2R)
# # End Not Run
## -------------------------------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.