#' @title Correct RTs of peaks in peak table using internal standard list
#' @description Correct RTs of peaks in peak table using internal standard list.
#' @author Xiaotao Shen
#' \email{shenxt1990@@163.com}
#' @param reference_is_table_name Experiment internal standard table name.
#' @param query_is_table_name Database internal standard table name.
#' @param query_peak_table_name Peak table name. Column 1 is name (peak name), column 2 is mz
#' (mass to charge ratio), column 3 is rt. And remaining columns are intensity of samples.
#' @param method polyline or loess.
#' @param poly A numeric vector.
#' @param degree A numeric vector.
#' @param path Work directory.
#' @return A peak table.
#' @export
# sxtTools::setwd_project()
# setwd("example/rt_correction")
#
# reference_is_table_name = "reference_is_table.csv"
# query_is_table_name = "query_is_table.csv"
# query_peak_table_name = "query_peak_table.csv"
# method = "polyline"
# poly = c(1, 2, 3)
# path = "."
#
# correct_rt(reference_is_table_name = "reference_is_table.csv",
# query_is_table_name = "query_is_table.csv",
# query_peak_table_name = "query_peak_table.csv",
# method = "loess",
# path = "example/rt_correction/")
correct_rt = function(reference_is_table_name,
query_is_table_name,
query_peak_table_name,
method = c("loess", "polyline"),
poly = c(1, 2, 3),
degree = c(1, 2),
path = ".") {
dir.create(path, showWarnings = FALSE)
method <- match.arg(method)
###check data
if (all(dir(path = path) != reference_is_table_name)) {
stop("No ", reference_is_table_name, " in ", path)
} else{
reference_is_table = readTable(file.path(path, reference_is_table_name)) %>%
as.data.frame()
}
if (all(dir(path = path) != query_is_table_name)) {
stop("No ", query_is_table_name, " in ", path)
} else{
query_is_table = readTable(file.path(path, query_is_table_name)) %>%
as.data.frame()
}
if (all(dir(path = path) != query_peak_table_name)) {
stop("No ", query_peak_table_name, " in ", path)
} else{
query_peak_table = readTable(file.path(path, query_peak_table_name)) %>%
as.data.frame()
}
reference_is_table$RT = as.numeric(reference_is_table$RT)
query_is_table$RT = as.numeric(query_is_table$RT)
query_peak_table$rt = as.numeric(query_peak_table$rt)
remain.idx <-
which(!is.na(reference_is_table[, 3]) &
!is.na(query_is_table[, 3]))
reference_is_table <-
reference_is_table[remain.idx, , drop = FALSE]
query_is_table <-
query_is_table[remain.idx, , drop = FALSE]
if (nrow(reference_is_table) < 10) {
poly = c(1, 2)
}
rt_table <- data.frame(
reference = reference_is_table[, 3],
query = query_is_table[, 3],
stringsAsFactors = FALSE
)
rt1 <- rt_table$reference
rt2 <- rt_table$query
if (method == "polyline") {
poly <- poly[which(poly < nrow(rt_table) - 1)]
mse <- best_poly(
x = rt_table$query,
y = rt_table$reference,
poly = poly,
path = path
)
idx <- poly[which.min(mse)]
model <- lm(rt1 ~ poly(rt2, idx))
} else{
result <- best_loess(
x = rt_table$query,
y = rt_table$reference,
span.begin = 0.5,
span.end = 1,
span.step = 0.1,
degree = degree,
path = path
)
idx <- which.min(result[, 3])
model <- loess(rt1 ~ rt2,
span = result[idx, 2],
degree = result[idx, 1])
}
raw_rt <- query_peak_table$rt
predict_rt <- predict(object = model,
newdata = data.frame(rt2 = raw_rt))
predict_rt[raw_rt < min(rt2)] <-
raw_rt[raw_rt < min(rt2)]
predict_rt[raw_rt > max(rt2)] <-
raw_rt[raw_rt > max(rt2)]
temp.data <- data.frame(raw_rt, predict_rt,
stringsAsFactors = FALSE)
plot <-
ggplot2::ggplot(data = temp.data, ggplot2::aes(x = raw_rt, y = predict_rt)) +
ggplot2::geom_point(color = "black") +
# ggplot2::geom_line() +
ggplot2::geom_smooth(method = "loess",
color = "black",
fill = "gray") +
ggplot2::geom_abline(intercept = 0,
slope = 1,
color = "red") +
ggplot2::theme_bw()
ggplot2::ggsave(
filename = file.path(path, "raw_rt vs predict_rt plot.pdf"),
plot = plot,
width = 8,
height = 6
)
query_peak_table <- data.frame(query_peak_table,
"rt_correction" = predict_rt,
stringsAsFactors = FALSE) %>%
dplyr::select(name, mz, rt, rt_correction)
write.csv(
query_peak_table,
file = file.path(path, "Feature_table_with_corrected_rt.csv"),
row.names = FALSE
)
}
###parameters optimization
best_poly = function(x,
y,
poly = c(1, 2, 3, 4),
path = ".") {
pre.all <- list()
for (i in seq_along(poly)) {
# cat(i, " ")
##LOO
pre <- NULL
for (j in seq_along(x)) {
# cat(j, " ")
y1 <- y[-j]
x1 <- x[-j]
model <- lm(y1 ~ poly(x1, poly[i]))
pre[j] <- predict(object = model,
newdata = data.frame(x1 = x[j]))
}
pre.all[[i]] <- pre
}
mse <-
unlist(lapply(pre.all, function(x) {
sum((y - x) ^ 2) / length(y)
}))
# y.lim1 <- 0.8*min(c(y, unlist(lapply(pre.all, min))))
# y.lim2 <- 1.2*max(c(y, unlist(lapply(pre.all, max))))
temp.data1 <- do.call(c, pre.all)
temp.data1 <- data.frame(
query.RT = rep(x, length(pre.all)),
reference.RT = temp.data1,
Poly.MSE = as.character(rep(paste(
poly, round(mse, 3), sep = ": "
), each = length(x))),
stringsAsFactors = FALSE
)
temp.data2 <- data.frame(
query.RT = x,
reference.RT = y,
stringsAsFactors = FALSE
)
plot <-
ggplot2::ggplot(data = temp.data2,
ggplot2::aes(x = query.RT, y = reference.RT)) +
ggplot2::geom_point(color = "black") +
ggplot2::geom_line() +
ggplot2::geom_smooth(method = "loess",
color = "black",
fill = "gray") +
ggplot2::geom_point(
data = temp.data1,
mapping = ggplot2::aes(x = query.RT, y = reference.RT, color = Poly.MSE)
) +
ggplot2::geom_line(
data = temp.data1,
mapping = ggplot2::aes(x = query.RT, y = reference.RT, color = Poly.MSE)
) +
ggplot2::geom_smooth(
data = temp.data1,
mapping = ggplot2::aes(
x = query.RT,
y = reference.RT,
color = Poly.MSE,
fill = Poly.MSE
),
method = "loess"
) +
# ggthemes::theme_pander() +
ggplot2::theme_bw()
ggplot2::ggsave(
filename = file.path(path, "MSE.plot.pdf"),
plot = plot,
width = 8,
height = 6
)
return(mse)
}
best_loess = function(x,
y,
span.begin = 0.5,
span.end = 1,
span.step = 0.1,
degree = c(1, 2),
path = ".") {
span <- seq(span.begin, span.end, span.step)
para <- NULL
for (i in seq_along(degree)) {
para <- rbind(para, cbind(degree[i], span))
}
colnames(para) <- c("degree", "span")
y <- y[order(x)]
x <- sort(x)
pre.all <- list()
for (i in 1:nrow(para)) {
temp.degree <- para[i, 1]
temp.span <- para[i, 2]
pre <- NULL
# for(j in 2:(length(x) - 1)) {
for (j in seq_along(x)) {
y1 <- y
x1 <- x
# y1 <- y[-j]
# x1 <- x[-j]
model <-
loess(y1 ~ x1, span = temp.span, degree = temp.degree)
pre[j] <- predict(object = model,
newdata = data.frame(x1 = x[j]))
}
pre.all[[i]] <- pre[!is.na(pre)]
}
mse <-
unlist(lapply(pre.all, function(x) {
sum((y[2:(length(y) - 1)] - x) ^ 2) / length(y)
}))
result <- data.frame(para, mse, stringsAsFactors = FALSE)
temp.data1 <- do.call(c, pre.all)
para <- paste(para[, 1], para[, 2], sep = ";")
temp.data1 <- data.frame(
query.RT = rep(x, length(pre.all)),
reference.RT = temp.data1,
Degree.Span.MSE = as.character(rep(paste(
para, round(mse, 3), sep = ": "
), each = length(x))),
stringsAsFactors = FALSE
)
temp.data2 <- data.frame(
query.RT = x,
reference.RT = y,
stringsAsFactors = FALSE
)
plot <-
ggplot2::ggplot(data = temp.data2,
ggplot2::aes(x = query.RT, y = reference.RT)) +
ggplot2::geom_point(color = "black") +
ggplot2::geom_line() +
ggplot2::geom_smooth(method = "loess",
color = "black",
fill = "gray") +
ggplot2::geom_point(
data = temp.data1,
mapping = ggplot2::aes(x = query.RT, y = reference.RT, color = Degree.Span.MSE)
) +
ggplot2::geom_line(
data = temp.data1,
mapping = ggplot2::aes(x = query.RT, y = reference.RT, color = Degree.Span.MSE)
) +
ggplot2::geom_smooth(
data = temp.data1,
mapping = ggplot2::aes(
x = query.RT,
y = reference.RT,
color = Degree.Span.MSE,
fill = Degree.Span.MSE
),
method = "loess"
) +
ggplot2::theme_bw()
ggplot2::ggsave(
filename = file.path(path, "MSE.plot.pdf"),
plot = plot,
width = 8,
height = 6
)
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.