#'@importFrom stats na.omit lm fitted predict median
#'@importFrom matrixStats colMedians
#'@import drc
standard_names <- function(chans, reps, dataType) {
if (dataType == "intensity") {
tempNames <- paste("rep", rep(seq_len(reps), each = chans), "_", rep(paste("C", c("ontrol",
0:(chans - 2)), sep = ""), reps), sep = "")
} else {
tempNames <- paste("rep", rep(seq_len(reps), each = chans), "_", rep(paste("C", 0:(chans -
1), sep = ""), reps), sep = "")
}
return(tempNames)
}
index_matrix <- function(nchans, reps, dataType, dataFrame) {
if (reps <= 1) {
NULL
} else {
if (dataType == "intensity") {
channe <- seq_len(((nchans - 1) * reps))
chans <- nchans - 1
ser <- standard_names(nchans, reps, dataType)
ser <- ser[-seq(1, by = nchans, length.out = reps)]
} else {
ser <- standard_names(nchans, reps, dataType)
channe <- seq_len((nchans * reps))
chans <- nchans
}
combinations <- t(utils::combn(reps, 2))
combmat <- matrix(rep(as.vector(t(combinations)), chans), ncol = 2, byrow = TRUE)
# create factor for repeat ...
name.vec <- seq_len((chans * reps))
repfac <- rep(seq_len(chans), times = reps)
index <- rep(0:(reps - 1), each = chans)
combfac <- rep(seq_len(reps), each = chans)
columnindex <- matrix(0, ncol = 5, nrow = chans * nrow(combinations))
colnames(columnindex) <- c("concentration", "rep1", "rep2", "index1", "index2")
columnindex[, 1] <- rep(seq_len(chans), each = nrow(combinations))
columnindex[, 2:3] <- combmat
columnindex
# create matrix which will be indexed by first 3 columns of column index
index.mat <- matrix(name.vec, ncol = reps)
columnindex[, 4] <- index.mat[columnindex[, c(1, 2)]]
columnindex[, 5] <- index.mat[columnindex[, c(1, 3)]]
create.names <- rep("", nrow(columnindex))
create.names <- paste(ser[columnindex[, 4]], "vs", ser[columnindex[, 5]])
final.mat <- data.frame(names = create.names, columnindex)
final.mat$concentration <- final.mat$concentration - 1
final.mat[!is.na(rowSums(matrix(match(columnindex, channe), ncol = 5))), ]
}
}
peptide_match <- function(dr1, dr2, nchan) {
maxrow <- max(nrow(dr1), nrow(dr2))
minrow <- min(nrow(dr1), nrow(dr2))
adVal <- maxrow - minrow
if (nrow(dr1) == maxrow) {
dr2$addedVals <- adVal
big.pep <- dr1
small.pep <- dr2
} else {
dr1$addedVals <- adVal
big.pep <- dr2
small.pep <- dr1
}
newframe <- big.pep
colnames(newframe) <- colnames(small.pep)
newframe[seq_len(minrow), ] <- small.pep
newframe[(minrow + 1):maxrow, ] <- base::colMeans(small.pep)
if (all.equal(dim(big.pep), dim(dr1)) == TRUE) {
dr2 <- newframe
} else {
dr1 <- newframe
}
list(dr1 = dr1, dr2 = dr2)
}
uniprotGene <- function(organism) {
if (organism == "H.sapiens") {
query = "<query model=\"genomic\" view=\"Protein.primaryAccession Protein.uniprotAccession Protein.genes.symbol\" sortOrder=\"Protein.primaryAccession ASC\" >
<constraint path=\"Protein.organism.shortName\" op=\"=\" value=\"H. sapiens\" code=\"A\" />
</query>"
ret = httr::POST("https://www.humanmine.org/humanmine/service/query/results", body = list(query = query,
format = "json"), encode = "form")
} else if (organism == "D.melanogaster") {
query = "<query model=\"genomic\" view=\"Protein.primaryAccession Protein.uniprotAccession Protein.genes.symbol\" sortOrder=\"Protein.primaryAccession ASC\" >
<constraint path=\"Protein.organism.shortName\" op=\"=\" value=\"D. melanogaster\" code=\"A\" />
</query>"
ret = httr::POST("https://www.flymine.org/flymine/service/query/results", body = list(query = query,
format = "json"), encode = "form")
} else if (organism == "M.musculus") {
query = "<query model=\"genomic\" view=\"Protein.primaryAccession Protein.uniprotAccession Protein.genes.symbol\" sortOrder=\"Protein.primaryAccession ASC\" >
<constraint path=\"Protein.organism.shortName\" op=\"=\" value=\"M. musculus\" code=\"A\" />
</query>"
ret = httr::POST("https://www.mousemine.org/mousemine/service/query/results", body = list(query = query,
format = "json"), encode = "form")
} else if (organism == "R.norvegicus") {
query = "<query model=\"genomic\" view=\"Protein.primaryAccession Protein.uniprotAccession Protein.genes.symbol\" sortOrder=\"Protein.primaryAccession ASC\" >
<constraint path=\"Protein.organism.shortName\" op=\"=\" value=\"R. norvegicus\" code=\"A\" />
</query>"
ret = httr::POST("https://www.ratmine.org/ratmine/service/query/results", body = list(query = query,
format = "json"), encode = "form")
} else {
query = "<query model=\"genomic\" view=\"Protein.primaryAccession Protein.uniprotAccession Protein.genes.symbol\" sortOrder=\"Protein.primaryAccession ASC\" >
<constraint path=\"Protein.organism.shortName\" op=\"=\" value=\"C. elegans\" code=\"A\" />
</query>"
ret = httr::POST("https://www.humanmine.org/humanmine/service/query/results", body = list(query = query,
format = "json"), encode = "form")
}
response = jsonlite::fromJSON(httr::content(ret, as = "text"))
data.prots <- response$results[, c(1, 3)]
colnames(data.prots) <- c("Entry", "Gene.names")
as.data.frame(data.prots[, seq_len(2)], stringsAsFactors = FALSE)
}
panel_shadeNtext <- function(x, y, corr = NULL, col.regions, ...) {
if (is.null(corr))
corr <- stats::cor(x, y, use = "
pair")
ncol <- 14
pal <- col.regions(ncol)
col.ind <- as.numeric(cut(corr, breaks = seq(from = -1, to = 1, length = ncol + 1), include.lowest = TRUE))
usr <- graphics::par("usr")
graphics::rect(usr[1], usr[3], usr[2], usr[4], col = pal[col.ind], border = NA)
graphics::box(col = "lightgray")
on.exit(graphics::par(usr))
graphics::par(usr = c(0, 1, 0, 1))
r <- formatC(corr, digits = 2, format = "f")
cex.cor <- 0.8/graphics::strwidth("-X.xx")
graphics::text(0.5, 0.5, r, cex = cex.cor)
}
shape_for_ggplot_pred <- function(df_ordered, conc, pred.names) {
cols_to_keep_pred <- c(pred.names, "GeneID", "Accession")
forggplot_pred <- vector(mode = "list", length = length(df_ordered$GeneID))
for (i in seq_len(length(df_ordered$GeneID))) {
tmp_pred <- df_ordered[, cols_to_keep_pred]
forggplot_pred[[i]] <- reshape2::melt(tmp_pred[i, ], id = c("GeneID", "Accession"), na.rm = TRUE)
}
forggplot_pred_1 <- do.call(rbind, forggplot_pred)
forggplot_pred_1 <- data.frame(forggplot_pred_1, x = conc)
}
shape_for_ggplot_perc <- function(df_ordered, conc, finalNames) {
cols_to_keep_perc <- c(finalNames, "GeneID", "Accession")
forggplot_perc <- vector(mode = "list", length = length(df_ordered$GeneID))
for (i in seq_len(length(df_ordered$GeneID))) {
tmp_perc <- df_ordered[, cols_to_keep_perc]
forggplot_perc[[i]] <- reshape2::melt(tmp_perc[i, ], id = c("GeneID", "Accession"), na.rm = TRUE)
}
forggplot_perc_1 <- do.call(rbind, forggplot_perc)
forggplot_perc_1 <- data.frame(forggplot_perc_1, x = conc)
}
remove_peptides <- function(dataFrame, chans, reps, accessionID, chanNames, sequenceID, qualityID,
incPDofPD, PDofPD, removePeptides, modelType, incGeneFile, geneFile) {
tempdat <- dataFrame
# make matrix of descriptions and accessions to filter by common proteins
accDesMat <- as.character(tempdat[as.character(tempdat[, accessionID]) == "", c(accessionID)])
if (incPDofPD == TRUE) {
data.merged <- tempdat[, c(chanNames, accessionID, sequenceID, qualityID, "pdofpd")]
} else {
data.merged <- tempdat[, c(chanNames, accessionID, sequenceID, qualityID)]
}
# need to make check if user has run the parameter function
if (chanNames[1] != "Control_rep1") {
channelnames <- standard_names(chans, reps, dataType = "intensity")
}
repindex <- rep(seq_len(reps), each = chans)
totfal <- rep(FALSE, (chans + 3))
if (incPDofPD == TRUE) {
newdf <- cbind(data.merged[, seq_len(chans)], data.merged[, c(accessionID, sequenceID, qualityID,
PDofPD)])
colnames(newdf) <- c(channelnames[repindex == 1], "Accession", "Sequence", "Quality", "Kd")
newdf <- newdf[!is.na(rowSums(newdf[, seq_len(chans)])), ]
newdf <- newdf[newdf$Quality <= 0.05, ]
newdf <- data.frame(newdf, outliers = rep(0, length(newdf[, 1])), uniquePeps = rep(0, length(newdf[,
1])), addedVals = rep(0, length(newdf[, 1])), Kd = newdf$Kd)
} else {
newdf <- cbind(data.merged[, seq_len(chans)], data.merged[, c(accessionID, sequenceID, qualityID)])
colnames(newdf) <- c(channelnames[repindex == 1], "Accession", "Sequence", "Quality")
newdf <- newdf[!is.na(rowSums(newdf[, seq_len(chans)])), ]
newdf <- newdf[newdf$Quality <= 0.05, ]
}
if (reps == 2) {
channelnames <- paste("rep", rep(seq_len(reps), each = chans), "_", rep(paste("C", c("ontrol",
0:(chans - 2)), sep = ""), reps), sep = "")
newdf2 <- cbind(data.merged[, (chans + 1):(2 * chans)], data.merged[, c(accessionID, sequenceID,
qualityID)])
colnames(newdf2) <- c(channelnames[repindex == 2], "Accession", "Sequence", "Quality")
newdf2 <- newdf2[!is.na(rowSums(newdf2[, seq_len(chans)])), ]
newdf2 <- newdf2[newdf2$Quality <= 0.05, ]
newdf <- data.frame(newdf, outliers = rep(0, length(newdf[, 1])), uniquePeps = rep(0, length(newdf[,
1])), addedVals = rep(0, length(newdf[, 1])))
newdf2 <- data.frame(newdf2, outliers = rep(0, length(newdf2[, 1])), uniquePeps = rep(0,
length(newdf2[, 1])), addedVals = rep(0, length(newdf2[, 1])))
common.proteins <- intersect(unique(newdf$Accession), unique(newdf2$Accession))
newdf <- newdf[!is.na(match(newdf$Accession, common.proteins)), ]
newdf2 <- newdf2[!is.na(match(newdf2$Accession, common.proteins)), ]
} else {
common.proteins <- unique(newdf$Accession)
}
if (removePeptides == FALSE) {
if (reps == 1 & modelType == "sigmoid") {
if (incPDofPD == TRUE) {
prot1 <- unique(newdf$Accession)
sumkd <- rep(0, length(prot1))
protdf <- newdf[seq_len(length(prot1)), colnames(newdf) != "Sequence"]
for (i in seq_len(length(prot1))) {
## grep for total intensity () includes non unique peps
protdf[i, seq_len(chans)] <- apply(newdf[grep(prot1[i], newdf$Accession), seq_len(chans)],
2, sum, na.rm = TRUE)
protdf$Kd[i] <- sum(newdf$Kd[grep(prot1[i], newdf$Accession)], na.rm = TRUE)
protdf$Accession[i] <- prot1[i]
## use == to get unique peptides per protein per repeat
protdf$uniquePeps[i] <- length(unique(unique(newdf$Sequence[newdf$Accession ==
prot1[i]])))
}
protdf$uniquePeps[grep(";", protdf$Accession)] <- 0
fc1 <- protdf[, 1]/protdf[, 2:chans]
Kd <- protdf$Kd/protdf[, 1]
fcprotdf <- data.frame(log2(fc1), Accession = protdf$Accession, uniquePepr1 = protdf$uniquePeps,
uniquePepr2 = protdf$uniquePeps, Kd = Kd)
fcprotdf <- fcprotdf[!is.na(rowSums(fcprotdf[seq_len((chans - 1))])), ]
fcprotdf
} else {
prot1 <- unique(newdf$Accession)
protdf <- newdf[seq_len(length(prot1)), colnames(newdf) != "Sequence"]
for (i in seq_len(length(prot1))) {
## grep for total intensity () includes non unique peps
protdf[i, seq_len(chans)] <- apply(newdf[grep(prot1[i], newdf$Accession), seq_len(chans)],
2, sum, na.rm = TRUE)
protdf$Accession[i] <- prot1[i]
## use == to get unique peptides per protein per repeat
protdf$uniquePeps[i] <- length(unique(unique(newdf$Sequence[newdf$Accession ==
prot1[i]])))
}
protdf$uniquePeps[grep(";", protdf$Accession)] <- 0
fc1 <- protdf[, 1]/protdf[, 2:chans]
fcprotdf <- data.frame(log2(fc1), Accession = protdf$Accession, uniquePepr1 = protdf$uniquePeps,
uniquePepr2 = protdf$uniquePeps)
fcprotdf <- fcprotdf[!is.na(rowSums(fcprotdf[seq_len((chans - 1))])), ]
fcprotdf
}
} else {
prot1 <- unique(newdf$Accession)
protdf <- newdf[seq_len(length(prot1)), colnames(newdf) != "Sequence"]
for (i in seq_len(length(prot1))) {
## grep for total intensity () includes non unique peps
protdf[i, seq_len(chans)] <- apply(newdf[grep(prot1[i], newdf$Accession), seq_len(chans)],
2, sum, na.rm = TRUE)
protdf$Accession[i] <- prot1[i]
## use == to get unique peptides per protein per repeat
protdf$uniquePeps[i] <- length(unique(c(unique(newdf$Sequence[newdf$Accession ==
prot1[i]]), unique(newdf2$Sequence[newdf2$Accession == prot1[i]]))))
}
protdf$uniquePeps[grep(";", protdf$Accession)] <- 0
prot2 <- unique(newdf2$Accession)
### second data frame
protdf2 <- newdf2[seq_len(length(prot2)), colnames(newdf2) != "Sequence"]
for (i in seq_len(length(prot2))) {
## grep for total intensity () includes non unique peps
protdf2[i, seq_len(chans)] <- matrixStats::colMedians(as.matrix(newdf2[grep(prot2[i],
newdf2$Accession), seq_len(chans)]), na.rm = TRUE)
protdf2$Accession[i] <- prot2[i]
protdf2$uniquePeps[i] <- length(unique(newdf2$Sequence[newdf2$Accession == prot2[i]]))
}
protdf2$uniquePeps[grep(";", protdf2$Accession)] <- 0
com.prot <- intersect(protdf$Accession, protdf2$Accession)
fc1 <- protdf[, 1]/protdf[, 2:chans]
fc2 <- protdf2[, 1]/protdf2[, 2:chans]
fcprotdf <- data.frame(log2(fc1[match(com.prot, protdf$Accession), ]), log2(fc2[match(com.prot,
protdf2$Accession), ]), Accession = com.prot, uniquePepr1 = protdf$uniquePeps[match(com.prot,
protdf$Accession)], uniquePepr2 = protdf2$uniquePeps[match(com.prot, protdf2$Accession)])
fcprotdf <- fcprotdf[!is.na(rowSums(fcprotdf[seq_len((reps * chans - 2))])), ]
fcprotdf
}
} else {
totpepdf <- NULL
totpepdf2 <- NULL
for (z in seq_len(length(common.proteins))) {
temp <- newdf[newdf$Accession == common.proteins[z], ]
temp2 <- newdf2[newdf2$Accession == common.proteins[z], ]
# first step: check if all peptides are unique ...
if (all.equal(grep(";", temp$Accession), integer(0)) == TRUE) {
uniPeptides1 <- length(unique(temp$Sequence))
} else {
uniPeptides1 <- length(unique(temp$Sequence[-grep(";", temp$Accession)]))
}
if (all.equal(grep(";", temp2$Accession), integer(0)) == TRUE) {
uniPeptides2 <- length(unique(temp2$Sequence))
} else {
uniPeptides2 <- length(unique(temp2[-grep(";", temp2$Accession)]))
}
## add unique peptide column
temp$uniquePeps <- uniPeptides1
temp2$uniquePeps <- uniPeptides2
tempPep <- intersect(unique(temp$Sequence), unique(temp2$Sequence))
if (all.equal(tempPep, character(0)) == TRUE) {
next
}
for (i in seq_len(length(tempPep))) {
if (sum(temp$Sequence == tempPep[i]) != sum(temp2$Sequence == tempPep[i])) {
dr <- peptide_match(temp[temp$Sequence == tempPep[i], ], temp2[temp2$Sequence ==
tempPep[i], ], chans)
dr1 <- dr$dr1
dr2 <- dr$dr2
} else {
dr1 <- temp[temp$Sequence == tempPep[i], ]
dr2 <- temp2[temp2$Sequence == tempPep[i], ]
}
tempoindex <- rep(FALSE, nrow(dr1))
for (j in seq_len(nrow(dr1))) {
percor <- stats::cor.test(log2(as.numeric(dr1[j, seq_len(chans)])), log2(as.numeric(dr2[j,
seq_len(chans)])))
tempoindex[j] <- percor$estimate < 0.4
}
dr1[tempoindex, seq_len(chans)] <- NA
dr2[tempoindex, seq_len(chans)] <- NA
tempReplace <- dr1
tempReplace2 <- dr2
temp <- temp[temp$Sequence != tempPep[i], ]
temp <- rbind(temp, tempReplace)
temp2 <- temp2[temp2$Sequence != tempPep[i], ]
temp2 <- rbind(temp2, tempReplace2)
}
temp <- temp[match(tempPep, temp$Sequence), ]
temp2 <- temp2[match(tempPep, temp2$Sequence), ]
totpepdf <- rbind(totpepdf, temp)
totpepdf2 <- rbind(totpepdf2, temp2)
}
totpepdf <- totpepdf[!is.na(rowSums(totpepdf[, seq_len(chans)])), ]
totpepdf2 <- totpepdf2[!is.na(rowSums(totpepdf2[, seq_len(chans)])), ]
totpepdf <- totpepdf[totpepdf$addedVals == 0, ]
totpepdf <- totpepdf[totpepdf$addedVals == 0, ]
totpepdf$uniquePeps[grep(";", totpepdf$Accession)] <- 0
pepframe <- data.frame(totpepdf[seq_len(length(common.proteins)), seq_len(chans)], totpepdf2[seq_len(length(common.proteins)),
seq_len(chans)], Accession = totpepdf$Accession[seq_len(length(common.proteins))], uniquePeps = totpepdf$uniquePeps[seq_len(length(common.proteins))])
pepsum1 <- pepsum2 <- totpepdf[seq_len(length(common.proteins)), ]
pepsum1 <- pepsum1[, -match(c("Sequence", "addedVals", "Quality", "outliers"), colnames(pepsum1))]
pepsum2 <- pepsum2[, -match(c("Sequence", "addedVals", "Quality", "outliers"), colnames(pepsum2))]
pepsum1$pepNum <- pepsum1$pepNum <- rep(0, length(common.proteins))
colnames(pepsum2)[seq_len(chans)] <- channelnames[(chans + 1):(chans * reps)]
for (i in seq_len(length(common.proteins))) {
pepsum1[i, seq_len(chans)] <- colSums(totpepdf[grep(common.proteins[i], totpepdf$Accession),
seq_len(chans)])
pepsum2[i, seq_len(chans)] <- colSums(totpepdf2[grep(common.proteins[i], totpepdf2$Accession),
seq_len(chans)])
pepsum1$pepNum[i] <- nrow(totpepdf[grep(common.proteins[i], totpepdf$Accession), seq_len(chans)])
pepsum2$pepNum[i] <- nrow(totpepdf2[grep(common.proteins[i], totpepdf2$Accession), seq_len(chans)])
pepsum1$Accession[i] <- pepsum2$Accession[i] <- common.proteins[i]
pepsum1$uniquePeps[i] <- totpepdf$uniquePeps[as.logical(match(totpepdf$Accession, common.proteins[i],
nomatch = FALSE))][1]
}
indexpepsum <- ((rowSums(pepsum1[seq_len(chans)]) != 0) + ((rowSums(pepsum2[seq_len(chans)])) !=
0)) == 2
pepsum1 <- pepsum1[indexpepsum, ]
pepsum2 <- pepsum2[indexpepsum, ]
fc1 <- pepsum1[, 1]/pepsum1[, 2:chans]
fc2 <- pepsum2[, 1]/pepsum2[, 2:chans]
fcprotdf <- data.frame(log2(fc1), log2(fc2), pepsum1$Accession, uniquePepr1 = pepsum1$uniquePeps,
uniquePepr2 = pepsum2$uniquePeps, num1 = pepsum1$pepNum, num2 = pepsum2$pepNum)
fcprotdf
}
fcprotdf
}
normalize_data <- function(dataFrame, chans, reps, PD2, channelNames, incPDofPD, PDofPD, removePeptides,
dataType, modelType, incGeneFile = FALSE, conversionTable = NA, normaliseData = "median", accessionID = NA,
uniquePeptides = NA, organism = "H.sapiens") {
inten <- dataFrame
channelIndex <- seq_len((reps * chans))
if (dataType != "intensity") {
if (modelType == "sigmoid") {
data_orig2 <- dataFrame
## if user has specified accession & Description
if (PD2 == TRUE) {
pattern <- "GN=(\\S+)"
g_fromD1 <- stringr::str_extract(data_orig2$Description, pattern)
gID_D1a <- stringr::str_split_fixed(g_fromD1, "GN=", n = 2)
gID_D1a <- as.vector(gID_D1a[, 2])
gID_D1 <- as.matrix(replace(gID_D1a, gID_D1a == "", "NA"))
# Addition of the gene ID column
data_orig2["geneID"] <- (gID_D1)
} else {
if (incGeneFile == FALSE) {
tempacc <- data_orig2[, accessionID]
data_orig2 <- data_orig2[, (-accessionID)]
data_orig2$Accession <- tempacc
uniGene <- uniprotGene(organism)
uniGene$Gene.names[uniGene$Gene.names == ""] <- NA
GeneID <- uniGene$Gene.names[match(data_orig2$Accession, uniGene$Entry)]
GeneID <- make.names(GeneID, unique = TRUE)
data_orig2$geneID <- GeneID
} else {
tempacc <- data_orig2[, accessionID]
data_orig2 <- data_orig2[, (-accessionID)]
data_orig2$Accession <- tempacc
uniGene <- conversionTable
GeneID <- uniGene$Gene.names[match(data_orig2$Accession, uniGene$Entry)]
GeneID <- make.names(GeneID, unique = TRUE)
data_orig2$geneID <- GeneID
}
}
if (incPDofPD == TRUE) {
if (PD2 == TRUE) {
data.merged <- data.frame(data_orig2[, channelNames], data_orig2$Accession, data_orig2$geneID,
data_orig2$X..Unique.Peptides, data_orig2[, PDofPD])
} else {
data.merged <- data.frame(data_orig2[, channelNames], data_orig2$Accession, data_orig2$geneID,
data_orig2[, uniquePeptides], data_orig2[, PDofPD])
}
final.Names <- standard_names(chans, reps, dataType)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps", "Kd")
tmp <- data.merged[, seq_len(chans)]
tf <- as.data.frame(lapply(tmp, function(x) (is.na(x))))
countsNAs <- as.data.frame(apply(tf, 1, function(x) table(x)["TRUE"]))
n_of_miss <- as.data.frame(as.numeric(stringr::str_replace_all(as.list(countsNAs[,
1]), "NA", "0")))
data.merged <- data.frame(data.merged, n_of_miss)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps", "Kd",
"MissingVal")
missing_val <- 0
data.merged <- as.data.frame(data.merged[data.merged$MissingVal <= missing_val, ])
# Specify the number of missing points.For zero missing point is ==0 filiter for 2 unique
# peptides
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
if (normaliseData == "loess") {
data.merged <- data.frame((affy::normalize.loess(2^(data.merged[, channelIndex]))),
Accession = data.merged$Accession, GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps,
Kd = 1/data.merged$Kd, MissingVal = data.merged$MissingVal)
} else if (normaliseData == "median") {
data.merged <- data.frame(((2^(data.merged[, channelIndex]))/apply((2^(data.merged[,
channelIndex])), 2, stats::median)), Accession = data.merged$Accession, GeneID = data.merged$GeneID,
UniquePeps = data.merged$UniquePeps, Kd = 1/data.merged$Kd, MissingVal = data.merged$MissingVal)
} else {
data.merged <- data.frame((2^(data.merged[, channelIndex])), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps, Kd = 1/data.merged$Kd,
MissingVal = data.merged$MissingVal)
}
data.merged
} else {
if (PD2 == TRUE) {
data.merged <- data.frame(data_orig2[, channelNames], data_orig2$Accession, data_orig2$geneID,
data_orig2$X..Unique.Peptides)
} else {
data.merged <- data.frame(data_orig2[, channelNames], data_orig2$Accession, data_orig2$geneID,
data_orig2[, uniquePeptides])
}
final.Names <- standard_names(chans, reps, "LFC")
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps")
tmp <- data.merged[, seq_len(chans)]
tf <- as.data.frame(lapply(tmp, function(x) (is.na(x))))
countsNAs <- as.data.frame(apply(tf, 1, function(x) table(x)["TRUE"]))
n_of_miss <- as.data.frame(as.numeric(stringr::str_replace_all(as.list(countsNAs[,
1]), "NA", "0")))
data.merged <- data.frame(data.merged, n_of_miss)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps", "MissingVal")
#
missing_val <- 0
#
data.merged <- as.data.frame(data.merged[data.merged$MissingVal <= missing_val, ])
# Specify the number of missing points.For zero missing point is ==0 filiter for 2 unique
# peptides
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
if (normaliseData == "loess") {
data.merged <- data.frame((affy::normalize.loess(2^(data.merged[, channelIndex]))),
Accession = data.merged$Accession, GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps,
MissingVal = data.merged$MissingVal)
} else if (normaliseData == "median") {
data.merged <- data.frame(((2^(data.merged[, channelIndex]))/apply((2^(data.merged[,
channelIndex])), 2, stats::median)), Accession = data.merged$Accession, GeneID = data.merged$GeneID,
UniquePeps = data.merged$UniquePeps, MissingVal = data.merged$MissingVal)
} else {
data.merged <- data.frame((2^(data.merged[, channelIndex])), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps, MissingVal = data.merged$MissingVal)
}
data.merged
}
} else {
tempdat <- dataFrame
temp <- channelIndex
nvec <- length(temp)
data.merged <- tempdat[, channelNames]
colnames(data.merged) <- standard_names(chans, reps, dataType)
data.names <- c(colnames(data.merged), "Accession", "GeneID", "UniquePeps", "MissingVal")
if (dataType == "FC") {
data.merged[, temp] <- log2(data.merged[, temp])
}
if (PD2 == TRUE) {
pattern <- "GN=(\\S+)"
g_fromD1 <- stringr::str_extract(tempdat$Description, pattern)
gID_D1a <- stringr::str_split_fixed(g_fromD1, "GN=", n = 2)
gID_D1a <- as.vector(gID_D1a[, 2])
gID_D1 <- as.matrix(replace(gID_D1a, gID_D1a == "", "NA"))
Accession <- tempdat$Accession
UniquePeps <- MissingVal <- tempdat[, grep("Unique", colnames(tempdat))]
data.merged <- cbind(data.merged, Accession, GeneID = gID_D1, UniquePeps, MissingVal)
} else {
tempdat$Accession <- tempdat[, accessionID]
if (incGeneFile == FALSE) {
uniGene <- uniprotGene(organism)
} else {
uniGene <- conversionTable
colnames(uniGene)[seq_len(2)] <- c("Entry", "Gene.names")
}
GeneID <- uniGene$Gene.names[match(tempdat$Accession, uniGene$Entry)]
GeneID <- make.names(GeneID, unique = TRUE)
Accession <- tempdat$Accession
UniquePeps <- MissingVal <- tempdat[, uniquePeptides]
data.merged <- cbind(data.merged, Accession, GeneID = GeneID, UniquePeps, MissingVal)
}
channelIndex <- seq_len((reps * chans))
missing <- rowSums(is.na(data.merged[, channelIndex]))
missing_val <- 0
data.merged$MissingVal <- missing
## subset by missing
data.merged <- data.merged[data.merged$MissingVal <= missing_val, ]
data.merged <- as.data.frame(data.merged[data.merged$MissingVal <= missing_val, ])
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
#
if (normaliseData == "loess") {
data.merged <- data.frame(log2(affy::normalize.loess(2^(data.merged[, channelIndex]))),
Accession = data.merged$Accession, GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps,
MissingVal = data.merged$MissingVal)
} else if (normaliseData == "median") {
data.merged <- data.frame(log2((2^(data.merged[, channelIndex]))/apply((2^(data.merged[,
channelIndex])), 2, stats::median)), Accession = data.merged$Accession, GeneID = data.merged$GeneID,
UniquePeps = data.merged$UniquePeps, MissingVal = data.merged$MissingVal)
} else {
data.merged <- data.frame(log2((2^(data.merged[, channelIndex]))), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps, MissingVal = data.merged$MissingVal)
}
data.merged
## filter for 2 unique peptide
}
} else {
### intensities to protein done.. here no description so we need toi use accession and intermine
tempdat <- inten
if (incGeneFile == FALSE) {
uniGene <- uniprotGene(organism)
} else {
uniGene <- conversionTable
colnames(uniGene) <- c("Entry", "Gene.names")
}
if (removePeptides == TRUE) {
Accession <- tempdat$pepsum1.Accession
} else {
Accession <- tempdat$Accession
}
uniGene$Gene.names[uniGene$Gene.names == ""] <- NA
GeneID <- uniGene$Gene.names[match(Accession, uniGene$Entry)]
GeneID <- make.names(GeneID, unique = TRUE)
tempdat$GeneID <- GeneID
UniquePeps <- tempdat$uniquePepr1
if (reps == 1) {
data.merged <- tempdat
final.Names <- paste0("rep1_C", 0:(chans - 2))
if (incPDofPD == TRUE) {
data.merged <- data.frame(data.merged[, seq_len((chans - 1))], Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$uniquePepr1, Kd = data.merged$Kd)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps", "Kd")
tmp <- data.merged[, seq_len((chans - 1))]
tf <- as.data.frame(lapply(tmp, function(x) (is.na(x))))
countsNAs <- as.data.frame(apply(tf, 1, function(x) table(x)["TRUE"]))
n_of_miss <- as.data.frame(as.numeric(stringr::str_replace_all(as.list(countsNAs[,
1]), "NA", "0")))
data.merged <- data.frame(data.merged, n_of_miss)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps", "Kd",
"MissingVal")
#
missing_val <- 0
#
data.merged <- as.data.frame(data.merged[data.merged$MissingVal <= missing_val, ])
# Specify the number of missing points.For zero missing point is ==0 filter for 2 unique peptides
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
#
if (normaliseData == "loess") {
data.merged <- data.frame((affy::normalize.loess(2^(data.merged[, seq_len((chans -
1))]))), Accession = data.merged$Accession, GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps,
Kd = data.merged$Kd)
} else if (normaliseData == "median") {
data.merged <- data.frame(((2^(data.merged[, seq_len((chans - 1))]))/apply((2^(data.merged[,
seq_len((chans - 1))])), 2, stats::median)), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps, Kd = data.merged$Kd)
} else {
data.merged <- data.frame((2^(data.merged[, seq_len((chans - 1))])), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps, Kd = data.merged$Kd)
}
data.merged
} else {
data.merged <- data.frame(data.merged[, seq_len((chans - 1))], Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$uniquePepr1)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps")
tmp <- data.merged[, seq_len((chans - 1))]
tf <- as.data.frame(lapply(tmp, function(x) (is.na(x))))
countsNAs <- as.data.frame(apply(tf, 1, function(x) table(x)["TRUE"]))
n_of_miss <- as.data.frame(as.numeric(stringr::str_replace_all(as.list(countsNAs[,
1]), "NA", "0")))
data.merged <- data.frame(data.merged, n_of_miss)
colnames(data.merged) <- c(final.Names, "Accession", "GeneID", "UniquePeps", "MissingVal")
missing_val <- 0
data.merged <- as.data.frame(data.merged[data.merged$MissingVal <= missing_val, ])
# #Specify the number of missing points.For zero missing point is ==0 #filiter for 2 unique
# peptides
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
if (normaliseData == "loess") {
data.merged <- data.frame((affy::normalize.loess(2^(data.merged[, seq_len((chans -
1))]))), Accession = data.merged$Accession, GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps)
} else if (normaliseData == "median") {
data.merged <- data.frame(((2^(data.merged[, seq_len((chans - 1))]))/apply((2^(data.merged[,
seq_len((chans - 1))])), 2, stats::median)), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps)
} else {
data.merged <- data.frame((2^(data.merged[, seq_len((chans - 1))])), Accession = data.merged$Accession,
GeneID = data.merged$GeneID, UniquePeps = data.merged$UniquePeps)
}
data.merged
}
} else {
data.merged <- tempdat[, seq_len(((chans - 1) * reps))]
data.merged <- tempdat[, seq_len(((chans - 1) * reps))]
if (removePeptides == FALSE) {
Accession <- tempdat$Accession
MissingVal <- UniquePeps
data.merged <- cbind(data.merged, Accession, GeneID = GeneID, UniquePeps, MissingVal)
data.merged <- data.merged[!is.na(rowSums(data.merged[, seq_len(((chans - 1) * reps))])),
]
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
data.merged <- data.frame((log2(affy::normalize.loess(2^(data.merged[, seq_len(((chans -
1) * reps))])))), Accession = data.merged$Accession, GeneID = data.merged$GeneID,
UniquePeps = data.merged$UniquePeps)
data.merged <- data.merged[!is.na(rowSums(data.merged[, seq_len(((chans - 1) * reps))])),
]
data.merged
} else {
Accession <- tempdat$pepsum1.Accession
data.merged <- cbind(data.merged, Accession, GeneID = GeneID, UniquePeps, num1 = tempdat$num1,
num2 = tempdat$num2)
data.merged <- data.merged[!is.na(rowSums(data.merged[, seq_len(((chans - 1) * reps))])),
]
data.merged <- data.merged[data.merged$UniquePeps > 1, ]
data.merged <- data.frame((log2(affy::normalize.loess(2^(data.merged[, seq_len(((chans -
1) * reps))])))), Accession = data.merged$Accession, GeneID = data.merged$GeneID,
UniquePeps = data.merged$UniquePeps, num1 = data.merged$num1, num2 = data.merged$num2)
data.merged <- data.merged[data.merged$num1 > 1 | data.merged$num2 > 1, ]
data.merged <- data.merged[!is.na(rowSums(data.merged[, seq_len(((chans - 1) * reps))])),
]
data.merged
}
}
}
data.merged
}
fit_model <- function(dataFrame, chans, reps, dataType = "LFC", modelType = "sigmoid", sigmoidConc = NA,
PD2 = TRUE, incPDofPD = FALSE, PDofPD = NA) {
if (modelType == "sigmoid") {
if (dataType == "intensity") {
nvec <- seq_len((chans - 1))
nvec <- length(nvec)
data.merged <- dataFrame
conc <- sigmoidConc
if (incPDofPD == TRUE) {
final.Names <- paste0("rep1_C", 0:(chans - 2))
pred.names <- paste0("predX", seq_len((chans - 1)))
colnames(data.merged[, seq_len((chans - 1))]) <- final.Names
data_merged_positives <- data.merged
data_merged_positives2 <- ((1/data_merged_positives[, seq_len((chans - 1))])) * 100
Reps_FC <- data.frame(data_merged_positives2, Accession = data_merged_positives$Accession,
GeneID = data_merged_positives$GeneID, UniquePeps = data_merged_positives$UniquePeps,
depletionConstant = data_merged_positives$Kd)
ryegrass.m1 <- vector(mode = "list", length = nrow(Reps_FC))
pvals <- vector(mode = "list", length = nrow(Reps_FC))
stderr <- vector(mode = "list", length = nrow(Reps_FC))
model_pred <- vector(mode = "list", length = nrow(Reps_FC))
coeff_predicted <- vector(mode = "list", length = nrow(Reps_FC))
for (i in seq_len(nrow(Reps_FC))) {
# maxIt and relTol to be user defined
ryegrass.m1[[i]] <- try(drc::drm(as.numeric(Reps_FC[i, seq_len((chans - 1))]) ~
as.numeric(conc), na.action = stats::na.omit, control = drc::drmc(constr = FALSE,
errorm = FALSE, noMessage = TRUE, maxIt = 1000, relTol = 1e-06), fct = drc::LL.4(fixed = c(NA,
NA, NA, NA), names = c("Slope", "Lower Limit", "Upper Limit", "RB50"))), silent = TRUE)
}
failed_sigm = 0
for (i in seq_len(length(ryegrass.m1))) {
# checking_val if FALSE the model has failed to calculate the pval
checking_val <- try(is.numeric(vsn::coefficients(ryegrass.m1[[i]])[["Slope:(Intercept)"]]),
silent = TRUE)
if (checking_val == "TRUE") {
pvals[[i]] <- t(as.data.frame(summary(ryegrass.m1[[i]])$coefficients[13:16]))
colnames(pvals[[i]]) <- c("SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval")
coeff_predicted[[i]] <- t(data.frame(vsn::coefficients(ryegrass.m1[[i]])))
colnames(coeff_predicted[[i]]) <- c("SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef",
"RB50Coef")
stderr[[i]] <- t(as.data.frame(summary(ryegrass.m1[[i]])$coefficients[5:8]))
colnames(stderr[[i]]) <- c("SlopeErr", "Lower_LimitErr", "Upper_LimitErr", "RB50Err")
model_pred[[i]] <- predict(ryegrass.m1[[i]])
} else {
failed_sigm = failed_sigm + 1
fit <- stats::lm(as.numeric(Reps_FC[i, seq_len((chans - 1))]) ~ poly(log10(conc),
2))
# extract the pval
pval <- as.numeric(summary(fit)$coefficients[, 4])
pvals[[i]] <- t(as.data.frame(c(pval, "lm-fit:intercept.slope.quadratic")))
colnames(pvals[[i]]) <- c("SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval")
stderr[[i]] <- data.frame(NA, NA, NA, NA)
colnames(stderr[[i]]) <- c("SlopeErr", "Lower_LimitErr", "Upper_LimitErr", "RB50Err")
coeff_predicted[[i]] <- data.frame(NA, NA, NA, NA)
colnames(coeff_predicted[[i]]) <- c("SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef",
"RB50Coef")
model_pred[[i]] <- as.numeric(stats::fitted(fit))
}
}
modelsReps <- data.frame(do.call(rbind.data.frame, lapply(model_pred, function(x) as.numeric(x))),
Reps_FC$GeneID, do.call(rbind.data.frame, lapply(pvals, function(x) x)), do.call(rbind.data.frame,
lapply(coeff_predicted, function(x) x)), do.call(rbind.data.frame, lapply(stderr,
function(x) x)))
colnames(modelsReps) <- c(pred.names, "GeneID", "SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval", "SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef", "RB50Coef", "SlopeErr",
"Lower_LimitErr", "Upper_LimitErr", "RB50Err")
data_merged_2 <- merge.data.frame(modelsReps, Reps_FC, by = "GeneID")
data_merged_2 <- data.frame(data_merged_2, Top_minus_min = data_merged_2$predX1 -
data_merged_2[, paste("predX", (chans - 1), sep = "")])
data_merged_2 <- data.frame(data_merged_2, correctedRB50 = (data_merged_2$RB50Coef *
data_merged_2$depletionConstant))
data_merged_2
} else {
final.Names <- paste0("rep1_C", 0:(chans - 2))
pred.names <- paste0("predX", seq_len((chans - 1)))
colnames(data.merged[, seq_len((chans - 1))]) <- final.Names
data_merged_positives <- data.merged
data_merged_positives2 <- ((1/data_merged_positives[, seq_len((chans - 1))])) * 100
Reps_FC <- data.frame(data_merged_positives2, Accession = data_merged_positives$Accession,
GeneID = data_merged_positives$GeneID, UniquePeps = data_merged_positives$UniquePeps)
ryegrass.m1 <- vector(mode = "list", length = nrow(Reps_FC))
pvals <- vector(mode = "list", length = nrow(Reps_FC))
stderr <- vector(mode = "list", length = nrow(Reps_FC))
model_pred <- vector(mode = "list", length = nrow(Reps_FC))
coeff_predicted <- vector(mode = "list", length = nrow(Reps_FC))
for (i in seq_len(nrow(Reps_FC))) {
# maxIt and relTol to be user defined
ryegrass.m1[[i]] <- try(drc::drm(as.numeric(Reps_FC[i, seq_len((chans - 1))]) ~
as.numeric(conc), na.action = stats::na.omit, control = drc::drmc(constr = FALSE,
errorm = FALSE, noMessage = TRUE, maxIt = 1000, relTol = 1e-06), fct = drc::LL.4(fixed = c(NA,
NA, NA, NA), names = c("Slope", "Lower Limit", "Upper Limit", "RB50"))), silent = TRUE)
}
failed_sigm = 0
for (i in seq_len(length(ryegrass.m1))) {
# checking_val if FALSE the model has failed to calculate the pval
checking_val <- try(is.numeric(vsn::coefficients(ryegrass.m1[[i]])[["Slope:(Intercept)"]]),
silent = TRUE)
if (checking_val == "TRUE") {
pvals[[i]] <- t(as.data.frame(suppressWarnings(summary(ryegrass.m1[[i]])$coefficients[13:16])))
colnames(pvals[[i]]) <- c("SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval")
coeff_predicted[[i]] <- t(data.frame(suppressWarnings(vsn::coefficients(ryegrass.m1[[i]]))))
colnames(coeff_predicted[[i]]) <- c("SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef",
"RB50Coef")
stderr[[i]] <- t(as.data.frame(suppressWarnings(summary(ryegrass.m1[[i]])$coefficients[5:8])))
colnames(stderr[[i]]) <- c("SlopeErr", "Lower_LimitErr", "Upper_LimitErr", "RB50Err")
model_pred[[i]] <- suppressWarnings(predict(ryegrass.m1[[i]]))
} else {
failed_sigm = failed_sigm + 1
fit <- stats::lm(as.numeric(Reps_FC[i, seq_len((chans - 1))]) ~ poly(log10(conc),
2))
# extract the pval
pval <- as.numeric(summary(fit)$coefficients[, 4])
pvals[[i]] <- t(as.data.frame(c(pval, "lm-fit:intercept.slope.quadratic")))
colnames(pvals[[i]]) <- c("SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval")
stderr[[i]] <- data.frame(NA, NA, NA, NA)
colnames(stderr[[i]]) <- c("SlopeErr", "Lower_LimitErr", "Upper_LimitErr", "RB50Err")
coeff_predicted[[i]] <- data.frame(NA, NA, NA, NA)
colnames(coeff_predicted[[i]]) <- c("SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef",
"RB50Coef")
model_pred[[i]] <- as.numeric(stats::fitted(fit))
}
}
modelsReps <- data.frame(do.call(rbind.data.frame, lapply(model_pred, function(x) as.numeric(x))),
Reps_FC$GeneID, do.call(rbind.data.frame, lapply(pvals, function(x) x)), do.call(rbind.data.frame,
lapply(coeff_predicted, function(x) x)), do.call(rbind.data.frame, lapply(stderr,
function(x) x)))
colnames(modelsReps) <- c(pred.names, "GeneID", "SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval", "SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef", "RB50Coef", "SlopeErr",
"Lower_LimitErr", "Upper_LimitErr", "RB50Err")
data_merged_2 <- merge.data.frame(modelsReps, Reps_FC, by = "GeneID")
data_merged_2 <- data.frame(data_merged_2, Top_minus_min = data_merged_2$predX1 -
data_merged_2[, paste("predX", (chans - 1), sep = "")])
data_merged_2
}
} else {
nvec <- seq_len(chans)
nvec <- length(nvec)
data.merged <- dataFrame
conc <- sigmoidConc
final.Names <- standard_names(chans, reps, dataType = "LFC")
pred.names <- paste0("predX", seq_len(chans))
colnames(data.merged[, seq_len(chans)]) <- final.Names
data_merged_positives <- data.merged
data_merged_positives2 <- ((1/data_merged_positives[, seq_len(chans)])) * 100
if (incPDofPD == TRUE) {
Reps_FC <- data.frame(data_merged_positives2, Accession = data_merged_positives$Accession,
GeneID = data_merged_positives$GeneID, UniquePeps = data_merged_positives$UniquePeps,
depletionConstant = data_merged_positives$Kd, MissingVal = data_merged_positives$MissingVal)
} else {
Reps_FC <- data.frame(data_merged_positives2, Accession = data_merged_positives$Accession,
GeneID = data_merged_positives$GeneID, UniquePeps = data_merged_positives$UniquePeps,
MissingVal = data_merged_positives$MissingVal)
}
ryegrass.m1 <- vector(mode = "list", length = dim(data_merged_positives)[1])
pvals <- vector(mode = "list", length = dim(data_merged_positives)[1])
stderr <- vector(mode = "list", length = dim(data_merged_positives)[1])
model_pred <- vector(mode = "list", length = dim(data_merged_positives)[1])
coeff_predicted <- vector(mode = "list", length = dim(data_merged_positives)[1])
for (i in seq_len(nrow(Reps_FC))) {
# maxIt and relTol to be user defined
ryegrass.m1[[i]] <- suppressWarnings(try(drc::drm(as.numeric(Reps_FC[i, seq_len(chans)]) ~
as.numeric(conc), na.action = stats::na.omit, control = drc::drmc(constr = FALSE,
errorm = FALSE, noMessage = TRUE, maxIt = 1000, relTol = 1e-06), fct = drc::LL.4(fixed = c(NA,
NA, NA, NA), names = c("Slope", "Lower Limit", "Upper Limit", "RB50"))), silent = TRUE))
}
failed_sigm = 0
for (i in seq_len(length(ryegrass.m1))) {
# checking_val if FALSE the model has failed to calculate the pval
checking_val <- try(is.numeric(vsn::coefficients(ryegrass.m1[[i]])[["Slope:(Intercept)"]]),
silent = TRUE)
if (checking_val == "TRUE") {
pvals[[i]] <- t(as.data.frame(suppressWarnings(summary(ryegrass.m1[[i]])$coefficients[13:16])))
colnames(pvals[[i]]) <- c("SlopePval", "Lower_LimitPval", "Upper_LimitPval", "RB50Pval")
coeff_predicted[[i]] <- t(data.frame(vsn::coefficients(ryegrass.m1[[i]])))
colnames(coeff_predicted[[i]]) <- c("SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef",
"RB50Coef")
stderr[[i]] <- t(as.data.frame(suppressWarnings(summary(ryegrass.m1[[i]])$coefficients[5:8])))
colnames(stderr[[i]]) <- c("SlopeErr", "Lower_LimitErr", "Upper_LimitErr", "RB50Err")
model_pred[[i]] <- suppressWarnings(predict(ryegrass.m1[[i]]))
} else {
failed_sigm = failed_sigm + 1
fit <- stats::lm(as.numeric(Reps_FC[i, seq_len(chans)]) ~ poly(log10(conc), 2))
# extract the pval
pval <- as.numeric(summary(fit)$coefficients[, 4])
pvals[[i]] <- t(as.data.frame(c(pval, "lm-fit:intercept.slope.quadratic")))
colnames(pvals[[i]]) <- c("SlopePval", "Lower_LimitPval", "Upper_LimitPval", "RB50Pval")
stderr[[i]] <- data.frame(NA, NA, NA, NA)
colnames(stderr[[i]]) <- c("SlopeErr", "Lower_LimitErr", "Upper_LimitErr", "RB50Err")
coeff_predicted[[i]] <- data.frame(NA, NA, NA, NA)
colnames(coeff_predicted[[i]]) <- c("SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef",
"RB50Coef")
model_pred[[i]] <- as.numeric(stats::fitted(fit))
}
}
modelsReps <- data.frame(do.call(rbind.data.frame, lapply(model_pred, function(x) as.numeric(x))),
Reps_FC$GeneID, do.call(rbind.data.frame, lapply(pvals, function(x) x)), do.call(rbind.data.frame,
lapply(coeff_predicted, function(x) x)), do.call(rbind.data.frame, lapply(stderr,
function(x) x)))
colnames(modelsReps) <- c(pred.names, "GeneID", "SlopePval", "Lower_LimitPval", "Upper_LimitPval",
"RB50Pval", "SlopeCoef", "Lower_LimitCoef", "Upper_LimitCoef", "RB50Coef", "SlopeErr",
"Lower_LimitErr", "Upper_LimitErr", "RB50Err")
data_merged_2 <- merge.data.frame(modelsReps, Reps_FC, by = "GeneID")
data_merged_2 <- data.frame(data_merged_2, Top_minus_min = data_merged_2$predX1 - data_merged_2[,
paste("predX", chans, sep = "")])
if (incPDofPD == TRUE) {
data_merged_2 <- data.frame(data_merged_2, correctedRB50 = (data_merged_2$RB50Coef *
data_merged_2$depletionConstant))
} else {
data_merged_2 <- data.frame(data_merged_2)
}
data_merged_2
}
} else {
if (dataType == "intensity") {
nvec <- seq_len(((chans - 1) * (reps - 1)))
nvec <- length(nvec)
data.merged <- dataFrame
conc <- rep(0:(chans - 2), times = reps)
} else {
nvec <- seq_len(((chans) * (reps)))
nvec <- length(nvec)
data.merged <- dataFrame
conc <- rep(0:(chans - 1), times = reps)[seq_len((chans * reps))]
}
design <- stats::model.matrix(~poly(conc, 2))
colnames(design) <- c("Intercept", "Slope", "Quadratic")
fit <- limma::lmFit(data.merged[, seq_len(length(conc))], method = "ls", design = design)
fit <- limma::eBayes(fit)
res <- limma::topTable(fit, coef = "Slope", number = nrow(data.merged), adjust = "BH") #pval for the slope
res2 <- limma::topTable(fit, coef = 1, number = nrow(data.merged), adjust = "BH") #pval for the intercept
res3 <- limma::topTable(fit, coef = "Quadratic", number = nrow(data.merged), adjust = "BH") #pval for the quadratic term ()
# add the pvalues to the dataframe
tmp_1 <- cbind(data.merged[rownames(res), ], res)
tmp_2 <- cbind(data.merged[rownames(res2), ], res2)
tmp_3 <- cbind(data.merged[rownames(res3), ], res3)
tobeselected <- merge.data.frame(tmp_1, tmp_2, by = "Accession")
tobeselected <- merge.data.frame(tobeselected, tmp_3, by = "Accession")
selectnames <- c(paste0(colnames(data.merged)[seq_len(length(conc))], ".x"), "logFC.x", "AveExpr.x",
"P.Value", "adj.P.Val", "P.Value.x", "adj.P.Val.x", "P.Value.y", "adj.P.Val.y", "Accession",
"GeneID.x", "UniquePeps")
# can create a replacement names function here ....
data.merged <- tobeselected[, match(selectnames, colnames(tobeselected))]
nam <- standard_names(chans, reps, dataType)
if (dataType == "intensity") {
nam <- nam[-seq(1, reps * chans, by = chans)]
}
colnames(data.merged)[seq_len(length(nam))] <- nam
colnames(data.merged) <- c(nam, "logFC", "AveExpr", "P.Value_slope", "adj.P.Val_slope", "P.Value_intercept",
"adj.P.Val_intercept", "P.Value_quadratic", "adj.P.Val_quadratic", "Accession", "GeneID",
"UniquePeps")
data.merged
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.