## @title Processor
##
## @description Data processing and error checks for all SIGHTS functions
##
## @param m A numeric matrix.
## @param type Type of function.
## @param mr,mc Matrix rows and cols.
## @param pr,pc Plate rows and cols.
## @param bm Bias matrix.
## @param bc Bias cols.
## @param ri Replicate index.
## @param cn Sample names.
## @param tf Trim factor.
## @param ts Test side.
## @param cm Control method.
##
## @details Checks for all errors and converts the data matrices into the required formats. It provides useful error messages.
##
## @keywords internal
## @return Numeric matrix
##
sightsCheck <- function(m, type, mr = NULL, mc = NULL, pr = NULL, pc = NULL, bm = NULL, bc = NULL, ri = NULL,
cn = NULL, tf = NULL, ts = NULL, cm = NULL) {
if (is.null(ncol(m))) {
m <- matrix(m, ncol = 1, dimnames = dimnames(m))
}
if (!is.null(mr)) {
if (is.vector(mr) & all(mr <= nrow(m))) {
m <- m[mr, , drop = FALSE]
} else {
stop(paste(type, "Rows exceed the number of plate wells in the ", type, "Matrix \n Please select well numbers between 1 and ",
nrow(m), sep = ""))
}
}
if (!is.null(pr) & !is.null(pc)) {
if (pr * pc != nrow(m)) {
stop(paste(type, "Rows (", nrow(m), ") is not equal to the product of plateRows (", pr, ") and plateCols (",
pc, ")", sep = ""))
}
}
if (!is.null(mc)) {
if (is.vector(mc) & all(mc <= ncol(m))) {
m <- m[, mc, drop = FALSE]
} else {
stop(paste(type, "Cols exceed the number of plates in the ", type, "Matrix \n Please select plate numbers between 1 and ",
ncol(m), sep = ""))
}
}
if (!is.null(bm)) {
if (nrow(bm) != nrow(m)) {
stop("dataRows is not equal to the number of plate wells in the biasMatrix")
}
}
if (!is.null(bc)) {
if (!(is.vector(bc) & all(bc <= ncol(m)))) {
stop("biasCols exceed the number of plates in the dataMatrix")
}
}
if (is.vector(ri) & length(ri) != ncol(m)) {
stop(paste(type, "Cols is not equal to the number of plates in the repIndex", sep = ""))
} else if (!is.null(ri)) {
if (!any(duplicated(ri))) {
stop("Please provide at least two replicates per sample")
} else if (length(unique(table(ri))) != 1) {
stop("Please provide the same number of replicates per sample")
}
}
if (is.vector(cn) & length(cn) != ncol(m)) {
stop(paste(type, "Cols is not equal to the number of sample names in the colNames", sep = ""))
}
if (!is.null(tf)) {
if (!is.numeric(tf) | tf < 0 | tf > 0.5) {
stop("trimFactor is not a number between 0 and 0.5")
}
}
if (!is.null(ts)) {
if (!any(ts == c("less", "greater", "two.sided"))) {
stop("testSide can only take one of these values: 'less', 'greater' or 'two.sided'")
}
}
if (type == "fdr") {
if (dim(m)[2]%%5 != 0) {
stop("testMatrix should consist of 5 columns each for 1 sample, where the 5th column should contain p-values")
}
if (!(is.character(cm)) | length(cm) != 1) {
stop(paste("Please provide a valid method name. Select one from", dQuote("smoother"), "or", dQuote("bootstrap"),
". Note: names are case-sensitive."))
}
if (cm != "smoother" & cm != "bootstrap") {
stop(paste(dQuote(cm), "is not a valid method name. Select one from", dQuote("smoother"), "or",
dQuote("bootstrap"), ". Note: names are case-sensitive."))
}
}
if (type == "data") {
if (is.data.frame(m) == FALSE) {
m <- as.data.frame(m)
}
} else {
# if plotData is a dataframe, turn it into a matrix
if (is.data.frame(m))
m <- as.matrix(m)
# if plotData is a vector, turn it into a matrix with one column
if (is.null(dim(m)[[2]])) {
m <- matrix(m, ncol = 1)
}
}
return(m)
}
##
## @title Z-score
##
## @description Calculates Z-score
##
## @param x A numeric matrix.
##
## @details Used in normZ.
##
## @keywords internal
## @return Numeric matrix
##
myZ <- function(x) {
return((x - base::mean(x, na.rm = TRUE))/stats::sd(x, na.rm = TRUE))
}
##
## @title Robust Z-score
##
## @description Calculates robust Z-score
##
## @param x A numeric matrix.
##
## @details Used in normRobZ, normLoess and normMedFil.
##
## @keywords internal
## @return Numeric matrix
##
myRobZ <- function(x) {
return((x - stats::median(x, na.rm = TRUE))/stats::mad(x, na.rm = TRUE))
}
##
## @title Loess score
##
## @description Calculates loess score
##
## @param plate A single plate.
## @param pr,pc Plate rows and cols.
##
## @details Used in normLoess.
##
## @keywords internal
## @return Numeric matrix
##
loessMe <- function(plate, pr, pc) {
coll <- 1:pc
roww <- 1:pr
tmp <- matrix(plate, nrow = pr, ncol = pc, byrow = FALSE)
rloess <- cloess <- NULL
# get row loess fit
for (i in 1:pr) {
nas <- rep(NA, pc)
isna <- is.na(tmp[i, ])
naInd <- coll[!is.na(tmp[i, ])]
x <- tmp[i, ]
if (sum(isna) != length(x)) {
tmp1 <- stats::loess(x ~ coll, family = "symmetric")$fitted
nas[naInd] <- tmp1
}
rloess <- rbind(rloess, nas)
}
# get column loess fit
for (i in 1:pc) {
nas <- rep(NA, pr)
isna <- is.na(tmp[, i])
naInd <- roww[!is.na(tmp[, i])]
x <- tmp[, i]
if (sum(isna) != length(x)) {
tmp1 <- stats::loess(x ~ roww, family = "symmetric")$fitted
nas[naInd] <- tmp1
}
cloess <- cbind(cloess, nas)
}
# get row and column averages of smoothers
rmeans <- apply(rloess, 1, base::mean, na.rm = TRUE)
cmeans <- apply(cloess, 2, base::mean, na.rm = TRUE)
# calculate normalized scores in inefficient fashion
outMat <- matrix(NA, nrow = pr, ncol = pc)
for (i in 1:pr) {
for (j in 1:pc) {
adj <- (rmeans[i]/rloess[i, j]) * (cmeans[j]/cloess[i, j])
outMat[i, j] <- tmp[i, j] * adj
}
}
return(c(outMat))
}
##
## @title MedFil score
##
## @description Calculates median filter score
##
## @param plate A single plate.
## @param buds Buddies.
##
## @details Used in normMedFil.
##
## @keywords internal
## @return Numeric matrix
##
applyMedFilter <- function(plate, buds) {
grandMed <- stats::median(plate, na.rm = TRUE)
outVec <- NULL
for (i in 1:length(plate)) {
hood <- buds[i, !is.na(buds[i, ])]
medd <- stats::median(plate[hood], na.rm = TRUE)
outVec <- c(outVec, plate[i] * (grandMed/medd))
}
return(outVec)
}
## @title Create Neighbourhood Matrix
##
## @description Creates neighbourhood list for matrix of data.
##
## @param nrows Number of rows in matrix.
## @param ncols Number of columns in matrix.
## @param wind Window size. Defines max distance of neighbours. Value of 2 will create a 5x5 neighbourhood.
##
## @keywords internal
## @return Numeric matrix
##
## @details Assumes data will be sequentially ordered first by column, then row. Each row defines neighbours for that well. Index values define which data points are neighbours in sequential data. The neighboorhood includes the middle well. Used in normMedFil.
##
createNeighbourhoodMatrix <- function(ncols = 12, nrows = 8, wind = 2) {
# total number of wells in matrix
numWells <- nrows * ncols
# create wasteful output matrix for max neighbourhood
outMat <- matrix(NA, nrow = numWells, ncol = numWells)
# reference matrix to identify sequential index of each well in matrix
indMat <- matrix(1:numWells, nrow = nrows, ncol = ncols)
# keeps track of where we are in sequential vector of matrix
seqWellInd <- 0
# keeps track of maximum number of buddies a well can have
maxBuddies <- 0
# iterate through each well location and find neighbours
for (coll in 1:ncols) {
for (roww in 1:nrows) {
# cat('Finding buddies for', roww, ' ', coll, '\n')
seqWellInd <- seqWellInd + 1
# vector to store neighbours
buddies <- NULL
# iterate through neighbourood , first by row, then column
for (i in (roww - wind):(roww + wind)) {
for (j in (coll - wind):(coll + wind)) {
# only keep valid neighbours (within range of matrix)
if ((i >= 1) & (i <= nrows) & (j >= 1) & (j <= ncols)) {
buddies <- c(buddies, indMat[i, j])
}
}
}
# assign neighbours to storage matrix
outMat[seqWellInd, 1:length(buddies)] <- sort(buddies)
# check if number of neighbours exceeds max neighbours yet encountered
if (length(buddies) > maxBuddies) {
maxBuddies <- length(buddies)
}
}
}
# remove excess columns
outMat <- outMat[, 1:maxBuddies]
return(outMat)
}
## @title Create Sequential Neighbourhood Matrix
##
## @description Creates a row neighbourhood list for matrix of data.
##
## @param nrows Number of rows in matrix.
## @param ncols Number of columns in matrix.
## @param wind Window size. Defines max distance of neighbours. Value of 2 will create a 5x5 neighbourhood.
##
## @keywords internal
## @return Numeric matrix
##
## @details Assumes data will be sequentially ordered first by column, then row. Each row defines neighbours for that well. Index values define which data points are neighbours in sequential data. The neighboorhood includes the middle well. Used in normMedFil.
##
createSequentialNeighbourhoodMatrix <- function(ncols = 12, nrows = 8, wind = 2) {
# createSequentialNeighbourhoodMatrix(namey='E://projects//HTS//Inglese//spatial//data//IngleseConNeighboursRowWindow3.txt',
# ncols=40, nrows=32, wind=3) total number of wells in matrix
numWells <- nrows * ncols
# create wasteful output matrix for max neighbourhood
outMat <- matrix(NA, nrow = numWells, ncol = numWells)
# reference matrix to identify sequential index of each well in matrix
indMat <- matrix(1:numWells, nrow = nrows, ncol = ncols)
# keeps track of where we are in sequential vector of matrix
seqWellInd <- 0
# keeps track of maximum number of buddies a well can have
maxBuddies <- 0
# iterate through each well location and find neighbours
for (coll in 1:ncols) {
for (roww in 1:nrows) {
# cat('Finding buddies for', roww, ' ', coll, '\n')
seqWellInd <- seqWellInd + 1
# vector to store neighbours
buddies <- NULL
# iterate through neighbourood by row
for (j in (coll - wind):(coll + wind)) {
# only keep valid neighbours (within range of matrix)
if ((j >= 1) & (j <= ncols))
{
# cat(roww, ' ', j, '\n')
buddies <- c(buddies, indMat[roww, j])
} # end if i
} # end for j
# assign neighbours to storage matrix
outMat[seqWellInd, 1:length(buddies)] <- sort(buddies)
# check if number of neighbours exceeds max neighbours yet encountered
if (length(buddies) > maxBuddies) {
maxBuddies <- length(buddies)
}
} # end for roww
} # end for coll
# remove excess columns
outMat <- outMat[, 1:maxBuddies]
return(outMat)
}
## @title Median Normalization
## @description Calculates median normalized results
## @param x Data matrix input.
## @details Used in normR.
## @return Numeric matrix
## @keywords internal
##
medMe <- function(x) {
return(x - stats::median(x, na.rm = TRUE))
}
##
## @title R-score
## @description Calculates robust regression score
## @param x A single plate.
## @param rows,cols Plate rows and cols.
## @keywords internal
## @return Numeric matrix
## @details Apply rlm model to x with autoregressive coefficient params rows and cols. Normalize by dividing by variance estimate. Used in normR.
##
rlmRConly <- function(x, rows, cols) {
tmp <- rep(NA, length(x))
naInd <- is.na(x)
ret <- MASS::rlm(x ~ rows + cols)
# tmp <- ret$residuals
tmp[!naInd] <- ret$residuals
return(tmp/ret$s)
}
##
## @title SPAWN score
## @description Calculates spatial and well normalization score
## @param mat Entire data matrix.
## @param pr,pc Plate rows and cols.
## @param tf Trim factor.
## @details Used in normSPAWN.
## @keywords internal
## @return Numeric matrix
##
spawning <- function(mat, pr, pc, tf) {
len <- dim(mat)[[2]]
# calculate trimmed mean polish
mt <- NULL
for (i in 1:len) {
tmp <- c(t(trimPolish(matrix(mat[, i], nrow = pr, ncol = pc, byrow = TRUE), trace.iter = FALSE, trim = tf)$residuals))
mt <- cbind(mt, tmp/stats::mad(tmp, na.rm = TRUE))
}
return(mt)
}
## @title Trimmed polish
## @description Modified from median polish code of B-score
##
## @param x A numeric matrix.
## @param eps Tolerance for convergence. Should be a real number greater than 0.
## @param maxiter The maximum number of iterations.
## @param trace.iter Logical. Should progress in convergence be reported?
## @param na.rm Logical. Should missing values be removed?
## @param trim Trim value when applying trimmed mean.
##
## @details Used in normSPAWN.
##
## @keywords internal
## @return Numeric matrix
##
trimPolish <- function(x, eps = 0.01, maxiter = 10, trace.iter = FALSE, na.rm = TRUE, trim) {
z <- as.matrix(x)
nr <- nrow(z)
nc <- ncol(z)
t <- 0
r <- numeric(nr)
c <- numeric(nc)
oldsum <- 0
for (iter in 1:maxiter) {
rdelta <- apply(z, 1, base::mean, na.rm = na.rm, trim)
z <- z - matrix(rdelta, nrow = nr, ncol = nc, byrow = FALSE)
r <- r + rdelta
delta <- base::mean(c, na.rm = na.rm, trim)
c <- c - delta
t <- t + delta
cdelta <- apply(z, 2, base::mean, na.rm = na.rm, trim)
z <- z - matrix(cdelta, nrow = nr, ncol = nc, byrow = TRUE)
c <- c + cdelta
delta <- mean(r, na.rm = na.rm, trim)
r <- r - delta
t <- t + delta
newsum <- sum(abs(z), na.rm = na.rm)
converged <- newsum == 0 || abs(newsum - oldsum) < eps * newsum
if (converged)
break
oldsum <- newsum
if (trace.iter)
cat(iter, ":", newsum, "\n")
}
if (converged) {
if (trace.iter)
cat("Final:", newsum, "\n")
} else warning(gettextf("medpolish() did not converge in %d iterations", maxiter), domain = NA)
names(r) <- rownames(z)
names(c) <- colnames(z)
ans <- list(overall = t, row = r, col = c, residuals = z, name = deparse(substitute(x)))
class(ans) <- "medpolish"
ans
}
## @title Empirical probability
## @description Generate empirical probability density function for data.
## @param data - vector of data points
## @param q - trimming value.
## @details Remove 1-q points as outliers from greater tail. Used in plotIGFit.
## @keywords internal
## @return Numeric vector
##
myca <- function(data, q = 0.9) {
# x <- seq(0.001,3.5,0.001)
len <- getLen(data, quan = q)
# len <- 1000
maxi <- sort(data)[len]
x <- seq(min(data), maxi, length = len)
# x <- quantile(data[1:len], ppoints(100))
p <- rep(0, len - 1)
lenny <- length(data)
for (i in 1:len) p[i] <- (sum(data < x[i]))/lenny
return(cbind(x, p))
}
## @title Get length
## @description Get rid of the very large data points so graphs scale better.
## @param data - vector of data points
## @param quan - trimming quantile.
## @details Used in plotIGFit (myca).
## @return Numeric vector
## @keywords internal
##
getLen <- function(data, quan = 0.9) {
return(trunc(quan * length(data)))
}
## @title t-test
## @description Do one-sample t-test
## @param x A numeric matrix
## @param testSide Test side
## @param trueMean True mean
## @details Used in statT.
## @keywords internal
## @return Numeric matrix
myT <- function(x, testSide, trueMean) {
# handle all NA situation
if (sum(is.na(x)) == length(x)) {
return(rep(NA, 5))
}
tmp <- stats::t.test(x, alternative = testSide, mu = trueMean)
return(c(tmp$statistic, base::mean(x, na.rm = TRUE) - trueMean, (1/sqrt(tmp$parameter + 1)) * stats::sd(x,
na.rm = TRUE), tmp$parameter, tmp$p.value))
}
## @title checking for NAs
## @param x A numeric matrix
## @return Numeric matrix
## @description set return values to NA if initial data were all NA
## @details Used in statRVM.
## @keywords internal
##
checkNA <- function(x) {
if (is.na(x[1])) {
outie <- rep(NA, 5)
} else {
outie <- x
}
return(outie)
}
## @title RVM test
## @param x A numeric matrix
## @param side Test side
## @return Numeric matrix
## @description apply RVM test and return p-value
## @details Used in statRVM.
## @keywords internal
##
myRVM <- function(x, side = "less") {
if (side == "two.sided") {
res <- RVT1(x)
} else {
res <- RVT1Sided(x, side)
}
return(c(res$v, res$mn, sqrt(1/res$vvr), res$n - 1 + 2 * res$a, res$vp))
# return(list(pval=res$vp, ab=cbind(res$a[1], res$b[1])))
}
## @title RVM two-sided test
##
## @description Apply RVM test to data
##
## @param data Data has genes as rows and arrays as columns.
##
## @return A data frame with a row for each gene and a column for each parameter:
## \item{mn}{Mean value}
## \item{vr}{Unadjusted variance}
## \item{n}{Number of non-missing samples}
## \item{t}{Unadjusted t-statistic}
## \item{tp}{Unadjusted t p-value}
## \item{v}{RVM model statistic}
## \item{vp}{RVM model p-value}
## \item{vvr}{Adjusted variance}
##
## @details used in statRVM (myRVM).
##
## @keywords internal
## @return Numeric matrix
##
RVT1 <- function(data) {
vr <- rowVars(data, na.rm = TRUE)
n <- rowSums(!is.na(data))
mn <- rowMeans(data, na.rm = TRUE)
a <- getab(vr, n - 1)
b <- a[2]
a <- a[1]
cat("alpha=", a, "beta=", b, "\n")
t <- mn/sqrt(vr/n)
tp <- 2 * (1 - stats::pt(abs(t), df = n - 1))
vvr <- n * (1 + 2 * a/(n - 1))/(vr + 2/((n - 1) * b))
v <- mn * sqrt(vvr)
degf = n - 1 + 2 * a
vp <- 2 * (1 - stats::pt(abs(v), df = degf))
return(data.frame(n, mn, vr, t, tp, vvr, v, vp, a, b))
}
## @title RVM one-sided test
##
## @description Apply a one sided one sample test (greater/less than 0)
##
## @param data Data has genes as rows and arrays as columns.
## @param side Optional. Default value is 'less'.
##
## @return A data frame with a row for each gene and a column for each parameter:
## \item{mn}{Mean value}
## \item{vr}{Unadjusted variance}
## \item{n}{Number of non-missing samples}
## \item{t}{Unadjusted t-statistic}
## \item{tp}{Unadjusted t p-value}
## \item{v}{RVM model statistic}
## \item{vp}{RVM model p-value}
## \item{vvr}{Adjusted variance}
##
## @details Used in statRVM (myRVM).
##
## @keywords internal
## @return Numeric matrix
##
RVT1Sided <- function(data, side = "less") {
vr <- rowVars(data, na.rm = TRUE)
n <- rowSums(!is.na(data))
mn <- rowMeans(data, na.rm = TRUE)
a <- getab(vr, n - 1)
b <- a[2]
a <- a[1]
cat("alpha=", a, "beta=", b, "\n")
t <- mn/sqrt(vr/n)
tp <- 1 - stats::pt(t, df = n - 1)
vvr <- n * (1 + 2 * a/(n - 1))/(vr + 2/((n - 1) * b))
v <- mn * sqrt(vvr)
degf = n - 1 + 2 * a
if (side == "less") {
tp <- stats::pt(t, df = n - 1)
vp <- stats::pt(v, df = degf)
} else if (side == "greater") {
tp <- 1 - stats::pt(t, df = n - 1)
vp <- 1 - stats::pt(v, df = degf)
}
return(data.frame(n, mn, vr, t, tp, vvr, v, vp, a, b))
}
## @title Get parameters for RVM
## @description Calculates parameters of prior distribution
## @param sig sig
## @param n n
## @return Estimates of a and b
## @keywords internal
## @details Used in plotIGFit.
## @return Numeric vector
##
getab <- function(sig, n) {
set <- (!is.na(sig) & n > 0 & sig > 0)
sig <- sig[set]
n <- n[set]
set <- n > 4
if (sum(set) > 0) {
m1 <- (n[set] - 2)/((n[set]) * sig[set])
m2 <- (n[set] - 2) * (n[set] - 4)/((n[set]) * sig[set])^2
m1 <- base::mean(m1, na.rm = TRUE)
m2 <- base::mean(m2, na.rm = TRUE)
b <- m2/m1 - m1
a <- m1^2/(m2 - m1^2)
} else {
a <- b <- 1
}
strt <- c(a, b)
g <- function(p, yunq) flik(p, yunq)
a <- stats::nlm(g, strt, yunq = c(sig, n))
a$estimate <- abs(a$estimate)
}
## @title Get log likelihood
## @description Calculates log likelihood for a*b*x from an F distribution with m and 2*a degrees of freedom
## @param p Contains a and b.
## @param y Vector containing data and the m values.
## @keywords internal
## @details Used in plotIGFit (getab).
## @return Numeric vector
##
flik <- function(p, y) {
x <- y[1:(length(y)/2)]
m <- y[(length(y)/2 + 1):length(y)]
p <- abs(p)
a <- p[1]
b <- p[2]
x <- x * (a * b)
n <- 2 * a
out <- log(stats::df(x, m, n)) + log(a * b)
sum(-out)
}
## @title Row Variance
## @description Calculates variance of elements in a row
## @param x Data matrix.
## @keywords internal
## @details Used in plotIGFit.
## @return Numeric vector
##
rowVars <- function(x, ...) {
apply(x, 1, stats::var, ...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.