Nothing
#'
#' Find the optimal encoding dimension
#'
#' Finds the optimal encoding dimension for a given data set by running a
#' grid search based on the provided parameter set.
#'
#' @param ods An OutriderDataSet
#' @param params,encDimParams Set of possible q values.
#' @param freq Frequency of outlier, defaults to 1E-2
#' @param zScore,zScoreParams Set of possible injection Z-score, defaults to 3.
#' @param sdlog Standard deviation of the sitribution on the log scale.
#' @param lnorm If TRUE, the default, Z-scores are drawn from a log normal
#' distribution with a mean of \code{log(zScore)} in log-scale.
#' @param inj Injection strategy, by default 'both'.
#' @param ... Further arguments passed on to the \code{controlForConfounders}
#' function.
#' @param BPPARAM BPPARAM object by default bpparam().
#'
#' @return The optimal encoding dimension
#'
#' @examples
#' ods <- makeExampleOutriderDataSet()
#' encDimSearchParams <- c(5, 8, 10, 12, 15)
#' zScoreParams <- c(2, 3, 5, 'lnorm')
#' implementation <- 'autoencoder'
#' register(MulticoreParam(4))
#' \dontshow{
#' ods <- ods[1:12,1:12]
#' encDimSearchParams <- c(2)
#' zScoreParams <- c('lnorm')
#' register(SerialParam())
#' implementation <- 'pca'
#' }
#' ods1 <- findEncodingDim(ods, params=encDimSearchParams,
#' implementation=implementation)
#' plotEncDimSearch(ods1)
#'
#' ods2 <- findInjectZscore(ods, zScoreParams=zScoreParams,
#' encDimParams=encDimSearchParams, implementation=implementation)
#' plotEncDimSearch(ods2)
#'
#' @rdname findEncodingDim
#' @aliases findEncodingDim, findInjectZscore
#' @export
findEncodingDim <- function(ods,
params=seq(2, min(100, ncol(ods) - 1, nrow(ods) - 1), 2),
freq=1E-2, zScore=3, sdlog=log(1.6), lnorm=TRUE,
inj='both', ..., BPPARAM=bpparam()){
# compute auto Correction
ods <- estimateSizeFactors(ods)
ods <- injectOutliers(ods, freq=freq, zScore=zScore, inj=inj, lnorm=lnorm,
sdlog=sdlog)
# TODO Please check if this is still correct
# ods <- getLossBounderyCases(ods)
eval <- bplapply(X=params, ..., BPPARAM=BPPARAM,
FUN=function(i, ..., evalAucPRLoss=NA){
evalAutoCorrection(ods, encoding_dim=i, BPPARAM=SerialParam(), ...)}
)
metadata(ods)[['encDimTable']] <- data.table(
encodingDimension= params,
evaluationLoss= unlist(eval),
evalMethod='aucPR')
metadata(ods)[['optimalEncDim']] <- NULL
metadata(ods)[['optimalEncDim']] <- getBestQ(ods)
counts(ods) <- assay(ods, 'trueCounts')
validateOutriderDataSet(ods)
return(ods)
}
getLossBounderyCases <- function(ods){
## Check limits of loss:
# Check min and max of loss by substituting
# counts and true Counts for the normalization Factors.
# Max overfitting loss:
normalizationFactors(ods) <- pmax(counts(ods), 1E-9)
overfit <- evalLoss(ods)
# Best possible loss:
normalizationFactors(ods) <- pmax(assay(ods, 'trueCounts'), 1E-9)
bestloss <- evalLoss(ods)
# Max underfitting loss (only taking the means of the data):
normalizationFactors(ods) <- matrix(
rep(pmax(rowMeans(counts(ods)), 1E-9), ncol(ods)),
ncol=ncol(ods))
underfit <- evalLoss(ods)
metadata(ods)[['boundaryCases']] = c(
'Best possible loss (c=k)' = bestloss,
'Max overfitting loss (c=kcorr)' = overfit,
'Max underfitting loss (c=colMeans(kcorr)' = underfit)
return(ods)
}
#' @rdname findEncodingDim
#' @export
findInjectZscore <- function(ods, freq=1E-2,
zScoreParams=c(seq(1.5, 4, 0.5), 'lnorm'),
encDimParams=c(seq(3, 40, 3), seq(45,70, 5), 100, 130, 160),
inj='both', ..., BPPARAM=bpparam()){
encDimParams <- encDimParams[encDimParams < min(dim(ods), nrow(ods))]
FUN <- function(idx, grid, ods, RNGseed, ...){
z <- grid[idx,"z"]
enc <- grid[idx,"enc"]
lnorm <- FALSE
if(z == 'lnorm'){
z <- 3
lnorm=TRUE
} else {
z <- as.integer(z)
}
message(date(), ": run Z-score: ", z, " and enc: ", enc)
set.seed(RNGseed)
findEncodingDim(ods, lnorm=lnorm, zScore=z, params=enc,
BPPARAM=SerialParam(), ...)
}
RNGseed <- .Random.seed
parGrid <- expand.grid(z=zScoreParams, enc=encDimParams)
odsres <- bplapply(seq_row(parGrid), FUN, ods=ods, freq=freq,
inj=inj, grid=parGrid, RNGseed=RNGseed, BPPARAM=BPPARAM)
res <- rbindlist(lapply(seq_along(odsres), function(i){
metadata(odsres[[i]])$encDimTable[,.(
encodingDimension, zScore=parGrid[i,"z"], evaluationLoss)]
}))
res <- rbindlist(lapply(zScoreParams, function(z){
tmpdt <- res[zScore==z]
tmpdt[,opt:=getBestQDT(tmpdt)]
tmpdt
}))
metadata(ods)[['encDimTable']] <- res
return(ods)
}
## Helper functions called within SearchEncDim ##
#' injectOutliers
#'
#' @param ods OutriderDataSet
#' @param freq frequency of injected counts.
#' @param zScore injection Z-score.
#' @param inj injection strategy.
#'
#' @return and OutriderDataSet with artificially corrupted counts.
#' @noRd
injectOutliers <- function(ods, freq, zScore, inj, lnorm, sdlog){
# copy true counts to be able to acces them in the loss later
assay(ods, 'trueCounts', withDimnames=FALSE) <- counts(ods)
# generate index of injected counts
size <- prod(dim(ods))
index <- sample(c(0,1,-1), size, prob = c(1 - freq, freq/2, freq/2),
replace = TRUE)
index <- matrix(index, nrow = nrow(ods))
switch(inj,
low = { index <- -abs(index) },
high = { index <- abs(index) }
)
tmpzScore <- matrix(0, ncol=ncol(ods), nrow=nrow(ods))
if(isTRUE(lnorm)){
tmpzScore[index!=0] <- rlnorm(sum(index!=0), log(zScore), sdlog=sdlog)
} else {
tmpzScore[index!=0] <- zScore
}
zScore <- tmpzScore
#inject counts
max_out <- min(10*max(counts(ods), na.rm=TRUE), .Machine$integer.max)
# compute size factor normalized counts.
# don't use it on the ods to not influence the later calculation.
sf <- estimateSizeFactorsForMatrix(counts(ods))
normtable <- t(t(counts(ods))/sf)
counts <- counts(ods)
list_index <- which(index != 0, arr.ind = TRUE)
for(i in seq_len(nrow(list_index))){
idxCol <- list_index[i,'col']
idxRow <- list_index[i,'row']
cts <- as.numeric(normtable[idxRow,])
fc <- zScore[idxRow, idxCol] * sd(log2(1 + cts))
clcount <- index[idxRow, idxCol] * fc + log2(1 + cts[idxCol])
#multiply size factor again
art_out <- round(sf[idxCol] * 2^clcount)
# only insert outliers if they are different from before
# and not too large
if(art_out < max_out & counts[idxRow, idxCol] != art_out){
counts[idxRow, idxCol] <- art_out
}else{
index[idxRow, idxCol] <- 0
zScore
}
}
# save coruppted counts and index of corruption into ods
assay(ods, 'counts', withDimnames=FALSE) <- matrix(as.integer(counts),
nrow=nrow(ods))
assay(ods, 'trueCorruptions', withDimnames=FALSE) <- index
assay(ods, 'injectedZscore', withDimnames=FALSE) <- zScore
return(ods)
}
#'
#' evaluate loss = 1/n * Sum(log-likelihood(k | c))
#'
#' @noRd
evalLoss <- function(ods, theta=25){
N_corrupted <- sum(abs(assay(ods, 'trueCorruptions')))
kTrue <- assay(ods, 'trueCounts')[assay(ods, 'trueCorruptions')!=0]
c <- assay(ods, 'normalizationFactors')[assay(ods, 'trueCorruptions')!=0]
eval <- sum(-dnbinom(kTrue, mu=c , size=theta, log=TRUE))/N_corrupted
return(eval)
}
evalAucPRLoss <- function(ods){
scores <- -as.vector(assay(ods, 'pValue'))
labels <- as.vector(assay(ods, 'trueCorruptions') != 0) + 0
if(any(is.na(scores))){
warning(sum(is.na(scores)), " P-values where NAs.")
scores[is.na(scores)] <- min(scores, na.rm=TRUE)-1
}
pr <- pr.curve(scores, weights.class0=labels)
return(max(0, pr$auc.integral, na.rm=TRUE))
}
evalAutoCorrection <- function(ods, encoding_dim, BPPARAM=bpparam(), ...){
ods <- OUTRIDER(ods, controlData=TRUE,q=encoding_dim, BPPARAM=BPPARAM, ...)
eloss <- evalAucPRLoss(ods)
print(paste0('Evaluation loss: ', eloss,' for q=',encoding_dim))
return(eloss)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.