pwOmics_maren: Pathway-based data integration of omics data

Documented in clusterTimeProfiles

#' Clustering of time profiles.
#' 
#' Soft clustering of time series data with Mfuzz R package [1]. Filtering of 
#' genes with low expression changes possible via min.std parameter. Expression 
#' values are standardized and undergo fuzzy c-means clustering based
#' on minimization of weighted square error function (see [2]). Fuzzifier 
#' parameter m is estimated via mestimate function of [1] based on a relation 
#' proposed by Schwaemmle and Jansen [3]. The optimal number of clusters is
#' determined via the minimum distance between cluster centroid using Dmin 
#' function of [3]. Be aware that the cluster number determination might be
#' difficult especially for short time series measurements.
#'  
#' @references 1. L. Kumar and M. Futschik, Mfuzz: a software package for soft 
#' clustering of microarray data, Bioinformation, 2(1) 5-7, 2007.
#' @references 2. Bezdak JC, Pattern Recognition with Fuzzy Objective Function 
#' Algorithms, Plenum Press, New York, 1981.
#' @references 3. Schwaemmle and Jensen, Bioinformatics, Vol. 26 (22), 
#' 2841-2848, 2010.
#'  
#' @param dynConsensusNet result of dynamic analysis: inferred net generated by 
#' consDynamicNet function.
#' @param min.std threshold parameter to exclude genes with a low standard 
#' deviation. All genes with an expression smaller than min.std will be excluded
#' from clustering. Default value is 0.
#' @param ncenters integer specifying the maximum number of centers which should 
#' be tested in minimum distance between cluster centroid test; this number 
#' is used as initial number to determine the data-specific maximal
#' cluster number based on number of distinct data points.
#' @return output dataframe of mfuzz function.
#' @keywords manip
#' @export
#' @examples
#' data(OmicsExampleData)
#' data_omics = readOmics(tp_prots = c(0.25, 1, 4, 8, 13, 18, 24), 
#' tp_genes = c(1, 4, 8, 13, 18, 24), OmicsExampleData,
#' PWdatabase = c("biocarta", "kegg", "nci", "reactome"), 
#' TFtargetdatabase = c("userspec"))
#' data_omics = readPhosphodata(data_omics, 
#' phosphoreg = system.file("extdata", "phospho_reg_table.txt", 
#' package = "pwOmics")) 
#' data_omics = readTFdata(data_omics, 
#' TF_target_path = system.file("extdata", "TF_targets.txt", 
#' package = "pwOmics"))
#' data_omics_plus = readPWdata(data_omics,  
#' loadgenelists = system.file("extdata/Genelists", package = "pwOmics")) 
#' \dontrun{
#' data_omics_plus = identifyPR(data_omics_plus)
#' setwd(system.file("extdata/Genelists", package = "pwOmics"))
#' data_omics = identifyPWs(data_omics_plus)
#' data_omics = identifyTFs(data_omics)
#' data_omics = identifyRsofTFs(data_omics, 
#' noTFs_inPW = 1, order_neighbors = 10)
#' data_omics = identifyPWTFTGs(data_omics)
#' statConsNet = staticConsensusNet(data_omics)
#' consDynNet = consDynamicNet(data_omics, statConsNet)
#' clusterTimeProfiles(consDynNet)
#' }
clusterTimeProfiles <- function(dynConsensusNet, min.std = 0, ncenters = 12) {
    
    if(names(dynConsensusNet[[1]])[[1]] != "APost")
    {stop("dynConsensusNet does not contain an appropriate data result
          for clustering of time profiles.")}
    
    requireNamespace("Mfuzz", quietly = TRUE)
    filter.std = mestimate = Dmin = mfuzz = NULL
    
    matsplines = dynConsensusNet[[2]]
    matsplines = ExpressionSet(matsplines)
    matsplines = filter.std(matsplines, min.std, visu = FALSE)
    m = mestimate(matsplines)
    x = as.matrix(exprs(matsplines))
    xrows <- nrow(x)
    xcols <- ncol(x)
    centers <- x[sample(1:xrows, ncenters), drop = FALSE]
    
    Dminval = Dmin(matsplines, m, crange = 2:ncenters, repeats = 5, visu = TRUE)
    
    ind_cent_num = which(abs(diff(na.omit(Dminval), 2)) == max(abs(diff(na.omit(Dminval), 2))))+2
    fuzzed_matsplines = mfuzz(matsplines, ind_cent_num, m)
    
    return(fuzzed_matsplines)
}