Nothing
#' Create patient networks from full matrix of named measurements
#'
#' @details Creates patient similarity networks when full matrices of
#' data are provided (e.g. gene expression, questionnaire results). To
#' generate networks from sparse data such as CNVs or indels, use
#' \code{makePSN_RangeSets} instead.
#' The rows of the data matrix (xpr) must be named (nm); one network is
#' create for each named set (namedSets). There are two options for the
#' way in which networks are created, depending on the value of
#' \code{writeProfiles}.
#' 1. writeProfiles=TRUE: GeneMANIA is used to generate interaction networks
#' and sparsify networks. This only works if the desired measure of
#' similarity is network-level Pearson correlation; an example is networks
#' at the level of pathways. In this case, the user does not explicitly
#' specify a similarity measure and \code{simMetric} is ignored.
#' 2. writeProfiles=FALSE: GeneMANIA is not used to generate interaction
#' networks. Rather, netDx uses \code{simMetric} to create interaction
#' networks. Networks can be sparsified by excluding weak connections
#' (cutoff).
#' @param xpr (matrix) rows are measurements, columns are samples. Columns
#' must be named (patient ID)
#' @param nm (character) names for measurements corresponding to row order
#' of \code{xpr}. Must match the names in the named sets specified in
#' \code{nameSets}
#' @param namedSets (list) sets of names to be grouped together. keys are
#' set names, and networks will be named as these. values are character
#' vectors corresponding to groups of names (matching those in \code{nm})
#' that are input to network generation
#' @param outDir (char) path to directory where networks are written.
#' If missing, is set to tempdir()
#' @param simMetric (char) measure of similarity. See \code{getSimilarity()}
#' for details. If writeProfiles is set to TRUE, must be one of pearson
#' (Pearson correlation) or MI (correlation by mutual information).
#' @param cutoff (numeric) patients with similarity smaller than this value
#' are not included in the corresponding interaction network
#' @param verbose (logical) print detailed messages
#' @param numCores (integer) number of cores for parallel network generation
#' @param writeProfiles (logical) use GeneMANIA's ProfileToNetworkDriver to
#' create interaction networks. If TRUE, this function writes subsets
#' of the original data corresponding to networks to file (profiles).
#' If FALSE, uses getSimilarity() and writes interaction networks.
#' @param sparsify (logical). If TRUE, sparsifies patient similarity network.
#' See useSparsify2, sparsify_edgeMax and sparsify_maxInt
#' @param useSparsify2 (logical). Cleaner sparsification routine.
#' If FALSE, uses new matrix-based sparsify3
#' @param sparsify_maxInt (numeric) Max num edges per node in sparsified
#' network.
#' @param sparsify_edgeMax (numeric) Max number of edges to include in the
#' final network
#' @param minMembers (integer) min number of measures in a network for
#' the network to be included. Useful when similarity measures require a
#' minimum number of measures to be meaningful (e.g. minimum of 6 for Pearson
#' correlation)
#' @param runSerially (logical) set to TRUE to create nets serially, rather
#' than in parallel
#' @param ... passed to \code{getSimilarity()}
#' @return (char) Basename of files to which networks are written.
#' Side effect of writing interaction networks in \code{outDir}
#' @import doParallel
#' @examples data(xpr,pheno,pathwayList);
#' # you may get a warning message that the output directory already
#' # exists; ignore it
#' out <- makePSN_NamedMatrix(xpr,rownames(xpr),pathwayList,
#' '.',writeProfiles=TRUE)
#' @export
makePSN_NamedMatrix <- function(xpr, nm, namedSets, outDir = tempdir(),
simMetric = "pearson",
verbose = TRUE, numCores = 1L, writeProfiles = TRUE, sparsify = FALSE,
useSparsify2 = FALSE,
cutoff = 0.3, sparsify_edgeMax = Inf, sparsify_maxInt = 50,
minMembers = 1L,
runSerially = FALSE, ...) {
if ((!simMetric %in% c("pearson", "MI")) & writeProfiles == TRUE) {
print(simMetric)
stop(paste("writeProfiles must only be TRUE with simMetric",
" set to pearson or MI. For all other metrics, ",
"set writeProfiles=FALSE", sep = ""))
}
cl <- makeCluster(numCores, outfile = paste(outDir,"makePSN_log.txt", sep=getFileSep()))
if (!runSerially) {
registerDoParallel(cl)
} else {
message("running serially")
}
if (simMetric == "pearson") {
message(paste("Pearson similarity chosen - ",
"enforcing min. 5 patients per net.",
sep = ""))
minMembers <- 5
}
# process pathways in parallel
`%myinfix%` <- ifelse(runSerially, `%do%`, `%dopar%`)
outFiles <- foreach(curSet = names(namedSets)) %myinfix% {
if (verbose)
message(sprintf("%s: ", curSet))
idx <- which(nm %in% namedSets[[curSet]])
if (verbose)
message(sprintf("%i members", length(idx)))
oFile <- NULL
# has sufficient connections to make network
if (length(idx) >= minMembers) {
if (writeProfiles) {
outFile <- paste(outDir,sprintf("%s.profile",curSet),
sep=getFileSep())
write.table(t(xpr[idx, , drop = FALSE]), file = outFile,
sep = "\t",dec=".",
col.names = FALSE, row.names = TRUE, quote = FALSE)
} else {
outFile <- paste(outDir,sprintf("%s_cont.txt", curSet),
sep=getFileSep())
message(sprintf("computing sim for %s", curSet))
sim <- getSimilarity(xpr[idx, , drop = FALSE],
type = simMetric,
...)
if (is.null(sim)) {
stop(sprintf(paste("makePSN_NamedMatrix:%s: ",
"similarity matrix is empty (NULL).\n",
"Check that there isn't a mistake in the ",
"input data or similarity method of choice.\n",
sep = ""), curSet))
}
pat_pairs <- sim
if (sparsify) {
if (useSparsify2) {
tryCatch({
spmat <- sparsify2(pat_pairs, cutoff = cutoff,
EDGE_MAX = sparsify_edgeMax,
outFile = outFile, maxInt = sparsify_maxInt)
}, error = function(ex) {
stop("sparsify2 caught error\n")
})
} else {
message("sparsify3")
tryCatch({
sp_t0 <- Sys.time()
spmat <- sparsify3(pat_pairs, cutoff = cutoff,
EDGE_MAX = sparsify_edgeMax,
outFile = outFile, maxInt = sparsify_maxInt,
verbose = FALSE)
print(Sys.time() - sp_t0)
}, error = function(ex) {
stop("sparsify3 caught error\n")
})
}
} else {
write.table(pat_pairs, file = outFile, sep = "\t",
col.names = FALSE,
row.names = FALSE, quote = FALSE)
print(basename(outFile))
message("done")
}
}
oFile <- basename(outFile)
}
oFile
}
stopCluster(cl)
outFiles
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.