########################################################################/**
# @RdocFunction findFiles
#
# @title "Finds one or several files in multiple directories"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{pattern}{A regular expression file name pattern to match.}
# \item{paths}{A @character @vector of paths to be searched.}
# \item{recursive}{If @TRUE, the directory structure is searched
# breath-first, in lexicographic order.}
# \item{firstOnly}{If @TRUE, the method returns as soon as a matching
# file is found, otherwise not.}
# \item{allFiles}{If @FALSE, files and directories starting with
# a period will be skipped, otherwise not.}
# \item{...}{Arguments passed to @see "base::list.files".}
# }
#
# \value{
# Returns a @vector of the full pathnames of the files found.
# }
#
# \section{Paths}{
# The \code{paths} argument may also contain paths specified as
# semi-colon (\code{";"}) separated paths, e.g.
# \code{"/usr/;usr/bin/;.;"}.
# }
#
# \section{Windows Shortcut links}{
# If package \pkg{R.utils} is available and loaded , Windows Shortcut links (*.lnk)
# are recognized and can be used to imitate links to directories
# elsewhere. For more details, see @see "R.utils::filePath".
# }
#
# @author "HB"
#
# @keyword file
# @keyword IO
# @keyword internal
#**/#######################################################################
findFiles <- function(pattern=NULL, paths=NULL, recursive=FALSE, firstOnly=TRUE, allFiles=TRUE, ...) {
# To please R CMD check
filePath <- NULL; rm(list="filePath");
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
splitPaths <- function(paths, ...) {
if (length(paths) == 0)
return(NULL);
# If in format "path1; path2;path3", split it to multiple strings.
paths <- unlist(strsplit(paths, split=";"));
paths <- gsub("[ \t]*$", "", gsub("^[ \t]*", "", paths));
paths <- paths[nchar(paths) > 0];
if (length(paths) == 0)
return(NULL);
paths;
} # splitPaths()
# Checks if a package is loaded or not (cut'n'paste from R.utils)
isPackageLoaded <- function(package, version=NULL, ...) {
s <- search();
if (is.null(version)) {
s <- sub("_[0-9.-]*", "", s);
} else {
package <- paste(package, version, sep="_");
}
pattern <- sprintf("package:%s", package);
(pattern %in% s);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'paths':
paths <- splitPaths(paths);
if (is.null(paths))
paths <- ".";
# Argument 'pattern':
if (!is.null(pattern))
pattern <- as.character(pattern);
# Argument 'recursive':
recursive <- as.logical(recursive);
# Argument 'firstOnly':
firstOnly <- as.logical(firstOnly);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Prepare list of paths to be scanned
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
hasRutilsLoaded <- isPackageLoaded("R.utils");
## hasRutils <- suppressWarnings(require(R.utils, quietly=TRUE));
# Don't search the same path twice
paths <- unique(paths);
# Don't search non-existing paths
for (kk in seq_along(paths)) {
path <- paths[kk];
# Example any '~':s
path <- file.path(dirname(path), basename(path));
path <- gsub("^[.][/\\]", "", path);
# Follow Windows shortcut links?
if (hasRutilsLoaded)
path <- filePath(path, expandLinks="any");
# Does the path exist and is it a directory
# Note, isdir is TRUE for directories, FALSE for files,
# *and* NA for non-existing files, e.g. items found by
# list.files() but are broken Unix links.
isDirectory <- identical(file.info(path)$isdir, TRUE);
if (!file.exists(path) || !isDirectory)
path <- NA;
paths[kk] <- path;
}
if (length(paths) > 0)
paths <- paths[!is.na(paths)];
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Search for files
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pathnames <- c();
for (path in paths) {
files <- list.files(path, all.files=allFiles, full.names=TRUE);
# Exclude listings that are neither files nor directories
files <- gsub("^[.][/\\]", "", files);
files <- files[nchar(files) > 0];
if (length(files) > 0) {
excl <- (basename(files) %in% c(".", "..", "/", "\\"));
files <- files[!excl];
}
# Nothing to do?
if (length(files) == 0)
next;
# Expand Windows shortcut links?
files0 <- files;
if (hasRutilsLoaded) {
# Remember these
files <- sapply(files, FUN=filePath, expandLinks="any", USE.NAMES=FALSE);
}
# Keep only existing files and directories
ok <- sapply(files, FUN=function(file) {
(file.exists(path) && !is.na(file.info(file)$isdir));
}, USE.NAMES=FALSE);
files <- files[ok];
files0 <- files0[ok];
# Nothing to do?
if (length(files) == 0)
next;
# First search the files, then the directories, so...
# Note, isdir is TRUE for directories, FALSE for files,
# *and* NA for non-existing files, e.g. items found by
# list.files() but are broken Unix links.
isDir <- sapply(files, FUN=function(file) {
identical(file.info(file)$isdir, TRUE);
file.info(file)$isdir;
}, USE.NAMES=FALSE);
# In case some files are non-accessible, exclude them
ok <- (!is.na(isDir));
files <- files[ok];
files0 <- files0[ok];
isDir <- isDir[ok];
# Nothing to do?
if (length(files) == 0)
next;
# Directories and files in lexicographic order
dirs <- files[isDir];
files <- files[!isDir];
files0 <- files0[!isDir];
# Keep only files that match the filename pattern
# of the non-expanded filename.
if (!is.null(pattern)) {
keep <- grep(pattern, basename(files0));
files <- files[keep];
}
if (length(files) > 0) {
files <- sort(files);
if (firstOnly)
return(files[1]);
# Store results
pathnames <- c(pathnames, files);
}
# Search directories recursively?
if (recursive) {
if (length(dirs) == 0)
next;
for (dir in sort(dirs)) {
files <- findFiles(pattern=pattern, paths=dir, recursive=recursive,
firstOnly=firstOnly, ...);
if (length(files) > 0 && firstOnly)
return(files[1]);
pathnames <- c(pathnames, files);
}
}
} # for (path ...)
pathnames;
} # findFiles()
############################################################################
# HISTORY:
# 2013-03-18 [HB]
# o Internal isPackageLoaded() no longer uses defunct manglePackageName().
# 2008-02-21 [HB]
# o Added an internal generic isPackageLoaded() function.
# 2008-02-20 [KH]
# o Replaced require("R.utils") with a "isLoaded()" feature.
# 2008-02-14
# o Added argument 'allFiles=TRUE' to findFiles().
# 2007-09-17
# o ROBUSTNESS: Now findFiles() are robust against broken Unix links.
# 2007-08-30
# o BUG FIX: Pattern matching was done on expanded filenames, whereas they
# should really be done on the non-expanded ones. This, only applies to
# Windows shortcuts, but it is not the destination file that is of
# interest, but the name of the shortcut file.
# o BUG FIX: The recent update was not grep():ing correctly; forgot to
# extract the basename().
# 2007-08-27
# o Now findFiles(..., recursive=TRUE) does a breath-first search in
# lexicographic order.
# o Now findFiles() don't search replicated directories.
# 2006-11-01
# o Removed usage of R.utils for now.
# 2006-03-14
# o Created from findCdf.R.
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.