# https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/tcga4yeo/tumor/ov/cgcc/broad.mit.edu/genome_wide_snp_6/snp/broad.mit.edu_OV.Genome_Wide_SNP_6.12.5.0/MANIFEST.txt
# http://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/
setConstructorS3("TcgaDccDownloader", function(urlPath=NULL, rootUrl="tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/", protocol=c("auto", "https","http"), username=NULL, password=NULL, ...) {
# Argument 'protocol':
protocol <- match.arg(protocol);
extend(Object(), "TcgaDccDownloader",
.protocol = protocol,
.username = username,
.password = password,
.rootUrl = rootUrl,
.urlPath = urlPath
)
})
setMethodS3("as.character", "TcgaDccDownloader", function(x, ...) {
# To please R CMD check
this <- x;
s <- sprintf("%s:", class(this)[1]);
s <- c(s, sprintf("Protocol: %s", getProtocol(this)));
s <- c(s, sprintf("Root URL: %s", getRootUrl(this)));
s <- c(s, sprintf("URL path: %s", getUrlPath(this)));
s <- c(s, sprintf("Root path: %s", getRootPath(this)));
class(s) <- "GenericSummary";
s;
})
setMethodS3("getRootPath", "TcgaDccDownloader", function(this, ...) {
"tcgaData";
})
setMethodS3("getProtocol", "TcgaDccDownloader", function(this, ...) {
protocol <- this$.protocol;
if (protocol == "auto") {
path <- getUrlPath(this);
path <- strsplit(path, split="/", fixed=TRUE)[[1]];
path <- path[1];
if (path %in% c("tcga4yeo")) {
protocol <- "https";
} else {
protocol <- "http";
}
}
protocol;
})
setMethodS3("getUsername", "TcgaDccDownloader", function(this, ...) {
this$.username;
})
setMethodS3("setUsername", "TcgaDccDownloader", function(this, value, ...) {
this$.username <- value;
this;
})
setMethodS3("getPassword", "TcgaDccDownloader", function(this, ...) {
this$.password;
})
setMethodS3("setPassword", "TcgaDccDownloader", function(this, value, ...) {
this$.password <- value;
this;
})
setMethodS3("setLogin", "TcgaDccDownloader", function(this, username, password, ...) {
setUsername(this, username);
setPassword(this, password);
})
setMethodS3("getRootUrl", "TcgaDccDownloader", function(this, ...) {
this$.rootUrl;
})
setMethodS3("getUrlPath", "TcgaDccDownloader", function(this, ...) {
this$.urlPath;
})
setMethodS3("getUrlPathname", "TcgaDccDownloader", function(this, filename, ...) {
filename <- Arguments$getCharacter(filename);
url <- paste(c(getRootUrl(this), getUrlPath(this), filename), collapse="/");
url <- gsub("//", "/", url);
url;
})
setMethodS3("getUrl", "TcgaDccDownloader", function(this, ...) {
protocol <- getProtocol(this);
pathname <- getUrlPathname(this, ...);
url <- sprintf("%s://%s", protocol, pathname);
url;
})
setMethodS3("downloadUrl", "TcgaDccDownloader", function(this, url, filename=NULL, path=NULL, ..., skip=TRUE, overwrite=!skip, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'url':
url <- Arguments$getCharacter(url);
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose);
if (verbose) {
pushState(verbose);
on.exit(popState(verbose));
}
# Argument 'filename' & 'path':
if (is.null(filename)) {
filename <- basename(url);
}
if (is.null(path)) {
path <- file.path(getRootPath(this), getUrlPath(this));
}
pathname <- Arguments$getWritablePathname(filename, path=path,
mustNotExist=FALSE);
verbose && enter(verbose, "Downloading URL");
verbose && cat(verbose, "URL: ", url);
if (isFile(pathname)) {
fi <- file.info(pathname);
if (!is.na(fi$size) && fi$size == 0) {
file.remove(pathname);
verbose && cat(verbose, "Removed empty preexisting file: ", pathname);
} else if (skip) {
verbose && cat(verbose, "Already downloaded. Skipping.");
verbose && exit(verbose);
return(pathname);
}
}
# Download to a temporary pathname
pathnameT <- sprintf("%s.tmp", pathname);
pathnameT <- Arguments$getWritablePathname(pathnameT, mustNotExist=TRUE);
on.exit({
if (isFile(pathnameT)) {
file.remove(pathnameT);
}
}, add=TRUE);
protocol <- getProtocol(this);
if (protocol == "https") {
usr <- getUsername(this);
pwd <- getPassword(this);
opts <- sprintf("--http-user=%s --http-passwd=%s", usr, pwd);
} else {
opts <- "";
}
fmtstr <- "wget --output-document=\"%s\" --no-check-certificate %s %s";
cmd <- sprintf(fmtstr, pathnameT, opts, url);
# Download
verbose && enter(verbose, "Downloading");
verbose && cat(verbose, "Command: ", cmd);
res <- system(cmd);
verbose && cat(verbose, "Downloading finished\n");
verbose && cat(verbose, "Download result:", res);
verbose && exit(verbose);
# Remove failed or "empty" downloads
fi <- file.info(pathnameT);
verbose && str(verbose, fi);
if (res != 0 || is.na(fi$size) || fi$size == 0) {
file.remove(pathnameT);
verbose && cat(verbose, "Removed downloaded file because it was empty: ", pathnameT);
pathname <- NULL;
} else {
# Rename temporary file
file.rename(pathnameT, pathname);
if (!isFile(pathname)) {
throw("Failed to rename temporary filename: ", pathnameT, " -> ", pathname);
}
if (isFile(pathnameT)) {
throw("Failed to remove temporary filename: ", pathnameT);
}
}
verbose && exit(verbose);
invisible(pathname);
})
setMethodS3("downloadFile", "TcgaDccDownloader", function(this, filename, ..., onError=c("error", "warning", "quiet"), force=FALSE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'filename':
filename <- Arguments$getCharacter(filename);
# Argument 'onError':
onError <- match.arg(onError);
# Argument 'force':
force <- Arguments$getLogical(force);
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose);
if (verbose) {
pushState(verbose);
on.exit(popState(verbose));
}
verbose && enter(verbose, "Downloading URL");
pathname <- NULL;
if (force || filename == "MANIFEST.txt" || hasFile(this, filename)) {
url <- getUrl(this, filename);
verbose && cat(verbose, "Filename: ", filename);
verbose && cat(verbose, "URL: ", url);
tryCatch({
pathname <- downloadUrl(this, url, ..., verbose=verbose);
}, error = function(ex) {
msg <- ex$message;
verbose && print(verbose, msg);
if (onError == "error") {
throw(ex);
} else if (onError == "warning") {
warning(msg);
} else if (onError == "quiet") {
}
});
}
verbose && cat(verbose, "Downloaded file: ", pathname);
verbose && exit(verbose);
pathname;
})
setMethodS3("downloadFiles", "TcgaDccDownloader", function(this, ...) {
downloadFilesByPattern(this, ...);
}, deprecated=TRUE);
setMethodS3("downloadFilesByPattern", "TcgaDccDownloader", function(this, pattern=NULL, ..., order=c("ascending", "descending", "random"), verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'pattern':
if (!is.null(pattern)) {
pattern <- Arguments$getRegularExpression(pattern);
}
# Argument 'order':
order <- match.arg(order);
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose);
if (verbose) {
pushState(verbose);
on.exit(popState(verbose));
}
verbose && enter(verbose, "Downloading files matching pattern");
verbose && enter(verbose, "Identifying filenames to be downloading");
verbose && cat(verbose, "Pattern: ", pattern);
filenames <- listFiles(this, pattern=pattern, ..., verbose=less(verbose, 5));
verbose && str(verbose, filenames);
verbose && exit(verbose);
# Reorder?
if (order == "ascending") {
o <- order(filenames, decreasing=FALSE);
} else if (order == "ascending") {
o <- order(filenames, decreasing=TRUE);
} else if (order == "random") {
o <- sample(length(filenames), replace=FALSE);
}
filenames <- filenames[o];
verbose && str(verbose, filenames);
verbose && enter(verbose, "Downloading files");
verbose && cat(verbose, "Number of files: ", length(filenames));
pathnames <- sapply(filenames, FUN=function(filename) {
verbose && print(verbose, filename);
pathname <- downloadFile(this, filename, ...);
verbose && print(verbose, pathname);
pathname;
});
verbose && str(verbose, pathnames);
verbose && exit(verbose);
verbose && exit(verbose);
invisible(pathnames);
})
setMethodS3("readLines", "TcgaDccDownloader", function(con, ...) {
# To please R CMD check
this <- con;
pathname <- downloadFile(this, ...);
if (length(pathname) == 0)
return(NULL);
lines <- readLines(pathname);
attr(lines, "pathname") <- pathname;
lines;
})
setMethodS3("readTextFile", "TcgaDccDownloader", function(this, filename, ...) {
field <- sprintf(".%s", filename);
value <- this[[field]];
if (is.null(value)) {
value <- readLines(this, filename);
this[[field]] <- value;
}
value;
})
setMethodS3("readManifest", "TcgaDccDownloader", function(this, ...) {
readTextFile(this, "MANIFEST.txt");
})
setMethodS3("readReadme", "TcgaDccDownloader", function(this, ...) {
readTextFile(this, "README.txt");
})
setMethodS3("readIdf", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
filename <- listFiles(this, pattern="[.]idf[.]txt", ...);
if (length(filename) == 0) {
throw("Cannot read IDF. Failed to located *.idf.txt: ");
}
if (length(filename) > 2) {
throw("Cannot read IDF. Failed to located a unique *.idf.txt: ",
paste(filename, collapse=", "));
}
readTextFile(this, filename);
})
setMethodS3("readSdrf", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
filename <- listFiles(this, pattern="[.]sdrf[.]txt", ...);
if (length(filename) == 0) {
throw("Cannot read IDF. Failed to located *.idf.txt: ");
}
if (length(filename) > 2) {
throw("Cannot read IDF. Failed to located a unique *.idf.txt: ",
paste(filename, collapse=", "));
}
readTextFile(this, filename);
})
# Not required
setMethodS3("readDescription", "TcgaDccDownloader", function(this, ...) {
readTextFile(this, "DESCRIPTION.txt");
})
# Not required
setMethodS3("readSampleInfo", "TcgaDccDownloader", function(this, ...) {
readTextFile(this, "SampleInfo.txt");
})
setMethodS3("downloadCoreTextFile", "TcgaDccDownloader", function(this, ...) {
# Must exists
manifest <- readManifest(this);
readme <- readReadme(this);
# Semi-optional
tryCatch({
idf <- readIdf(this);
}, error = function(ex) {});
tryCatch({
sdrf <- readSdrf(this);
}, error = function(ex) {});
# Optional
si <- readSampleInfo(this);
desc <- readDescription(this);
})
setMethodS3("listFiles", "TcgaDccDownloader", function(this, pattern=NULL, ignore.case=TRUE, ...) {
if (!is.null(pattern)) {
pattern <- Arguments$getRegularExpression(pattern);
}
manifest <- readManifest(this, ...);
# Check if there are checksums
pattern2 <- "^([0-9abcdefABCDEF]+)[ ]+(.*)$";
hasChecksums <- (regexpr(pattern2, manifest) != -1);
# Sanity check
stopifnot(length(unique(hasChecksums)) == 1);
hasChecksums <- hasChecksums[1];
if (hasChecksums) {
filenames <- gsub(pattern2, "\\2", manifest);
} else {
filenames <- manifest;
}
if (!is.null(pattern)) {
filenames <- grep(pattern=pattern, filenames,
ignore.case=ignore.case, value=TRUE);
}
filenames;
})
setMethodS3("hasFiles", "TcgaDccDownloader", function(this, filenames, ...) {
# Argument 'filenames':
filenames <- Arguments$getCharacter(filenames);
availableFilenames <- listFiles(this, ...);
is.element(filenames, availableFilenames);
})
setMethodS3("hasFile", "TcgaDccDownloader", function(this, filename, ...) {
# Argument 'filename':
filename <- Arguments$getCharacter(filename);
hasFiles(this, filenames=filename, ...);
})
setMethodS3("getDataSet", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
# path <- getParent(path, depth=0);
name <- basename(path);
name;
})
setMethodS3("getDataSetName", "TcgaDccDownloader", function(this, ...) {
fullname <- getDataSet(this, ...);
pattern <- TcgaDccData$getDataSetPatterns()$dataset;
gsub(pattern, "\\1_\\2.\\3", fullname);
})
setMethodS3("getArchive", "TcgaDccDownloader", function(this, ...) {
fullname <- getDataSet(this, ...);
pattern <- TcgaDccData$getDataSetPatterns()$dataset;
gsub(pattern, "\\4", fullname);
})
setMethodS3("getDataType", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
path <- getParent(path, depth=1);
name <- basename(path);
name;
})
setMethodS3("getPlatform", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
path <- getParent(path, depth=2);
name <- basename(path);
name;
})
setMethodS3("getCenter", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
path <- getParent(path, depth=3);
name <- basename(path);
name;
})
setMethodS3("getCenterType", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
path <- getParent(path, depth=4);
name <- basename(path);
name;
})
setMethodS3("getTumorType", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
path <- getParent(path, depth=5);
name <- basename(path);
name;
})
setMethodS3("getAccessRoot", "TcgaDccDownloader", function(this, ...) {
path <- getUrlPath(this);
path <- getParent(path, depth=6);
name <- basename(path);
name;
})
setMethodS3("getKnownFilePatterns", "TcgaDccDownloader", function(this, types=c("coreFiles", "level1", "level2", "level3"), ...) {
# Argument 'types':
types <- Arguments$getCharacters(types);
center <- getCenter(this);
platform <- getPlatform(this);
dataType <- getDataType(this);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Default patterns
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
patterns <- list(
coreFiles = c(
"^MANIFEST.txt$",
"^README.txt$",
"[.]idf[.]txt$",
"[.]sdrf[.]txt$"
),
level1 = c(),
level2 = c(),
level3 = c()
);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (center == "hudsonalpha.org" &&
is.element(platform, c("humanhap550", "human1mduo"))) {
patterns$level1 <- c(
"[.]idat$",
"[.]XandYintensity[.]txt$"
);
patterns$level2 <- c(
"[.]B_Allele_Freq[.]txt$",
"[.]Delta_B_Allele_Freq[.]txt$",
"[.]Genotypes[.]txt$",
"[.]Normal_LogR[.]txt$",
"[.]Paired_LogR[.]txt$",
"[.]Unpaired_LogR[.]txt$"
);
patterns$level3 <- c(
"[.]seg[.]txt$",
"[.]segnormal[.]txt$",
"[.]loh[.]txt$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "broad.mit.edu" &&
platform == "genome_wide_snp_6") {
patterns$coreFiles <- c(patterns$coreFiles,
"^DESCRIPTION.txt$",
"^SampleInfo.txt$"
);
patterns$level1 <- c(
"[.](cel|CEL)$"
);
patterns$level2 <- c(
"[.]after_5NN[.]copynumber[.]data[.]txt$",
"[.]copynumber[.]byallele[.]data[.]txt$",
"^[^.]+[.]copynumber[.]data[.]txt$",
"[.]ismpolish[.]data[.]txt$",
"[.]no_outlier[.]copynumber[.]data[.]txt$"
);
patterns$level3 <- c(
"[.]birdseed[.]data[.]txt$",
"[.]seg[.]data[.]txt$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "hms.harvard.edu" &&
is.element(platform, c("hg-cgh-244a", "hg-cgh-415k_g4124a"))) {
patterns$level1 <- c(
"_BioSizing[.]tsv$",
"_QA[.]tsv$",
"[.]jpg$",
"_lowess_normalized_smoothed[.]png$",
"[.]pdf$",
"^TCGA-.*[.]txt$"
);
if (platform == "hg-cgh-244a") {
patterns$level1 <- c(patterns$level1,
"_Nanodrop[.]tsv$",
"_lowess_normalized[.]tsv$",
"[.]tif$"
);
};
patterns$level2 <- c(
"[.]tsv$",
"[.]data[.]txt$"
);
patterns$level3 <- c(
"_Segment[.]tsv$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "mskcc.org" &&
is.element(platform, c("hg-cgh-244a", "cgh-1x1m_g4447a"))) {
patterns$level1 <- c(
"^MSK_.*[.]txt$"
);
patterns$level2 <- c(
"[.]data[.]txt$"
);
patterns$level3 <- c(
"[.]CBS[.]txt$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "jhu-usc.edu" &&
platform == "humanmethylation27") {
patterns$level1 <- c(
"lvl-1[.].*[.]txt$"
);
patterns$level2 <- c(
"lvl-2[.].*[.]txt$"
);
patterns$level3 <- c(
"lvl-3[.].*[.]txt$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "jhu-usc.edu" &&
is.element(platform, c("illuminadnamethylation_oma002_cpi", "illuminadnamethylation_oma003_cpi"))) {
patterns$level1 <- c(
"cy3-cy5-value[.]txt$"
);
patterns$level2 <- c(
"beta-value[.]txt$",
"detection-p-value[.]txt$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "unc.edu" &&
is.element(platform, c("agilentg4502a_07_1", "agilentg4502a_07_2", "agilentg4502a_07_3"))) {
patterns$level1 <- c(
"_([a-Z]{3}|[0-9]{2})[0-9]{2}[.]txt$"
);
patterns$level2 <- c(
"lmean[.]out[.]logratio[.]probe[.]tcga_level2[.]data[.]txt$"
);
patterns$level3 <- c(
"lmean[.]out[.]logratio[.]gene[.]tcga_level3[.]data[.]txt$"
);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
else if (center == "unc.edu" &&
is.element(platform, c("h-mirna_8x15k", "h-mirna_8x15kv2"))) {
patterns$level1 <- c(
"_[a-Z]{3}[0-9]{2}_[0-9]_[0-9][.]txt$"
);
patterns$level2 <- c(
"probe[.]tcga_level2[.]data[.]txt$"
);
patterns$level3 <- c(
"gene[.]tcga_level3[.]data[.]txt$"
);
}
else {
throw("Unknown center & platform: ", center, ", ", platform);
}
patterns <- patterns[types];
patterns;
})
############################################################################
# HISTORY:
# 2010-04-05
# o Added file patterns from jhu-usc.edu and unc.edu to getKnownFilePatterns().
# 2010-01-17
# o Updated listFiles() of TcgaDccDownloader to also handle the new DCC v3
# MANIFEST.txt files that contains MD5 checksums.
# 2010-01-04
# o ROBUSTNESS: Updated getKnownFilePatterns() of TcgaDccDownloader to
# "^[^.]+[.]copynumber[.]data[.]txt$" in order to avoid getting
# "[.]no_outlier[.]copynumber[.]data[.]txt$" files too.
# 2009-11-01
# o Added (mskcc.org, cgh-1x1m_g4447a) to getKnownFilePatterns().
# 2009-10-29
# o Added default argument 'ignore.case=TRUE' to listFiles(...) for
# TcgaDccDownloader.
# 2009-10-22
# o Added argument 'onError' to downloadFile().
# o Added argument 'order' to downloadFilesByPattern().
# o Renamed downloadFiles() to downloadFilesByPattern().
# o Now downloadUrl() downloads to a temporary file and renames it after
# download is complete. This lower the risk for incomplete downloads.
# o Now downloadFile() only download files available in the MANIFEST file.
# o Added hasFiles() and hasFile() to TcgaDccDownloader.
# 2009-04-18
# o BUG FIX: Now the output directory passed to wget is put within double
# quotes.
# o Now downloadUrl() removes pre-existing empty files by default.
# o Added accessor functions for path attributes.
# 2009-04-16
# o Added downloadFiles().
# o Added setLogin().
# 2009-04-13
# o Updated readIdf() and readSdrf() to locate the files via the manifest
# file.
# 2009-04-07
# o Created.
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.