# Function to import pyradiomics csv files --------------------------------------------
#' Import pyradiomics data
#'
#' This functions imports feature values extracted using pyradiomics software
#'
#' @param dir (character) Path to the directory containing tsv files generated by pyradiomics
#'
#' @return A RadAR object (class \code{\link{SummarizedExperiment}})
#' @author Matteo Benelli (\email{matteo.benelli@uslcentro.toscana.it})
#' @export
#'
#' @examples
import_pyradiomics <- function (dir = NULL) {
assertthat::assert_that(length(dir) > 0, msg = paste("[RadAR] Error:", dir, "should be specified"))
assertthat::assert_that(file.exists(dir), msg = paste("[RadAR] Error:", dir, "does not exist"))
extractor <- "pyradiomics"
list_files <- list.files(dir, pattern = "csv$", full.names = T)
assertthat::assert_that(length(list_files) > 0, msg = paste("[RadAR] Error:", dir, "does not contain csv files"))
ix_files <- check_files(list_files)
list_files <- list_files[ix_files]
tmp <- list.files(dir, pattern = "csv$", full.names = F)
filenames <- gsub(".csv", "", tmp[ix_files])
feature_data <- c()
mask_id <- c()
sample_id <- c()
filename_tot <- c()
for (i in 1: length(list_files)) {
message(paste("[RadAR] importing", list_files[i]))
xx <- read.delim(list_files[i], sep = ",", header = T)
# check file, i=1
if (i == 1) {
ncol_file <- ncol(xx)
end_general <- rev(grep("general_", colnames(xx)))[1]
if (is.na(end_general)) {
end_general <- rev(grep("diagnostics_", colnames(xx)))[1]
}
assertthat::assert_that(!is.na(end_general), msg = "[RadAR] Error: Bad or unexpected format")
xx.data <- xx[1, -c(1: end_general), drop = F]
#assertthat::assert_that(all(is.numeric(xx.data)), msg = "[RadAR] Error: Bad or unexpected format (data is not numeric)")
image_types <- gsub("_.*", "", colnames(xx.data))
feature_names <- gsub(".*_", "", colnames(xx.data))
} else {
assertthat::assert_that(ncol(xx) == ncol_file, msg = paste("[RadAR] Error: unexpected number of features in", list_files[i]))
xx.data <- xx[, -c(1: end_general)]
}
mylabels <- unique(xx$Mask)
mask_id <- c(mask_id, mylabels)
sample_id <- c(sample_id, rep(filenames[i], length(mylabels)))
filename_tot <- c(filename_tot, rep(list_files[i], length(mylabels)))
assertthat::assert_that(!any(duplicated(xx$Mask)), msg = paste("[RadAR] duplicated mask names in", list_files[i]))
for (j in 1: length(mylabels)) {
mylabel <- mylabels[j]
# mylabel <- gsub("Segmentation_segment_", "", mylabels[j])
feature_data <- cbind(feature_data,
as.numeric(xx.data[which(xx$Mask == mylabel), ]))
}
}
colnames(feature_data) <- mask_id
if (length(unique(image_types)) > 1) {
feature_ids <- paste0(feature_names, ".", image_types)
} else {
feature_ids <- feature_names
}
rownames(feature_data) <- feature_ids
dict_out <- retrieve_feature_dictionary(features_names = feature_names,
extractor = extractor)
# feature_description <- rep(NA, nrow(feature_data))
rowData <- data.frame(feature_name = feature_names,
image_type = image_types,
feature_description = dict_out$feature_description,
feature_type = dict_out$feature_type
)
#rownames(rowData) <- feature_ids
colData <- data.frame(filename = filename_tot,
sample_id = sample_id,
mask_id = mask_id)
metadata <- list()
metadata$extractor <- extractor
rdr <- new_radar_dataset(features = feature_data, rowData = rowData, colData = colData, metadata = metadata)
return(rdr)
}
# Function to import 3D Slicer tabulated files --------------------------------------------
#' Import 3DSlicer data
#'
#' This functions imports feature values extracted using 3DSlicer software
#'
#' @param dir (character) Path to the directory containing tsv files generated by 3DSlicer
#'
#' @return A RadAR object (class \code{\link{SummarizedExperiment}})
#' @author Matteo Benelli (\email{matteo.benelli@uslcentro.toscana.it})
#' @export
#'
#' @examples
import_3dslicer <- function (dir) {
assertthat::assert_that(length(dir) > 0, msg = paste("[RadAR] Error:", dir, "should be specified"))
assertthat::assert_that(file.exists(dir), msg = paste("[RadAR] Error:", dir, "does not exist."))
extractor <- "3dslicer"
list_files <- list.files(dir, pattern = "tsv$", full.names = T)
assertthat::assert_that(length(list_files) > 0, msg = paste("[RadAR] Error:", dir, "does not contain tsv files"))
ix_files <- check_files(list_files)
list_files <- list_files[ix_files]
tmp <- list.files(dir, pattern = "tsv$", full.names = F)
filenames <- gsub(".tsv", "", tmp[ix_files])
feature_data <- c()
mask_id <- c()
sample_id <- c()
filename_tot <- c()
for (i in 1: length(list_files)) {
message(paste("[RadAR] importing", list_files[i]))
xx <- read.delim(list_files[i], sep = "\t", header = T)
# identify file format
if (colnames(xx)[1] == "Label") {
mylabels <- unique(xx$Label)
if (i == 1) {
assertthat::assert_that(length(xx$Feature.Class) > 0, msg = "[RadAR] Error: Bad or unexpected format")
assertthat::assert_that(length(xx$Label) > 0, msg = "[RadAR] Error: Bad or unexpected format")
assertthat::assert_that(length(xx$Image.type) > 0, msg = "[RadAR] Error: Bad or unexpected format")
assertthat::assert_that(length(xx$Feature.Name) > 0, msg = "[RadAR] Error: Bad or unexpected format")
#assertthat::assert_that(all(is.numeric(xx$Value)), msg = "[RadAR] Error: Bad or unexpected format")
nfeatures_file0 <- table(xx$Feature.Class)/length(unique(xx$Label))
ix_data_tmp <- which(xx$Label == mylabels[1] & xx$Image.type %in% c("general", "diagnostics") == F)
image_types <- xx$Image.type[ix_data_tmp]
feature_names <- xx$Feature.Name[ix_data_tmp]
} else {
nfeatures_file <- table(xx$Feature.Class)/length(unique(xx$Label))
assertthat::assert_that(assertthat::are_equal(nfeatures_file0, nfeatures_file),
msg = paste("[RadAR] Error: unexpected number of features in", list_files[i]))
}
ix_data <- which(xx$Image.type %in% c("general", "diagnostics") == F)
#xx <- xx[which(xx$Image.type %in% image_type),]
mask_id <- c(mask_id, mylabels)
sample_id <- c(sample_id, rep(filenames[i], length(mylabels)))
filename_tot <- c(filename_tot, rep(list_files[i], length(mylabels)))
assertthat::assert_that(!any(duplicated(xx$Mask)), msg = paste("[RadAR] duplicated mask names in", list_files[i]))
for (j in 1: length(mylabels)) {
mylabel <- mylabels[j]
# mylabel <- gsub("Segmentation_segment_", "", mylabels[j])
feature_data <- cbind(feature_data,
as.numeric(xx$Value[intersect(which(xx$Label == mylabel),ix_data)]))
}
}
if (colnames(xx)[1] == "Image.type") {
tmp_header <- read.delim(list_files[i], sep = "\t", header = F, check.names = F, nrows = 1)
mylabels <- gsub(".* ", "", tmp_header[4: length(tmp_header)])
if (i == 1) {
assertthat::assert_that(length(xx$Feature.Class) > 0, msg = "[RadAR] Error: Bad or unexpected format")
assertthat::assert_that(length(xx$Image.type) > 0, msg = "[RadAR] Error: Bad or unexpected format")
assertthat::assert_that(length(xx$Feature.Name) > 0, msg = "[RadAR] Error: Bad or unexpected format")
nfeatures_file0 <- nrow(xx)
ix_data_tmp <- which(xx$Image.type %in% c("general", "diagnostics") == F)
image_types <- xx$Image.type[ix_data_tmp]
feature_names <- xx$Feature.Name[ix_data_tmp]
} else {
nfeatures_file <- nrow(xx)
assertthat::assert_that(assertthat::are_equal(nfeatures_file0, nfeatures_file),
msg = paste("[RadAR] Error: unexpected number of features in", list_files[i]))
}
ix_data <- which(xx$Image.type %in% c("general", "diagnostics") == F)
mask_id <- c(mask_id, mylabels)
sample_id <- c(sample_id, rep(filenames[i], length(mylabels)))
filename_tot <- c(filename_tot, rep(list_files[i], length(mylabels)))
#assertthat::assert_that(!any(duplicated(xx$Mask)), msg = paste("[RadAR] duplicated mask names in", list_files[i]))
for (j in 1: length(mylabels)) {
mylabel <- mylabels[j]
# mylabel <- gsub("Segmentation_segment_", "", mylabels[j])
feature_data <- cbind(feature_data,
as.numeric(xx[ix_data, (3+j)]))
}
} else {
assertthat::assert_that(1 < 0, msg = paste("[RadAR] Error: format unrecognized."))
}
}
colnames(feature_data) <- mask_id
if (length(unique(image_types)) > 1) {
feature_ids <- paste0(feature_names, ".", image_types)
} else {
feature_ids <- feature_names
}
rownames(feature_data) <- feature_ids
dict_out <- retrieve_feature_dictionary(features_names = feature_names,
extractor = extractor)
# feature_description <- rep(NA, nrow(feature_data))
rowData <- data.frame(feature_name = feature_names,
image_type = image_types,
feature_description = dict_out$feature_description,
feature_type = dict_out$feature_type
)
#rownames(rowData) <- feature_ids
colData <- data.frame(filename = filename_tot,
sample_id = sample_id,
mask_id = mask_id)
metadata <- list()
metadata$extractor <- extractor
rdr <- new_radar_dataset(features = feature_data, rowData = rowData, colData = colData, metadata = metadata)
return(rdr)
}
# Function to import LifeX session files --------------------------------------------
#' Import LifeX session data
#'
#' @param session (character) Path to the session file generated by LifeX
#'
#' @return
#' @export
#'
#' @examples
import_lifex_session <- function (session) {
assertthat::assert_that(length(session) > 0, msg = paste("[RadAR] Error:", session, "should be specified"))
extractor <- "lifex"
tmp0 <- scan(file = session, what = "", sep = "\n")
skiplines <- grep("^INFO", tmp0)[1] - 1
xx <- read.delim(file = session, sep = ",", header = T, skip = skiplines)
ncol_file <- ncol(xx)
prefix_data <- gsub("_.*", "", colnames(xx))
ix_data <- which(prefix_data %in% c("HISTO",
"CONVENTIONAL",
"DISCRETIZED",
"SHAPE",
"GLCM",
"GLRLM",
"NGLDM",
"GLZLM" ))
xx.data <- xx[, ix_data, drop = F]
image_types <- rep(NA, ncol(xx.data))
feature_names <- colnames(xx.data)
xx$Mask <- paste0(xx$INFO_PatientID, "_", xx$INFO_NameOfRoi)
mask_id <- xx$Mask
sample_id <- mask_id
filename_tot <- rep(session, nrow(xx))
assertthat::assert_that(!any(duplicated(xx$Mask)), msg = paste("[RadAR] duplicated mask names in", session))
mylabels <- mask_id
feature_data <- c()
for (j in 1: length(mylabels)) {
mylabel <- mylabels[j]
feature_data <- cbind(feature_data,
as.numeric(xx.data[which(xx$Mask == mylabel), ]))
}
colnames(feature_data) <- mask_id
if (length(unique(image_types)) > 1) {
feature_ids <- paste0(feature_names, ".", image_types)
} else {
feature_ids <- feature_names
}
## check all NA features
ix_na <- which(apply(feature_data, 1, allisna))
if (length(ix_na) > 0) {
message(paste("[RadAR]", length(ix_na), "not numeric features were removed"))
feature_data <- feature_data[-ix_na, ]
feature_ids <- feature_ids[-ix_na]
}
rownames(feature_data) <- feature_ids
ix_notfinite <- which(!is.finite(feature_data), arr.ind = T)
if (length(ix_notfinite) > 0) {
feature_data[ix_notfinite] <- NA
}
dict_out <- retrieve_feature_dictionary(features_names = feature_names,
extractor = extractor)
# feature_description <- rep(NA, nrow(feature_data))
rowData <- data.frame(feature_name = feature_names,
image_type = image_types,
feature_description = dict_out$feature_description,
feature_type = dict_out$feature_type
)
if (length(ix_na) > 0) {
rowData <- rowData[-ix_na, ]
}
colData <- data.frame(filename = filename_tot,
sample_id = sample_id,
mask_id = mask_id)
metadata <- list()
metadata$extractor <- extractor
rdr <- new_radar_dataset(features = feature_data, rowData = rowData, colData = colData, metadata = metadata)
return(rdr)
}
# Function to import LifeX files (xls and csv) --------------------------------------------
#' Import LifeX data
#'
#' @param dir (character) Path to the directory containing xls or csv files generated by LifeX
#'
#' @return A RadAR object (class \code{\link{SummarizedExperiment}})
#' @author Matteo Benelli (\email{matteo.benelli@uslcentro.toscana.it})
#' @export
#'
#' @examples
import_lifex <- function (dir) {
assertthat::assert_that(length(dir) > 0, msg = paste("[RadAR] Error:", dir, "should be specified"))
extractor <- "lifex"
list_files <- NULL
list_files_xls <- list.files(dir, pattern = "xls$", full.names = T)
list_files_csv <- list.files(dir, pattern = "csv$", full.names = T)
if (length(list_files_xls) > 0 & length(list_files_csv) == 0) {
list_files <- list_files_xls
filetype <- "xls"
}
if (length(list_files_csv) > 0 & length(list_files_xls) == 0) {
list_files <- list_files_csv
filetype <- "csv"
}
assertthat::assert_that(length(list_files) > 0, msg = paste("[RadAR] Error:", dir, "no LifeX (xls or csv) file found.
For session file exported from LifeX, use import_lifex_session function instead."))
if (filetype == "xls") {
if (!requireNamespace("gdata", quietly = TRUE)) {
stop("[RadAR] Error: package \"gdata\" needed for this function to work. Please install it.",
call. = FALSE)
}
tmp <- list.files(dir, pattern = "xls$", full.names = F)
filenames <- gsub(".xls", "", tmp)
}
if (filetype == "csv") {
tmp <- list.files(dir, pattern = "csv$", full.names = F)
filenames <- gsub(".csv", "", tmp)
}
feature_data <- c()
mask_id <- c()
sample_id <- c()
filename_tot <- c()
for (i in 1: length(list_files)) {
message(paste("[RadAR] importing", list_files[i]))
if (filetype == "xls") {
xx <- gdata::read.xls(list_files[i], header = T)
} else {
xx <- read.delim(list_files[i], header = T, sep = ",", skip = 1)
}
# check file, i=1
if (i == 1) {
ncol_file <- ncol(xx)
prefix_data <- gsub("_.*", "", colnames(xx))
ix_data <- which(prefix_data %in% c("HISTO",
"CONVENTIONAL",
"DISCRETIZED",
"SHAPE",
"GLCM",
"GLRLM",
"NGLDM",
"GLZLM" ))
xx.data <- xx[1, ix_data, drop = F]
image_types <- rep(NA, ncol(xx.data))
feature_names <- colnames(xx.data)
} else {
assertthat::assert_that(ncol(xx) == ncol_file, msg = paste("[RadAR] Error: unexpected number of features in", list_files[i]))
xx.data <- xx[, ix_data]
}
xx$Mask <- paste0(xx$INFO_PatientID, "_", xx$INFO_NameOfRoi)
mylabels <- unique( xx$Mask)
mask_id <- c(mask_id, mylabels)
sample_id <- c(sample_id, rep(filenames[i], length(mylabels)))
filename_tot <- c(filename_tot, rep(list_files[i], length(mylabels)))
assertthat::assert_that(!any(duplicated(xx$Mask)), msg = paste("[RadAR] duplicated mask names in", list_files[i]))
for (j in 1: length(mylabels)) {
mylabel <- mylabels[j]
feature_data <- cbind(feature_data,
as.numeric(xx.data[which(xx$Mask == mylabel), ]))
}
}
colnames(feature_data) <- mask_id
if (length(unique(image_types)) > 1) {
feature_ids <- paste0(feature_names, ".", image_types)
} else {
feature_ids <- feature_names
}
## check all NA features
ix_na <- which(apply(feature_data, 1, allisna))
if (length(ix_na) > 0) {
message(paste("[RadAR]", length(ix_na), "not numeric features were removed"))
feature_data <- feature_data[-ix_na, ]
feature_ids <- feature_ids[-ix_na]
}
rownames(feature_data) <- feature_ids
ix_notfinite <- which(!is.finite(feature_data), arr.ind = T)
if (length(ix_notfinite) > 0) {
feature_data[ix_notfinite] <- NA
}
dict_out <- retrieve_feature_dictionary(features_names = feature_names,
extractor = extractor)
# feature_description <- rep(NA, nrow(feature_data))
rowData <- data.frame(feature_name = feature_names,
image_type = image_types,
feature_description = dict_out$feature_description,
feature_type = dict_out$feature_type
)
if (length(ix_na) > 0) {
rowData <- rowData[-ix_na, ]
}
#rownames(rowData) <- feature_ids
colData <- data.frame(filename = filename_tot,
sample_id = sample_id,
mask_id = mask_id)
metadata <- list()
metadata$extractor <- extractor
rdr <- new_radar_dataset(features = feature_data, rowData = rowData, colData = colData, metadata = metadata)
return(rdr)
}
#' Import features
#'
#' This functions imports a table of radiomic features (features in rows, patients in columns)
#'
#' @param file (character) Path to the table of radiomic features. It should be tab delimited.
#' First row should report sample names. First column should report feature names.
#'
#' @return A RadAR object (class \code{\link{SummarizedExperiment}})
#' @author Matteo Benelli (\email{matteo.benelli@uslcentro.toscana.it})
#' @export
#'
#' @examples
import_radiomic_table <- function (file = NULL) {
assertthat::assert_that(length(file) > 0,
msg = paste("[RadAR] Error: file", file, "doesn't exist"))
extractor <- "unknown / generic table"
feature_data <- read.delim(file = file, sep = "\t", row.names = 1, header = T)
feature_data <- as.matrix(feature_data)
colData <- data.frame (sample_id = colnames(feature_data))
rowData <- data.frame (feature_name = rownames(feature_data))
rownames(colData) <- colnames(feature_data)
rownames(rowData) <- rownames(feature_data)
metadata <- list()
metadata$extractor <- extractor
rdr <- new_radar_dataset(features = feature_data, rowData = rowData, colData = colData, metadata = metadata)
return(rdr)
}
new_radar_dataset <- function (features, rowData, colData, metadata) {
rdr <- SummarizedExperiment::SummarizedExperiment(assays=list(values=features),
rowData=rowData,
colData=colData,
metadata=metadata)
flag_dup <- which(duplicated(rownames(rdr)))
if (length(flag_dup) > 0) {
message(paste("[RadAR]", "Removed", length(flag_dup), "duplicated rows (identical feature names)"))
keep <- which(!duplicated(rownames(rdr)))
rdr <- rdr[keep,]
}
return(rdr)
}
check_files <- function (list_files) {
flag <- rep(T, length(list_files))
for (i in 1: length(list_files)) {
myfile <- file.path(list_files[i])
tmp <- file.size(myfile)
if (tmp < 2000) {
message (paste("[RadAR]", myfile, "is empty or corrupted and will be discarded"))
flag[i] <- F
}
}
return(which(flag))
}
allisna <- function(x) {
all(is.na(x))
}
retrieve_feature_dictionary <- function(features_names, extractor) {
unique.feature_names <- unique(features_names)
feature_type <- rep(NA, length(features_names))
feature_description <- rep(NA, length(features_names))
if (extractor %in% c("3dslicer", "pyradiomics")) {
my_dict <- dict$pyrad
} else {
my_dict <- dict$lifex
}
for (i in 1: length(unique.feature_names)) {
ix <- which(my_dict$feature_name == unique.feature_names[i])
if (length(ix) == 1) {
feature_description[which(features_names == unique.feature_names[i])] <- my_dict$feature_description[ix]
feature_type[which(features_names == unique.feature_names[i])] <- my_dict$feature_type[ix]
}
}
out <- list(feature_description = feature_description,
feature_type = feature_type)
return (out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.