#' Generate a count_set summerized experiment from NanoString data
#'
#' @description Generates a summerized experiment from NanoString count
#' data in an nSolver RCC Collector Tool Format or matrix and (optional)
#' sample annotations including group, batch, and sample pairing.
#'
#' @param rccexp_dir Full path to the csv file generated by nSolver RCC
#' Collector Tool Format Export. Default = NULL
#' @param count_data a matrix of count data; genes as rows, samples as columns.
#' Contains 3 columns called "Code_Class", "Gene_Name" & "Accession". Code_class
#' contains probe annotations "Endogenous", "Positive" and "Negative". Default = NULL
#' @param group Biological groups for each lane of counts defined by a factor
#' or vector. Default = NULL
#' @param batch Batch information such as nanostring chip ID, defined by a
#' factor or vector of integers. Default = NULL
#' @param pair Sample pairing defined by a factor or vector. Default = NULL
#' @param samp_id Sample IDs defined by a factor or vector. This is used to
#' build the replicate matrix. Default = NULL
#' @param output_log File path to directory where the summarized experiment
#' will be saved
#' @param count_se File path to an already generated summarized experiment
#' @param low_reps Force Ccount_set to build if a group contains less than
#' 2 replicates. Default = FALSE
#' @param verbose Verbosity ON/OFF. Default = TRUE
#'
#' @examples
#'
#' # biological groups
#' rnf5_group <- c(rep("WT", 5), rep("KO", 5))
#'
#' # sample ids
#' rnf5_sampleid <- c("GSM3638131", "GSM3638132", "GSM3638133", "GSM3638134",
#' "GSM3638135", "GSM3638136", "GSM3638137", "GSM3638138",
#' "GSM3638139", "GSM3638140")
#'
#' # build count_set
#' rnf5_count_set <- count_set(count_data = Rnf5,
#' group = rnf5_group,
#' samp_id = rnf5_sampleid)
#'
#' @return A summarized experiment
#'
#' @export count_set
#'
#' @importFrom SummarizedExperiment colData colData<- SummarizedExperiment
#' rowData rowData<- SummarizedExperiment
#' @importFrom S4Vectors metadata metadata<- SimpleList
#' @importFrom utils read.csv
count_set <- function(rccexp_dir = NULL,
count_data = NULL,
group = NULL,
batch = NULL,
pair = NULL,
samp_id = NULL,
output_log = NULL,
count_se = NULL,
low_reps = FALSE,
verbose = TRUE) {
######## Check Inputs #######
# check data input
if(is.null(rccexp_dir) & is.null(count_data) & is.null(count_se)){
stop("rccexp_dir or count_data must be specified")}
# check rccexp_dir
if(is.null(rccexp_dir) & is.null(count_data) & is.null(count_se)){
stop("A full path to a csv exported by nSolver RCC Collector Tool Format
must be provided.")}
# check group
if(!is.null(group)) {
group <- as.factor(group)
if(low_reps == FALSE & (min(summary(group)))<2){
stop("The groups provided contain groups with less than two replicates.")
}
}
# check batch
if(!is.null(batch)) {
batch <- as.factor(batch)
}
# check pair
if(!is.null(pair)) {
pair <- as.factor(pair)
if((min(summary(pair)))<2) warning("The pairs provided
contain >=1 pair with less than 2 samples.", "\n", sep="")
}
# check output_log
if(is.null(output_log)){
message("### No output directory provided. The summarized experiment will
not be saved",
"\n", sep=" ")}
# check low_reps
if(!is.null(low_reps)) {
if(low_reps != TRUE & low_reps != FALSE) stop("options for low_reps are
TRUE & FALSE")
}
# check samp_id
if(!is.null(samp_id)) {
samp_id <- as.character(samp_id)
}
####### Finished Input Checks #######
####### Create Count Set #######
# if rccexp_dir is provided and is correct file type
if(!is.null(rccexp_dir) & is.null(count_se)) {
rccexp <- read.csv(rccexp_dir, strip.white = TRUE, check.names = FALSE)
names(rccexp)[1:3] <- c("Code_Class", "Gene_Name", "Accession")
nsamples <- (ncol(rccexp)-3)
if(verbose == TRUE) {
message(paste("###", nsamples, "samples have been loaded", "\n",
sep = " "))
}
if(!("FOV Counted" %in% rccexp[[1]]))
stop("The file provided is not a csv exported by nSolver RCC Collector
Tool Format.")
}
#if rccexp_dir is not provided and count_data is
if(is.null(rccexp_dir) &!is.null(count_data)) {
nsamples <- (ncol(count_data)-3)
if(verbose) {
message(paste("###", nsamples, "samples have been loaded", "\n",
sep = " "))
}
}
#if rccexp_dir & count_data is not provided but count_se is
if(!is.null(rccexp_dir) || !is.null(count_data)){
# check group length
if(is.null(group)) group <- rep(1, nsamples)
if(!is.null(group) & length(group) != nsamples){
stop("The length of group must be equal to the number of samples.")}
# check batch length
if(is.null(batch)) batch = rep(1, nsamples)
if(!is.null(batch) & length(batch) != nsamples){
stop("The length of batch must be equal to the number of samples.")}
# check pair length
if(is.null(pair)) pair = rep(0, nsamples)
if(!is.null(pair) & length(pair) != nsamples){
stop("The length of pairs must be equal to the number of samples.")}
# check samp_id length
if(is.null(samp_id)) samp_id = rep(0, nsamples)
if(!is.null(samp_id) & length(samp_id) != nsamples){
stop("The length of replicates must be equal to the number
of samples.")}
# make a count table
if(!is.null(rccexp_dir)){
count_tab <- subset(rccexp, Code_Class == "Endogenous"|
Code_Class == "Endogenous1"|
Code_Class == "Housekeeping"|
Code_Class == "Positive"|
Code_Class == "Negative"|
Code_Class == "Ligation"|
Code_Class == "SpikeIn",
select = 1:ncol(rccexp))
}
if(!is.null(count_data)){
count_tab <- subset(count_data, Code_Class == "Endogenous"|
Code_Class == "Endogenous1"|
Code_Class == "Housekeeping"|
Code_Class == "Positive"|
Code_Class == "Negative"|
Code_Class == "Ligation"|
Code_Class == "SpikeIn",
select = 1:ncol(count_data))
}
# make annotation table and remove annotations from count table
annot_tab <- count_tab[, c("Code_Class", "Gene_Name", "Accession")]
annot_tab$Code_Class <- as.factor(as.character(annot_tab$Code_Class))
rownames(count_tab) <- count_tab$Gene_Name
rms <- c("Code_Class", "Gene_Name", "Accession")
count_tab <- count_tab[, !names(count_tab) %in% rms]
#ensure counts are numeric
count_num <- apply(count_tab, 2, as.numeric)
rownames(count_num) <- rownames(count_tab)
# make a sample table
samp_tab <- as.data.frame(cbind(as.character(group),
(as.character(batch)),
(as.character(pair))))
samp_tab$samp_id <- samp_id
rownames(samp_tab) <- colnames(count_num)
colnames(samp_tab) <- c("group", "batch", "pair", "samp_id")
# make a summerized experiment
se_build <-
SummarizedExperiment(assays = SimpleList(counts = as.matrix(count_num)))
# add phenotype data to samples
colData(se_build) <- S4Vectors::DataFrame(samp_tab)
rowData(se_build) <- S4Vectors::DataFrame(annot_tab)
se_out <- se_build
#save.se file
if(!is.null(output_log)){
save(se_out, file = paste(output_log, "se.R", sep=""))
if(verbose == TRUE){
message("### summariseExperiment saved to:", paste(output_log, "se.R",
"\n", sep=""))
}
}
}
# if a summarized experiment or file path to a se.R is provided
if(!is.null(count_se)){
#if file path provided
if(is.character(count_se)){
load(count_se)
if(verbose == TRUE){
message("### A summarizedExperiment has been loaded from:", count_se,
"\n", sep="")
}
}
# check the file format
if(count_se@class != "SummarizedExperiment"){
stop(paste("summarized file provided is not a SummarizedExperiment,
Please produce a SummarizedExperiment using count_set.", "\n",
sep=""))
}
se_out <- count_se
# if already a summarized file
if(is.character(count_se) == FALSE &&
count_se@class == "SummarizedExperiment"){
se_out <- count_se
if(verbose == TRUE){
message("### an existing summarized experiment has been provided")
}
}
}
return(se_out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.