Nothing
#' gemini_create_input
#'
#' @description Creates a gemini.input object from a counts matrix with given annotations.
#' @param counts.matrix a matrix of counts with rownames corresponding to features (e.g. guides) and colnames corresponding to samples.
#' @param sample.replicate.annotation a data.frame of annotations for each sample/replicate pair.
#' Note that at least one column in \code{sample.replicate.annotation} must correspond to the colnames of \code{counts.matrix} (see Details) (default = NULL)
#' @param guide.annotation a data.frame of annotations for each guide. Note that at least one column in \code{guide.annotation} must correspond to the rownames of counts.matrix (default = NULL)
#' @param samplesAreColumns a logical indicating if samples are on the columns or rows of counts.matrix. (default = TRUE)
#' @param sample.column.name a character or integer indicating which column of \code{sample.replicate.annotation} describes the samples.
#' @param gene.column.names a character or integer vector of length(2) indicating which columns of \code{guide.annotation} describe the genes being targeted.
#' @param ETP.column a character or integer vector indicating which column(s) of \code{counts.matrix} contain the early time-point(s) of the screen (i.e. pDNA, early sequencing, etc.). Defaults to the first column.
#' @param LTP.column a character or integer vector indicating which column(s) is the later time-point of the screen (i.e. day21, post-treatment, etc.). Defaults to \code{(1:ncol(counts.matrix))[-ETP.column]}, or all other columns except for those specified by \code{ETP.column}.
#' @param verbose Verbosity (default FALSE)
#' @return a gemini.input object
#'
#' @details
#' This function initializes a gemini.input object from a counts matrix. There are a few key assumptions made in the input format.
#' \itemize{
#' \item The counts matrix is regular.
#' \item The counts matrix structure is in accordance with the \code{samplesAreColumns} parameter.
#' \item The first column of \code{sample.replicate.annotation} matches with the existing dimension names of the counts matrix.
#' \item The first column of \code{guide.annotations} matches with the existing dimension names of the counts matrix.
#' \item \code{sample.column.name} must specify a column in \code{sample.replicate.annotation} (either by name or index) that describes unique samples.
#' \item \code{gene.column.names} must specify two columns in \code{sample.replicate.annotation} (either by name or index) that describe genes.
#' }
#'
#' @importFrom dplyr mutate
#'
#' @examples
#' data("counts", package = "gemini")
#' data("sample.replicate.annotation", package = "gemini")
#' data("guide.annotation", package = "gemini")
#' Input <- gemini_create_input(
#' counts.matrix = counts,
#' sample.replicate.annotation = sample.replicate.annotation,
#' guide.annotation = guide.annotation,
#' sample.column.name = "samplename",
#' gene.column.names = c("U6.gene", "H1.gene")
#' )
#'
#' @export
gemini_create_input <-
function(counts.matrix,
sample.replicate.annotation = NULL,
guide.annotation = NULL,
samplesAreColumns = TRUE,
sample.column.name = "samplename",
gene.column.names = NULL,
ETP.column = 1,
LTP.column = NULL,
verbose = FALSE) {
# Check ETP/LTP column identification
if(is.numeric(ETP.column) & is.null(LTP.column)){
LTP.column <- seq(from = 1, to = ncol(counts.matrix))[-ETP.column]
}else if(is.character(ETP.column) & is.null(LTP.column)){
ETP.column <- which(colnames(counts.matrix) %in% ETP.column)
LTP.column <- seq(from = 1, to = ncol(counts.matrix))[-ETP.column]
}
# Require dimension names for counts matrix if no guide and replicate annotations provided
if (is.null(dimnames(counts.matrix)) | is.null(guide.annotation) | is.null(sample.replicate.annotation))
stop("No dimnames for counts.matrix - no annotations available.", "")
# Require sample.column.name and gene.column.names specification
if (is.null(gene.column.names) | is.null(sample.column.name)){
stop("Did you provide gene.column.names and/or sample.column.name?")
}
# transpose matrix
if (!samplesAreColumns) {
if(verbose) message("Transposing matrix...")
# transpose and preserve dimnames
dn <- dimnames(counts.matrix)
counts.matrix <- t(counts.matrix)
dimnames(counts.matrix) <- rev(dn)
}
# default guide annotations to rownames of counts matrix
gannot <-
data.frame(rowname = rownames(counts.matrix),
stringsAsFactors = FALSE)
# Default sample annotations to column names of counts matrix, ordering by ETP -> LTP
sannot <-
data.frame(colname = colnames(counts.matrix)[c(ETP.column, LTP.column)],
stringsAsFactors = FALSE, row.names = seq(from = 1, to = length(c(ETP.column, LTP.column)))) %>%
dplyr::mutate(TP = c(rep("ETP", length(ETP.column)), rep("LTP", length(LTP.column))))
# Merge existing sample annotations with colnames, ensuring formatting and matching names
if (!is.null(sample.replicate.annotation) & !is.null(sample.column.name)) {
colnames(sample.replicate.annotation)[colnames(sample.replicate.annotation) == sample.column.name] <- "samplename" # Set sample column name to "samplename"
if(verbose) message("Merging sample annotations with colnames of counts.matrix...")
i = which(apply(sample.replicate.annotation, 2, function(x)
all(x %in% sannot[, 1])))
if(!length(i) > 0){
if(verbose) message("No columns found in sample.replicate.annotation which completely match colnames of counts.matrix...")
}
sannot <- merge(sannot, sample.replicate.annotation, by.x = 1, by.y = i[1], no.dups = FALSE, all = FALSE, sort = FALSE, suffixes = c("", ".y"))
}else{
stop("Could not determine samplename. Please add sample/replicate annotation and specify and sample.column.name. See ?gemini_create_input.")
}
# Merge guide annotations with existing rownames, ensuring formatting and matching names
if (!is.null(guide.annotation)) {
if(verbose) message("Merging guide annotations with rownames()...")
i = which(apply(guide.annotation, 2, function(x)
all(x %in% gannot[, 1])))
if(!length(i) > 0){
if(verbose) message("No columns found in guide.annotation which completely match rownames()...")
}
gannot <- merge(gannot, guide.annotation, by.x = 1, by.y = i, no.dups = FALSE, all = FALSE, sort = FALSE, suffixes = c("", ".y"))
}else{
stop("Could not determine gene/guide data. Please add guide annotation and specify and gene.column.names. See ?gemini_create_input.")
}
# Create new Input object
Output <- list(
counts = data.matrix(counts.matrix[, c(ETP.column, LTP.column)]),
replicate.map = as.data.frame(sannot, optional = TRUE, row.names = seq(from = 1, to = nrow(sannot))),
guide.pair.annot = as.data.frame(gannot, optional = TRUE, rownames = seq(from = 1, to = nrow(gannot)))
)
Output <- gemini_prepare_input(Output, gene.columns = gene.column.names)
class(Output) <- union(class(Output), "gemini.input")
if(verbose) message("Created gemini input object.")
return(Output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.