#' @title tfidf
#'
#' @description Get a normalized TFIDF matrix from peak-by-cell matrix from single cell profiles
#'
#' @param bmat a sparse/dense matrix or dataframe indicating count matrix (peak-by-cell matrix)
#' @param mat_binary a logic value to indicate if input bmat is spare matrix or not. bmat will be converted into a sparse matrix.
#' @param TF a logic value to indicate if term frequency (TF) normalization is performed
#' @param log_TF a logic value to indicate if natural logarithm values of the TF are calculated and used
#' @param scale_factor a scale factor used to multiple the resulting TF-IDF matrix
#'
#' @return a sparse matrix of normalized TFIDF matrix
#' @export
#'
#' @import Matrix
#' @importFrom stats quantile
#'
#' @examples
#' \dontrun{
#' tfidf_mat <- tfidf(bmat=peakbycellmatrix, mat_binary=TRUE, TF=TRUE, log_TF=TRUE)}
#'
tfidf <- function(bmat, mat_binary=TRUE, TF=TRUE, log_TF=TRUE, scale_factor=100000) {
if (mat_binary) {
# "term frequency" method
bmat@x[bmat@x>=1] <- 1
message("[info] binarize matrix")
}
if (TF) {
# "term frequency" method
tf <- t(t(bmat) / Matrix::colSums(bmat))
} else {
# "raw count" method
tf <- bmat
}
message("[info] calculate tf")
# Either TF method can optionally be log scaled
if (log_TF) {
if (TF) {
tf@x <- log1p(tf@x * scale_factor)
} else {
tf@x <- log1p(tf@x * 1)
}
}
message("[info] calculate idf")
# IDF "inverse document frequency smooth" method
idf <- log(1 + ncol(bmat) / Matrix::rowSums(bmat))
# TF-IDF
fast_tfidf = function(tf, idf) {
tf <- t(tf)
tf@x <- tf@x * rep.int(idf, diff(tf@p))
tf <- t(tf)
return(tf)
}
message("[info] fast log tf-idf")
tf_idf_counts <- fast_tfidf(tf, idf)
rownames(tf_idf_counts) <- rownames(bmat)
colnames(tf_idf_counts) <- colnames(bmat)
return(tf_idf_counts)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.