#' @importFrom dplyr select
#' @importFrom MASS lda
#' @keywords internal
LDAeffectsize <- function(datalist, compareclass, classgroup, bootnums=30, LDA=2, ci=0.95){
res <- list()
tmpformula <- as.formula(paste0(classgroup, " ~. "))
for (i in seq_len(bootnums)){
compareres <- list()
df <- datalist[[i]]
for (p in seq_len(nrow(compareclass))){
tmppairs <- compareclass[p,]
z <- suppressWarnings(lda(tmpformula,data=df, tol=1e-6))
w <- z$scaling[,1]
w.unit <- w/sqrt(sum(w^2))
ss <- df %>% select(-c(classgroup)) %>% as.matrix()
LD <- ss %*% w.unit
tmpp1 <- df[[match(classgroup, colnames(df))]]==tmppairs[1]
tmpp2 <- df[[match(classgroup, colnames(df))]]==tmppairs[2]
effect.size <- abs(mean(LD[tmpp1,])-mean(LD[tmpp2,]))
wfinal <- w.unit * effect.size
coeff <- abs(wfinal)
mm <- z$means
gm <- abs(mm[match(tmppairs[1], rownames(mm)),,drop=FALSE]- mm[match(tmppairs[2], rownames(mm)),,drop=FALSE])
tmpres <- (gm+coeff) *0.5
compareres[[p]] <- tmpres
}
compareres <- do.call("rbind", compareres)
res[[i]] <- compareres
}
res <- cal_ci(x=res, classgroup=classgroup, ci=ci, method="lda")
colnames(res) <- c("f", paste0("LDA", colnames(res)[-1]))
res <- res[res$LDAmean>=LDA,]
#res <- Reduce("+", res)
#res <- apply(res/bootnums, 2, function(x)max(x))
#res <- log(1+res,10)
#res <- data.frame(f=names(res), LDA=res)
#res$f <- gsub("`", "", as.vector(res$f))
#res <- res[res$LDA>=LDA,]
if (!nrow(res)>0){stop("No features with significant differences after LDA analysis.")}
#rownames(res) <- NULL
return(res)
}
#' @keywords internal
rfimportance <- function(datalist, classgroup, bootnums, effsize=2, ci=0.95){
rfres <- list()
#tmpformula <- as.formula(paste0(class, " ~. "))
for (i in seq_len(bootnums)){
df <- datalist[[i]]
classindex <- match(classgroup, colnames(df))
X <- df[,-classindex]
X <- apply(X, 2, function(x)(x-min(x))/(max(x)-min(x)))
Y <- df[, classindex, drop = TRUE]
dfres <- randomForest::randomForest(x=X, y=Y, importance=TRUE, proximity=TRUE)
imres <- randomForest::importance(dfres, type=1)
rfres[[i]] <- imres
}
rfres <- cal_ci(x=rfres, classgroup=classgroup, ci=ci, method="rf")
colnames(rfres) <- c("f", paste0("MDA", colnames(rfres)[-1]))
#rfres <- Reduce("+", rfres)
#rfres <- data.frame(rfres/bootnums)
#rfres <- data.frame(f=rownames(rfres), MeanDecreaseAccuracy=rfres$MeanDecreaseAccuracy)
#rfres <- rfres[rfres$MeanDecreaseAccuracy>=effsize,]
return(rfres)
}
#' @title Generate random data list from a original data.
#' @param dalist list, a list contained multi data.frame.
#' @param bootnums integer, the number of bootstrap iteration, default is 30.
#' @param ratio numeric, the ratios of each data.frame to keep.
#' @param makerownames logical, whether build row.names,default is FALSE.
#' @return the list contained the data.frame generated by bootstrap iteration.
#' @author Shuangbin Xu
#' @export
#' @examples
#' \dontrun{
#' data(iris)
#' irislist <- split(iris, iris$Species)
#' set.seed(1024)
#' irislist <- get_sampledflist(irislist)
#' }
get_sampledflist <- function(dalist,
bootnums=30,
ratio=0.7,
makerownames=FALSE){
datalist <- list()
for (i in seq_len(bootnums)){
res <- lapply(dalist,function(x){x[sample(nrow(x), trunc(nrow(x)*ratio)),,drop=FALSE]})
res <- do.call("rbind", c(res, make.row.names=makerownames))
datalist[[i]] <- res
}
return(datalist)
}
#' @importFrom stats var
#' @keywords internal
remove_constant <- function(dflist){
noconstant <- list()
factornames <- colnames(dflist[[1]][!vapply(dflist[[1]],
is.numeric,logical(1))])
for (i in seq_len(length(dflist))){
tmpdata <- dflist[[i]] %>%
dplyr::group_split(!!rlang::sym(factornames)) %>%
as.list() %>%
lapply(., function(x)x[,vapply(x, is.numeric, logical(1))]) %>%
lapply(., function(x)apply(x,2,var,na.rm=T)==0)
tmpdata2 <- tmpdata %>%
dplyr::bind_rows() %>%
colSums()
noconstant[[i]] <- names(tmpdata2[tmpdata2 != length(tmpdata)])
}
noconstant <- Reduce(intersect,noconstant)
for (i in seq_len(length(dflist))){
dflist[[i]] <- dflist[[i]][,match(c(noconstant,factornames),
colnames(dflist[[i]])),drop=FALSE]
}
return(dflist)
}
#' @importFrom tibble rownames_to_column
cal_ci <- function(x, classgroup, ci=0.95, method){
if (method=="rf"){
x <- do.call("cbind", x)
x <- data.frame(t(x), check.names=FALSE)
}else{
x <- do.call("rbind", x)
}
x <- apply(x, 2, CI, ci=ci)
x <- data.frame(t(x), check.names=FALSE)
x <- log(1+x, 10)
rownames(x) <- gsub("`", "", rownames(x))
x <- rownames_to_column(x, var="f")
return(x)
}
#' @title Split Large Vector or DataFrame
#'
#' @description
#' Split large vector or dataframe to list class, which contian subset vectors
#' or dataframe of origin vector or dataframe.
#'
#' @param x vector class or data.frame class.
#' @param nums integer.
#' @param chunks integer. use chunks if nums is missing.
#' Note nums and chunks shouldn't concurrently be NULL,
#' default is NULL.
#' @param random bool, whether split randomly, default is FALSE,
#' if you want to split data randomly, you can set TRUE, and
#' if you want the results are reproducible, you should add
#' seed before.
#' @return the subset of x, vector or data.frame class.
#' @export
#' @author Shuangbin Xu
#' @examples
#' data(iris)
#' irislist <- split_data(iris, 40)
#' dalist <- c(1:100)
#' dalist <- split_data(dalist, 30)
split_data <- function(x,
nums,
chunks=NULL,
random=FALSE){
if (missing(nums)){
nums <- trunc(x/chunks)
}
if (missing(nums) && is.null(chunks)){
stop("The nums and chunks shouldn't concurrently be missing!")
}
if (!is.null(nums) && !is.null(chunks)){
message("We would use nums to split. and the chunk numbers is the length(x) provided by nums.")
}
if (is.vector(x) && length(x)>0){
ind <- seedind(length(x), nums, random=random)
}
if (is.vector(x) && length(x)==0){
stop("The legth of vector should be larger than zero, if the class of x is vector.")
}
if (is.data.frame(x)){
ind <- seedind(nrow(x), nums, random=random)
}
return(split(x, ind))
}
#' @keywords internal
seedind <- function(l, n, random=FALSE, seed=TRUE){
tmpind <- rep(seq_len(trunc(l/n)+1),n)
if (random){
#if ((l%%n)==0){
# tmpind <- tmpind[!tmpind %in% max(tmpind)]
#}
tmpind <- sample(sort(tmpind)[seq_len(l)])
}else{
tmpind <- sort(tmpind)[seq_len(l)]
}
return(tmpind)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.