#' @eval get_description('split_data')
#' @export split_data
#' @examples
#' M = split_data(p_train=0.75)
#'
split_data = function(p_train,...) {
out=struct::new_struct('split_data',
p_train=p_train,
...)
return(out)
}
.split_data<-setClass(
"split_data",
contains = c('model'),
slots=c(
p_train='entity',
training='entity',
testing='entity'
),
prototype=list(
name = 'Split data',
description = paste0('The data matrix is divided into two subsets.',
'A predefined proportion of the samples are randomly selected for a ',
'training set, and the remaining samples are used for the test set.'),
type = 'processing',
predicted = 'testing',
.params=c('p_train'),
.outputs=c('training','testing'),
p_train=entity(name = 'Proportion in training set',
description = paste0('The proportion of samples selected for the ',
'training set.'),
value = 0.75,
type='numeric'),
training=entity(name = 'A DatasetExperiment of training data',
description = 'A DatasetExperiment object containing samples selected for the training set.',
type='DatasetExperiment',
value=DatasetExperiment()
),
testing=entity(name = 'A DatasetExperiment of data for testing',
description = 'A DatasetExperiment object containing samples selected for the testing set.',
type='DatasetExperiment',
value=DatasetExperiment()
)
)
)
#' @export
#' @template model_apply
setMethod(f="model_apply",
signature=c("split_data","DatasetExperiment"),
definition=function(M,D) {
opt=param_list(M)
# number of samples
nMax=nrow(D$data)
# number in the training set
n=floor(nMax*opt$p_train)
# select a random subset of the data for training
in_training=sample(x=1:nMax,size = n, replace=FALSE,prob=NULL)
training=DatasetExperiment(data=D$data[in_training,,drop=FALSE],
sample_meta=D$sample_meta[in_training,,drop=FALSE],
variable_meta=D$variable_meta,
name=c(D$name,'(Training set)'),
description=c(D$description,'A subset of the data has been selected as a training set'))
testing=DatasetExperiment(data=D$data[-in_training,,drop=FALSE],
sample_meta=D$sample_meta[-in_training,,drop=FALSE],
variable_meta=D$variable_meta,
name=c(D$name,'(Testing set)'),
description=c(D$description,'A subset of the data has been selected as a test set'))
output_value(M,'training')=training
output_value(M,'testing')=testing
return(M)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.