Nothing
group_by.GMQLDateset <- function(.data, groupBy_meta = conds(),
groupBy_regions = c(""), region_aggregates = NULL, meta_aggregates = NULL)
{
ptr_data = value(.data)
gmql_group(ptr_data, groupBy_meta, groupBy_regions, region_aggregates,
meta_aggregates)
}
#' Method group_by
#'
#' @description Wrapper to GMQL GROUP operator
#' @description It performs the grouping of samples and/or sample regions of
#' the input dataset based on one specified metadata and/or region attribute.
#' If the metadata attribute is multi-value, i.e., it assumes multiple values
#' for sample (e.g., both <disease, cancer> and <disease, diabetes>),
#' the grouping identifies different groups of samples for each attribute value
#' combination (e.g., group1 for samples that feature the combination
#' <disease, cancer>, group2 for samples that feature the combination
#' <disease, diabetes>, and group3 for samples that feature both combinations
#' <disease, cancer> and <disease, diabetes>). For each obtained group, it is
#' possible to request the evaluation of aggregate functions on metadata
#' attributes; these functions consider the metadata contained in all samples
#' of the group. The regions, their attributes and their values in output are
#' the same as the ones in input for each sample, and the total number of
#' samples does not change. All metadata in the input samples are conserved
#' with their values in the output samples, with the addition of the "_group"
#' attribute, whose value is the identifier of the group to which the specific
#' sample is assigned; other metadata attributes can be added as aggregate
#' functions computed on specified metadata. When used on region attributes,
#' group_by can group regions of each sample individually, based on their
#' coordinates (chr, start, stop, strand) and possibly also on other
#' specified grouping region attributes (when these are present in the schema
#' of the input dataset). In each sample, regions found in the same group
#' (i.e., regions with same coordinates and grouping attribute values),
#' are combined into a single region; this allows to merge regions that are
#' duplicated inside the same sample (based on the values of their coordinates
#' and of other possible specified region attributes). For each grouped
#' region, it is possible to request the evaluation of aggregate functions
#' on other region attributes (i.e., which are not coordinates, or grouping
#' region attributes). This use is independent on the possible grouping
#' realised based on metadata. The generated output schema only contains the
#' original region attributes on which the grouping has been based, and
#' additionally the attributes in case calculated as aggregated functions.
#' If the group_by is applied only on regions, the output metadata and their
#' values are equal to the ones in input. Both when applied on metadata and
#' on regions, the group_by operation returns a number of output samples
#' equal to the number of input ones. Note that the two possible uses of
#' group_by, on metadata and on regions, are perfectly orthogonal,
#' therefore they can be used in combination or independently.
#'
#'
#' @importFrom rJava J .jarray .jnull
#' @importFrom dplyr group_by
#'
#' @param .data GMQLDataset object
#' @param groupBy_meta \code{\link{conds}} function to support methods with
#' groupBy or JoinBy input parameter
#'
#' @param groupBy_regions vector of strings made up by region attribute names
#' @param meta_aggregates It accepts a list of aggregate functions on
#' metadata attribute.
#' All the elements in the form \emph{key} = \emph{aggregate}.
#' The \emph{aggregate} is an object of class AGGREGATES.
#' The aggregate functions available are: \code{\link{SUM}},
#' \code{\link{COUNTSAMP}}, \code{\link{MIN}}, \code{\link{MAX}},
#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}},
#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}},
#' \code{\link{Q2}}, \code{\link{Q3}}.
#' Every aggregate accepts a string value, except for COUNTSAMP, which does
#' not have any value.
#' Argument of 'aggregate function' must exist in schema, i.e. among region
#' attributes. Two styles are allowed:
#' \itemize{
#' \item list of key-value pairs: e.g. sum = SUM("cell")
#' \item list of values: e.g. SUM("cell")
#' }
#' "mixed style" is not allowed
#'
#' @param region_aggregates It accepts a list of aggregate functions on
#' region attribute.
#' All the elements in the form \emph{key} = \emph{aggregate}.
#' The \emph{aggregate} is an object of class AGGREGATES.
#' The aggregate functions available are: \code{\link{SUM}},
#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}},
#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}},
#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}},
#' \code{\link{Q2}}, \code{\link{Q3}}.
#' Every aggregate accepts a string value, except for COUNT, which does
#' not have any value.
#' Argument of 'aggregate function' must exist in schema, i.e. among region
#' attributes. Two styles are allowed:
#' \itemize{
#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
#' \item list of values: e.g. SUM("pvalue")
#' }
#' "mixed style" is not allowed
#'
#' @return GMQLDataset object. It contains the value to use as input
#' for the subsequent GMQLDataset method
#'
#' @examples
#'
#' ## This statement initializes and runs the GMQL server for local execution
#' ## and creation of results on disk. Then, with system.file() it defines
#' ## the path to the folder "DATASET" in the subdirectory "example"
#' ## of the package "RGMQL" and opens such file as a GMQL dataset named "exp"
#' ## using CustomParser
#'
#' init_gmql()
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
#' exp = read_gmql(test_path)
#'
#' ## This GMQL statement groups samples of the input 'exp' dataset according
#' ## to their value of the metadata attribute 'tumor_type' and computes the
#' ## maximum value that the metadata attribute 'size' takes inside the samples
#' ## belonging to each group. The samples in the output GROUPS_T dataset
#' ## have a new _group metadata attribute which indicates which group they
#' ## belong to, based on the grouping on the metadata attribute tumor_type.
#' ## In addition, they present the new metadata aggregate attribute 'MaxSize'.
#' ## Note that the samples without metadata attribute 'tumor_type' are
#' ## assigned to a single group with _group value equal 0
#'
#' GROUPS_T = group_by(exp, conds("tumor_type"),
#' meta_aggregates = list(max_size = MAX("size")))
#'
#' ## This GMQL statement takes as input dataset the same input dataset as
#' ## the previous example. Yet, it calculates new _group values based on the
#' ## grouping attribute 'cell', and adds the metadata aggregate attribute
#' ## 'n_samp', which counts the number of samples belonging to the respective
#' ## group. It has the following output GROUPS_C dataset samples
#' ## (note that now no sample has metadata attribute _group with value
#' ## equal 0 since all input samples include the metadata attribute cell,
#' ## with different values, on which the new grouping is based)
#'
#' GROUPS_C = group_by(exp, conds("cell"),
#' meta_aggregates = list(n_samp = COUNTSAMP()))
#'
#' ## This GMQL statement groups the regions of each 'exp' dataset sample by
#' ## region coordinates chr, left, right, strand (these are implicitly
#' ## considered) and the additional region attribute score (which is
#' ## explicitly specified), and keeps only one region for each group.
#' ## In the output GROUPS dataset schema, the new region attributes
#' ## avg_pvalue and max_qvalue are added, respectively computed as the
#' ## average of the values taken by the pvalue and the maximum of the values
#' ## taken by the qvalue region attributes in the regions grouped together,
#' ## and the computed value is assigned to each region of each output sample.
#' ## Note that the region attributes which are not coordinates or score are
#' ## discarded.
#'
#' GROUPS = group_by(exp, groupBy_regions = "score",
#' region_aggregates = list(avg_pvalue = AVG("pvalue"),
#' max_qvalue = MAX("qvalue")))
#'
#' @name group_by
#' @rdname group_by
#' @aliases group_by,GMQLDataset-method
#' @aliases group_by-method
#' @export
setMethod("group_by","GMQLDataset",group_by.GMQLDateset)
gmql_group <- function(input_data, group_meta, group_reg, region_aggregates,
meta_aggregates)
{
if(!is.null(group_meta))
{
if("condition" %in% names(group_meta))
{
cond <- .join_condition(group_meta)
if(is.null(cond))
join_matrix <- .jnull("java/lang/String")
else
join_matrix <- .jarray(cond, dispatch = TRUE)
}
else
stop("use function conds()")
}
else
join_matrix <- .jnull("java/lang/String")
if(!is.null(group_reg))
{
if(!is.character(group_reg))
stop("metadata: no valid input")
group_reg <- group_reg[!group_reg %in% ""]
group_reg <- group_reg[!duplicated(group_reg)]
if(!length(group_reg))
group_reg <- .jnull("java/lang/String")
else
group_reg <- .jarray(group_reg,dispatch = TRUE)
}
else
group_reg <- .jnull("java/lang/String")
if(!is.null(meta_aggregates) && length(meta_aggregates))
{
aggr <- .aggregates(meta_aggregates,"AGGREGATES")
metadata_matrix <- .jarray(aggr, dispatch = TRUE)
}
else
metadata_matrix <- .jnull("java/lang/String")
if(!is.null(region_aggregates) && length(region_aggregates))
{
aggr <- .aggregates(region_aggregates,"AGGREGATES")
region_matrix <- .jarray(aggr, dispatch = TRUE)
}
else
region_matrix <- .jnull("java/lang/String")
WrappeR <- J("it/polimi/genomics/r/Wrapper")
response <- WrappeR$group(join_matrix, metadata_matrix, group_reg,
region_matrix, input_data)
error <- strtoi(response[1])
val <- response[2]
if(error)
stop(val)
else
GMQLDataset(val)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.