# == title
# Quickly visualize a data frame
#
# == param
# -df a data frame.
# -overlap how to group numeric columns. If the overlapping rate between the ranges in the
# current column and previous numeric column is larger than this value, the two columns
# are treated as under same measurement and should be grouped.
# -nlevel If the number of levels of a character column is larger than this value, the column will
# be excluded, because it doesn't make any sense to visualize a character vector or matrix
# that contains huge number of unique elements through a heatmap.
# -show_row_names whether show row names after the last heatmap if there are row names.
# -show_column_names whether show column names for all heatmaps.
# -group a list of index that defines the groupping.
# -group_names names for each group.
# -main_heatmap which group is the main heatmap?
# -km a value larger than 1 means applying k-means clustering on rows for the main heatmap.
# -split one or multiple variables that split the rows.
# -cluster_rows whether perform clustering on rows of the main heatmap.
# -cluster_columns whether perform clustering on columns for all heatmaps.
# -row_order order of rows, remember to turn off ``cluster_rows``
# -... pass to `draw,HeatmapList-method` or `make_layout,HeatmapList-method`
#
# == details
# The data frame contains heterogeneous information. The `plotDataFrame` function provides a simple and quick way to
# visualize information that are stored in a data frame.
#
# There are only a few settings in this function, so the heamtap generated by this functioin
# may look ugly (in most of the time). However, users can customize the style of the heatmaps by manually
# constructing a `HeatmapList` object.
#
# == value
# A `HeatmapList` object.
#
# == author
# Zuguang Gu <z.gu@dkfz.de>
#
plotDataFrame = function(df, overlap = 0.25, nlevel = 30, show_row_names = TRUE,
show_column_names = TRUE, group = NULL, group_names = names(group),
main_heatmap = NULL, km = 1, split = NULL, cluster_rows = TRUE,
cluster_columns = TRUE, row_order = NULL, ...) {
if(is.matrix(df)) {
ht_list = Heatmap(df, show_row_names = show_row_names, show_column_names = show_column_names, row_order = row_order)
} else if(is.data.frame(df)) {
nc = ncol(df)
cn = colnames(df)
ht = NULL
current_range = NULL
current_group = 0
if(is.null(group)) {
group = list()
for(i in seq_len(nc)) {
if(is.numeric(df[[i]])) {
if(is.null(current_range)) {
# if previous column are character/factor
current_range = quantile(df[[i]], c(0.1, 0.9), na.rm = TRUE)
current_group = current_group + 1
group[[ current_group ]] = i
} else {
# if previous columns are numeric
range2 = range(df[[i]], c(0.1, 0.9), na.rm = TRUE)
intersected_range = c(max(current_range[1], range2[1]), min(current_range[2], range2[2]))
l = df[[i]] >= intersected_range[1] & df[[i]] <= intersected_range[2]
l2 = df[[i-1]] >= intersected_range[1] & df[[i-1]] <= intersected_range[2]
if(sum(l)/length(l) > overlap && sum(l2)/length(l2) > overlap) {
group[[ current_group ]] = c(group[[ current_group ]], i)
} else {
# current column is not under same measurement as previous columns
current_range = range2
current_group = current_group + 1
group[[ current_group ]] = i
}
}
} else {
current_range = NULL
if(length(unique(df[[i]])) < nlevel) {
current_group = current_group + 1
group[[ current_group ]] = i
}
}
}
}
if(is.null(group_names)) {
for(i in seq_along(group)) {
if(length(group[[i]]) > 1) {
group_names[i] = paste0("matrix_", i)
} else if(length(group[[i]]) == 1) {
group_names[i] = cn[ group[[i]] ]
}
}
}
if(is.null(main_heatmap)) {
main_heatmap = which.max(sapply(group, length))
} else if(!is.numeric(main_heatmap) && !is.null(group_names)) {
main_heatmap = which(group_names == main_heatmap)[1]
}
i_max = max(unlist(group))
for(i in seq_along(group)) {
ci = group[[i]]
if(is.null(main_heatmap)) {
if(length(ci) > 1) {
main_heatmap = i
}
}
if(i == main_heatmap) {
split2 = split
km2 = km
} else {
split2 = NULL
km2 = 1
}
if(length(ci) > 1) {
column_title = group_names[i]
} else {
column_title = character(0)
}
if(i == 1) {
if(i == i_max) {
ht_list = Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = show_row_names, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
} else {
ht_list = Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = FALSE, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
}
} else {
if(i == i_max) {
ht_list = ht_list + Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = show_row_names, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
} else {
ht_list = ht_list + Heatmap(df[, ci, drop = FALSE], name = group_names[i], column_title = column_title, cluster_rows = cluster_rows, cluster_columns = cluster_columns, show_row_names = FALSE, show_column_names = show_column_names, km = km2, split = split2, row_order = row_order)
}
}
}
} else {
stop("`table` can only be a matrix or a data frame.")
}
draw(ht_list, main_heatmap = main_heatmap, ...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.