###########################
## Visualize tissue data ##
###########################
#' Visualize tissue data
#'
#' Visualize the expression of protein of interest in each target tissue by cell
#' types.
#'
#' @param data Input the list object generated by \code{hpa_download()} or
#' \code{hpa_subset()}. Require the \code{normal_tissue} dataset. Use HPA
#' histology data (built-in) by default.
#' @param targetGene Vector of strings of HGNC gene symbols. By default it is
#' set to \code{c('TP53', 'EGFR', 'CD44', 'PTEN')}. You can also mix
#' HGNC gene symbols and ensemnl ids (start with ENSG) and they will be
#' converted to HGNC gene symbols.
#' @param targetTissue Vector of strings of normal tissues. Default to all.
#' @param targetCellType Vector of strings of normal cell types. Default to all.
#' @param color Vector of 4 colors used to depict different expression levels.
#' @param customTheme Logical argument. If \code{TRUE}, the function will return
#' a barebone ggplot2 plot to be customized further.
#'
#' @return This function will return a ggplot2 plot object, which can be further
#' modified if desirable. The tissue data is visualized as a heatmap: x axis
#' contains inquired protein and y axis contains tissue/cells of interest.
#'
#' @family visualization functions
#'
#' @examples
#' data("hpa_histology_data")
#' geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS')
#' tissueList <- c('breast', 'cerebellum', 'skin 1')
#'
#' ## A typical function call
#' hpaVisTissue(data=hpa_histology_data,
#' targetGene=geneList,
#' targetTissue=tissueList)
#'
#' @import dplyr
#' @import ggplot2
#' @export
hpaVisTissue <- function(data = NULL,
targetGene = NULL,
targetTissue = NULL,
targetCellType = NULL,
color = c('#FCFDBF', '#FE9F6D', '#DE4968', '#8C2981'),
customTheme = FALSE) {
# Check if parameters are provided or not
data <- is_null_data(data = data)
target_check <- is_null_target(gene = targetGene,
tissue = targetTissue,
celltype = targetCellType)
targetGene <- target_check$targetGene
# targetTissue <- target_check$targetTissue
# Create plot dataset with target parameters
plotData <- data$normal_tissue %>%
filter(gene %in% targetGene) %>%
{if (!is.null(targetTissue)) filter(., tissue %in% targetTissue) else .} %>%
{if (!is.null(targetCellType)) filter(., cell_type %in% targetCellType) else .}
plotData <- mutate(
plotData,
tissue_cell = paste0(tissue, ' / ', cell_type),
level = factor(level,
levels = c('High', 'Medium', 'Low', 'Not detected'))
)
levelColors <- c(
'Not detected' = color[1],
'Low' = color[2],
'Medium' = color[3],
'High' = color[4]
)
plot <- ggplot(plotData, aes(x = gene, y = tissue_cell)) +
geom_tile(aes(fill = level)) +
scale_x_discrete(limits = targetGene) +
scale_fill_manual(values = levelColors)
if (!customTheme) {
plot <- plot +
ylab('Tissue / Cell') +
xlab('Genes') +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
coord_equal()
}
return(plot)
}
##############################
## Visualize pathology data ##
##############################
#' Visualize pathology data
#'
#' Visualize the expression of genes of interest in each cancer.
#'
#' @param data Input the list object generated by \code{hpa_download()} or
#' \code{hpa_subset()}. Require the \code{pathology} dataset. Use HPA
#' histology data (built-in) by default.
#' @param targetGene Vector of strings of HGNC gene symbols. By default it is
#' set to \code{c('TP53', 'EGFR', 'CD44', 'PTEN')}. You can also mix HGNC gene
#' symbols and ensemnl ids (start with ENSG) and they will be converted to
#' HGNC gene symbols.
#' @param targetCancer Vector of strings of normal tissues. The function will
#' plot all available cancer by default.
#' @param facetBy Determine how multiple graphs would be faceted. Either
#' \code{cancer} (default) or \code{gene}.
#' @param color Vector of 4 colors used to depict different expression levels.
#' @param customTheme Logical argument. If \code{TRUE}, the function will return
#' a barebone ggplot2 plot to be customized further.
#'
#' @return This function will return a ggplot2 plot object, which can be further
#' modified if desirable. The pathology data is visualized as multiple bar
#' graphs, one for each type of cancer. For each bar graph, x axis contains
#' the inquired protein and y axis contains the proportion of patients.
#'
#' @family visualization functions
#'
#' @examples
#' data("hpa_histology_data")
#' geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS')
#' cancerList <- c('breast cancer', 'glioma', 'melanoma')
#'
#' ## A typical function call
#' hpaVisPatho(data=hpa_histology_data,
#' targetGene=geneList)
#'
#' @import dplyr
#' @import ggplot2
#' @importFrom stats reshape
#' @export
hpaVisPatho <- function(data = NULL,
targetGene = NULL,
targetCancer = NULL,
facetBy = "cancer",
color = c('#FCFDBF', '#FE9F6D', '#DE4968', '#8C2981'),
customTheme = FALSE) {
# Check if parameters are provided or not
data <- is_null_data(data = data)
target_check <- is_null_target(gene = targetGene,
cancer = targetCancer)
targetGene <- target_check$targetGene
# Prepare data for plotting
plotData <- data$pathology %>%
filter(gene %in% targetGene)
if (!is.null(targetCancer)) {
plotData <- filter(plotData, cancer %in% targetCancer)
}
plotData <- plotData %>%
select(gene, cancer, high, medium, low, not_detected) %>%
rename(
'High' = 'high',
'Medium' = 'medium',
'Low' = 'low',
'Not detected' = 'not_detected'
) %>%
## The old way used tidyr::gather
# gather(key = "level", value = "patient_count", -gene, -cancer)
## The new way uses stats::reshape
as.data.frame() %>%
reshape(
direction = "long",
varying = list(3:6),
v.names = "patient_count",
timevar = "level",
times = c("High", "Medium", "Low", "Not detected")
)
#re-level
plotData$level <- factor(plotData$level,
levels = c("High", "Medium", "Low", "Not detected"))
levelColors <- c(
'Not detected' = color[1],
'Low' = color[2],
'Medium' = color[3],
'High' = color[4]
)
plotBy <- if(facetBy == "cancer") "gene" else "cancer"
plot <-
ggplot(plotData, aes(x = .data[[plotBy]], y = patient_count, fill = level)) +
geom_bar(stat = 'identity', position = 'fill') +
scale_x_discrete(limits = if(facetBy == "cancer") targetGene else targetCancer) +
scale_fill_manual(values = levelColors) +
facet_wrap( ~ .data[[facetBy]])
if (!customTheme) {
plot <- plot +
ylab('Patient proportions') +
xlab(if(facetBy == "cancer") "Genes" else "Cancers") +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
}
return(plot)
}
############################
## Visualize subcell data ##
############################
#' Visualize subcellular location data
#'
#' Visualize the the confirmed subcellular locations of genes of interest.
#'
#' @param data Input the list object generated by \code{hpa_download()} or
#' \code{hpa_subset()}. Require the \code{subcellular_location} dataset. Use
#' HPA histology data (built-in) by default.
#' @param targetGene Vector of strings of HGNC gene symbols. By default it is
#' set to \code{c('TP53', 'EGFR', 'CD44', 'PTEN')}. You can also mix
#' HGNC gene symbols and ensemnl ids (start with ENSG) and they will be
#' converted to HGNC gene symbols.
#' @param reliability Vector of string indicate which reliability scores you want to plot. The
#' default is everything \code{c("enhanced", "supported", "approved",
#' "uncertain")}.
#' @param color Vector of 2 colors used to depict if the protein expresses in a
#' location or not.
#' @param customTheme Logical argument. If \code{TRUE}, the function will return
#' a barebone ggplot2 plot to be customized further.
#'
#' @return This function will return a ggplot2 plot object, which can be further
#' modified if desirable. The subcellular location data is visualized as a
#' tile graph, in which the x axis includes the inquired proteins and the y
#' axis contain the subcellular locations.
#'
#' @family visualization functions
#'
#' @examples
#' data("hpa_histology_data")
#' geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS')
#'
#' ## A typical function call
#' hpaVisSubcell(data=hpa_histology_data,
#' targetGene=geneList)
#'
#' @import dplyr
#' @import ggplot2
#' @importFrom tibble as_tibble
#' @export
hpaVisSubcell <- function(data = NULL,
targetGene = NULL,
reliability = c("enhanced", "supported", "approved", "uncertain"),
color = c('#FCFDBF', '#8C2981'),
customTheme = FALSE) {
# Check if parameters are provided or not
data <- is_null_data(data = data)
target_check <- is_null_target(gene = targetGene)
targetGene <- target_check$targetGene
# Prepare data for plotting
plotData <- data$subcellular_location %>%
filter(gene %in% targetGene) %>%
mutate(sub_location = NA)
# if ("enhanced" %in% reliability)
# plotData <- mutate(plotData,
# sub_location = paste(sub_location, enhanced, sep = ";"))
# if ("supported" %in% reliability)
# plotData <- mutate(plotData,
# sub_location = paste(sub_location, supported, sep = ";"))
# if ("approved" %in% reliability)
# plotData <- mutate(plotData,
# sub_location = paste(sub_location, approved, sep = ";"))
# if ("uncertain" %in% reliability)
# plotData <- mutate(plotData,
# sub_location = paste(sub_location, uncertain, sep = ";"))
for (i in reliability) {
plotData <- mutate(plotData,
sub_location = paste(sub_location, .data[[i]], sep = ";"))
}
# plotData <- plotData %>%
# mutate(sub_location=strsplit(sub_location, ';')) %>%
# tidyr::unnest(sub_location) %>%
# select(sub_location, gene) %>%
# filter(sub_location != "NA") %>%
# table() %>%
# as_tibble() %>%
# mutate(n=factor(n, levels=c('0', '1')))
## Use apply(as_tibble) %>% bind_rows instead of unnest
plotData <- plotData %>%
mutate(sub_location = strsplit(sub_location, ';')) %>%
apply(MARGIN = 1, FUN = as_tibble) %>% bind_rows() %>%
select(sub_location, gene) %>%
filter(sub_location != "NA") %>%
table() %>%
as_tibble() %>%
mutate(n = factor(n, levels = c('0', '1')))
levelColors <- c('0' = color[1],
'1' = color[length(color)])
plot <- ggplot(plotData, aes(x = gene, y = sub_location)) +
geom_tile(aes(fill = n), colour = "grey50") +
scale_x_discrete(limits = targetGene) +
scale_fill_manual(
values = levelColors,
name = "Detected",
breaks = c(0, 1),
labels = c("No", "Yes")
)
if (!customTheme) {
plot <- plot +
ylab('Subcellular locations') +
xlab('Genes') +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
coord_equal()
}
return(plot)
}
## hpaVis shared child functions================================================
is_null_target <- function(gene = "",
tissue = "",
celltype = "",
cancer = "") {
out <- list()
infoDisp <- FALSE
if (is.null(gene)) {
message('* WARNING: targetGene variable not specified, use example (TP53, EGFR, CD44, PTEN).')
out$targetGene <- c('TP53', 'EGFR', 'CD44', 'PTEN')
infoDisp <- TRUE
} else out$targetGene <- gene_ensembl_convert(gene, "gene")
# Check if targetTissue is provided
if (is.null(tissue)) {
message('* WARNING: targetTissue variable not specified, visualize all.')
infoDisp <- TRUE
}
# Check if targetCellType is provided
if (is.null(celltype)) {
message('* WARNING: targetCellType variable not specified, visualize all.')
infoDisp <- TRUE
}
# Check if targetCellType is provided
if (is.null(cancer)) {
message('* WARNING: targetCancer variable not specified, visualize all.')
infoDisp <- TRUE
}
# Show a message if any parameter is not defined
if (infoDisp) {
message('>> Use hpaListParam() to list possible values for target variables.')
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.