Nothing
###########################
## Visualize tissue data ##
###########################
#' Visualize tissue data
#'
#' Visualize the expression of protein of interest in each target tissue by cell
#' types.
#'
#' @param data Input the list object generated by \code{hpa_download()} or
#' \code{hpa_subset()}. Require the \code{normal_tissue} dataset. Use HPA
#' histology data (built-in) by default.
#' @param targetGene Vector of strings of HGNC gene symbols. By default it is
#' set to \code{c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1')}. You can also mix
#' HGNC gene symbols and ensemnl ids (start with ENSG) and they will be
#' converted to HGNC gene symbols.
#' @param targetTissue Vector of strings of normal tissues. Default to breast.
#' @param targetCellType Vector of strings of normal cell types. Default to all.
#' @param color Vector of 4 colors used to depict different expression levels.
#' @param customTheme Logical argument. If \code{TRUE}, the function will return
#' a barebone ggplot2 plot to be customized further.
#'
#' @return This function will return a ggplot2 plot object, which can be further
#' modified if desirable. The tissue data is visualized as a heatmap: x axis
#' contains inquired protein and y axis contains tissue/cells of interest.
#'
#' @family visualization functions
#'
#' @examples
#' data("hpa_histology_data")
#' geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS')
#' tissueList <- c('breast', 'cerebellum', 'skin 1')
#'
#' ## A typical function call
#' hpaVisTissue(data=hpa_histology_data,
#' targetGene=geneList,
#' targetTissue=tissueList)
#'
#' @import dplyr
#' @import ggplot2
#' @export
hpaVisTissue <- function(data=NULL,
targetGene=NULL,
targetTissue=NULL,
targetCellType=NULL,
color=c('#ffffb2', '#fecc5c', '#fd8d3c', '#e31a1c'),
customTheme=FALSE) {
infoDisp <- FALSE
# Check if data is provided or not
if (is.null(data)) {
message(paste0('No data provided. Use version ',
hpa_histology_data$metadata$HPAversion,
"."))
data = HPAanalyze::hpa_histology_data
}
# Check if targetGene is provided
if (is.null(targetGene)) {
message('targetGene variable not specified, default to TP53, EGFR, CD44, PTEN and IDH1.')
targetGene <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1')
infoDisp <- TRUE
}
# Check if targetTissue is provided
if (is.null(targetTissue)) {
message('targetTissue variable not specified, default to breast.')
targetTissue <- 'breast'
infoDisp <- TRUE
}
# Check if targetCellType is provided
if (is.null(targetCellType)) {
message('targetCellType variable not specified, visualize all.')
targetCellType <- NULL
infoDisp <- TRUE
}
# Show a message if any parameter is not defined
if (infoDisp) {
message('Use hpaListParam() to list possible values for target variables.')
}
targetGene <- gene_ensembl_convert(targetGene, "gene")
plotData <- data$normal_tissue %>%
filter(gene %in% targetGene) %>%
filter(tissue %in% targetTissue)
if(!is.null(targetCellType)) {
plotData <- filter(plotData, cell_type %in% targetCellType)
}
plotData <- mutate(plotData,
tissue_cell=paste0(tissue, ' / ', cell_type),
level=factor(level,
levels=c('High', 'Medium',
'Low', 'Not detected')))
levelColors <- c('Not detected'=color[1],
'Low'=color[2],
'Medium'=color[3],
'High'=color[4])
plot <- ggplot(plotData, aes(x=gene, y=tissue_cell)) +
geom_tile(aes(fill=level)) +
scale_x_discrete(limits=targetGene) +
scale_fill_manual(values=levelColors)
if(!customTheme) {
plot <- plot +
ylab('Tissue / Cell') +
xlab('Genes') +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x=element_text(angle=90, hjust=1)) +
coord_equal()
}
return(plot)
}
##############################
## Visualize pathology data ##
##############################
#' Visualize pathology data
#'
#' Visualize the expression of genes of interest in each cancer.
#'
#' @param data Input the list object generated by \code{hpa_download()} or
#' \code{hpa_subset()}. Require the \code{pathology} dataset. Use HPA histology
#' data (built-in) by default.
#' @param targetGene Vector of strings of HGNC gene symbols. By default it is
#' set to \code{c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1')}. You can also mix
#' HGNC gene symbols and ensemnl ids (start with ENSG) and they will be
#' converted to HGNC gene symbols.
#' @param targetCancer Vector of strings of normal tissues. The function will
#' plot all available cancer by default.
#' @param color Vector of 4 colors used to depict different expression levels.
#' @param customTheme Logical argument. If \code{TRUE}, the function will return
#' a barebone ggplot2 plot to be customized further.
#'
#' @return This function will return a ggplot2 plot object, which can be further
#' modified if desirable. The pathology data is visualized as multiple bar
#' graphs, one for each type of cancer. For each bar graph, x axis contains
#' the inquired protein and y axis contains the proportion of patients.
#'
#' @family visualization functions
#'
#' @examples
#' data("hpa_histology_data")
#' geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS')
#' cancerList <- c('breast cancer', 'glioma', 'melanoma')
#'
#' ## A typical function call
#' hpaVisPatho(data=hpa_histology_data,
#' targetGene=geneList)
#'
#' @import dplyr
#' @import ggplot2
#' @importFrom stats reshape
#' @export
hpaVisPatho <- function(data=NULL,
targetGene=NULL,
targetCancer=NULL,
color=c('#ffffb2', '#fecc5c', '#fd8d3c', '#e31a1c'),
customTheme=FALSE) {
infoDisp <- FALSE
# Check if data is provided or not
if (is.null(data)) {
message(paste0('No data provided. Use version ',
hpa_histology_data$metadata$HPAversion,
"."))
data = HPAanalyze::hpa_histology_data
}
# Check if targetGene is provided
if (is.null(targetGene)) {
message('targetGene variable not specified, default to TP53, EGFR, CD44, PTEN and IDH1.')
targetGene <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1')
infoDisp <- TRUE
}
# Show a message if any parameter is not defined
if (infoDisp) {
message('Use hpaListParam() to list possible values for target variables.')
}
plotData <- data$pathology %>%
filter(gene %in% targetGene)
if(!is.null(targetCancer)) {
plotData <- filter(plotData, cancer %in% targetCancer)
}
targetGene <- gene_ensembl_convert(targetGene, "gene")
plotData <- plotData %>%
select(gene, cancer, high, medium, low, not_detected) %>%
rename('High'='high', 'Medium'='medium',
'Low'='low', 'Not detected'='not_detected') %>%
## The old way used tidyr::gather
# gather(key = "level", value = "patient_count", -gene, -cancer)
## The new way uses stats::reshape
as.data.frame() %>%
reshape(direction = "long",
varying = list(3:6),
v.names = "patient_count",
timevar = "level",
times = c("High", "Medium", "Low", "Not detected")
)
#re-level
plotData$level <- factor(plotData$level,
levels = c("High", "Medium", "Low", "Not detected"))
levelColors <- c('Not detected'=color[1],
'Low'=color[2],
'Medium'=color[3],
'High'=color[4])
plot <- ggplot(plotData, aes(x=gene, y=patient_count, fill=level)) +
geom_bar(stat='identity', position='fill') +
scale_x_discrete(limits=targetGene) +
scale_fill_manual(values=levelColors) +
facet_wrap(~ cancer)
if(!customTheme) {
plot <- plot +
ylab('Patient proportions') +
xlab('Genes') +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x=element_text(angle=90, hjust=1))
}
return(plot)
}
############################
## Visualize subcell data ##
############################
#' Visualize subcellular location data
#'
#' Visualize the the confirmed subcellular locations of genes of interest.
#'
#' @param data Input the list object generated by \code{hpa_download()} or
#' \code{hpa_subset()}. Require the \code{subcellular_location} dataset. Use
#' HPA histology data (built-in) by default.
#' @param targetGene Vector of strings of HGNC gene symbols. By default it is
#' set to \code{c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1')}. You can also mix
#' HGNC gene symbols and ensemnl ids (start with ENSG) and they will be
#' converted to HGNC gene symbols.
#' @param reliability Vector of string indicate which reliability scores you want to plot. The
#' default is everything \code{c("enhanced", "supported", "approved",
#' "uncertain")}.
#' @param color Vector of 2 colors used to depict if the protein expresses in a
#' location or not.
#' @param customTheme Logical argument. If \code{TRUE}, the function will return
#' a barebone ggplot2 plot to be customized further.
#'
#' @return This function will return a ggplot2 plot object, which can be further
#' modified if desirable. The subcellular location data is visualized as a
#' tile graph, in which the x axis includes the inquired proteins and the y
#' axis contain the subcellular locations.
#'
#' @family visualization functions
#'
#' @examples
#' data("hpa_histology_data")
#' geneList <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1', 'IDH2', 'CYCS')
#'
#' ## A typical function call
#' hpaVisSubcell(data=hpa_histology_data,
#' targetGene=geneList)
#'
#' @import dplyr
#' @import ggplot2
#' @importFrom tibble as_tibble
#' @export
hpaVisSubcell <- function(data=NULL,
targetGene=NULL,
reliability = c("enhanced", "supported", "approved", "uncertain"),
color=c('#ffffb2', '#e31a1c'),
customTheme=FALSE) {
infoDisp <- FALSE
# Check if data is provided or not
if (is.null(data)) {
message(paste0('No data provided. Use version ',
hpa_histology_data$metadata$HPAversion,
"."))
data = HPAanalyze::hpa_histology_data
}
# Check if targetGene is provided
if (is.null(targetGene)) {
message('targetGene variable not specified, default to TP53, EGFR, CD44, PTEN and IDH1.')
targetGene <- c('TP53', 'EGFR', 'CD44', 'PTEN', 'IDH1')
infoDisp <- TRUE
}
# Show a message if any parameter is not defined
if (infoDisp) {
message('Use hpaListParam() to list possible values for target variables.')
}
targetGene <- gene_ensembl_convert(targetGene, "gene")
plotData <- data$subcellular_location %>%
filter(gene %in% targetGene) %>%
mutate(sub_location = NA)
if ("enhanced" %in% reliability) plotData <-
mutate(plotData,
sub_location = paste(sub_location, enhanced, sep = ";"))
if ("supported" %in% reliability) plotData <-
mutate(plotData,
sub_location = paste(sub_location, supported, sep = ";"))
if ("approved" %in% reliability) plotData <-
mutate(plotData,
sub_location = paste(sub_location, approved, sep = ";"))
if ("uncertain" %in% reliability) plotData <-
mutate(plotData,
sub_location = paste(sub_location, uncertain, sep = ";"))
# plotData <- plotData %>%
# mutate(sub_location=strsplit(sub_location, ';')) %>%
# tidyr::unnest(sub_location) %>%
# select(sub_location, gene) %>%
# filter(sub_location != "NA") %>%
# table() %>%
# as_tibble() %>%
# mutate(n=factor(n, levels=c('0', '1')))
## Use apply(as_tibble) %>% bind_rows instead of unnest
plotData <- plotData %>%
mutate(sub_location=strsplit(sub_location, ';')) %>%
apply(MARGIN = 1, FUN = as_tibble) %>% bind_rows() %>%
select(sub_location, gene) %>%
filter(sub_location != "NA") %>%
table() %>%
as_tibble() %>%
mutate(n=factor(n, levels=c('0', '1')))
levelColors <- c('0'=color[1],
'1'=color[length(color)])
plot <- ggplot(plotData, aes(x=gene, y=sub_location)) +
geom_tile(aes(fill=n), colour="grey50") +
scale_x_discrete(limits=targetGene) +
scale_fill_manual(values=levelColors,
name="Detected",
breaks = c(0, 1),
labels = c("No", "Yes"))
if(!customTheme) {
plot <- plot +
ylab('Subcellular locations') +
xlab('Genes') +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
coord_equal()
}
return(plot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.