#' Construct copy-number cohort plot
#'
#' Given a data frame construct a plot to display copy-number calls for a cohort
#' of samples.
#' @name cnSpec
#' @param x Object of class data frame with rows representing copy-number
#' segment calls. The data frame must contain columns with the following names
#' "chromosome", "start", "end", "segmean", "sample".
#' @param y Object of class data frame with rows representing chromosome
#' boundaries for a genome assembly. The data frame must contain columns with
#' the following names "chromosome", "start", "end" (optional: see details).
#' @param genome Character string specifying a valid UCSC genome (see details).
#' @param plot_title Character string specifying title to display on the plot.
#' @param CN_Loss_colour Character string specifying the colour value of copy
#' number losses.
#' @param CN_Gain_colour Character string specifying the colour value of copy
#' number gains.
#' @param x_title_size Integer specifying the size of the x-axis title.
#' @param y_title_size Integer specifying the size of the y-axis title.
#' @param facet_lab_size Integer specifying the size of the faceted labels
#' plotted.
#' @param plotLayer Valid ggplot2 layer to be added to the plot.
#' @param out Character vector specifying the the object to output, one of
#' "data", "grob", or "plot", defaults to "plot" (see returns).
#' @param CNscale Character string specifying if copy number calls supplied are
#' relative (i.e.copy neutral == 0) or absolute (i.e. copy neutral ==2). One of
#' "relative" or "absolute"
#' @details cnSpec requires the location of chromosome boundaries for a given
#' genome assembly in order to ensure the entire chromosome space is plotted.
#' As a convenience this information is available to cnSpec for
#' the following genomes "hg19", "hg38", "mm9", "mm10", "rn5" and can be
#' retrieved by supplying one of the afore mentioned assemblies via the `genome`
#' parameter. If a genome assembly is supplied to the `genome` parameter and is
#' unrecognized cnSpec will attempt to query the UCSC MySQL database for the
#' required information. If chromosome boundary locations are unavailable for
#' a given assembly or if it is desireable to plot a specific region
#' encapsulating the copy number data these boundaries can be supplied to the
#' `y` paramter which has priority of the parameter `genome`.
#'
#' The `plotLayer` parameter can be used to add an additional layer to the
#' ggplot2 graphic (see vignette).
#' @return One of the following, a list of dataframes containing data to be
#' plotted, a grob object, or a plot.
#' @importFrom data.table dcast
#' @importFrom data.table melt
#' @importFrom stats na.omit
#' @importFrom gtools mixedsort
#' @examples
#' cnSpec(LucCNseg, genome="hg19")
#' @export
cnSpec <- function(x, y=NULL, genome='hg19', plot_title=NULL,
CN_Loss_colour='#002EB8', CN_Gain_colour='#A30000',
x_title_size=12, y_title_size=12, facet_lab_size=10,
plotLayer=NULL, out="plot", CNscale="absolute")
{
# Perform quality check on input data
data <- cnSpec_qual(x, y, genome, CNscale=CNscale)
x <- data[[1]]
y <- data[[2]]
# Get dummy data for genome
# (this is used to set chromosome boundaries in plot)
# Check to see if y is specified if not check if genome is preloaded
# else attempt to query UCSC, if unsuccessful report an error
preloaded <- c("hg38", "hg19", "mm10", "mm9", "rn5")
if(!is.null(y))
{
message("detected value in y, reformating...")
# reformat the input
temp <- y
temp1 <- y
temp2 <- y
temp1$end <- temp$start
temp2$start <- temp$end
UCSC_Chr_pos <- unique(rbind(temp1, temp2))
} else if(is.null(y) && any(genome == preloaded)){
message("genome specified is preloaded, retrieving data...")
UCSC_Chr_pos <- GenVisR::cytoGeno[GenVisR::cytoGeno$genome == genome,]
UCSC_Chr_pos <- multi_chrBound(UCSC_Chr_pos)
} else {
# Obtain data for UCSC genome and extract relevant columns
memo <- paste0("attempting to query UCSC mySQL database for chromosome",
" positions")
message(memo)
cyto_data <- suppressWarnings(multi_cytobandRet(genome))
UCSC_Chr_pos <- multi_chrBound(cyto_data)
}
# Check that dummy data has a size, if not report an error
if(nrow(UCSC_Chr_pos) < 1)
{
memo <- paste0("query to UCSC unsuccessful, please supply data frame",
" to argument y")
stop(memo)
}
# Dcast the input data into a recognizable format this will set as NA
# segment calls in one sample but not the other
CN_data <- data.table::dcast(x, chromosome + start + end ~ sample,
value.var = "segmean")
# Create the dummy data (make sure entire chromosome is plotted)
dummy_data <- lapply(unique(x$sample),
function(sample, chr_pos) cbind(chr_pos, sample),
UCSC_Chr_pos)
dummy_data <- do.call("rbind", dummy_data)
dummy_data$chromosome <- gsub('chr', '', dummy_data$chromosome)
# Format the Copy Number data
CN_data$chromosome <- gsub('chr', '', CN_data$chromosome)
CN_data <- data.table::melt(CN_data, id.vars=c('chromosome', 'start', 'end'))
CN_data <- stats::na.omit(CN_data)
colnames(CN_data) <- c('chromosome', 'start', 'end', 'sample', 'cn')
# Change the order of chromosomes and samples (natural sort order)
chromosome_sorted <- as.vector(unique(dummy_data$chromosome))
chromosome_sorted <- gtools::mixedsort(chromosome_sorted)
dummy_data$chromosome <- factor(dummy_data$chromosome, levels=chromosome_sorted)
CN_data$chromosome <- factor(CN_data$chromosome, levels=chromosome_sorted)
# if x$sample was a factor plot that instead of sorting samples
if(is.factor(x$sample))
{
sample_sorted <- levels(x$sample)
} else {
sample_sorted <- as.vector(unique(CN_data$sample))
sample_sorted <- gtools::mixedsort(sample_sorted)
}
CN_data$sample <- factor(CN_data$sample, levels=sample_sorted)
# build the plot
p1 <- cnSpec_buildMain(CN_data, dummy_data, plot_title=plot_title,
CN_low_colour=CN_Loss_colour,
CN_high_colour=CN_Gain_colour,
x_lab_size=x_title_size,
y_lab_size=y_title_size,
facet_lab_size=facet_lab_size,
layers=plotLayer, CNscale=CNscale)
# Decide what to output
output <- multi_selectOut(data=list(cnData=CN_data, chrBound=dummy_data),
plot=p1, out=out)
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.