n
#' Plot offtargets aligned to the target sequence
#'
#' @param offTargetFile The path of the file offTargetsInPeakRegions.xls
#' that stores the offtargets to be plotted. This file is the
#' output file from the function GUIDEseqAnalysis.
#' @param sep Field delimiter for the file specified as
#' offTargetFile, default to tab dilimiter
#' @param header Indicates whether there is header in the file
#' specified as offTargetFile, default to TRUE
#' @param gRNA.size Size of the gRNA, default to 20 for SpCas9 system
#' @param input.DNA.bulge.symbol The symbol used to represent DNA bulges
#' in the file specified as offTargetFile, default to "^"
#' @param input.RNA.bulge.symbol The symbol used to represent RNA bulges
#' in the file specified as offTargetFile, default to "-"
#' @param input.match.symbol The symbol used to represent matched bases
#' in the file specified as offTargetFile, default to "."
#' @param plot.DNA.bulge.symbol The symbol used to represent DNA bulges
#' in the figure to be generated, default to DNA.bulge, i.e., the nucleotide
#' in the DNA bulge. Alternatively, you can specify a symbol to represent
#' all DNA bulges such as "I".
#' @param plot.RNA.bulge.symbol The symbol used to represent RNA bulges
#' in the figure to be generated, default to "-"
#' @param plot.match.symbol The symbol used to represent matched bases
#' in the figure to be generated, default to "."
#' @param color.DNA.bulge The color used to represent DNA bulges
#' in the figure to be generated, default to "red"
#' @param size.symbol The size used to plot the bases, and the symbols
#' of DNA/RNA bulges, default to 3
#' @param color.values The color used to represent different bases, DNA
#' bulges, and RNA bulges.
#' @param PAM PAM sequence in the target site, please update it to
#' the exact PAM sequence in the input target site.
#' @param body.tile.height Specifies the height of each plotting
#' tile around each base/symbol for offtargets, default to 2.5
#' @param header.tile.height Specifies the height of each plotting
#' tile around each base/symbol for the target sequence on the very
#' top, default to 3.6
#' @param hline.offset Specifies the offset from the top border to draw
#' the horizontal line below the gRNA sequence, default to 3.8. Increase it
#' to move the line down and decrease it to move the line up.
#' @param plot.top.n Optional. If not specified, all the offtargets
#' in the input file specified as offTargetFile will be included
#' in the plot. With a very large number of offtargets,
#' users can select the top n offtargets to be included in the plot.
#' For example, set plot.top.n = 20 to include only top 20 offtargets
#' in the plot. Please note offtargets are ordered by the n.distinct.UMIs or
#' peak_score from top to bottom.
#' @param insertion.score.column "n.distinct.UMIs" or "peak_score" to be included on
#' @param insertion.score.column.prefix to designate sample name e.g., S1
#' which means that two of columns are named as S1.peak_score
#' and S1.n.distinct.UMIs in the input file. Useful if the input file is
#' generated by the function combineOfftargets
#' the right side of the alignment as Insertion Events. Relative Insertion
#' Rate (RIR) % is calculated as peak_score/n.distinct.UMIs
#' divided by ontarget peak_score/n.distinct.UMIs. For example, RIR for ontarget
#' should be 100
#' @param width.IR For adjusting the width of the IR output
#' @param width.RIR For adjusting the width of the RIR output
#' @param family font family, default to sans (Arial). Other options are
#' serif (Times New Roman) and mono (Courier). It is possible to use custom
#' fonts with the extrafont package with the following commands
#' install.packages("extrafont")
#' library(extrafont)
#' font_import()
#' loadfonts(device = "postscript")
#' @param hjust horizontal alignment
#' @param vjust vertical alignment
#'
#' @return a ggplot object
#' @importFrom ggplot2 ggplot aes geom_text annotate theme geom_tile
#' @importFrom ggplot2 element_blank element_text element_rect
#' @importFrom ggplot2 scale_x_continuous scale_y_continuous theme_bw
#' @importFrom ggplot2 geom_hline geom_vline scale_fill_manual
#' @importFrom purrr map
#' @importFrom dplyr select arrange mutate top_n '%>%'
#' @importFrom tidyr unnest
#' @export plotAlignedOfftargets
#' @author Lihua Julie Zhu
#'
#' @examples
#' offTargetFilePath <- system.file("extdata/forVisualization",
#' "offTargetsInPeakRegions.xls",
#' package = "GUIDEseq")
#' fig1 <- plotAlignedOfftargets(offTargetFile = offTargetFilePath,
#' plot.top.n = 20,
#' plot.match.symbol = ".",
#' plot.RNA.bulge.symbol = "-",
#' insertion.score.column = "peak_score")
#' fig1
#'
#' fig2 <- plotAlignedOfftargets(offTargetFile = offTargetFilePath,
#' plot.top.n = 20,
#' plot.match.symbol = ".",
#' plot.RNA.bulge.symbol = "-",
#' insertion.score.column = "n.distinct.UMIs")
#' fig2
#'
plotAlignedOfftargets <- function(offTargetFile, sep ="\t",
header = TRUE, gRNA.size = 20L,
input.DNA.bulge.symbol = "^",
input.RNA.bulge.symbol = "-",
input.match.symbol = ".",
plot.DNA.bulge.symbol = "DNA.bulge",
plot.RNA.bulge.symbol = "-",
plot.match.symbol = ".",
color.DNA.bulge = "red",
size.symbol = 3,
color.values =
c("A" = "#B5D33D", #green
"T" = "#AE9CD6", #purple
"C" = "#6CA2EA", #blue
"G" = "#FED23F", #orange
"-" = "gray",
"." = "white"),
PAM = "GGG",
body.tile.height = 2.5,
header.tile.height = 3.6,
hline.offset = 3.8,
plot.top.n,
insertion.score.column = c("n.distinct.UMIs",
"peak_score"
),
insertion.score.column.prefix,
width.IR = 2.5,
width.RIR = 2.5,
family = "sans",
hjust = "middle",
vjust = 0.5
)
{
if(missing(offTargetFile) || !file.exists(offTargetFile))
stop("Please specify the offTargetFile as a valid file path!")
x <- read.table(offTargetFile,
sep = sep,
header = header,
stringsAsFactors = FALSE)
insertion.score.column = match.arg(insertion.score.column)
if (!missing(insertion.score.column.prefix))
insertion.score.column <- paste0( insertion.score.column.prefix, insertion.score.column)
insertion.score.column <- sym(insertion.score.column)
on.target.IR <- x %>%
filter(total.mismatch.bulge == 0) %>%
select(insertion.score.column) %>%
as.numeric
x <- x %>% filter(!!insertion.score.column > 0) %>%
mutate(
IR = !!insertion.score.column,
RIR =
round(!!insertion.score.column/on.target.IR * 100,
digits = 2)) %>%
arrange(RIR)
if (!missing(plot.top.n) && class(plot.top.n) == "numeric")
x <- x %>% top_n(plot.top.n, RIR)
x <- rbind(x[x$total.mismatch.bulge > 0,], x[x$total.mismatch.bulge == 0,])
ontarget <- paste0(substr(x$gRNAPlusPAM[1],
1, gRNA.size),
PAM)
aligns <- x %>% select(guideAlignment2OffTarget) %>%
as.vector %>% unlist
PAM.char <- matrix(unlist(map(x$PAM.sequence, strsplit, "")),
nrow = nrow(x), byrow = TRUE)
PAM.ontarget <- unlist(strsplit(PAM, ""))
PAM.char[PAM.char[,1] == PAM.ontarget[1], 1] <- input.match.symbol
PAM.char[PAM.char[,2] == PAM.ontarget[2], 2] <- input.match.symbol
PAM.char[PAM.char[,3] == PAM.ontarget[3], 3] <- input.match.symbol
aligns <- c(paste0(aligns,
paste0(PAM.char[,1],
PAM.char[,2],
PAM.char[,3])),
ontarget)
aligns.char <- unlist(map(aligns, strsplit, ""))
aligns.char <- aligns.char[aligns.char != input.DNA.bulge.symbol]
aligns.char[aligns.char == input.RNA.bulge.symbol] <- plot.RNA.bulge.symbol
aligns.char[aligns.char == input.match.symbol] <- plot.match.symbol
x0 <- data.frame(x = rep(1:(gRNA.size + nchar(PAM)),
length(aligns) ),
y = body.tile.height * sort(rep(1:length(aligns),
gRNA.size + nchar(PAM))),
aligns.char = aligns.char,
h = c(rep(body.tile.height, nrow(x) * (gRNA.size + nchar(PAM))),
rep(header.tile.height, gRNA.size + nchar(PAM))))
# top.gRNA.indexS <- (length(aligns) - 1) * (gRNA.size + nchar(PAM)) + 1
# top.gRNA.indexE <- top.gRNA.indexS + (gRNA.size + nchar(PAM)) - 1
#
# x0[top.gRNA.indexS:top.gRNA.indexE , 2] <-
# x0[top.gRNA.indexS, 2] + body.tile.height/2
#
# top.gRNA.indexS <- (length(aligns)) * (gRNA.size + nchar(PAM)) + 1
# x0[top.gRNA.indexS:nrow(x0) , 2] <-
# x0[top.gRNA.indexS, 2] + body.tile.height/2
#
x0.for.symbol <- x0
#%>% mutate(y = y - 0.6)
x1 <- data.frame(x = rep(gRNA.size + nchar(PAM) + width.IR,
nrow(x)),
y = body.tile.height * 1:nrow(x) - 0.135,
IR = x$IR)
# x1[nrow(x1) , 2] <-
# x1[nrow(x1), 2] + body.tile.height/2
#
x2 <- data.frame(x = rep(gRNA.size + nchar(PAM) + width.IR + width.RIR,
nrow(x)),
y = body.tile.height * 1:nrow(x) - 0.135,
RIR = formatC(x$RIR, format ="f", digits = 2))
# x2[nrow(x2) , 2] <-
# x2[nrow(x2), 2] + body.tile.height/2
#
x.DNA.bulge <- data.frame(x = x$pos.DNA.bulge,
y = body.tile.height * 1:nrow(x),
DNA.bulge = x$DNA.bulge)
x.DNA.bulge <- x.DNA.bulge[!is.na(x.DNA.bulge[,1]) &
x.DNA.bulge[,1] != "",] %>%
mutate(x = strsplit(as.character(x), ","),
DNA.bulge = strsplit(as.character(DNA.bulge), ",")) %>%
unnest(c(x, DNA.bulge)) %>%
mutate(x = as.numeric(x) - 0.5)
max.y <- max(x0$y) + 2
max.x <- max(x0$x)
p1 <- ggplot(x0, aes(x = x, y = y)) +
geom_tile(aes(x, y, fill = aligns.char,
height=h)) +
scale_fill_manual(values = color.values) +
geom_text(data = x0.for.symbol, aes(x=x,
y=y, label = aligns.char),
size = size.symbol, vjust = vjust, hjust = hjust,
family = family) +
geom_text(data = x1, aes(x, y, label = IR), hjust = "right",
vjust = vjust,
family = family, size = size.symbol) +
annotate("text", x = max(x1$x) - width.IR/4, y = max.y - 2,
label = "IR", hjust = hjust, vjust = vjust,
family = family, size = size.symbol, fontface = "bold") +
geom_text(data = x2, aes(x, y, label = RIR), hjust = "right",
vjust = vjust,
family = family, size = size.symbol) +
annotate("text", x = max(x2$x) - width.RIR/4, y = max.y - 2,
label = "RIR (%)", hjust = hjust, vjust = vjust,
family = family, size = size.symbol,fontface = "bold")
#annotate("text", x = x1$x, y = x1$y, label = x1$IR)
if (nrow(x.DNA.bulge) >0 && plot.DNA.bulge.symbol == "DNA.bulge")
p1 <- p1 +
geom_text(data = x.DNA.bulge, aes(x=x,
y=y,
label = DNA.bulge),
size = size.symbol +0.5,
color = color.DNA.bulge,
family = family,
vjust = 0.1) +
geom_text(data = x.DNA.bulge, aes(x=x,
y=y - 0.4,
label = "^"),
size = size.symbol,
color = color.DNA.bulge,
family = family,
vjust = 0.8)
else if (nrow(x.DNA.bulge) >0)
p1 <- p1 +
geom_text(data = x.DNA.bulge, aes(x=x,
y=y,
label = plot.DNA.bulge.symbol),
size = size.symbol + 0.5,
color = color.DNA.bulge, family = family,
vjust = 0.1) +
geom_text(data = x.DNA.bulge, aes(x=x,
y=y - 0.4,
label = "^"),
size = size.symbol,
color = color.DNA.bulge,
family = family,
vjust = 0.8)
p1 +
geom_vline(xintercept = gRNA.size + 0.5) +
geom_vline(xintercept = gRNA.size + nchar(PAM) + 0.5) +
geom_hline(yintercept = max.y - hline.offset) +
theme_bw() +
scale_y_continuous(limits = c(0, max.y),
expand = c(0, 0)) +
scale_x_continuous(limits = c(0.5, max(x2$x) + 1.4),
expand = c(0, 0)) +
#coord_cartesian(xlim = c(0, max(x1$x) + 1), clip = "off") +
theme(text=element_text(size=size.symbol, family= family,
face = "bold"),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", size = 2),
legend.position= "none",
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank()
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.