In malhamdoosh/abseqR: Reporting and data analysis functionalities for Rep-Seq datasets of antibody libraries

# Buidling rmd using loops: https://github.com/rstudio/flexdashboard/issues/80

library(flexdashboard)
library(plotly)
library(png)
library(grid)
library(gridExtra)

.getFile <- function(path, pattern, .stop = FALSE, .callback = NULL) {
    flist <- list.files(path = path, pattern = pattern, full.names = TRUE)
    n <- length(flist)
    if (n != 1 && is.null(.callback)) {
        if (.stop) {
            stop("Expected to find one file, but found ", n, " from ",
                 paste(flist, collapse = ", "), " instead.")
        } else {
            return(NA)
        }
    } else if (!is.null(.callback)) {
        flist <- flist[lapply(flist, .callback) == TRUE]
        if (length(flist) != 1) {
            if (.stop) {
                stop("Expected to find one file, but found ", n, " from ",
                     paste(flist, collapse = ", "), " instead.")
            } else {
                return(NA)
            }
        }
    }
    return(flist[[1]])
}

.compositionNoGene <- function(x) {
    return(length(grep("-", basename(x), fixed = TRUE)) == 0)
}

r if(!params$hasAnnot) {"\\begin{comment}"}

Summary {data-orientation=rows data-icon="fa-list-ul"}

Row

r paste(knitr::knit(text = knitr::knit_expand(text = paste("### Overview of", params$name))))

sampleNames <- unlist(strsplit(params$name, "_vs_"))
# counts
splitRaw <- unlist(strsplit(params$rawReads, ","))
splitAnnot <- unlist(strsplit(params$annotReads, ","))
splitFiltered <- unlist(strsplit(params$filteredReads, ","))
splitProd <- unlist(strsplit(params$productiveReads, ","))

# filtering
splitBit <- unlist(strsplit(params$bitfilters, ","))
splitAl <- unlist(strsplit(params$alignfilters, ","))
splitSStart <- unlist(strsplit(params$sstartfilters, ","))
splitQStart <- unlist(strsplit(params$qstartfilters, ","))

overviewOut <- lapply(seq_along(sampleNames), function(i) {
    rawR <- as.numeric(splitRaw[i])
    annotR <- as.numeric(splitAnnot[i])
    filtR <- as.numeric(splitFiltered[i])
    # might be NA if productivity analysis wasn't conducted
    prodR <- suppressWarnings(as.numeric(splitProd[i]))
    line1 <- knitr::knit_expand(text = sprintf("#### %s\n\n", sampleNames[i]))
    line2 <- knitr::knit_expand(text = sprintf("* Number of raw reads %s\n", prettyNum(rawR, big.mark = ",")))
    line3 <- knitr::knit_expand(text = sprintf("* Number of annotated reads %s%% (%s/%s)\n", round((annotR/rawR)*100, 2) , prettyNum(annotR, big.mark = ","), prettyNum(rawR, big.mark = ",")))
    line4 <- knitr::knit_expand(text = sprintf("* Number of reads after filtering %s%% (%s/%s)\n",
                                               round((filtR/annotR)*100, 2), prettyNum(filtR, big.mark = ","), prettyNum(annotR, big.mark = ",")))
    line5 <- knitr::knit_expand(text = sprintf("* Number of productive reads %s%% (%s/%s)\n", round((prodR/filtR)*100, 2), prettyNum(prodR, big.mark = ","), prettyNum(filtR, big.mark = ",")))
    line6 <- knitr::knit_expand(text = "\nThe following criteria are applied on the _V gene only_\n")
    line7 <- knitr::knit_expand(text = "\n\n|Filtering criterion|Value|\n")
    line8 <- knitr::knit_expand(text = "|--------------------|:-----:|\n")
    line9 <- knitr::knit_expand(text = sprintf("|Bitscore | %s|\n", splitBit[i]))
    line10 <- knitr::knit_expand(text = sprintf("|V gene alignment length | %s|\n", splitAl[i]))
    line11 <- knitr::knit_expand(text = sprintf("|Subject start position | %s|\n", splitSStart[i]))
    line12 <- knitr::knit_expand(text = sprintf("|Query start position | %s|\n", splitQStart[i]))
    line13 <- knitr::knit_expand(text = "\n\nSequences that do not satisfy the above filtering criteria are _excluded_ from the analysis.\n\n")
    paste0(line1, line2, line3, line4, line5, line6, line7, line8, line9,
          line10, line11, line12, line13)
})

r paste(knitr::knit(text = paste(overviewOut, collapse = "\n")))

Row

Sequence lengths without outlier removal

fname <- .getFile(path = file.path(params$rootDir, "annot"),
                  pattern = ".*_all_clones_len_dist\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Row

Sequence lengths with outlier removal

fname <- .getFile(path = file.path(params$rootDir, "annot"),
                  pattern = ".*_all_clones_len_dist_no_outliers\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

r if(!params$hasAnnot) {"\\end{comment}"}

r if(!params$hasAbun) {"\\begin{comment}"}

Quality {data-orientation=rows data-icon="fa-clipboard-list"}

Row

Indels (Gaps)

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*_igv_gaps_dist\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Plot of insertions and deletions found within the V gene

Mismatches

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*_igv_mismatches_dist\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Plot of nucleotide base mismatches found within the V gene

r if(!params$hasAbun) {"\\end{comment}"}

r if(!params$hasAbun || !params$single) {"\\begin{comment}"}

Row

.renderHMText <- function(alignQual) {
    fileInteractiveness <- if (params$interactive) "" else "_static"
    alignmentQuality <- lapply(seq_along(alignQual), function(i) {
        line1 <- knitr::knit_expand(text = sprintf("### %s\n", names(alignQual)[i]))
        line2 <- knitr::knit_expand(text = sprintf("\n```r", alignQual[[i]]))
        line3 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(path = file.path(params$rootDir, "abundance"), pattern = ".*_igv_align_quality_%s_hm%s\\\\.Rdata")', alignQual[[i]], fileInteractiveness))
        line4 <- knitr::knit_expand(text = '\nif(!is.na(fname)) { load(fname); plot } else { ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "404!") }')
        line5 <- knitr::knit_expand(text = "\n```\n")
        paste(line1, line2, line3, line4, line5, collapse = "\n")
    })
    return(alignmentQuality)
}

alignQual <- list("Bitscore vs Alignment Length" = "bitscore",
                  "Identity vs Alignment Length" = "identity")
alignmentQuality <- .renderHMText(alignQual)

r if (params$hasAbun && params$single) { paste(knitr::knit(text = paste(alignmentQuality, collapse = "\n"))) }

Row

alignQual <- list("Gaps vs Alignment Length" = "gaps",
                  "Mismatches vs Alignment Length" = "mismatches")
alignmentQuality.pt2 <- .renderHMText(alignQual)

r if (params$hasAbun && params$single) { paste(knitr::knit(text = paste(alignmentQuality.pt2, collapse = "\n"))) }

Row

alignQual <- list("Subject Start vs Query Start" = "start")
alignmentQuality.pt3 <- .renderHMText(alignQual)

r if (params$hasAbun && params$single) { paste(knitr::knit(text = paste(alignmentQuality.pt3, collapse = "\n"))) }

Placeholder

ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Placeholder")

r if(!params$hasAbun || !params$single) {"\\end{comment}"}

r if(!params$hasAbun) {"\\begin{comment}"}

Abundance {data-orientation=rows data-icon="fa-chart-bar"}

Row

V family

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*igv_dist_family_level\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

V gene

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*igv_dist_gene_level\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    # TODO: ggplotly is buggy with horizontal plot in tabsets
    if (params$interactive) {
        plot
        #ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Only the top 15 from each sample are shown

r if(!params$hasAbun) {"\\end{comment}"}

r if(!params$inclD || !params$hasAbun) {"\\begin{comment}"}

Row

D family

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*igd_dist_family_level\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

D gene

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*igd_dist_gene_level\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    # TODO: ggplotly is buggy with horizontal plot in tabsets
    if (params$interactive) {
        plot
        #ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Only the top 15 from each sample are shown

r if(!params$inclD || !params$hasAbun) {"\\end{comment}"}

r if(!params$hasAbun) {"\\begin{comment}"}

Row

J family

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*igj_dist_family_level\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

J variant

fname <- .getFile(path = file.path(params$rootDir, "abundance"),
                  pattern = ".*igj_dist_variant_level\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    # TODO: ggplotly is buggy with horizontal plot in tabsets
    if (params$interactive) {
        plot
        #ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Only the top 15 from each sample are shown

r if(!params$hasAbun) {"\\end{comment}"}

r if(!params$hasAbun || !params$single) {"\\begin{comment}"}

Row

V-J association

fname <- .getFile(path = file.path(params$rootDir, "abundance"), pattern = ".*_vjassoc\\.png")
if (!is.na(fname)) {
    img <- readPNG(fname)
    grid.raster(img)
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Plot associations between V genes and J genes on the family level

r if(!params$hasAbun || !params$single) {"\\end{comment}"}

r if(!params$hasProd) {"\\begin{comment}"}

Productivity {data-orientation=rows data-icon="fa-check"}

Row

Productivity summary

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_productivity\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        # TODO: buggy facet plot
        plot
        # ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5,
                                                                y = 5, label =
                                                                    "404!")
}

Productivity distribution of sample based on stop codons and frameshifts

Frameshift distribution

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_vjframe_dist\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5,
                                                                y = 5, label =
                                                                    "404!")
}

Distribution of in-frame and out-of-frame sequences

Row {.tabset .tabset-fade}

titles <- c("FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3")
frcdrMisGaps <- lapply(seq_along(titles), function(i) {
    line1 <- knitr::knit_expand(text = sprintf("### %s\n", titles[i]))
    line2 <- knitr::knit_expand(text = sprintf("\n```r", i))
    line3 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "productivity"), pattern = ".*_%s_mismatches_dist\\\\.Rdata")', tolower(titles[i])))
    line4 <- knitr::knit_expand(text = sprintf('\nif (!is.na(fname)) { load(fname); if (params$interactive) { mismatch <- ggplotly(plot + labs(title = "Mismatches, gaps and gaps in out-of-frame sequences in %s")) } else { mismatch <- plot } } else { df <- data.frame(); mismatch <- ggplot(df) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable") }', titles[i]))
    line5 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "productivity"), pattern = ".*_%s_gaps_dist\\\\.Rdata")', tolower(titles[i])))
    line6 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { load(fname); if (params$interactive) { gaps <- ggplotly(plot + labs(title = "")) } else { gaps <- plot } } else { df <- data.frame(); gaps <- ggplot(df) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")}')
    line7 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "productivity"), pattern = ".*_%s_gaps_dist_out_of_frame\\\\.Rdata")', tolower(titles[i])))
    line8 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { load(fname); if (params$interactive) { gapsOut <- ggplotly(plot + labs(title = "")) } else { gapsOut <- plot } } else { df <- data.frame(); gapsOut <- ggplot(df) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable") }')
    line9 <- knitr::knit_expand(text = "\nif (params$interactive) { subplot(mismatch, gaps, gapsOut, titleX = TRUE, titleY = TRUE) } else { grid.arrange(mismatch, gaps, gapsOut, nrow = 1)}")
    line10 <- knitr::knit_expand(text = "\n```\n")
    line11 <- knitr::knit_expand(text = sprintf("\n> Proportion of mismatches in productive sequences, gaps in productive sequences, and gaps in out-of-frame sequences in %s region\n", titles[i]))
    paste(line1, line2, line3, line4, line5, line6, line7, line8, line9, line10,
          line11, collapse = "\n")
})

if (params$inclD) {
    titles <- c("IGV", "IGD", "IGJ")
} else {
    titles <- c("IGV", "IGJ")
}
vdjMisGaps <- lapply(seq_along(titles), function(i) {
    line1 <- knitr::knit_expand(text = sprintf("### %s\n", titles[i]))
    line2 <- knitr::knit_expand(text = sprintf("\n```r", i))
    line3 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "productivity"), pattern = ".*_%s_mismatches_dist\\\\.Rdata")', tolower(titles[i])))
    line4 <- knitr::knit_expand(text = sprintf('\nif (!is.na(fname)) { load(fname); if (params$interactive) { mismatch <- ggplotly(plot + labs(title = "Mismatches and gaps in %s")) } else { mismatch <- plot } } else { df <- data.frame(); mismatch <- ggplot(df) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable") }', titles[i]))
    line5 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "productivity"), pattern = ".*_%s_gaps_dist\\\\.Rdata")', tolower(titles[i])))
    line6 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { load(fname); if (params$interactive) { gaps <- ggplotly(plot + labs(title = "")) } else { gaps <- plot } } else { df <- data.frame(); gaps <- ggplot(df) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable") }')
    line7 <- knitr::knit_expand(text = "\nif (params$interactive) { subplot(mismatch, gaps, titleX = TRUE, titleY = TRUE) } else { grid.arrange(mismatch, gaps, nrow = 1)}")
    line8 <- knitr::knit_expand(text = "\n```\n")
    line9 <- knitr::knit_expand(text = sprintf("\n> Proportion of mismatches and gaps in productive sequences in %s\n", titles[i]))
    paste(line1, line2, line3, line4, line5, line6, line7, line8, line9, collapse = "\n")
})

r paste(knitr::knit(text = paste(frcdrMisGaps, collapse = "\n"))) r paste(knitr::knit(text = paste(vdjMisGaps, collapse = "\n")))

Row {.tabset .tabset-fade}

IGV productive distribution

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_igv_dist_productive\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Distribution of productive sequences from IGV family

IGV inframe, unproductive distribution

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_igv_dist_inframe_unproductive\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Distribution of in-frame but unproductive sequences from IGV family

IGV out-of-frame distribution

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_igv_dist_out_of_frame\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Distribution of out-of-frame sequences from IGV family

Row {.tabset .tabset-fade}

Inframe

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_stopcodon_region_inframe\\.Rdata", .stop = FALSE)
if (is.na(fname)) {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
} else {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
}

Distribution of stop codons in CDR/FR regions from in-frame sequences

Out-of-frame

fname <- .getFile(path = file.path(params$rootDir, "productivity"),
                  pattern = ".*_stopcodon_region_outframe\\.Rdata", .stop = FALSE)
if (is.na(fname)) {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
} else {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
}

Distribution of stop codons in CDR/FR regions from out-of-frame sequences

r if(!params$hasProd) {"\\end{comment}"}

r if(!params$hasDiv) {"\\begin{comment}"}

Diversity {data-orientation=rows data-icon="fa-chart-line"}

Row {.tabset .tabset-fade}

CDR-V

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_cdr_v_duplication\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    dup <- plot + labs(title = "Sequence duplication level, rarefaction, and recapture")
} else {
    dup <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_cdr_v_rarefaction\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    rare <- plot + labs(title = "")
} else {
    rare <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_cdr_v_recapture\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    recap <- plot + labs(title = "")
} else {
    recap <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

if (params$interactive) {
    dup <- hide_legend(dup)
    rare <- hide_legend(rare)
    p <- subplot(hide_legend(dup), hide_legend(rare), recap,
                 titleX = TRUE, titleY = TRUE)
    .p <- plotly_build(p)
    for (i in seq_along(.p$x$data)) {
        if (length(grep(",\\d+(,NA)?\\)$", .p$x$data[[i]]$name))) {
            .p$x$data[[i]]$showlegend <- FALSE
        }
    }
    .p
} else {
    grid.arrange(dup, rare, recap, nrow = 1)
}

Duplication level, rarefaction and recapture plots for CDR (regions) and (V)ariable domain

CDR

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_cdr_duplication\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    dup <- plot + labs(title = "Sequence duplication level, rarefaction, and recapture")
} else {
    dup <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_cdr_rarefaction\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    rare <- plot + labs(title = "")
} else {
    rare <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_cdr_recapture\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    recap <- plot + labs(title = "")
} else {
    recap <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

if (params$interactive) {
    dup <- hide_legend(dup)
    rare <- hide_legend(rare)
    p <- subplot(hide_legend(dup), hide_legend(rare), recap,
                 titleX = TRUE, titleY = TRUE)
    .p <- plotly_build(p)
    for (i in seq_along(.p$x$data)) {
        if (length(grep(",\\d+(,NA)?\\)$", .p$x$data[[i]]$name))) {
            .p$x$data[[i]]$showlegend <- FALSE
        }
    }
    .p
} else {
    grid.arrange(dup, rare, recap, nrow = 1)
}

Duplication level, rarefaction and recapture plots for CDR (regions)

FR

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_fr_duplication\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    dup <- plot + labs(title = "Sequence duplication level, rarefaction, and recapture")
} else {
    dup <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_fr_rarefaction\\.Rdata")

if (!is.na(fname)) {
    load(fname)
    rare <- plot + labs(title = "")
} else {
    rare <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

fname <- .getFile(path = file.path(params$rootDir, "diversity"),
                  pattern = ".*_fr_recapture\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    recap <- plot + labs(title = "")
} else {
    recap <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

if (params$interactive) {
    dup <- hide_legend(dup)
    rare <- hide_legend(rare)
    p <- subplot(hide_legend(dup), hide_legend(rare), recap,
                 titleX = TRUE, titleY = TRUE)
    .p <- plotly_build(p)
    for (i in seq_along(.p$x$data)) {
        if (length(grep(",\\d+(,NA)?\\)$", .p$x$data[[i]]$name))) {
            .p$x$data[[i]]$showlegend <- FALSE
        }
    }
    .p
} else {
    grid.arrange(dup, rare, recap, nrow = 1)
}

Duplication level, rarefaction and recapture plots for FR (regions)

Row {.tabset .tabset-fade}

regions <-
    c(
    "FR1" = "_fr1_spectratype",
    "CDR1" = "_cdr1_spectratype",
    "FR2" = "_fr2_spectratype",
    "CDR2" = "_cdr2_spectratype",
    "FR3" = "_fr3_spectratype",
    "CDR3" = "_cdr3_spectratype",
    "CDR3 without outliers" = "_cdr3_spectratype_no_outliers",
    "FR4" = "_fr4_spectratype",
    "V domain" = "_v_spectratype"
    )
specOut <- lapply(seq_along(regions), function(i) {
    line1 <- knitr::knit_expand(text = sprintf("### %s\n", names(regions)[[i]]))
    line2 <- knitr::knit_expand(text = sprintf("\n```r\n", i))
    line3 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(path = file.path(params$rootDir, "diversity", "spectratypes"), pattern = ".*%s\\\\.Rdata")\n', regions[[i]]))
    line4 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { load(fname); if (params$interactive) { ggplotly(plot); } else { plot; } } else { ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable"); }')
    line5 <- knitr::knit_expand(text = "\n```\n")
    line6 <- knitr::knit_expand(text = sprintf("\n> Amino acid length distribution for %s region\n", names(regions)[[i]]))
    paste(line1, line2, line3, line4, line5, line6, collapse = "\n")
})

r if (params$hasDiv) { paste(knitr::knit(text = paste(specOut, collapse = '\n'))) }

r if(!params$hasDiv) {"\\end{comment}"}

r if(!params$hasDiv || !params$single) {"\\begin{comment}"}

Row {.tabset .tabset-fade}

regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4")
compOut <- lapply(seq_along(regions), function(i) {
    line1 <- knitr::knit_expand(text = sprintf("### %s\n", regions[i]))
    line2 <- knitr::knit_expand(text = sprintf("\n```r\n", i))
    line3 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "diversity", "composition_logos", "%s"), pattern = ".*_cumulative_logo\\\\.Rdata", .callback = .compositionNoGene)', regions[i]))
    line4 <- knitr::knit_expand(text = '\nremoveLegend <- FALSE; if (!is.na(fname)) { load(fname); unscaled <- plot; } else { unscaled <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "404!"); }')
    line5 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(file.path(params$rootDir, "diversity", "composition_logos", "%s"), pattern = ".*_cumulative_logo_scaled\\\\.Rdata", .callback = .compositionNoGene)', regions[i]))
    line6 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { load(fname); scaled <- plot; removeLegend <- TRUE; } else { scaled <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "404!"); }')
    line7 <- knitr::knit_expand(text = '\nif (params$interactive) { if (removeLegend) { unscaled <- hide_legend(ggplotly(unscaled)) }; p <- subplot(unscaled, scaled); .p <- plotly_build(p); for (i in seq_along(.p$x$data)) { if (length(grep("x\\\\d+", .p$x$data[[i]]$xaxis))) { .p$x$data[[i]]$showlegend <- FALSE } }; .p; } else { grid.arrange(unscaled, scaled, nrow = 1) }')
    line8 <- knitr::knit_expand(text = "\n```\n")
    if (regions[i] == "FR3") {
        line9 <- knitr::knit_expand(text = "\n> Composition logo plot was truncated to the first 30 amino acids.\n")
    } else {
        line9 <- knitr::knit_expand(text = sprintf("\n> Amino acid composition logos in %s - unscaled (sum to 1 in each individual position) and scaled (to the max number of sequences in a given position).\n", regions[i]))
    }
    paste(line1, line2, line3, line4, line5, line6, line7, line8, line9, collapse = "\n")
})

r if (params$single && params$hasDiv) { paste(knitr::knit(text = paste(compOut, collapse = '\n'))) }

Row {.tabset .tabset-fade}

regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4")
motifOut <- lapply(seq_along(regions), function(i) {
    line1 <- knitr::knit_expand(text = sprintf("### %s (aligned, unaligned)\n", regions[i]))
    line2 <- knitr::knit_expand(text = sprintf("\n```r", i))
    line3 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(path = file.path(params$rootDir, "diversity", "motifs"), pattern = ".*%s_motif_aligned_logo\\\\.png")', tolower(regions[i])))
    line4 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { imgAl <- rasterGrob(as.raster(readPNG(fname)), interpolate = FALSE) } else { imgAl <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable") }')
    line5 <- knitr::knit_expand(text = sprintf('\nfname <- .getFile(path = file.path(params$rootDir, "diversity", "motifs"), pattern = ".*%s_motif_logo\\\\.png")', tolower(regions[i])))
    line6 <- knitr::knit_expand(text = '\nif (!is.na(fname)) { img <- rasterGrob(as.raster(readPNG(fname)), interpolate = FALSE) } else { img <- ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable") }')
    line7 <- knitr::knit_expand(text = "\ngrid.arrange(imgAl, img, nrow = 1)")
    line8 <- knitr::knit_expand(text = "\n```\n")
    line9 <- knitr::knit_expand(text = sprintf("\n> (Left) %s aligned using clustal-omega (Right) unaligned motif discovery\n", regions[i]))
    paste(line1, line2, line3, line4, line5, line6, line7, line8, line9, collapse = "\n")
})

r if (params$single && params$hasDiv) { paste(knitr::knit(text = paste(motifOut, collapse = '\n'))) }

r if(!params$hasDiv || !params$single) {"\\end{comment}"}

r if(!params$hasDiv || params$single) {"\\begin{comment}"}

Clonotypes {data-orientation=rows data-icon="fa-dna"}

Row {.tabset .tabset-fade}

files <- list.files(path = file.path(params$rootDir, "clonotype_analysis"),
                    pattern = ".*_clone_scatter\\.png",
                    full.names = TRUE)
scatterOut <- lapply(seq_along(files), function(i) {
    line1 <- knitr::knit_expand(text = sprintf("### %s\n",
                                               gsub("_", " ",
                                                    stringr::str_extract(basename(files[[i]]),
                                                                         "^.*(?=(_clone_scatter))"))))
    line2 <- knitr::knit_expand(text = sprintf("\n```r\n", i))
    line3 <- knitr::knit_expand(text = sprintf('grid.raster(readPNG("%s"))', files[[i]]))
    line4 <- knitr::knit_expand(text = "\n```\n")
    line5 <- knitr::knit_expand(text = "\n> Clonotype frequencies from 2 samples. The axis values are scaled to log10 of the clone proportion. The marginal density plots show the density of total(grey), non-overlapping(purple), and overlapping(blue) clonotype abundances.\n")
    paste(line1, line2, line3, line4, line5, collapse = "\n")
})

r if(!params$single && params$hasDiv) { paste(knitr::knit(text = paste(scatterOut, collapse = '\n'))) }

Row

Distribution of top 10 clonotypes from each sample

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = ".*_top10Clonotypes\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Clonotype distributions are scaled to top 10

Number of overlapping and non-overlapping clonotypes

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = ".*_cdr3_clonotypeIntersection\\.png")
if (!is.na(fname)) {
    img <- readPNG(fname)
    grid.raster(img)
} else {
    ggplot(data.frame()) +
        xlim(0, 10) +
        ylim(0, 10) +
        annotate("text", x = 5, y = 5, label = "Can't find clonotype venn diagram, you probably have > 5 samples")
}

Counts show number of clonotypes

Row

Pearson correlation

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = "pearson\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Spearman correlation

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = "spearman\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    if (params$interactive) {
        ggplotly(plot)
    } else {
        plot
    }
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Row

Morisita Horn

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = "morisita_horn\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    plot
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Bray Curtis

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = "bray_curtis\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    plot
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

Jaccard

fname <- .getFile(path = file.path(params$rootDir, "clonotype_analysis"), pattern = "jaccard\\.Rdata")
if (!is.na(fname)) {
    load(fname)
    plot
} else {
    ggplot(data.frame()) + xlim(0, 10) + ylim(0, 10) + annotate("text", x = 5, y = 5, label = "Plot unavailable")
}

r if(!params$hasDiv || params$single) {"\\end{comment}"}

malhamdoosh/abseqR documentation built on May 24, 2019, 12:36 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

malhamdoosh/abseqR Reporting and data analysis functionalities for Rep-Seq datasets of antibody libraries

In malhamdoosh/abseqR: Reporting and data analysis functionalities for Rep-Seq datasets of antibody libraries

Summary {data-orientation=rows data-icon="fa-list-ul"}

Row

Row

Sequence lengths without outlier removal

Row

Sequence lengths with outlier removal

Quality {data-orientation=rows data-icon="fa-clipboard-list"}

Row

Indels (Gaps)

Mismatches

Row

Row

Row

Placeholder

Abundance {data-orientation=rows data-icon="fa-chart-bar"}

Row

V family

V gene

Row

D family

D gene

Row

J family

J variant

Row

V-J association

Productivity {data-orientation=rows data-icon="fa-check"}

Row

Productivity summary

Frameshift distribution

Row {.tabset .tabset-fade}

Row {.tabset .tabset-fade}

IGV productive distribution

IGV inframe, unproductive distribution

IGV out-of-frame distribution

Row {.tabset .tabset-fade}

Inframe

Out-of-frame

Diversity {data-orientation=rows data-icon="fa-chart-line"}

Row {.tabset .tabset-fade}

CDR-V

CDR

FR

Row {.tabset .tabset-fade}

Row {.tabset .tabset-fade}

Row {.tabset .tabset-fade}

Clonotypes {data-orientation=rows data-icon="fa-dna"}

Row {.tabset .tabset-fade}

Row

Distribution of top 10 clonotypes from each sample

Number of overlapping and non-overlapping clonotypes

Row

Pearson correlation

Spearman correlation

Row

Morisita Horn

Bray Curtis

Jaccard

R Package Documentation

Browse R Packages

We want your feedback!

malhamdoosh/abseqR
Reporting and data analysis functionalities for Rep-Seq datasets of antibody libraries