Nothing
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, fig.height = 4.5, fig.width = 8) knitr::opts_knit$set(progress = FALSE, root.dir = params$workdir)
suppressWarnings(suppressMessages(library(RCAS))) # Connect to sqlite database mydb <- RSQLite::dbConnect(RSQLite::SQLite(), params$dbPath) # Read sample table and check format sampleTable <- read.table(params$sampleTablePath, header = TRUE,stringsAsFactors = FALSE) if(sum(c('sampleName', 'sampleGroup') %in% colnames(sampleTable)) != 2) { stop("The sample table must consist of at least two columns (separated by white space(s)) with headers 'sampleName' and 'sampleGroup'. See ",params$sampleTable) } if(nrow(sampleTable) < 2) { stop("The sample table must contain at least one entry. See ",sampleTable) } #set up which chunks to run depending on which tables are available in mydb #1. geneOverlaps table evalGeneOverlaps <- FALSE if(RSQLite::dbExistsTable(mydb, 'geneOverlaps')){ geneOverlaps <- RSQLite::dbReadTable(mydb, 'geneOverlaps') samplesNotFound <- setdiff(sampleTable$sampleName, colnames(geneOverlaps)) if(length(samplesNotFound) > 0){ warning("Samples with names '",paste0(samplesNotFound, collapse = ', '), "' do not exist in 'geneOverlaps' table. Skipping this table") } else { evalGeneOverlaps <- TRUE geneOverlaps <- subset(geneOverlaps, select = sampleTable$sampleName) } } #2. annotationSummaries table evalAnnotationSummaries <- FALSE if(RSQLite::dbExistsTable(mydb, 'annotationSummaries')){ AS <- RSQLite::dbReadTable(mydb, 'annotationSummaries') annotationSummaries <- as.matrix(AS[,c(-1)]) rownames(annotationSummaries) <- AS$sampleName samplesNotFound <- setdiff(sampleTable$sampleName, rownames(annotationSummaries)) if(length(samplesNotFound) > 0) { warning("Samples with names '",paste0(samplesNotFound, collapse = ', '), "' do not exist in 'annotationSummaries' table. Skipping this table") } else { evalAnnotationSummaries <- TRUE annotationSummaries <- annotationSummaries[sampleTable$sampleName,] } } #3. discoveredMotifs table evalMotifDiscovery <- FALSE if(RSQLite::dbExistsTable(mydb, 'discoveredMotifs')) { motifData <- RSQLite::dbReadTable(mydb, 'discoveredMotifs') samplesNotFound <- setdiff(sampleTable$sampleName, motifData$sampleName) if(length(samplesNotFound) > 0) { warning("Samples with names '",paste0(samplesNotFound, collapse = ', '), "' do not exist in 'discoveredMotifs' table. Skipping this table") } else { evalMotifDiscovery <- TRUE motifData <- motifData[motifData$sampleName %in% sampleTable$sampleName,] } } #4. featureBoundaryCoverageProfiles table evalCoverageProfiles <- FALSE if(RSQLite::dbExistsTable(mydb, 'featureBoundaryCoverageProfiles')) { cvg <- RSQLite::dbReadTable(mydb, 'featureBoundaryCoverageProfiles') samplesNotFound <- setdiff(sampleTable$sampleName, cvg$sampleName) if(length(samplesNotFound) > 0) { warning("Samples with names '",paste0(samplesNotFound, collapse = ', '), "' do not exist in 'featureBoundaryCoverageProfiles' table. Skipping this table") } else { evalCoverageProfiles <- TRUE cvg <- cvg[cvg$sampleName %in% sampleTable$sampleName,] cvg$sampleGroup <- sampleTable[match(cvg$sampleName, sampleTable$sampleName),]$sampleGroup } } #initialize figure and table counts for captions figureCount <- 1 tableCount <- 1
cat("# Distance of samples by various metrics\n")
cat('## Jaccard distance based on shared target genes\n') cat("**Figure",figureCount,":** Jaccard distance based on shared target genes. Here we plot the distance matrix between samples based on the Jaccard index computed for each pairwise sample based on how many genes the compared pair of samples co-overlap. \n")
SM <- as.matrix(proxy::dist(x = as.matrix(geneOverlaps), method = 'Jaccard', by_rows = FALSE, diag = TRUE, upper = TRUE)) pheatmap::pheatmap(SM, display_numbers = FALSE) figureCount <- figureCount + 1
cat("## Clustering of samples based on overlaps with transcript features\n") cat("**Figure",figureCount,":** Clustering of samples based on overlaps with transcript features\n")
annDF <- data.frame('sampleGroup' = sampleTable[match(rownames(annotationSummaries), sampleTable$sampleName),]$sampleGroup) rownames(annDF) <- rownames(annotationSummaries) pheatmap::pheatmap(t(annotationSummaries), scale = 'column', annotation_col = annDF) figureCount <- figureCount + 1
cat("# Motif Discovery\n") cat("**Table",tableCount,":** Feature specific motifs discovered for each sample\n")
DT::datatable(motifData, extensions = c('Buttons', 'FixedColumns'), options = list(fixedColumns = TRUE, scrollX = TRUE, dom = 'Bfrtip', buttons = c('copy', 'print', 'csv','excel', 'pdf')), filter = 'bottom' ) tableCount <- tableCount + 1
cat("# Feature Boundary Coverage Profiles\n")
cat("## Smoothed line plots\n") cat("**Figure",figureCount,":** Feature boundary coverage profiles for each sample\n")
#scale meanCoverage values by sampleName and feature cvgScaled <- do.call(rbind, lapply(split(cvg, cvg$sampleName), FUN = function(df) { df$scaledCoverage <- scale(df$meanCoverage) return(df) })) p <- plotly::ggplotly(ggplot2::ggplot(cvgScaled, aes(x = bases, y = scaledCoverage)) + geom_vline(xintercept = 0, color = 'gray') + facet_grid(feature ~ boundary) + geom_smooth(aes(color = sampleGroup, alpha = scaledCoverage)) + theme_minimal()) layout(p) figureCount <- figureCount + 1
plotPCA <- function(cv, title = '') { cvM <- dcast(cv, sampleName ~ boundary + bases, value.var = 'meanCoverage') M <- as.matrix(cvM[,-1]) rownames(M) <- cvM[,1] pca1 <- prcomp(M) scores = as.data.frame(pca1$x) scores$sampleName <- cv[match(rownames(scores), cv$sampleName),]$sampleName scores$sampleGroup <- cv[match(rownames(scores), cv$sampleName),]$sampleGroup p <- ggplot2::ggplot(data = scores, aes(x = PC1, y = PC2, label = sampleName)) + geom_point(aes(color = sampleGroup), size = 4) + geom_hline(yintercept = 0, colour = "gray65") + geom_vline(xintercept = 0, colour = "gray65") + labs(title = title) + theme_minimal(base_size = 18) %+replace% theme(legend.position = 'bottom', legend.title = element_blank()) return(p) } pcaPlots <- lapply(unique(cvg$feature), function(f) { plotPCA(cv = cvg[cvg$feature == f,], title = f) }) names(pcaPlots) <- unique(cvg$feature)
cat("## PCA plots {.tabset}\n") for (i in 1:length(pcaPlots)) { cat("### ",names(pcaPlots)[i],"\n") print(pcaPlots[[i]]) cat('\n\n') } figureCount <- figureCount + 1
RSQLite::dbDisconnect(mydb)
RCAS is developed in the group of Altuna Akalin (head of the Scientific Bioinformatics Platform) by Bora Uyar (Bioinformatics Scientist), Dilmurat Yusuf (Bioinformatics Scientist) and Ricardo Wurmus (System Administrator) at the Berlin Institute of Medical Systems Biology (BIMSB) at the Max-Delbrueck-Center for Molecular Medicine (MDC) in Berlin.
RCAS is developed as a bioinformatics service as part of the RNA Bioinformatics Center, which is one of the eight centers of the German Network for Bioinformatics Infrastructure (de.NBI).
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.