R/plotCNA.R
In CopywriteR: Copy number information from targeted sequencing using off-target reads

Documented in plotCNA

plotCNA <- function(destination.folder, smoothed = TRUE, sample.plot, y.min,
                    y.max, ...) {

    start.time <- Sys.time()
    
    ## Restore work directory upon exit
    wd.orig <- getwd()
    on.exit(setwd(wd.orig))

    ## Make destination folder path absolute
    destination.folder <- tools::file_path_as_absolute(destination.folder)
    destination.folder <- file.path(destination.folder, "CNAprofiles")

    ## Provide output to log
    flog.appender(appender.file(file.path(destination.folder,
                                          "CopywriteR.log")))
    args.ellipsis <- unlist(list(...))
    flog.info(paste0("plotCNA was run using the following commands:", "\n\n",
                     "plotCNA(destination.folder = \"",
                     dirname(destination.folder),
                     "\", smoothed = ", smoothed,
                     if (!missing(sample.plot)) {", sample.plot = sample.plot"},
                     if (!missing(y.min)) {paste0(", y.min = ", y.min)},
                     if (!missing(y.max)) {paste0(", y.max = ", y.max)},
                     if (!missing(...)) {paste0(", ", paste(names(args.ellipsis),
                                                "=", args.ellipsis,
                                                collapse = ", "))},
                     ")"))

    ## Check the existence of folders and files
    if (!file.exists(destination.folder)) {
        stop(.wrap("The destination folder could not be found. Please change",
                   "the path specified in", sQuote(destination.folder)))
    }

    # Pre-define inputStructure variable to avoid it from raising NOTES during
    # R CMD CHECK (variable is loaded from file below)
    inputStructure <- NULL

    # Load inputStructure variable
    load(file.path(destination.folder, "input.Rdata"))

    ## Set variables
    chromosomes <- inputStructure$chromosomes
    nautosomes <- length(grep("[0-9]", chromosomes))
    prefix <- inputStructure$prefix
    bin.size <- inputStructure$bin.size

    ## Set sample.plot
    if (missing(sample.plot)) {
        sample.plot <- apply(inputStructure$sample.control, c(1, 2),
                             function(x) {
            x <- paste0("log2.", basename(x))
        })
        all.samples <- unique(as.vector(sample.plot))
        sample.plot <- rbind(data.frame(sample.plot, stringsAsFactors = FALSE),
                             data.frame(samples = all.samples,
                                        controls = rep(NA, length(all.samples)), 
                                        stringsAsFactors = FALSE))
    } else {
        colnames(sample.plot) <- c("samples", "controls")
        sample.plot[, ] <- apply(sample.plot, c(1, 2), function(x) {
            if (!is.na(x)) {
                x <- paste0("log2.", gsub("_properreads", "", basename(x)))
            } else {
                x <- as.character(x)
            }
        })
    }

    ## Read data
    log2.read.counts <- read.table(file = file.path(destination.folder,
                                                    "log2_read_counts.igv"),
                                   sep = "\t", header = TRUE,
                                   check.names = FALSE)

    ## Remove prefix and convert chromosome names to integers
    log2.read.counts$Chromosome <- gsub(prefix, "", log2.read.counts$Chromosome)
    chromosomes <- gsub(prefix, "", chromosomes)
    
    log2.read.counts$Chromosome <- gsub("X", nautosomes + 1,
                                        log2.read.counts$Chromosome)
    chromosomes <- gsub("X", nautosomes + 1, chromosomes)
    
    log2.read.counts$Chromosome <- gsub("Y", nautosomes + 2,
                                        log2.read.counts$Chromosome)
    chromosomes <- gsub("Y", nautosomes + 2, chromosomes)
    
    log2.read.counts$Chromosome <- as.integer(log2.read.counts$Chromosome)
    chromosomes <- sort(as.integer(chromosomes))

    ## Fix behaviour of DNAcopy with 'outlier' values
    log2.read.counts[, 5:ncol(log2.read.counts)] <-
        apply(log2.read.counts[, 5:ncol(log2.read.counts), drop = FALSE],
              c(1, 2), function(x) {
        if (is.na(x)) {
            x <- NA
        } else if (x < -5) {
            x <- -5
        } else if (x > 10) {
            x <- 10
        } else {
            x <- x
        }
    })

    ## Create table with values to be plotted
    if (all(na.omit(unlist(sample.plot)) %in% colnames(log2.read.counts))) {
        plotting.values <-
            log2.read.counts[, c("Chromosome", "Start", "End", "Feature")]
        for (i in seq_len(nrow(sample.plot))) {
            if (!is.na(sample.plot$controls[i])) {
                plotting.values[, paste0(sample.plot$samples[i], ".vs.",
                                         sample.plot$controls[i])] <- 
                    log2.read.counts[, sample.plot$samples[i]] -
                    log2.read.counts[, sample.plot$controls[i]]
            } else {
                plotting.values[, paste0(sample.plot$samples[i], ".vs.none")] <-
                    log2.read.counts[, sample.plot$samples[i]]
            }
        }
    } else {
        unique.samples <- unique(na.omit(unlist(sample.plot)))
        print(unique.samples)
        print(unique.samples %in% colnames(log2.read.counts))
        missing.samples <- unique.samples[!unique.samples %in% colnames(log2.read.counts)]
        print(missing.samples)
        stop(.wrap("The following samples refer to .bam files that have not",
                   "been processed by CopywriteR:",
                   paste(sQuote(missing.samples), collapse = ", "),
                   ". Please re-analyze all required samples using CopywriteR",
                   "and run the plotCNA function again."))
    }

    ## Apply DNAcopy
    CNA.object <- CNA(plotting.values[, 5:ncol(plotting.values), drop = FALSE],
                      plotting.values$Chromosome,
                      rowMeans(plotting.values[, c("Start", "End")]),
                      data.type = "logratio", 
                      sampleid = colnames(plotting.values)[5:ncol(plotting.values)])
    if (smoothed) {
        CNA.object <- smooth.CNA(CNA.object)
    }
    segment.CNA.object <- segment(CNA.object, verbose = 1, ...)
    save(segment.CNA.object, file = file.path(destination.folder,
                                              "segment.Rdata"))

    ## Calculate the chromosome lengths from the bin.bed file
    chrom.lengths <-
        scanBamHeader(inputStructure$sample.control$samples[1])[[1]]$targets
    names(chrom.lengths) <- gsub(prefix, "", names(chrom.lengths))    
    chrom.lengths <- data.frame(Chromosome = names(chrom.lengths),
                                Length = chrom.lengths, row.names = NULL,
                                stringsAsFactors = FALSE)
    suppressWarnings(chrom.lengths <- chrom.lengths[chrom.lengths$Chromosome %in% c("X", "Y") |
                                                    !is.na(as.integer(as.vector(chrom.lengths$Chromosome))), ])
    chrom.lengths <- chrom.lengths[mixedorder(chrom.lengths$Chromosome), ]
    chrom.lengths$Chromosome <- gsub("X", nautosomes + 1,
                                     chrom.lengths$Chromosome)
    chrom.lengths$Chromosome <- gsub("Y", nautosomes + 2,
                                     chrom.lengths$Chromosome)
    chrom.lengths[, "CumSum"] <- c(0, cumsum(as.numeric(chrom.lengths$Length))[seq_len(nrow(chrom.lengths) - 1)])
    
    ## Create plots
    segment.CNA.object$output[, "start.position.chrom"] <-
        chrom.lengths$CumSum[match(as.integer(segment.CNA.object$output$chrom),
                                   chrom.lengths$Chromosome)]
    segment.CNA.object$output$loc.start <- segment.CNA.object$output$loc.start + segment.CNA.object$output$start.position.chrom
    segment.CNA.object$output$loc.end <- segment.CNA.object$output$loc.end + segment.CNA.object$output$start.position.chrom
    segment.CNA.object$output$start.position.chrom <- NULL
    
    segment.CNA.object$data[, "start.position.chrom"] <-
        chrom.lengths$CumSum[match(as.integer(segment.CNA.object$data$chrom),
                                   chrom.lengths$Chromosome)]
    segment.CNA.object$data$maploc <- segment.CNA.object$data$maploc + segment.CNA.object$data$start.position.chrom
    segment.CNA.object$data$start.position.chrom <- NULL

    # Get sample names
    samples <- colnames(segment.CNA.object$data)
    samples <- samples[3:length(samples)]

    # Create plots folder
    plot.folder <- file.path(destination.folder, "plots")
    dir.create(plot.folder)
    setwd(plot.folder)

    # Set boundaries plots
    if (missing(y.min)) {
        y.min <- -2
    }
    if (missing(y.max)) {
        y.max <- 5
    }

    # Loop through samples using lapply
    invisible(lapply(samples, function(x) {
        # Select sample
        select.sample <- x
        current.sample <- segment.CNA.object
        current.sample$data <-
            current.sample$data[, c("chrom", "maploc", select.sample)]
        current.sample$data <-
            current.sample$data[!is.na(current.sample$data[, select.sample]), ]
        current.sample$output <-
            current.sample$output[current.sample$output$ID == select.sample, ]

        # Print MAD-values in log file
        flog.info(paste("The MAD-value for sample", select.sample, "is:",
                        round(madDiff(current.sample$data[, select.sample]), 3)))
                            
        # Create and set new directory
        dir.create(file.path(plot.folder, select.sample))
        setwd(file.path(plot.folder, select.sample))

        # Loop through chromosomes using lapply
        invisible(lapply(c(as.list(chromosomes), list(chromosomes)),
                         function(x) {
            # Select chromosome
            select.chrom <- x
            current.sample$data <- current.sample$data[current.sample$data$chrom %in% select.chrom, ]
            current.sample$output <- current.sample$output[current.sample$output$chrom %in% select.chrom, ]

            # Set variables
            genome.position.min <-
                chrom.lengths$CumSum[match(min(select.chrom),
                                           chrom.lengths$Chromosome)]
            genome.position.max <-
                chrom.lengths$CumSum[match(max(select.chrom),
                                           chrom.lengths$Chromosome)] +
                                     chrom.lengths$Length[match(max(select.chrom), chrom.lengths$Chromosome)]

            # Plot data
            if (length(x) > 1) {
                name.chrom <- "all_chrom"
            } else {
                name.chrom <- paste0("chrom_", x)
            }
            pdf(file = paste0(name.chrom, ".pdf"), width = 14, height = 7)
            plot(current.sample$data[, "maploc"],
                 current.sample$data[, select.sample], ylim = c(y.min, y.max),
                 pch = ".", cex = 1, xlim = c(genome.position.min, 
                                              genome.position.max),
                 xaxs = "i", xlab = "", ylab = "log2 value", xaxt = "n",
                 main = select.sample)

            points(current.sample$data$maploc[current.sample$data[, select.sample] > y.max],
                   rep.int(y.max, sum(current.sample$data[, select.sample] > y.max)),
                   cex = 0.25, col = "green", pch = 2)
            points(current.sample$data$maploc[current.sample$data[, select.sample] < y.min],
                   rep.int(y.min, sum(current.sample$data[, select.sample] < y.min)),
                   cex = 0.25, col = "red", pch = 6)

            invisible(apply(current.sample$output, 1, function(x) {
                segments(x0 = as.numeric(x["loc.start"]),
                         y0 = as.numeric(x["seg.mean"]),
                         x1 = as.numeric(x["loc.end"]),
                         col = "red", pch = ".", cex = 0.2)
            }))
            par(xpd = TRUE)
            text(x = 0.96 * (genome.position.max - genome.position.min) + genome.position.min,
                 y = 0.075 * (y.max - y.min) + y.max,
                 labels = paste0("mad = ", round(madDiff(current.sample$data[, select.sample]), 3)))
            text(x = 0.035 * (genome.position.max - genome.position.min) + genome.position.min,
                 y = 0.075 * (y.max - y.min) + y.max,
                 labels = paste0(bin.size / 1000, " kb bins"))
            par(xpd = FALSE)
            ticks <- (chrom.lengths$CumSum + chrom.lengths$Length / 2)[select.chrom]
            axis(1, at = ticks, labels = select.chrom)
            if (length(select.chrom) > 1) {
                abline(v = chrom.lengths$CumSum[2:nrow(chrom.lengths)],
                       lty = "dotted")
            }
            dev.off()
        }))
    }))
    flog.info(paste("Total calculation time of plotCNA was",
                    round(difftime(Sys.time(), start.time, units = "mins"), 2),
                    "minutes"))
    cat("Total calculation time of CopywriteR was: ",
        Sys.time() - start.time, "\n\n")
}
Any scripts or data that you put into this service are public.
CopywriteR documentation built on Nov. 8, 2020, 7:50 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
CopywriteR
Copy number information from targeted sequencing using off-target reads

R/plotCNA.R
In CopywriteR: Copy number information from targeted sequencing using off-target reads

Defines functions plotCNA

Documented in plotCNA

Try the CopywriteR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CopywriteR Copy number information from targeted sequencing using off-target reads

R/plotCNA.R In CopywriteR: Copy number information from targeted sequencing using off-target reads

Defines functions plotCNA

Documented in plotCNA

Try the CopywriteR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CopywriteR
Copy number information from targeted sequencing using off-target reads

R/plotCNA.R
In CopywriteR: Copy number information from targeted sequencing using off-target reads