R/riboseq.R
In systemPipeR: systemPipeR: NGS workflow and report generation environment

Documented in featureCoverage featuretypeCounts genFeatures plotfeatureCoverage plotfeaturetypeCounts predORF scaleRanges

######################################################
## Generate various feature types from TxDb objects ##
######################################################
genFeatures <- function(txdb, featuretype="all", reduce_ranges, upstream=1000, downstream=0, verbose=TRUE) {
    ## Check validity of inputs
    supported_features <- c("tx_type", "promoter", "intron", "exon", "cds", "fiveUTR", "threeUTR", "intergenic")
    if(tolower(featuretype[1])=="all") featuretype <- supported_features
    if(class(txdb)!="TxDb") stop("Argument 'txdb' needs to be assigned object of class TxDb!")
    if(any(!featuretype %in% supported_features)) stop("featuretype supports any of the following values: ", paste(supported_features, collapse=", "))
	
    ## Empty GRangesList to store results
    featuresGRl <- GRangesList()
    seqinfo(featuresGRl) <- seqinfo(txdb)
    
    ## Empty GRanges object for feature types that don't exist
    gr_empty <- GRanges()
    mcols(gr_empty) <- DataFrame(feature_by=character(), featuretype_id=character(), featuretype=character())
    seqinfo(gr_empty) <- seqinfo(txdb)

    ## Gene/transcript id mappings (required for some features)
    ids <- mcols(transcripts(txdb,  columns=c("tx_id", "tx_name", "gene_id")))
    ge_id <- unstrsplit(ids$gene_id)
    names(ge_id) <- ids$tx_id
	
    ## Transcript ranges: each 'tx_type' as separate GRanges object (reduced by gene)
    if("tx_type" %in% featuretype) {
        tx <- transcripts(txdb, columns=c("tx_name", "gene_id", "tx_type"))
        if(length(tx)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("transcript"=gr_empty))
        } else {
            mcols(tx)$tx_type <- ifelse(is.na(mcols(tx)$tx_type), "transcript", mcols(tx)$tx_type)
            tx_type <- unique(mcols(tx)$tx_type)
            for(i in seq_along(tx_type)) {
                tmp <- tx[mcols(tx)$tx_type==tx_type[i]]
                if(reduce_ranges==TRUE) {
                    tmp <- reduce(split(tmp, as.character(mcols(tmp)$gene_id))) 
                    tmp <- unlist(tmp)
                    feature_id <- paste(names(tmp), ":", tx_type[i], "_red", sep="")
                    mcols(tmp) <- DataFrame(feature_by=names(tmp), featuretype_id=feature_id, featuretype=paste0(tx_type[i], "_red"))
                    names(tmp) <- seq_along(tmp)
                    featuresGRl <- c(featuresGRl, GRangesList(tmp=tmp))
                    names(featuresGRl)[length(featuresGRl)] <- paste0(tx_type[i], "_red")
                } else {
                    mcols(tmp) <- DataFrame(feature_by=mcols(tmp)$gene_id, featuretype_id=mcols(tmp)$tx_name, featuretype=tx_type[i])
                    featuresGRl <- c(featuresGRl, GRangesList(tmp=tmp))
                    names(tmp) <- seq_along(tmp)
                    names(featuresGRl)[length(featuresGRl)] <- tx_type[i]
                }
            }
            if(verbose==TRUE) cat("Created feature ranges:", paste(tx_type, collapse=", "), "\n")
        }
    }

    ## Create promoter ranges reduced by gene
	if("promoter" %in% featuretype) {
        mycheck <- suppressWarnings(promoters(transcriptsBy(txdb, "gene"), upstream, downstream))
        if(length(mycheck)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("promoter"=gr_empty))
        } else {
            if(reduce_ranges==TRUE) {
                mypromoters <- suppressWarnings(unlist(reduce(promoters(transcriptsBy(txdb, "gene"), upstream, downstream))))
                mypromoters <- trim(mypromoters)
                feature_id <- paste0(names(mypromoters), ":P_red")
                mcols(mypromoters) <- DataFrame(feature_by=names(mypromoters), featuretype_id=feature_id, featuretype="promoter_red")
                names(mypromoters) <- seq_along(mypromoters)
                featuresGRl <- c(featuresGRl, GRangesList("promoter_red"=mypromoters))
            } else {
                mypromoters <- suppressWarnings(promoters(txdb, upstream, downstream, columns=c("tx_name", "gene_id", "tx_type")))
                mypromoters <- trim(mypromoters)
                mcols(mypromoters) <- DataFrame(feature_by=as(mcols(mypromoters)$gene_id, "CharacterList"), featuretype_id=mcols(mypromoters)$tx_name, featuretype="promoter")
                featuresGRl <- c(featuresGRl, GRangesList("promoter"=mypromoters))
            }
            if(verbose==TRUE) cat("Created feature ranges: promoter", "\n")
        }
    }
    
    ## Create intron ranges reduced by gene
	if("intron" %in% featuretype) {
        ## Introns by transcript
		myintrons <- intronsByTranscript(txdb)
        if(length(myintrons)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("intron"=gr_empty))
        } else {
            ## Convert to introns by genes
            intron_count <- sapply(width(myintrons), length) 
            names(intron_count) <- ge_id[names(intron_count)]
            intron_count <- intron_count[intron_count!=0]
            ge_id_intron <- rep(names(intron_count), intron_count)
            myintrons <- unlist(myintrons)
            myintrons <- split(myintrons, ge_id_intron) # Introns by gene
            if(reduce_ranges==TRUE) {
                ## Reduce introns by gene, use gene ID for naming and append counter
                myintrons <- reduce(myintrons) 
                intron_red_count <- sapply(width(myintrons), length) 
                ge_id_red_intron <- rep(names(intron_red_count), intron_red_count)
                feature_id <- paste(ge_id_red_intron, sprintf("I%03d_red", unlist(sapply(as.integer(intron_red_count), function(x) seq(from=1, to=x)))), sep=":")
                myintrons <- unlist(myintrons) 
                mcols(myintrons) <- DataFrame(feature_by=ge_id_red_intron, featuretype_id=feature_id, featuretype="intron_red")
                names(myintrons) <- seq_along(myintrons)
                featuresGRl <- c(featuresGRl, GRangesList("intron_red"=myintrons))
            } else {
                intron_red_count <- sapply(width(myintrons), length) 
                ge_id_red_intron <- rep(names(intron_red_count), intron_red_count)
                feature_id <- paste(ge_id_red_intron, sprintf("I%03d", unlist(sapply(as.integer(intron_red_count), function(x) seq(from=1, to=x)))), sep=":")
                myintrons <- unlist(myintrons) 
                mcols(myintrons) <- DataFrame(feature_by=as(ge_id_red_intron, "CharacterList"), featuretype_id=feature_id, featuretype="intron")
                names(myintrons) <- seq_along(myintrons)
                featuresGRl <- c(featuresGRl, GRangesList("intron"=myintrons))
            }
            if(verbose==TRUE) cat("Created feature ranges: intron", "\n")
	    }
    }
    
    ## Create exon ranges reduced by gene 
	if("exon" %in% featuretype) {
        ## exons by gene
		myexons <- exonsBy(txdb, "gene")
        if(length(myexons)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("exon"=gr_empty))
        } else {
            if(reduce_ranges==TRUE) {
                myexons <- reduce(myexons)
                exon_red_count <- sapply(width(myexons), length) 
                ge_id_red_exon <- rep(names(exon_red_count), exon_red_count)
                feature_id <- paste(ge_id_red_exon, sprintf("E%03d_red", unlist(sapply(as.integer(exon_red_count), function(x) seq(from=1, to=x)))), sep=":")
                myexons <- unlist(myexons) 
                mcols(myexons) <- DataFrame(feature_by=ge_id_red_exon, featuretype_id=feature_id, featuretype="exon_red")
                names(myexons) <- seq_along(myexons)
                featuresGRl <- c(featuresGRl, GRangesList("exon_red"=myexons))
            } else {
                exon_red_count <- sapply(width(myexons), length) 
                ge_id_red_exon <- rep(names(exon_red_count), exon_red_count)
                feature_id <- paste(ge_id_red_exon, sprintf("E%03d", unlist(sapply(as.integer(exon_red_count), function(x) seq(from=1, to=x)))), sep=":")
                myexons <- unlist(myexons) 
                mcols(myexons) <- DataFrame(feature_by=as(ge_id_red_exon, "CharacterList"), featuretype_id=feature_id, featuretype="exon")
                names(myexons) <- seq_along(myexons)
                featuresGRl <- c(featuresGRl, GRangesList("exon"=myexons))
            
            }
            if(verbose==TRUE) cat("Created feature ranges: exon", "\n")
        }
    }
    
    ## Create CDS ranges reduced by gene 
	if("cds" %in% featuretype) {
        ## CDS by gene
		mycds <- cdsBy(txdb, "gene")
        if(length(mycds)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("cds"=gr_empty))
        } else {
            if(reduce_ranges==TRUE) {
                mycds <- reduce(mycds)
                cds_red_count <- sapply(width(mycds), length) 
                ge_id_red_cds <- rep(names(cds_red_count), cds_red_count)
                feature_id <- paste(ge_id_red_cds, sprintf("CDS%03d_red", unlist(sapply(as.integer(cds_red_count), function(x) seq(from=1, to=x)))), sep=":")
                mycds <- unlist(mycds) 
                mcols(mycds) <- DataFrame(feature_by=ge_id_red_cds, featuretype_id=feature_id, featuretype="cds_red")
                names(mycds) <- seq_along(mycds)
                featuresGRl <- c(featuresGRl, GRangesList("cds_red"=mycds))
            } else {
                cds_red_count <- sapply(width(mycds), length) 
                ge_id_red_cds <- rep(names(cds_red_count), cds_red_count)
                feature_id <- paste(ge_id_red_cds, sprintf("CDS%03d", unlist(sapply(as.integer(cds_red_count), function(x) seq(from=1, to=x)))), sep=":")
                mycds <- unlist(mycds) 
                mcols(mycds) <- DataFrame(feature_by=as(ge_id_red_cds, "CharacterList"), featuretype_id=feature_id, featuretype="cds")
                names(mycds) <- seq_along(mycds)
                featuresGRl <- c(featuresGRl, GRangesList("cds"=mycds))
            }
            if(verbose==TRUE) cat("Created feature ranges: cds", "\n")
        }
    }

    ## Create 5'UTR ranges reduced by gene
	if("fiveUTR" %in% featuretype) {
        ## 5'UTRs by transcript
		myfiveutr <- fiveUTRsByTranscript(txdb)
        if(length(myfiveutr)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("fiveUTR"=gr_empty))
        } else {
            ## Convert to 5'UTRs by genes
            utr_count <- sapply(width(myfiveutr), length) 
            names(utr_count) <- ge_id[names(utr_count)]
            utr_count <- utr_count[utr_count!=0]
            ge_id_utr <- rep(names(utr_count), utr_count)
            myfiveutr <- unlist(myfiveutr)
            myfiveutr <- split(myfiveutr, ge_id_utr) # UTRs by gene
            if(reduce_ranges==TRUE) {
                ## Reduce introns by gene, use gene ID for naming and append counter
                myfiveutr <- reduce(myfiveutr) 
                utr_red_count <- sapply(width(myfiveutr), length) 
                ge_id_red_utr <- rep(names(utr_red_count), utr_red_count)
                feature_id <- paste(ge_id_red_utr, sprintf("fiveUTR%03d_red", unlist(sapply(as.integer(utr_red_count), function(x) seq(from=1, to=x)))), sep=":")
                myfiveutr <- unlist(myfiveutr) 
                mcols(myfiveutr) <- DataFrame(feature_by=ge_id_red_utr, featuretype_id=feature_id, featuretype="fiveUTR_red")
                names(myfiveutr) <- seq_along(myfiveutr)
                featuresGRl <- c(featuresGRl, GRangesList("fiveUTR_red"=myfiveutr))
            } else {
                utr_red_count <- sapply(width(myfiveutr), length) 
                ge_id_red_utr <- rep(names(utr_red_count), utr_red_count)
                feature_id <- paste(ge_id_red_utr, sprintf("fiveUTR%03d", unlist(sapply(as.integer(utr_red_count), function(x) seq(from=1, to=x)))), sep=":")
                myfiveutr <- unlist(myfiveutr) 
                mcols(myfiveutr) <- DataFrame(feature_by=as(ge_id_red_utr, "CharacterList"), featuretype_id=feature_id, featuretype="fiveUTR")
                names(myfiveutr) <- seq_along(myfiveutr)
                featuresGRl <- c(featuresGRl, GRangesList("fiveUTR"=myfiveutr))
            }    
            if(verbose==TRUE) cat("Created feature ranges: fiveUTR", "\n")
	    }
    }
    
    ## Create 3'UTR ranges reduced by gene
	if("threeUTR" %in% featuretype) {
        ## 3'UTRs by transcript
		mythreeutr <- threeUTRsByTranscript(txdb)
        if(length(mythreeutr)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("threeUTR"=gr_empty))
        } else {
            ## Convert to 3'UTRs by genes
            utr_count <- sapply(width(mythreeutr), length) 
            names(utr_count) <- ge_id[names(utr_count)]
            utr_count <- utr_count[utr_count!=0]
            ge_id_utr <- rep(names(utr_count), utr_count)
            mythreeutr <- unlist(mythreeutr)
            mythreeutr <- split(mythreeutr, ge_id_utr) # UTRs by gene
            if(reduce_ranges==TRUE) {
                ## Reduce introns by gene, use gene ID for naming and append counter
                mythreeutr <- reduce(mythreeutr) 
                utr_red_count <- sapply(width(mythreeutr), length) 
                ge_id_red_utr <- rep(names(utr_red_count), utr_red_count)
                feature_id <- paste(ge_id_red_utr, sprintf("threeUTR%03d_red", unlist(sapply(as.integer(utr_red_count), function(x) seq(from=1, to=x)))), sep=":")
                mythreeutr <- unlist(mythreeutr) 
                mcols(mythreeutr) <- DataFrame(feature_by=ge_id_red_utr, featuretype_id=feature_id, featuretype="threeUTR_red")
                names(mythreeutr) <- seq_along(mythreeutr)
                featuresGRl <- c(featuresGRl, GRangesList("threeUTR_red"=mythreeutr))
            } else {
                utr_red_count <- sapply(width(mythreeutr), length) 
                ge_id_red_utr <- rep(names(utr_red_count), utr_red_count)
                feature_id <- paste(ge_id_red_utr, sprintf("threeUTR%03d", unlist(sapply(as.integer(utr_red_count), function(x) seq(from=1, to=x)))), sep=":")
                mythreeutr <- unlist(mythreeutr) 
                mcols(mythreeutr) <- DataFrame(feature_by=as(ge_id_red_utr, "CharacterList"), featuretype_id=feature_id, featuretype="threeUTR")
                names(mythreeutr) <- seq_along(mythreeutr)
                featuresGRl <- c(featuresGRl, GRangesList("threeUTR"=mythreeutr))
            }
            if(verbose==TRUE) cat("Created feature ranges: threeUTR", "\n")
	    }
    }

    ## Create intergenic ranges
	if("intergenic" %in% featuretype) {
        # if(verbose==TRUE & any(is.na(seqlengths(txdb)))) warning("seqlengths missing for: ", paste(head(names(seqlengths(txdb))[is.na(seqlengths(txdb))]), collapse=", "), ". Thus, corresponding chromosome end ranges will be missing in intergenic results.")
        ge <- genes(txdb)
        if(length(ge)==0) { # If range set is empty, append 'gr_empty'.
            featuresGRl <- c(featuresGRl, GRangesList("intergenic"=gr_empty))
        } else {
            myseqinfo <- seqinfo(ge)
            seqlengths(ge) <- NA # Note: the following ignores chromosome end ranges on the right end
            mynames <- names(ge)
            strand(ge) <- "*"
            ge <- reduce(ge, with.revmap=TRUE)
            myids <- character(length = length(ge))
            revmaplist <- as.list(mcols(ge)$revmap)
            for(i in seq(along=ge)) myids[i] <- paste(mynames[revmaplist[[i]]], collapse="_")
            myintergenics <- gaps(ge)
            myintergenics <- myintergenics[strand(myintergenics)=="*"] # Removes chromosome ranges created by gaps when seqlengths available
            # index <- findOverlaps(ge, myintergenics, minoverlap=0L) 
            index <- findOverlaps(ge, myintergenics, maxgap=0L, minoverlap=0L) # change necessary since default in IRanges changed from maxgap=0L to maxgap=-1L
            myids <- tapply(myids[as.matrix(index)[,1]], as.matrix(index)[,2], paste, collapse="__")
            if(reduce_ranges==TRUE) {
                mcols(myintergenics) <- DataFrame(feature_by=sprintf("INTER%08d", seq_along(myids)), featuretype_id=as.character(myids), featuretype="intergenic")
            } else {
                mcols(myintergenics) <- DataFrame(feature_by=as(sprintf("INTER%08d", seq_along(myids)), "CharacterList"), featuretype_id=as.character(myids), featuretype="intergenic")
            }
            names(myintergenics) <- seq_along(myintergenics)
            seqinfo(myintergenics) <- myseqinfo
            featuresGRl <- c(featuresGRl, GRangesList("intergenic"=myintergenics))
            if(verbose==TRUE) cat("Created feature ranges: intergenic", "\n")
	    }
    }

    ## Return results
    return(featuresGRl)
}
## Usage:
# file <- system.file("extdata/annotation", "tair10.gff", package="systemPipeRdata")
# txdb <- makeTxDbFromGFF(file=file, format="gff3", organism="Arabidopsis")
# feat <- genFeatures(txdb, featuretype="all", reduce_ranges=TRUE, upstream=1000, downstream=0, verbose=TRUE)

#############################################################
## Compute and Plot Read Distribution Across Feature Types ##
#############################################################
## Compute distribution of reads across feature types
featuretypeCounts <- function(bfl, grl, singleEnd=TRUE, readlength=NULL, type="data.frame") {
    ## Check for valid inputs
    if(!(is.null(readlength[1]) | is.integer(readlength))) stop("readlength needs to be assigned NULL or integer vector")
    if(length(type) != 1) stop("Argument needs to be character vector of length 1.")
    if(!any(c("list", "data.frame") %in% type)) stop("Argument 'type' needs to be assigned one of 'list' or 'data.frame'")
    
    ## List container for storing results
    resultlist <- sapply(names(bfl), function(x) list(NULL))
    
    ## Vector to store counts of total aligned reads
    totalcounts <- numeric(length(bfl)); names(totalcounts) <- names(bfl)

    ## Total length (in bps) of each reduced feature type
    feattypelength <- sapply(names(grl), function(x) sum(width(reduce(grl[[x]]))))

    ## Read count for every feature given in gr (coming from gff)
    for(i in names(grl)) strand(grl[[i]][strand(grl[[i]])=="*"]) <- "+" # Sets "*" to "+" to avoid double-counting of reads in this strand-specific method
    feature <- names(grl)
    featurewithtotal <- c(names(grl), "N_total_aligned")
    for(j in seq(along=bfl)) {
        if(is.null(readlength[1])) {
            myMA <- matrix(rep(0, length(featurewithtotal)*1), nrow=length(featurewithtotal), ncol=1, dimnames=list(featurewithtotal, "anyreadlength"))
        } else {
            myMA <- matrix(rep(0, length(featurewithtotal)*length(readlength)), nrow=length(featurewithtotal), ncol=length(readlength), dimnames=list(featurewithtotal, readlength))
        }
        myMAsense <- myMA
        bf <- open(bfl[[j]])
        while(isIncomplete(bf)) {
            if(singleEnd==TRUE) {
                aligns <- readGAlignments(bf, use.names=FALSE)
            } else if(singleEnd==FALSE) {
                aligns <- readGAlignmentPairs(bf, use.names=FALSE)
            } else {
                stop("singleEnd needs to be TRUE or FALSE")
            }
            for(i in feature) {
                gr <- grl[[i]]
                counter <- which(feature==i)
                ## Non-strand-specific counts
                alignssub <- subsetByOverlaps(aligns, gr, ignore.strand=TRUE)
                if(singleEnd==TRUE) {
                    if(is.null(readlength[1])) {
                        counts <- length(alignssub)
                        if(counter==1) totalcounts <- length(aligns)
                    } else {
                        counts <- table(qwidth(alignssub))[colnames(myMA)]
                        if(counter==1) totalcounts <- table(qwidth(aligns))[colnames(myMA)]
                    } 
                } else { # The following reports for PE reads the mean read length for each pair
                    if(is.null(readlength[1])) {
                        counts <- length(alignssub)
                        if(counter==1) totalcounts <- length(aligns)
                    } else {
                        mywidth <- round((qwidth(last(alignssub)) + qwidth(first(alignssub)))/2)
                        counts <- table(mywidth)[colnames(myMA)]
                        if(counter==1) { 
                            totalmywidth <- round((qwidth(last(aligns)) + qwidth(first(aligns)))/2)
                            totalcounts <- table(totalmywidth)[colnames(myMA)]
                        }
                    } 
                }
                counts[is.na(counts)] <- 0
                myMA[i,] <- myMA[i,] + counts
                if(counter==1) {
                    totalcounts[is.na(totalcounts)] <- 0
                    myMA["N_total_aligned",] <- myMA["N_total_aligned",] + totalcounts
                }
                ## Strand-specific counts
                alignssub <- subsetByOverlaps(aligns, gr, ignore.strand=FALSE)
                if(singleEnd==TRUE) {
                    if(is.null(readlength[1])) {
                        sensecounts <- length(alignssub)
                        if(counter==1) totalsensecounts <- length(aligns[strand(aligns)=="+"])
                    } else {
                        sensecounts <- table(qwidth(alignssub))[colnames(myMAsense)]
                        if(counter==1) totalsensecounts <- table(qwidth(aligns[strand(aligns)=="+"]))[colnames(myMAsense)]
                    }
                } else { # The following reports for PE reads the mean read length for each pair
                    if(is.null(readlength[1])) {
                        sensecounts <- length(alignssub)
                        if(counter==1) totalsensecounts <- length(aligns[strand(aligns)=="+"])
                    } else {
                        mywidth <- round((qwidth(last(alignssub)) + qwidth(first(alignssub)))/2)
                        sensecounts <- table(mywidth)[colnames(myMA)]
                        if(counter==1) { 
                            totalmywidth <- round((qwidth(last(aligns)) + qwidth(first(aligns)))/2)
                            totalsensecounts <- table(totalmywidth)[colnames(myMA)]
                        }
                    } 
                }
                sensecounts[is.na(sensecounts)] <- 0
                myMAsense[i,] <- myMAsense[i,] + sensecounts
                if(counter==1) {
                    totalsensecounts[is.na(totalsensecounts)] <- 0
                    myMAsense["N_total_aligned",] <- myMAsense["N_total_aligned",] + totalsensecounts
                }
            }
        }
        close(bf)
        myMAantisense <- myMA-myMAsense
        myMAantisense[myMAantisense < 0] <- 0; myMAsense[myMAsense <- 0] <- 0
        myMAsense <- data.frame(SampleName=names(bfl)[j], Strand="Sense", Featuretype=row.names(myMAsense), Featuretypelength=as.numeric(feattypelength[row.names(myMAsense)]), myMAsense, check.names=FALSE)
        myMAantisense <- data.frame(SampleName=names(bfl)[j], Strand="Antisense", Featuretype=row.names(myMAantisense), Featuretypelength=as.numeric(feattypelength[row.names(myMAantisense)]), myMAantisense, check.names=FALSE)
        resultlist[[j]] <- list(Sense=myMAsense, Antisense=myMAantisense)
        cat(paste0("Processed sample ", j, ": ", names(bfl)[j]), "\n")
    }
    
    ## Object types to return
    if(type=="list") {
        return(resultlist)
    } else if(type=="data.frame") {
        resultdf <- do.call("rbind", unlist(resultlist, recursive=FALSE))
        row.names(resultdf) <- NULL
        return(resultdf)
    } else {
        stop("Argument 'type' needs to be assigned one of 'list' or 'data.frame'")
    }
}

## Usage: 
# featureCounts <- featuretypeCounts(bfl=BamFileList(outpaths(args), yieldSize=50000), grl=feat, singleEnd=TRUE, readlength=c(74:76,99:102), type="data.frame")
# write.table(featureCounts, "results/featureCounts.xls", quote=FALSE, row.names=FALSE, sep="\t")
# featureCounts2 <- featuretypeCounts(bfl=BamFileList(outpaths(args), yieldSize=50000), grl=feat, singleEnd=TRUE, readlength=NA, type="data.frame")
# write.table(featureCounts2, "results/featureCounts2.xls", quote=FALSE, row.names=FALSE, sep="\t")

## Plot distribution of reads across feature types
plotfeaturetypeCounts <- function(x, graphicsfile, graphicsformat="pdf", scales="fixed", anyreadlength=FALSE, drop_N_total_aligned=TRUE, scale_count_val=10^6, scale_length_val=NULL) { 
    ## Input validity checks
    if(class(x)!="data.frame") stop("x needs to be object of class 'data.frame'")
    if(any(colnames(x)[1:3]!=c("SampleName", "Strand", "Featuretype"))) stop("First three column names need to be: 'SampleName', 'Strand', 'Featuretype'")
    if(!is.numeric(as.matrix(x[,-c(1:3)]))) stop("Following columns (after 3rd one) need to be numeric.")
    
    ## Store Featuretypelength and remove it from x
    featuretypelength <- tapply(x$Featuretypelength, x$Featuretype, unique)
    featuretypelength <- featuretypelength[!is.na(featuretypelength)]
    x <- x[, !colnames(x) %in% "Featuretypelength"]
        
    ## Get numbers of total aligned reads per sample
    N_total_aligned_DF <- x[x$Featuretype=="N_total_aligned",]
    if(colnames(N_total_aligned_DF)[4]!="anyreadlength") N_total_aligned_DF <- data.frame(N_total_aligned_DF[,1:3], anyreadlength=rowSums(N_total_aligned_DF[,-c(1:3)]))
    N_total_aligned <- tapply(as.numeric(as.vector(N_total_aligned_DF[,4])), N_total_aligned_DF$SampleName, sum)

    ## Eliminate total read counts to not show them in plots
    if(drop_N_total_aligned==TRUE) x <- x[x$Featuretype!="N_total_aligned",]
    
    ## Sum up read length counts if 'anyreadlength=TRUE'  
    if(colnames(x)[4]!="anyreadlength" & anyreadlength==TRUE) x <- data.frame(x[,1:3], anyreadlength=rowSums(x[,-c(1:3)]))
    
    ## Scale per x reads (e.g. per million reads)
    if(is.numeric(scale_count_val[1])) {
        if(is.numeric(scale_length_val[1])) {
            correctv <- scale_length_val/featuretypelength[as.character(x$Featuretype)]
            for(i in seq_along(colnames(x))[-c(1:3)]) x[,i] <- as.numeric(as.vector(x[,i])) * (scale_count_val/N_total_aligned[x$SampleName]) * correctv
        } else if(length(scale_length_val) == 0) {
            for(i in seq_along(colnames(x))[-c(1:3)]) x[,i] <- as.numeric(as.vector(x[,i])) * (scale_count_val/N_total_aligned[x$SampleName])
        } else {
            stop("'scale_length_val' needs to be assinged NULL or numeric value.")
        }
    } else if(is.null(scale_count_val[1])) {
        if(is.numeric(scale_length_val[1])) {
            correctv <- scale_length_val/featuretypelength[as.character(x$Featuretype)]
            for(i in seq_along(colnames(x))[-c(1:3)]) x[,i] <- as.numeric(as.vector(x[,i])) * correctv
        } else if(length(scale_length_val) == 0) {
            x <- x
        } else {
            stop("'scale_length_val' needs to be assinged NULL or numeric value.")
        }
    } else {
        stop("'scale_count_val' needs to be assinged NULL or numeric value.")
    }

    ## Define plotting order of samples
    x[,1] <- factor(x[,1], levels=unique(x[,1]), ordered=TRUE) 
    
    ## Label for count axis
    if(length(scale_count_val)==0 & length(scale_length_val)==0) axis_label <- "Raw counts"
    if(length(scale_count_val)>0 & length(scale_length_val)>0) axis_label <- paste("Counts normalized per", scale_count_val, "reads", "and", scale_length_val, "bp feature length")
    if(length(scale_count_val)>0 & length(scale_length_val)==0) axis_label <- paste("Counts normalized per", scale_count_val, "reads") 
    if(length(scale_count_val)==0 & length(scale_length_val)>0) axis_label <- paste("Counts normalized per", scale_length_val, "bp feature length")

    ## Plot without read length resolution
    if(colnames(x)[4]=="anyreadlength") {
        x[,3] <- factor(x[,3], levels=sort(as.character(unique(x[,3])), decreasing=TRUE), ordered=TRUE) # Defines plotting order of bars!!!
        colnames(x)[3:4] <- c("Feature", "Counts") # Assign nicer names for plot
        myplot <- ggplot(x, aes(x=Feature, y=Counts)) + 
                        geom_bar(aes(fill=Strand), position="stack", stat="identity") + 
                        facet_wrap(~SampleName) + 
                        scale_y_continuous(axis_label) +
                        theme(axis.text.x=element_text(angle = -90, hjust = 0, vjust=0.4)) +
                        coord_flip()
                        
        get(graphicsformat)(graphicsfile)
            print(myplot)
        dev.off()
        cat("Generated graphics file", graphicsfile, "\n")
        return(myplot)
    ## Plot with read length resolution
    } else {
    ## Split data on samples (SampleName column) and convert to format expected by ggplot2
        .convert2DFlist <- function(x) {
            featureCountslist <- split(x, x$SampleName, drop=TRUE)
            featureCountslist <- featureCountslist[unique(as.character(x$SampleName))]
            for(j in seq_along(featureCountslist)) {
                df <- featureCountslist[[j]]
                tmpDF <- data.frame()
                for(i in seq_along(df[,4:length(df[1,])])) {
                    tmpDF <- rbind(tmpDF, data.frame(df[,1:3], Counts=df[,i+3], Length=colnames(df[,i+3,drop=FALSE])))
                }
                featureCountslist[[j]] <- tmpDF
            }
            return(featureCountslist)
        }
        featureCountslist <- .convert2DFlist(x)
    
        ## Get maximum read counts in data set after summing up strand spec counts
        tmp <- data.frame(paste(x[,1], x[,3], sep="_"), x[,-c(1:3)])
        mymax <- max(t(sapply(unique(tmp[,1]), function(x) colSums(tmp[tmp[,1]==x,-1]))))
        ## Generate plotting instructions
        myplotlist <- sapply(names(featureCountslist), function(x) NULL, simplify=FALSE)
        for(i in seq(along=featureCountslist)) {
            myplot <- ggplot(featureCountslist[[i]], aes(x=Length, y=Counts)) + 
                             geom_bar(aes(fill=Strand), position="stack", stat="identity") + 
                             facet_wrap(~Featuretype, ncol=1, scales=scales) + 
                             theme(legend.position="bottom") +
                             theme(axis.text.x=element_text(angle=-90, hjust=0, vjust=0.4)) +
                             ggtitle(featureCountslist[[i]][1,1])
            ## Assure same scale for all panels if scales="fixed"
            if(scales=="fixed") {
                myplot <- myplot + scale_y_continuous(axis_label, limits=c(0, mymax)) 
           } else {
                myplot <- myplot + scale_y_continuous(axis_label) 
           }
            myplotlist[[i]] <- myplot
        }
    
        ## Generate graphics and write to file in pdf, png or jpeg format.
        if(tolower(graphicsformat)=="pdf") mydim <- c(height=20, width=6*length(myplotlist))
        if(tolower(graphicsformat) %in% c("png", "jpeg")) mydim <- c(height=20*96, width=6*length(myplotlist)*96)
        get(graphicsformat)(graphicsfile, width=mydim["width"], height=mydim["height"])
            grid::grid.newpage() # Open a new page on grid device
            grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, length(myplotlist)))) # Assign to device viewport with 1 by 2 grid layout 
            for(i in seq(along=myplotlist)) print(myplotlist[[i]], vp = grid::viewport(layout.pos.row = 1, layout.pos.col = i))
        dev.off()
        cat("Generated graphics file", graphicsfile, "\n")
        return(myplotlist)
    }
}

## Usage: 
# library(ggplot2); library(grid)
# featureCounts <- read.delim("results/featureCounts.xls", check.names=FALSE)
# myplots <- plotfeaturetypeCounts(x=featureCounts, graphicsfile="results/featureCounts.pdf", graphicsformat="pdf", scales="fixed", anyreadlength=FALSE, drop_N_total_aligned=TRUE, scale_count_val=10^6, scale_length_val=10^3) 
# featureCounts2 <- read.delim("results/featureCounts2.xls", check.names=FALSE)
# myplots <- plotfeaturetypeCounts(x=featureCounts2, graphicsfile="results/featureCounts2.pdf", graphicsformat="pdf", scales="fixed", anyreadlength=TRUE, drop_N_total_aligned=TRUE, scale_count_val=10^6, scale_length_val=10^3) 

##############################################
## Compute and plot coverage along features ## 
##############################################
## Computes coverage along single and multi component features, such as exons/cds by transcripts. 
featureCoverage <- function(bfl, grl, resizereads=NULL, readlengthrange=NULL, Nbins=20, method=mean, fixedmatrix, resizefeatures, upstream, downstream, outfile, overwrite=FALSE) {
    ## Input validity checks
    if(!is.null(outfile)) {
        if(file.exists(outfile) & overwrite==FALSE) stop(paste("File", outfile, "exists. Delete it or set 'overwrite=TRUE'"))
        if(file.exists(outfile) & overwrite==TRUE) unlink(outfile)
    }
    if(class(bfl)!="BamFileList") stop("'bfl' needs to be of class 'BamFileList'")
    if(!is(grl, "GRangesList")) stop("'grl' needs to be a GRangesList object.")
    if(!is.null(readlengthrange)) {
        if(is.numeric(readlengthrange) & length(readlengthrange)!=2) stop("'readlengthrange' needs to be assigned NULL or numeric vector of length 2")
        if(!is.numeric(readlengthrange)) stop("'readlengthrange' needs to be assigned NULL or numeric vector of length 2")
        if((is.numeric(readlengthrange) & length(readlengthrange)==2) & readlengthrange[1] > readlengthrange[2]) stop("Second value in read length range needs to be equal or larger then first one.") 
    }  
    if(fixedmatrix==TRUE & resizefeatures==FALSE) stop("If 'fixedmatrix=TRUE' then 'resizefeatures' needs to be set to 'TRUE' too.")
    
    ## If Nbins is not NULL, then remove ranges with width < Nbins
    if(!is.null(Nbins)) grl <- grl[sum(width(reduce(grl))) >= Nbins]

    ## Resize features by upstream and downstream value
    if(resizefeatures==TRUE) {
        grl <- .resizeFeature(grl=grl, upstream=upstream, downstream=upstream, component_resort=TRUE)
    }

    ## Following codes operates mostly on GRanges object
    gr <- unlist(grl)
    
    ## Define empty buckets for collcecting results generated in master loop
    binDFresult <- data.frame()
    fixedMAresult <- data.frame()
    fixedbinMAresult <- data.frame()
    rle_result <- list()

    for(mysample in seq_along(bfl)) {
        ## Compute coverage for ranges in stored in gr from BAM file. Note, coverage() works 
        ## directly on BAM files but for resizing read lengths and strand specific analysis 
        ## one needs to import the corresponding alignment sections first.
        
        ## Import alignment chunk
        aligns <- readGAlignments(bfl[[mysample]], param=ScanBamParam(what=scanBamWhat(), which=gr))
        ## Remove duplicated mappings generated by readGAlignment for nearby/overlapping ranges; reduce() doesn't fix that
        aligns <- aligns[!duplicated(paste(as.character(seqnames(aligns)), start(aligns), end(aligns), as.character(strand(aligns)), mcols(aligns)$qname, sep="_"))]   
     
        ## Subset to specific read length range specified under readlengthrange
        if(is.numeric(readlengthrange)) {
                aligns <- aligns[qwidth(aligns) >= readlengthrange[1] & qwidth(aligns) <= readlengthrange[2]]
        } 
       
        ## Compute coverage 
        if(is.numeric(resizereads[1])) {
            cov <- coverage(resize(granges(aligns), resizereads[1]))
            cov_pos <- coverage(resize(granges(subsetByOverlaps(aligns, gr, ignore.strand=FALSE)), resizereads[1]))
        } else if(is.null(resizereads)) {
            cov <- coverage(aligns)
            cov_pos <- coverage(subsetByOverlaps(aligns, gr, ignore.strand=FALSE))
        } else {
            stop("'resizereads' needs to be assigned NULL or positive integer of length 1.")   
        }
        cov_neg <- cov - cov_pos    

        ## Extract coverage for gr components. 
        ## Sense coverage
        # cov_posreg <- suppressWarnings(Views(cov_pos, as(gr, "IntegerRangesList"))) # delete 
        cov_posreg <- suppressWarnings(Views(cov_pos[names(as(gr, "IntegerRangesList"))], as(gr, "IntegerRangesList"))) # Update 22-Nov-15: cov_pos needs to be subsetted by seqnames in IntegerRangesList. This is relevant if txdb/grl was created from gff with scaffolds not containing any genes
        cov_posreg <- cov_posreg[sapply(cov_posreg, length) > 0] # Removes empty components (chr) 
        ## Antisense coverage
        # cov_negreg <- suppressWarnings(Views(cov_neg, as(gr, "IntegerRangesList"))) # delete
        cov_negreg <- suppressWarnings(Views(cov_neg[names(as(gr, "IntegerRangesList"))], as(gr, "IntegerRangesList"))) # Update 22-Nov-15: cov_neg needs to be subsetted by seqnames in IntegerRangesList. This is relevant if txdb/grl was created from gff with scaffolds not containing any genes
        cov_negreg <- cov_negreg[sapply(cov_negreg, length) > 0] # Removes empty components (chr) 
        
        ## Collapse (splice) multicomponent coverage ranges, e.g. cds exons to full cds   
        ## Sense coverage
        mystrand <- strand(grl); mystrand <- sapply(names(mystrand), function(x) as.character(mystrand[[x]])[1])
        cov_posList <- as.list(mystrand)
        ## Features on "+" strand
        for(i in seq_along(cov_posreg)) {
            for(j in names(mystrand[mystrand=="+"])) {
                myview <- cov_posreg[[i]][names(cov_posreg[[i]]) %in% j]
                if(length(myview)!=0) {
                    cov_posList[[j]] <- Rle(as.numeric(na.omit(as.vector(t(as.matrix(myview))))))
                }
            }
        }
        ## Features on "-" strand: note coverage of component features needs to be reversed here prior to collapsing to single feature!
        for(i in seq_along(cov_posreg)) {
            for(j in names(mystrand[mystrand=="-"])) {
                myview <- cov_posreg[[i]][names(cov_posreg[[i]]) %in% j]
                if(length(myview)!=0) {
                    cov_posList[[j]] <- Rle(as.numeric(na.omit(as.vector(t(as.matrix(reverse(myview)))))))
                }
            }
        }
        ## Antisense coverage
        cov_negList <- as.list(mystrand)
        ## Features on "+" strand
        for(i in seq_along(cov_negreg)) {
            for(j in names(mystrand[mystrand=="+"])) {
                myview <- cov_negreg[[i]][names(cov_negreg[[i]]) %in% j]
                if(length(myview)!=0) {
                    cov_negList[[j]] <- Rle(as.numeric(na.omit(as.vector(t(as.matrix(myview))))))
                }
            }
        }
        ## Features on "-" strand: note coverage of component features needs to be reversed here prior to collapsing to single feature!
        for(i in seq_along(cov_negreg)) {
            for(j in names(mystrand[mystrand=="-"])) {
                myview <- cov_negreg[[i]][names(cov_negreg[[i]]) %in% j]
                if(length(myview)!=0) {
                    cov_negList[[j]] <- Rle(as.numeric(na.omit(as.vector(t(as.matrix(reverse(myview)))))))
                }
            }
        }
        
        ## Get total number of aligned reads from BAM file and add to output for normalization
        param <- ScanBamParam(flag=scanBamFlag(isSecondaryAlignment=FALSE, isUnmappedQuery=FALSE))
        aligned_reads <- countBam(bfl[mysample], param=param)
        
        ## If Nbins is integer, compute relative bin coverage for fixed number of bins (intervals) of each feature
        if(!is.null(Nbins)) {
            ## Function to bin coverage
            .interComp <- function(x, slice, method) {
                interval <- cut(1:length(x), slice, labels=FALSE)
                resultvec <- tapply(x, as.factor(interval), method)
                return(resultvec)
            }
            ## Sense coverage
            myMApos <- t(sapply(cov_posList, function(y) .interComp(x=y, slice=Nbins, method=method)))
            ## Antisense coverage
            myMAneg <- t(sapply(cov_negList, function(y) .interComp(x=y, slice=Nbins, method=method)))
            ## Combine results
            myMAneg <- myMAneg[rownames(myMApos),, drop=FALSE] # Assure proper sorting
            myDFpos <- data.frame(SampleName=names(bfl[mysample]), N_total_aligned=aligned_reads$records, IDs=rownames(myMApos), Strand="Sense", myMApos, check.names=FALSE)
            myDFneg <- data.frame(SampleName=names(bfl[mysample]), N_total_aligned=aligned_reads$records, IDs=rownames(myMAneg), Strand="Antisense", myMAneg, check.names=FALSE)
            myDFbins <- rbind(myDFpos, myDFneg, make.row.names=FALSE)
            row.names(myDFbins) <- NULL                  
            binDFresult <- rbind(binDFresult, myDFbins) 
            exportDF <- myDFbins
        } 
        ## Generate fixed matrix output
        if(fixedmatrix==TRUE) {
            mycolnames <- c(-upstream:-1, 0, 1:downstream)
            fixedMAstartpos <- t(sapply(names(cov_posList), function(x) as.numeric(cov_posList[[x]])[1:(upstream+downstream+1)]))
            colnames(fixedMAstartpos) <- mycolnames 
            fixedMAendpos <- t(sapply(names(cov_posList), function(x) as.numeric(cov_posList[[x]])[(length(as.numeric(cov_posList[[x]]))-(upstream+downstream)):length(as.numeric(cov_posList[[x]]))]))
            colnames(fixedMAendpos) <- mycolnames 
            fixedMAstartneg <- t(sapply(names(cov_negList), function(x) as.numeric(cov_negList[[x]])[1:(upstream+downstream+1)]))
            colnames(fixedMAstartneg) <- mycolnames 
            fixedMAendneg <- t(sapply(names(cov_negList), function(x) as.numeric(cov_negList[[x]])[(length(as.numeric(cov_negList[[x]]))-(upstream+downstream)):length(as.numeric(cov_negList[[x]]))]))
            colnames(fixedMAendneg) <- mycolnames 
            fixedMA <- rbind(data.frame(SampleName=names(bfl[mysample]), N_total_aligned=aligned_reads$records, IDs=rownames(fixedMAstartpos), Strand="Sense", fixedMAstartpos, check.names=FALSE),
                             data.frame(SampleName=names(bfl[mysample]), N_total_aligned=aligned_reads$records, IDs=rownames(fixedMAstartneg), Strand="Antisense", fixedMAstartneg, check.names=FALSE))
            tmpMA <- rbind(data.frame(fixedMAendpos[rownames(fixedMAstartpos),, drop=FALSE], check.names=FALSE),
                             data.frame(fixedMAendneg[row.names(fixedMAstartneg),,drop=FALSE], check.names=FALSE))
            fixedMAout <- cbind(fixedMA, "<S|E>"="|", tmpMA)
            row.names(fixedMAout) <- NULL                  
            fixedMAresult <- rbind(fixedMAresult, fixedMAout)
            exportDF <- fixedMAout
        }
        ## Combine fixed and binned coverage matrix
        if(!is.null(Nbins) & fixedmatrix==TRUE) {
            fixedbinMAout <- data.frame(fixedMA, "<S|B>"="|", myDFbins[,-c(1:4), drop=FALSE], "<B|E>"="|", tmpMA, check.names=FALSE)
            row.names(fixedbinMAout) <- NULL 
            fixedbinMAresult <- rbind(fixedbinMAresult, fixedbinMAout)
            exportDF <- fixedbinMAout
        } 
        ## Organize rle results in list
        if(fixedmatrix==FALSE & is.null(Nbins)) {
            rle_out <- list(list(N_total_aligned=aligned_reads$records, sense=as(cov_posList, "RleList"), antisense=as(cov_negList, "RleList")))
            names(rle_out)  <- names(bfl[mysample])
            rle_result <- c(rle_result, rle_out)
        } 
        
        ## Write tabular data to file in append mode
        if(!is.null(outfile) & (fixedmatrix==TRUE | !is.null(Nbins))) {
            if(!file.exists(outfile)) {    
                write.table(exportDF, outfile, append=FALSE, quote=FALSE, row.names=FALSE, col.names=TRUE, sep="\t")
                cat("Processed sample:", names(bfl[mysample]), "and wrote results to file", outfile, "\n")
            } else {
                write.table(exportDF, outfile, append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE, sep="\t")
                cat("Processed sample:", names(bfl[mysample]), "and appended results to file", outfile, "\n")
            }
        } else {
            cat("Processed sample:", names(bfl[mysample]), "\n")
        }
    }
    
    ## Return proper final result 
    if(!is.null(Nbins) & fixedmatrix==FALSE) {
        return(binDFresult)
    } else if(is.null(Nbins) & fixedmatrix==TRUE) {
        return(fixedMAresult)
    } else if(!is.null(Nbins) & fixedmatrix==TRUE) {
        return(fixedbinMAresult)
    } else {
        return(rle_result)    
    }
}
## Usage:
# grl <- cdsBy(txdb, "tx", use.names=TRUE)
# fcov <- featureCoverage(bfl=BamFileList(outpaths(args)[1]), grl=grl[1:4], resizereads=NULL, readlengthrange=NULL, Nbins=NULL, method=mean, fixedmatrix=TRUE, resizefeatures=TRUE, upstream=20, downstream=20, outfile="results/zzz.xls", overwrite=TRUE)

## Helper function to extend single and multi component ranges
## Extends featureBy GRangeList objects containing component features 
## such as exons in CDSs or transcripts so that only the first and last components 
## get extended. Single component features will be extended the same way.
.resizeFeature <- function(grl, upstream, downstream, component_resort=TRUE) { 
    if(!is(grl, "GRangesList")) stop("'grl' needs to be a GRangesList object.")
    gr <- unlist(grl)    
    if(!all(names(grl) %in% unique(names(gr)))) stop("None or not all components in grl are named.")
    
    ## Add sort_index
    mcols(gr) <- DataFrame(mcols(gr), sort_index=seq_along(gr))

    ## Ranges on + strand
    gr_pos <- gr[strand(gr)=="+" | strand(gr)=="*"] # Note: treats unstranded * the same as pos +
    if(component_resort==TRUE) {
        gr_pos <- gr_pos[order(as.character(seqnames(gr_pos)), names(gr_pos), start(gr_pos))] # Assures proper sorting
    }
    index_pos_first <- !duplicated(names(gr_pos), fromLast=FALSE)
    suppressWarnings(start(gr_pos[index_pos_first]) <- start(gr_pos[index_pos_first]) - upstream)
    gr_pos <- trim(gr_pos)
    index_pos_last <- !duplicated(names(gr_pos), fromLast=TRUE)
    suppressWarnings(end(gr_pos[index_pos_last]) <- end(gr_pos[index_pos_last]) + downstream) 
    gr_pos <- trim(gr_pos)

    ## Ranges on - strand
    gr_neg <- gr[strand(gr)=="-"]
    if(component_resort==TRUE) {
        gr_neg <- gr_neg[order(as.character(seqnames(gr_neg)), names(gr_neg), -start(gr_neg))]
    } 
    index_neg_first <- !duplicated(names(gr_neg), fromLast=FALSE)
    suppressWarnings(end(gr_neg[index_neg_first]) <- end(gr_neg[index_neg_first]) + upstream)
    gr_neg <- trim(gr_neg)
    index_neg_last <- !duplicated(names(gr_neg), fromLast=TRUE)
    suppressWarnings(start(gr_neg[index_neg_last]) <- start(gr_neg[index_neg_last]) - downstream)
    gr_neg <- trim(gr_neg)
   
    ## Return output in same format as input 
    gr_mod <- c(gr_pos, gr_neg)
    gr_mod <- gr_mod[order(mcols(gr_mod)$sort_index)]
    gr_mod <- gr_mod[, seq_along(colnames(mcols(grl[[1]])))]
    myfactor <- names(gr_mod)
    if(is.null(names(grl[[1]]))) names(gr_mod) <- NULL
    grl_mod <- split(gr_mod, myfactor)
    grl_mod <- grl_mod[names(grl)]
    return(grl_mod)
}
## Usage:
# grl_mod <- .resizeFeature(grl=grl, upstream=20, downstream=20, component_resort=TRUE)

###########################
## Plot feature coverage ##
###########################
## Plots tabular coverage data generated by featureCoverage()
plotfeatureCoverage <- function(covMA, method=mean, scales="fixed", extendylim=2, scale_count_val=10^6) {
    ## Some input validity checks
    if(class(covMA) != "data.frame") stop("'covMA' needs to be assigned an object of class 'data.frame'.")
    expectedcol <- c("SampleName", "N_total_aligned", "IDs", "Strand")
    if(any(!colnames(covMA)[1:4] %in% expectedcol)) stop(paste("The first 4 columns in 'covMA' need to be named:", paste(expectedcol, collapse=", ")))
    
    ## Determine split type required for provided input 
    splitloc <- which(grepl("(<S\\|B>)|(<B\\|E>)|(<S\\|E>)", colnames(covMA)))
    
    ## Get numbers of total aligned reads per sample
    N_total_aligned <- covMA$N_total_aligned; names(N_total_aligned) <- covMA$SampleName
    N_total_aligned <- N_total_aligned[!duplicated(names(N_total_aligned))]    
    
    ## Define function to convert tabular input into ggplot2 friendly format
    .convertDFlist <- function(malist) {
        for(j in seq_along(malist)[-1]) {
            df <- malist[[j]]
            tmpDF <- data.frame() 
            for(i in seq_along(colnames(df))[-c(1,2)]) {
                tmpDF <- rbind(tmpDF, data.frame(df[,1:2], Position=colnames(df)[i], Coverage=df[,i]))
            }
            malist[[j]] <- tmpDF
        }
        return(malist)
    }
   
    ## Construct informative title: print ID if only one or their count if more than 1 
    ti <- as.character(unique(covMA[,"IDs"]))
    if(length(ti) > 1) ti <- paste(length(ti), "features")
     
    ## Split and compute summary stats in format expected by ggplot2
    ## For binned matrix 
    if(length(splitloc)==0) {
        malist <- sapply(c("title", "bin"), function(x) NULL)
        malist[["title"]] <- c(titlebin=paste("CDS of", ti), x="Bins", y="Coverage")
        bin <- covMA
        malist[["bin"]] <- aggregate(bin[,-c(1:4)], by=list(SampleName=bin$SampleName, Strand=bin$Strand), FUN=method)
        malist <- .convertDFlist(malist)
    } 
    ## For fixed matrix around start and stop codons 
    if(length(splitloc)==1) {
        malist <- sapply(c("title", "start", "stop"), function(x) NULL)
        malist[["title"]] <- c(titlestart=paste("Start of", ti), titlestop=paste("Stop of", ti), x="Bins", y="Coverage")
        start <- covMA[ , 1:(splitloc[1] - 1)]
        malist[["start"]] <- aggregate(start[,-c(1:4)], by=list(SampleName=start$SampleName, Strand=start$Strand), FUN=method)
        stop <- covMA[ , c(1:4, (splitloc[1] + 1):length(colnames(covMA)))]  
        malist[["stop"]] <- aggregate(stop[,-c(1:4)], by=list(SampleName=stop$SampleName, Strand=stop$Strand), FUN=method)
        malist <- .convertDFlist(malist)
    } 
    ## For fixed matrix and binned matrix 
    if(length(splitloc)==2) {
        malist <- sapply(c("title","start","bin","stop"), function(x) NULL)
        malist[["title"]] <- c(titlestart=paste("Start of", ti), titlebin=paste("CDS of", ti), titlestop=paste("Stop of", ti), x="Bins", y="Coverage")
        start <- covMA[ , 1:(splitloc[1] - 1)]
        malist[["start"]] <- aggregate(start[,-c(1:4)], by=list(SampleName=start$SampleName, Strand=start$Strand), FUN=method)
        bin <- covMA[ , c(1:4, (splitloc[1] + 1):(splitloc[2] - 1))]
        malist[["bin"]] <- aggregate(bin[,-c(1:4)], by=list(SampleName=bin$SampleName, Strand=bin$Strand), FUN=method)
        stop <- covMA[ , c(1:4, (splitloc[2] + 1):length(colnames(covMA)))]  
        malist[["stop"]] <- aggregate(stop[,-c(1:4)], by=list(SampleName=stop$SampleName, Strand=stop$Strand), FUN=method)
        malist <- .convertDFlist(malist)
    }
    
    ## Scale per x reads (e.g. per million reads)
    if(is.numeric(scale_count_val[1])) {
        for(i in seq_along(malist)[-1]) malist[[i]][,"Coverage"] <- malist[[i]][,"Coverage"] * (scale_count_val/N_total_aligned[malist[[i]][,"SampleName"]])
    } else if(length(scale_count_val) == 0) {
            malist <- malist
    } else {
            stop("'scale_count_val' needs to be assinged NULL or numeric value.")
    }
    
    ## Label for coverage axis
    if(length(scale_count_val)==0) axis_label <- "Raw Coverage"
    if(length(scale_count_val)>0) axis_label <- paste("Coverage normalized per", scale_count_val, "reads")
    
    ## Get maximum coverage in data sets after summing up strand spec counts
    mymax <- numeric()
    for(i in seq_along(malist)[-1]) {
        myfactor <- paste(malist[[i]][,1], malist[[i]][,2], malist[[i]][,3], sep="_")
        mymax <- max(c(mymax, max(tapply(malist[[i]][,"Coverage"], myfactor, sum))))
    }
    mymax <- mymax * extendylim
    
    ## Generate plotting instructions
    myplotlist <- sapply(names(malist[-1]), function(x) NULL, simplify=FALSE)
    for(i in seq_along(myplotlist)) {
        myplot <- ggplot(malist[[names(myplotlist)[i]]], aes(x=Position, y=Coverage)) + 
                         geom_bar(aes(fill=Strand), position="stack", stat="identity") + 
                         facet_wrap(~SampleName, ncol=1, scales=scales) +
                         theme(legend.position="bottom") + 
                         ggtitle(malist[["title"]][i])
        ## Assure same scale for all panels if scales="fixed"
        if(scales=="fixed") {
            myplot <- myplot + scale_y_continuous(axis_label, limits=c(0, mymax)) 
        } else {
            myplot <- myplot + scale_y_continuous(axis_label) 
        }
        myplotlist[[i]] <- myplot
    }
    
    ## Generate graphics 
    grid::grid.newpage() # Open a new page on grid device
    grid::pushViewport(viewport(layout = grid::grid.layout(1, length(myplotlist)))) # Assign to device viewport with 1 by 2 grid layout 
    for(i in seq(along=myplotlist)) print(myplotlist[[i]], vp = grid::viewport(layout.pos.row = 1, layout.pos.col = i))
}

## Usage:
# grl <- cdsBy(txdb, "tx", use.names=TRUE)
# fcov <- featureCoverage(bfl=BamFileList(outpaths(args)[1:2]), grl=grl[1:4], resizereads=NULL, readlengthrange=NULL, Nbins=20, method=mean, fixedmatrix=TRUE, resizefeatures=TRUE, upstream=20, downstream=20, outfile="results/zzz.xls", overwrite=TRUE)
# plotfeatureCoverage(covMA=fcov, method=mean, scales="fixed", extendylim=2, scale_count_val=10^6)

##################
## Predict ORFs ##
##################
## Function to predict ORFs in DNA sequences provided as DNAString/DNAStringSet objects
predORF <- function(x, n=1, type="grl", mode="orf", strand="sense", longest_disjoint=FALSE, startcodon="ATG", stopcodon=c("TAA", "TAG", "TGA")) {
	## Check input validity 
    if(any(nchar(c(startcodon, stopcodon))!=3)) stop("startcodon and stopcodons can only contain 3-letter strings.")
    if(!toupper(mode) %in% c("ORF", "CDS")) stop("'mode' can only be assigned one of: 'orf' or 'cds'")
    if(length(names(x))==0 | any(duplicated(names(x)))) stop("Sequence name slot of x need be populated with unique names.")
    
    ## Remove sequence with less than 6 nucleotides in length
    x <- x[width(x)>=6]

    ## Function for predicting ORFs/CDSs on single sequence
    .predORF <- function(x, n, mode, strand, ...) {
        ## start/stop codon assignment
        if(tolower(strand)=="sense") {
		    mystrand <- 1 # for +
            startcodon <- startcodon
            stopcodon <- stopcodon
		} else if(tolower(strand)=="antisense") {
            ## Reverse and complement start/stop plus swap their assignment
		    mystrand <- 2 # for -
		    stopcodon_temp <- as.character(reverseComplement(DNAStringSet(stopcodon)))
            startcodon_temp <- as.character(reverseComplement(DNAStringSet(startcodon)))
		    stopcodon <- startcodon_temp 
            startcodon <- stopcodon_temp
        
        } else {
            stop("strand can only be assigned 'sense', 'antisense' or 'both'")
        }
        
        ## Sequences containing N are not processed
        if(alphabetFrequency(x)["N"] > 0) {
            orfRanges <- cbind(subject_id=numeric(), start=numeric(), end=numeric(), width=numeric(), strand=numeric(), inframe2end=numeric())
            warning("Skipped sequence containing Ns.")
            return(orfRanges)
        }

        ## Tripletize x for each frame 
        c1 <- as.character(suppressWarnings(codons(x)))
		c2 <- as.character(suppressWarnings(codons(x[2:length(x)])))
		c3 <- as.character(suppressWarnings(codons(x[3:length(x)])))
        
        ## Identify position of start/stop in tripletized x
		startpos1 <- which(c1 %in% startcodon)
		stoppos1 <- which(c1 %in% stopcodon)
		startpos2 <- which(c2 %in% startcodon)
		stoppos2 <- which(c2 %in% stopcodon)
		startpos3 <- which(c3 %in% startcodon)
		stoppos3 <- which(c3 %in% stopcodon)
		
		## Make sure subsequent code also finds coding sequence frames (CDS) rather than just strict open reading frames (ORFs) 
		if(mode=="cds") {
			stoppos1 <- unique(c(0, stoppos1, length(c1)+1)); stoppos2 <- unique(c(0, stoppos2, length(c2)+1)); stoppos3 <- unique(c(0, stoppos3, length(c3)+1))
			startpos1 <- stoppos1; startpos2 <- stoppos2; startpos3 <- stoppos3
		}
        
        ## Map tripletized matches back to sequence of x 
        if(tolower(strand)=="sense") {
		    orfpos1 <- t(sapply(seq(along=startpos1), function(x) c((startpos1[x] * 3) -2, stoppos1[stoppos1 > startpos1[x]][1] * 3)))
		    if(length(orfpos1)==0) orfpos1 <- matrix(nrow = 0, ncol = 2)
		    orfpos2 <- t(sapply(seq(along=startpos2), function(x) c((startpos2[x] * 3) -1, (stoppos2[stoppos2 > startpos2[x]][1] * 3) + 1)))
		    if(length(orfpos2)==0) orfpos2 <- matrix(nrow = 0, ncol = 2)
		    orfpos3 <- t(sapply(seq(along=startpos3), function(x) c((startpos3[x] * 3) -0, (stoppos3[stoppos3 > startpos3[x]][1] * 3) + 2)))
		    if(length(orfpos3)==0) orfpos3 <- matrix(nrow = 0, ncol = 2)
		}
        if(tolower(strand)=="antisense") { # Note - for antisense predictions startpos contain stop codons and stoppos contain start codons
		    startpos1 <- sort(startpos1, decreasing = TRUE); startpos2 <- sort(startpos2, decreasing = TRUE); startpos3 <- sort(startpos3, decreasing = TRUE)
		    stoppos1 <- sort(stoppos1, decreasing = TRUE); stoppos2 <- sort(stoppos2, decreasing = TRUE); stoppos3 <- sort(stoppos3, decreasing = TRUE)
		    orfpos1 <- t(sapply(seq(along=stoppos1), function(x) c((startpos1[startpos1 < stoppos1[x]][1] * 3) -2, stoppos1[x] * 3)))
		    if(length(orfpos1)==0) orfpos1 <- matrix(nrow = 0, ncol = 2)
		    orfpos2 <- t(sapply(seq(along=stoppos2), function(x) c((startpos2[startpos2 < stoppos2[x]][1] * 3) -1, (stoppos2[x] * 3) + 1)))
		    if(length(orfpos2)==0) orfpos2 <- matrix(nrow = 0, ncol = 2)
		    orfpos3 <- t(sapply(seq(along=stoppos3), function(x) c((startpos3[startpos3 < stoppos3[x]][1] * 3) -0, (stoppos3[x] * 3) + 2)))
		    if(length(orfpos3)==0) orfpos3 <- matrix(nrow = 0, ncol = 2)
		}
        orfRanges <- rbind(orfpos1, orfpos2, orfpos3)
		orfRanges <- na.omit(orfRanges)
		if(mode=="cds") { # Modifications required for mode=="cds"
			orfRanges[,1] <- orfRanges[,1] + 3
			orfRanges[orfRanges[,2] > length(x), 2] <- length(x)
		}
		orfRanges <- IRanges::IRanges(start=orfRanges[,1], end=orfRanges[,2])
		orfRanges <- orfRanges[rev(order(width(orfRanges)))]
		
        ## Organize results in data.frame and also add info about frame of predicted ORF to downstream ORF e.g. prediction is uORF of 5'-UTR
	    orfRanges <- as.data.frame(orfRanges)
	 	inframe <- (length(x) - orfRanges$end) / 3; inframe2 <- inframe
		inframe2[abs((inframe - as.integer(inframe))) == 0] <- 1
		inframe2[abs(round((inframe - as.integer(inframe)),2)) == round(1/3, 2)] <- 2
		inframe2[abs(round((inframe - as.integer(inframe)),2)) == round(2/3, 2)] <- 3
		if(nrow(orfRanges)>0) {
            orfRanges <- cbind(subject_id=1:length(orfRanges[,1]), orfRanges, strand=mystrand, inframe2end=inframe2)
        } else {
            orfRanges <- cbind(subject_id=numeric(), start=numeric(), end=numeric(), width=numeric(), strand=numeric(), inframe2end=numeric())
        }
        ## Return all ORFs 
        if(n=="all") {
			## Subset to non-overlapping ORF set containing longest ORF
            if(nrow(orfRanges) > 0 & longest_disjoint==TRUE) {
                tmpgr <- GRanges(seqnames="dummy", IRanges::IRanges(orfRanges[,2], orfRanges[,3]), strand="+")
                orfRanges <- orfRanges[disjointBins(tmpgr)==1, , drop=FALSE]
			    orfRanges[,1] <- 1:nrow(orfRanges)
            }
            return(orfRanges)
        ## Return only as many ORFs as specified under n sorted decreasingly by length
		} else if(is.numeric(n)) {
            ## Make sure subsetting does not exceed number of records in orfRanges
            if(nrow(orfRanges) < n & nrow(orfRanges) != 0) { 
                upperlimit <- length(orfRanges[,1]) 
            } else if(nrow(orfRanges) > 0) { 
                upperlimit <- n
            } else {
                upperlimit <- NULL
            }
            if(is.null(upperlimit)) {
                return(orfRanges[upperlimit, , drop=FALSE])
		    } else {
                return(orfRanges[1:upperlimit, , drop=FALSE])
            }
        } else {
            stop("n needs to be assigned positive integer or 'all'")
        }
	}
	
    ## Run .predORF
    if(class(x)=="DNAString") {
        if(tolower(strand) == "sense" | tolower(strand) == "antisense") {
		    tmp <- .predORF(x, n, mode, strand, longest_disjoint, startcodon, stopcodon)
		    tmp[,"strand"] <- ifelse(as.numeric(tmp[,"strand"])==1, "+", "-")
        } else if(tolower(strand) == "both") {
		    tmp_pos <- .predORF(x, n, mode, strand="sense", longest_disjoint, startcodon, stopcodon)
		    tmp_neg <- .predORF(x, n, mode, strand="antisense", longest_disjoint, startcodon, stopcodon)
            tmp <- rbind(tmp_pos, tmp_neg)
		    tmp[,"strand"] <- ifelse(as.numeric(tmp[,"strand"])==1, "+", "-")
        } else {
            stop("strand can only be assigned 'sense', 'antisense' or 'both'")
        }
	    if(type=="df") return(tmp)
	    if(type=="gr") return(makeGRangesFromDataFrame(data.frame(seqnames="unknown", tmp), keep.extra.columns=TRUE))
    }
	if(class(x)=="DNAStringSet") {
        if(tolower(strand) == "sense" | tolower(strand) == "antisense") {
		    tmp <- lapply(x, function(y) .predORF(y, n=n, mode=mode, strand, longest_disjoint, startcodon, stopcodon))
		    names(tmp) <- names(x)
	        tmpdf <- do.call("rbind", tmp)
            rownames(tmpdf) <- NULL
            tmpdf <- data.frame(seqnames=rep(names(tmp), sapply(tmp, nrow)), tmpdf)
            tmpdf[,"strand"] <- ifelse(as.numeric(tmpdf[,"strand"])==1, "+", "-")
        } else if(tolower(strand) == "both") {
		    ## for sense strand
            tmp_pos <- lapply(x, function(y) .predORF(y, n=n, mode=mode, strand="sense", longest_disjoint, startcodon, stopcodon))
		    names(tmp_pos) <- names(x)
	        tmpdf_pos <- do.call("rbind", tmp_pos)
            rownames(tmpdf_pos) <- NULL
		    ## for antisense strand
            tmpdf_pos <- data.frame(seqnames=rep(names(tmp_pos), sapply(tmp_pos, nrow)), tmpdf_pos)
		    tmp_neg <- lapply(x, function(y) .predORF(y, n=n, mode=mode, strand="antisense", longest_disjoint, startcodon, stopcodon))
		    names(tmp_neg) <- names(x)
	        tmpdf_neg <- do.call("rbind", tmp_neg)
            rownames(tmpdf_neg) <- NULL
            tmpdf_neg <- data.frame(seqnames=rep(names(tmp_neg), sapply(tmp_neg, nrow)), tmpdf_neg)
            ## combine and sort results
            tmpdf <- rbind(tmpdf_pos, tmpdf_neg)
            tmpdf[,"strand"] <- ifelse(as.numeric(tmpdf[,"strand"])==1, "+", "-")
            tmpdf <- tmpdf[order(tmpdf$seqnames, tmpdf$subject_id), ]
        } else {
            stop("strand can only be assigned 'sense', 'antisense' or 'both'")
        }
	    
        ## Return results in format defined by type
        if(tolower(type)=="df") {
                rownames(tmpdf) <- NULL
                return(tmpdf)
        } else if(tolower(type)=="gr") {
                tmpdf <- makeGRangesFromDataFrame(tmpdf, keep.extra.columns=TRUE)
		        return(tmpdf)
        } else if(tolower(type)=="grl") {
                tmpdf <- makeGRangesFromDataFrame(tmpdf, keep.extra.columns=TRUE)
		        tmpdf <- split(tmpdf, as.character(seqnames(tmpdf)))
                return(tmpdf)
        } else {
            stop("type can only be assigned 'df' of 'gr'")
        }
	}
}
# library(Biostrings)
# file <- system.file("extdata", "someORF.fa", package="Biostrings")
# dna <- readDNAStringSet(file)
# orf <- predORF(dna[1:4], n=1, type="df", mode="orf", strand="antisense", startcodon="ATG", stopcodon=c("TAA", "TAG", "TGA"))


######################################################
## Scale feature level mappings to genome positions ##
######################################################
## Function to scale ranges within genomic features (query)
## to corresponding genome positions (subject). The function
## also accounts for intron insertions of compound features 
## (e.g. between exons of transcribed regions) that are absent in 
## the query ranges, but present in the corresponding subject ranges.
scaleRanges <- function(subject, query, type="custom", verbose=TRUE) {
    ## Both input objects need to be of class GRangesList
    if(!is(subject, "GRangesList") | !is(query, "GRangesList")) stop("Both subject and query need to be GRangesList objects.")
    ## All names(query) need to be present in names(subject)
    if(any(!names(query) %in% names(subject))) stop("All 'names(query)' need to be present in 'names(subject)'.")

    ## Perform scaling on single subject/query pair each containing on entry
    .scaleRanges <- function(subject, query, returntype="df") {
        ## Check for validity of query
        if(length(query)>1) warning("Only the first range in 'query' will be used.")
        query <- query[1]
        if(sum(width(subject)) < width(query)) stop("Sum of width of subject ranges cannot be smaller than width of query range.")
    
        ## Check for validity of subject
        subjectstrand <- unique(as.character(strand(subject)))
        if(length(subjectstrand) != 1) stop("More than one orientation detected. There can only be one.")
        querystrand <- unique(as.character(strand(query)))
    
        ## Check for validity of seqnames
        myseqname <- unique(as.character(seqnames(subject)))
        if(length(myseqname) != 1) stop("More than one seqname detected. There can only be one.")
    
        ## Scale query range to range of subject ranges (of genomic feature) using interval trees from IRanges 
        subject <- subject[order(start(subject))]
        rangev <- paste("c(", paste(paste(start(subject), end(subject), sep=":"), collapse=", "),")", sep="")
        rangev <- eval(parse(text=rangev)) 
        names(rangev) <- seq_along(rangev)
        if(subjectstrand=="+") rangev <- rangev[start(query):end(query)]
        if(subjectstrand=="-") rangev <- rev(rangev)[start(query):end(query)]
        ir <- reduce(IRanges::IRanges(rangev, rangev))
    
        ## Set orientation properly
        if(querystrand=="-" & subjectstrand=="-") mystrand <- "+"
        if(querystrand=="+" & subjectstrand=="+") mystrand <- "+"
        if(querystrand=="+" & subjectstrand=="-") mystrand <- "-"
        if(querystrand=="-" & subjectstrand=="+") mystrand <- "-"
    
        ## Organize result as GRanges 
        if(mystrand=="+") { # Proper exon ranking for exons on +/- strand
            gr <- GRanges(myseqname, ir[order(start(ir), decreasing=FALSE)], mystrand)
        } else {
            gr <- GRanges(myseqname, ir[order(start(ir), decreasing=TRUE)], mystrand)  
        }
        mcols(gr) <- data.frame(type=type)
        
        ## Return results as data.frame or GRanges
        if(returntype=="gr") return(gr)
        if(returntype=="df") return(as.data.frame(gr))
    }

    ## Run .scaleRanges on two GRangesLists as input
    subject <- subject[names(query)] # Subset subject to entries in query
    mygrl <- unlist(query)
    myids1 <- rep(names(query), sapply(start(query), length))
    myids2 <- paste(myids1, ":", start(mygrl), "_", end(mygrl), sep="")
    mcols(mygrl) <- data.frame(type=type)
    mygrl <- split(mygrl, seq_along(mygrl))
    names(mygrl) <- myids2
    mylist <- as.list(character(length=length(mygrl))); names(mylist) <- names(mygrl) 
    for(i in seq_along(mygrl)) {
        myentryid <- myids1[i]
        suppressWarnings(mylist[[i]] <- .scaleRanges(subject[[myentryid]], mygrl[[i]]))
        if(verbose==TRUE) {
            cat(paste0("Scaled range ", i, " of ", length(mygrl), " ranges: ", names(mygrl[i])), "\n")
        }
    }
    ## If loop gets interrupted, output only completed results!
    mylist <- mylist[1:i] 
    gr <- makeGRangesFromDataFrame(do.call("rbind", mylist), keep.extra.columns=FALSE)
    mcols(gr) <- DataFrame(feature_by=as(rep(names(mylist), sapply(mylist, nrow)), "CharacterList"), featuretype_id=names(gr), featuretype=type)
    names(gr) <- NULL
    grl <- split(gr, as.character(mcols(gr)$feature_by))
    return(grl)
}
## Usage for simple example
# subject <- GRanges(seqnames="Chr1", IRanges(c(5,15,30),c(10,25,40)), strand="+") 
# query <- GRanges(seqnames="myseq", IRanges(1, 9), strand="+")
# scaleRanges(GRangesList(myid1=subject), GRangesList(myid1=query), type="test")

## Usage for more complex example
# library(GenomicFeatures)
# gff <- system.file("extdata/annotation", "tair10.gff", package="systemPipeRdata")
# txdb <- makeTxDbFromGFF(file=gff, format="gff3", organism="Arabidopsis")
# futr <- fiveUTRsByTranscript(txdb, use.names=TRUE)
# genome <- system.file("extdata/annotation", "tair10.fasta", package="systemPipeRdata")
# dna <- extractTranscriptSeqs(FaFile(genome), futr)
# uorf <- predORF(dna, n="all", mode="orf", longest_disjoint=TRUE, strand="sense")
# grl_scaled <- scaleRanges(subject=futr, query=uorf, type="uORF", verbose=TRUE)
# export.gff3(unlist(grl_scaled), "uorf.gff")

#######################################
## Translational Efficiency Analysis ##
#######################################
## Currently, this step is covered entire by the code given in the Ribo-Seq
## workflow vignette using DESeq2 out of the box. The approach ended up being
## very similar to some of time course analysis steps given in the DESeq2
## vignette as well as some community discussions:
##      https://support.bioconductor.org/p/61509/ also here
##      https://support.bioconductor.org/p/67455/
## To-dos: add convenience function similar to run_DESeq2 (or extend it)
## to handle this analysis step for many experiment sets in a more convenient manner.
Any scripts or data that you put into this service are public.
systemPipeR documentation built on Jan. 26, 2021, 2 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
systemPipeR
systemPipeR: NGS workflow and report generation environment

R/riboseq.R
In systemPipeR: systemPipeR: NGS workflow and report generation environment

Defines functions scaleRanges predORF plotfeatureCoverage .resizeFeature featureCoverage plotfeaturetypeCounts featuretypeCounts genFeatures

Documented in featureCoverage featuretypeCounts genFeatures plotfeatureCoverage plotfeaturetypeCounts predORF scaleRanges

Try the systemPipeR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

systemPipeR systemPipeR: NGS workflow and report generation environment

R/riboseq.R In systemPipeR: systemPipeR: NGS workflow and report generation environment

Defines functions scaleRanges predORF plotfeatureCoverage .resizeFeature featureCoverage plotfeaturetypeCounts featuretypeCounts genFeatures

Documented in featureCoverage featuretypeCounts genFeatures plotfeatureCoverage plotfeaturetypeCounts predORF scaleRanges

Try the systemPipeR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

systemPipeR
systemPipeR: NGS workflow and report generation environment

R/riboseq.R
In systemPipeR: systemPipeR: NGS workflow and report generation environment