#' Enrichment analysis using GREAT package
#' to identify putative pathways of interest for further
#' investigation
#' @param peaks list, output of categAltrePeaks() function
#' #@param peaktype character, "Experiment Specific", "Reference Specific",
#' # "Ambiguous", "Shared", or "All" (All is default)
#' @param species default hg19
#' @param rule character, "basalPlusExt", "twoClosest", "oneClosest" rule that associates
#' genomic regions to genes (default is "basalPlusExt").
#' See https://bioconductor.org/packages/release/bioc/html/chipenrich.html for more detail.
#' @param adv_upstream kb, extension to upstream (if rule is basalPlusExt), default 5
#' @param adv_downstream kb, extension to downstream (if rule is basalPlusExt), default 1.0
#' @param adv_span kb, max extension (if rule is basalPlusExt), default 1000.0
#' @param adv_twoDistance kb, max extension (if rule is twoClosest), default 1000.0
#' @param adv_oneDistance kb, max extension (if rule is oneClosest), default 1000.0
#' @param pathway_category character, "GO", "Pathway Data", "Regulatory Motifs",
#' "Phenotype Data and Human Disease", "Gene Expression", "Gene Families"
#' (default is "GO")
#' @examples
#' \dontrun{
#' csvfile <- loadCSVFile("DNaseEncodeExample.csv")
#' samplePeaks <- loadBedFiles(csvfile)
#' consensusPeaks <- getConsensusPeaks(samplepeaks = samplePeaks, minreps = 2)
#' TSSannot <- getTSS()
#' consensusPeaksAnnotated <- combineAnnotatePeaks(conspeaks = consensusPeaks,
#' TSS = TSSannot,
#' merge = TRUE,
#' regionspecific = TRUE,
#' distancefromTSSdist = 1500,
#' distancefromTSSprox = 1000)
#' consensusPeaksCounts <- getCounts(annotpeaks = consensusPeaksAnnotated,
#' reference = 'SAEC',
#' sampleinfo = csvfile,
#' chrom = 'chr21')
#' alteredPeaks <- countanalysis(counts=consensusPeaksCounts,
#' pval=0.01,
#' lfcvalue=1)
#' alteredPeaksCategorized <- categAltrePeaks(alteredPeaks,
#' lfctypespecific = 1.5,
#' lfcshared = 1.2,
#' pvaltypespecific = 0.01,
#' pvalshared = 0.05)
#' callPaths <- runGREAT(peaks = alteredPeaksCategorized)
#' }
#' @return ways --
#' pathways also annotated with additional information
# run with categaltre_peaks
#' @export
#'
runGREAT <- function(peaks,
# peaktype="All",
species = "hg19",
rule = "basalPlusExt",
adv_upstream = 5.0,
adv_downstream = 1.0,
adv_span = 1000.0,
adv_twoDistance = 1000.0,
adv_oneDistance = 1000.0,
pathway_category = "GO") {
# Check that peaktype entry is allowable and grab peaks for analysis
# if (is.na(match(peaktype,c("All","Experiment Specific", "Shared", "Ambiguous",
# "Reference Specific")))) {
# stop("peaktype should be either 'All', 'Experiment Specific', 'Shared',
# 'Ambiguous', or 'Reference Specific'")
# }
# if (peaktype == "All") {
# mypeaks = as.data.frame(peaks$analysisresults)[,c("chr","start","stop")] }
# else {
# mypeaks = as.data.frame(peaks$analysisresults)[which(peaks$analysisresults==peaktype),
# c("chr","start","stop")]}
if (is.na(match(rule, c(
"basalPlusExt", "twoClosest", "oneClosest"
)))) {
stop("rule must be either 'basalPlusExt', 'twoClosest', 'oneClosest'")
}
mygreat <- list()
for (i in c("ExperimentSpecificByIntensity", "ReferenceSpecificByIntensity", "Shared")) {
#print(paste("Running", i))
mypeaks <- as.data.frame(peaks$analysisresults)[
which(peaks$analysisresults$REaltrecateg == i),
c("chr", "start", "stop")]
ilabel <- gsub(" ", "_", i)
# Run GREAT
if (rule == "basalPlusExt") {
mygreat[[ilabel]] <- rGREAT::submitGreatJob(
mypeaks,
species = species,
adv_span = adv_span,
rule = "basalPlusExt",
adv_upstream = adv_upstream,
adv_downstream = adv_downstream,
request_interval = 20
)
}
if (rule == "twoClosest") {
mygreat[[ilabel]] <- rGREAT::submitGreatJob(
mypeaks,
species = species,
rule = "twoClosest",
adv_twoDistance = adv_twoDistance,
request_interval = 20
)
}
if (rule == "oneClosest") {
mygreat[[ilabel]] <- rGREAT::submitGreatJob(
mypeaks,
species = species,
rule = "oneClosest",
adv_twoDistance = adv_oneDistance,
request_interval = 20
)
}
} # end looping through peak types
return(mygreat)
} # end function
#' Enrichment analysis using GREAT package
#' to identify putative pathways of interest for further
#' investigation
#'
#' @param GREATpath output of runGREAT()
#' @param pathway_category character, "GO", "Pathway Data", "Regulatory Motifs",
#' "Phenotype Data and Human Disease", "Gene Expression", "Gene Families"
#' (default is "GO")
#' @param enrichcutoff numeric, fold change enrichment cutoff to determine enriched pathways,
#' default is 2
#' @param adjpvalcutoff numeric, Bonferroni adjusted p-value cutoff to determine enriched pathways,
#' default is 0.05
#' @param adjustby character, "fdr" or "bonferroni", default is "bonferroni"
#' @param test character, "Both" denotes hypergeometric and binomical tests are used to
#' determine enriched pathways, "Binom" denotes binomial tests used, "Hyper" denotes
#' hypergeometric tests are used. Default is "Binom"
#'
#' @return list of dataframes for enriched pathways - each dataframe in the list
#' represents one pathway type (e.g. "GO Molecular Function")
#' @examples
#' \dontrun{
#' csvfile <- loadCSVFile("DNaseEncodeExample.csv")
#' samplePeaks <- loadBedFiles(csvfile)
#' consensusPeaks <- getConsensusPeaks(samplepeaks = samplePeaks, minreps = 2)
#' TSSannot <- getTSS()
#' consensusPeaksAnnotated <- combineAnnotatePeaks(conspeaks = consensusPeaks,
#' TSS = TSSannot,
#' merge = TRUE,
#' regionspecific = TRUE,
#' distancefromTSSdist = 1500,
#' distancefromTSSprox = 1000)
#' consensusPeaksCounts <- getCounts(annotpeaks = consensusPeaksAnnotated,
#' reference = 'SAEC',
#' sampleinfo = csvfile,
#' chrom = 'chr21')
#' alteredPeaks <- countanalysis(counts=consensusPeaksCounts,
#' pval=0.01,
#' lfcvalue=1)
#' alteredPeaksCategorized <- categAltrePeaks(alteredPeaks,
#' lfctypespecific = 1.5,
#' lfcshared = 1.2,
#' pvaltypespecific = 0.01,
#' pvalshared = 0.05)
#' callPaths <- runGREAT(peaks = alteredPeaksCategorized)
#' pathResults <- processPathways(callPaths, pathway_category = "GO",
#' enrichcutoff = 2, adjpvalcutoff = 0.05)
#' }
#' @export
#'
processPathways <- function(GREATpath,
pathway_category = "GO",
adjustby = "bonferroni",
test = "Binom",
enrichcutoff = 2,
adjpvalcutoff = 0.05) {
finaloutput = list()
for (job in names(GREATpath)) {
if (!is(GREATpath[[job]], "GreatJob")) {
stop(
"GREATpath is not a list of 'GreatJob' objects.
Input should be the output of runGREAT()"
)
}
output <- rGREAT::getEnrichmentTables(GREATpath[[job]], category = pathway_category)
names(output) <- gsub(" ", "_", names(output))
stats <- data.frame(Pathway = names(output),
NumSig = rep(NA, length(names(output))))
for (i in names(output)) {
output[[i]]$Binom_adj_PValue <- stats::p.adjust(
output[[i]]$"Binom_Raw_PValue",
method = adjustby)
output[[i]]$Hyper_adj_PValue <- stats::p.adjust(
output[[i]]$"Hyper_Raw_PValue",
method = adjustby)
if (test == "Both") {
keepers <- base::Reduce(intersect, list(
which(output[[i]]$Binom_Fold_Enrichment > enrichcutoff),
which(output[[i]]$Hyper_Fold_Enrichment >
enrichcutoff),
which(output[[i]]$Binom_adj_PValue <=
adjpvalcutoff),
which(output[[i]]$Hyper_adj_PValue <=
adjpvalcutoff)
))
}
else if (test == "Binom") {
keepers <- base::Reduce(intersect,
list(
which(output[[i]]$Binom_Fold_Enrichment
> enrichcutoff),
which(output[[i]]$Binom_adj_PValue
<= adjpvalcutoff)
)
)
}
else if (test == "Hyper") {
keepers <- base::Reduce(intersect, list(
which(output[[i]]$Hyper_Fold_Enrichment > enrichcutoff),
which(output[[i]]$Hyper_adj_PValue <=
adjpvalcutoff)
))
}
else {
stop("test should be 'Both', 'Binom', or 'Hyper'")
}
#print(length(keepers))
output[[i]] <- output[[i]][keepers, ]
output[[i]] <- output[[i]][base::order(output[[i]]$Binom_adj_PValue), ]
stats$NumSig[which(stats$Pathway == i)] <-
length(which(output[[i]]$Binom_adj_PValue <= adjpvalcutoff))
}
finaloutput[[job]] <- list(Sig_Pathways = output, stats = stats)
} # end looping through each GREAT job
return(finaloutput)
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.