###################################################
### edgeR_GLM parameters: to be modified by the user
###################################################
rm(list=ls()) # remove all the objects of the R session
workspace <- "." # workspace for the R session
projectName <- "project" # name of the project (cannot contain any ".")
analysisVersion <- "vN" # name of the analysis version (cannot contain any ".")
author <- "(Biomics Platform - Institut Pasteur)" # author of the statistical report
researcher <- "Charles Friedel" # name of the researcher
chief <- "Louis Pasteur" # name of the head of unit
targetFile <- "../target.txt" # path to the design/target file
infoFile <- NULL # path to the annotation file (needed if 0 counts not in counts files)
rawDir <- "../raw" # path to the directory containing raw counts files
varInt1 <- "varInt1" # first factor of interest
varInt2 <- "varInt2" # second factor of interest
condRef1 <- "condRef1" # reference biological condition for varInt1
condRef2 <- "condRef2" # reference biological condition for varInt2
design <- ~ varInt1 + varInt2 + varInt1:varInt2 # design du mod?le statistique
outfile <- TRUE # TRUE to export figures, FALSE to display them in R
colors <- c("#f3c300", "#875692", "#f38400", "#a1caf1", "#be0032", # vector of colors of each group on the plots
"#c2b280", "#848482", "#008856", "#e68fac", "#0067a5",
"#f99379", "#604e97", "#f6a600", "#b3446c", "#dcd300",
"#882d17", "#8db600", "#654522", "#e25822", "#2b3d26")
alpha <- 0.05 # threshold of statistical significance
adjMethod <- "BH" # p-value adjustment method: "BH" (default) or "BY"
gene.selection <- "pairwise" # type de selection dans MDSplot
geneLengthFile <- NULL # path to the genes lenghts file (default is NULL)
featuresToRemove <- c("alignment_not_unique", # names of the features to be removed (default is the HTSeq-count specific lines)
"ambiguous", "no_feature",
"not_aligned", "too_low_aQual")
###################################################
### code chunk number 1: autres parametres et divers chargements
###################################################
setwd(workspace)
library(RNADiff)
library(knitr)
versionName <- paste(projectName, analysisVersion, sep="-")
ncol <- NULL # largeur des tableaux dans le rapport
cat("Creation des dossiers d'exports\n")
dir.create("figures", showWarnings=FALSE)
dir.create("tables", showWarnings=FALSE)
###################################################
### code chunk number 2: loadData
###################################################
cat("Chargement des annotations et longueurs des genes si besoin\n")
if (!is.null(infoFile)) print(head(info <- read.delim(infoFile, sep="\t", header=TRUE, stringsAsFactors=FALSE))) else info <- NULL
if (!is.null(geneLengthFile)) print(head(glength <- read.table(geneLengthFile, sep="\t", header=TRUE, stringsAsFactors=FALSE))) else glength <- NULL
cat("Chargement du target file\n")
print(target <- loadTargetFile(targetFile, varInt=c(varInt1,varInt2), condRef=c(condRef1,condRef2)))
cat("Chargement des donnees\n")
counts <- loadCountData(target, rawDir=rawDir, versionName=versionName, featuresToRemove=featuresToRemove)
cat("Verifier que les echantillons de counts sont dans le meme ordre que le target\n")
print(cbind(target=as.character(target[,1]),counts=colnames(counts)))
cat("Verifier que les identifiants dans info et glength sont les memes que dans les comptages\n")
checkInfoGlength(counts=counts, info=info, glength=glength)
####################################################
### code chunk number 3: description of raw data
####################################################
cat("\nFigure : nombre de reads par echantillon\n")
barplotTC(counts=counts, group=target[,c(varInt1,varInt2)], col=colors, out=outfile, versionName=versionName)
cat("Figure : nombre de comptages nuls par echantillon\n")
barplotNul(counts=counts, group=target[,c(varInt1,varInt2)], col=colors, out=outfile, versionName=versionName)
N <- nrow(counts) - nrow(removeNul(counts))
cat("\nNombre de genes avec que des comptages nuls :", N,"\n")
cat("\nFigure : estimation de la densite des comptages de chaque echantillon\n")
densityPlot(counts=counts, group=target[,c(varInt1,varInt2)], col=colors, out=outfile, versionName=versionName)
cat("\nFigure + tableau : sequences majoritaires pour chaque echantillon\n")
majSequences <- majSequences(counts=counts, group=target[,c(varInt1,varInt2)], versionName=versionName, col=colors, out=outfile)
cat("\nCalcul des SERE\n")
print(sere <- pairwiseSERE(counts, versionName=versionName))
cat("\nFigure : pairwise scatterplots of samples\n")
pairwiseScatterPlots(counts=counts, group=target[,c(varInt1,varInt2)], out=outfile, versionName=versionName)
####################################################
### code chunk number 4: creating DESeqDataSet object, normalization and estimateDispersion
####################################################
dge <- DGEList(counts=counts, remove.zeros=TRUE)
dge$design <- model.matrix(object=design, data=target)
cat("Estimation des Effective Library Sizes\n")
dge <- calcNormFactors(dge)
tmm <- dge$samples$norm.factors
cat("TMM coefficients:\n")
print(tmm)
cat("\nCalcul des dispersions\n")
dge <- estimateGLMCommonDisp(dge, dge$design)
dge <- estimateGLMTrendedDisp(dge, dge$design)
dge <- estimateGLMTagwiseDisp(dge, dge$design)
cat("\nFigure : relation mean-dispersion\n")
BCVPlot(dge=dge, out=outfile, versionName=versionName)
####################################################
### code chunk number 5: Boxplot avant et apres normalisation
####################################################
cat("Figure : boxplots sur comptages bruts et normalises\n")
boxplotCounts(counts=dge$counts, group=target[,c(varInt1,varInt2)], col=colors, out=outfile, versionName=versionName)
boxplotCounts(counts=normCounts.edgeR(dge), group=target[,c(varInt1,varInt2)], col=colors, type="norm", out=outfile, versionName=versionName)
###################################################
### code chunk number 6: clustering + PCA of samples
###################################################
cat("Figure : dendrogramme de la classification sur comptages transformes\n")
clusterPlot(counts=cpm(dge, prior.count=2, log=TRUE), out=outfile, versionName=versionName)
cat("Figure : premier plan de Multi Dimensional Scaling sur les comptages transformes\n")
MDSPlot(dge=dge, group=target[,c(varInt1,varInt2)], gene.selection=gene.selection, col=colors, out=outfile, versionName=versionName)
####################################################
### code chunk number 7: analyse differentielle
####################################################
fit <- glmFit(dge, dge$design)
colnames(fit$design)
to_test <- list("B6-NI_vs_SEG-NI"=c(0,0,0,1,0,0),
"B6-Imoins_vs_SEG-Imoins"=c(0,0,0,1,1,0),
"B6-Iplus_vs_SEG-Iplus"=c(0,0,0,1,0,1),
"SEG-Imoins_vs_SEG-NI"=c(0,1,0,0,0,0),
"SEG-Iplus_vs_SEG-NI"=c(0,0,1,0,0,0),
"SEG-Iplus_vs_SEG-Imoins"=c(0,-1,1,0,0,0),
"B6-Imoins_vs_B6-NI"=c(0,1,0,0,1,0),
"B6-Iplus_vs_B6-NI"=c(0,0,1,0,0,1),
"B6-Iplus_vs_B6-Imoins"=c(0,-1,1,0,-1,1),
"(SEG-Imoins_vs_SEG-NI)_vs_(B6-Imoins_vs_B6-NI)"=c(0,0,0,0,1,0),
"(SEG-Iplus_vs_SEG-NI)_vs_(B6-Iplus_vs_B6-NI)"=c(0,0,0,0,0,1),
"(SEG-Iplus_vs_SEG-Imoins)_vs_(B6-Iplus_vs_B6-Imoins)"=c(0,0,0,0,-1,1))
checkContrasts(coefs=colnames(fit$design),contrasts=to_test,versionName=versionName)
res <- vector("list",length(to_test)); names(res) <- names(to_test);
for (name in names(to_test)){
lrt <- glmLRT(fit, contrast=to_test[[name]])
res[[name]] <- topTags(lrt, n=nrow(dge$counts), adjust.method=adjMethod, sort.by="none")$table
}
###################################################
### code chunk number 8: export tables
###################################################
cat("Export des resultats\n")
complete <- exportComplete.edgeR(dge=dge, res=res, alpha=alpha, group=paste(target[,varInt1], target[,varInt2], sep="-"),
conds=unique(paste(target[,varInt1], target[,varInt2], sep="-")),
versionName=versionName, info=info, export=TRUE)
cat("# genes up, down et total par comparaison\n")
print(nDiffTotal <- nDiffTotal(complete, alpha=alpha, versionName=versionName), quote=FALSE)
cat("Figure : nb de genes DE selon seuil FDR\n")
nbDiffSeuil(complete=complete, out=outfile, versionName=versionName)
if (!is.null(geneLengthFile)){
cat("Export : comptages normalises par la longueur des genes\n")
normGeneLength(counts=normCounts.edgeR(dge), glength=glength, versionName=versionName)
geneLengthEffect(counts, complete, glength, out=outfile, versionName=versionName)
}
###################################################
### code chunk number 9: distribution of raw p-values and MA-plot
###################################################
cat("Figure : distribution des log2(Fold-Changes)\n")
diagLogFC(complete=complete, out=outfile, versionName=versionName)
cat("Figure : histogramme des p-valeurs brutes\n")
histoRawp(complete=complete, out=outfile, versionName=versionName)
cat("\nFigure : MA-plot\n")
MAplotDE(complete=complete, pvalCutoff=alpha, out=outfile, versionName=versionName)
cat("\nFigure : volcano-plot\n")
volcanoPlotDE(complete=complete, pvalCutoff=alpha, out=outfile, versionName=versionName)
cat("\nFigure : Venn diagram\n")
vennDiagramDE(complete=complete, alpha=alpha, out=outfile, versionName=versionName)
cat("\nFigure : heatmap\n")
heatmapDE(counts.trans=cpm(dge, prior.count=2, log=TRUE), complete=complete,
alpha=alpha, out=outfile, key.xlab="logCPM-centered data", versionName=versionName)
###################################################
### code chunk number 10: sessionInfo and saving
###################################################
cat("Sauvegarde des resultats\n")
sessionInfo <- sessionInfo()
pckVersionRNADiff <- packageVersion("RNADiff")
pckVersionedgeR <- packageVersion("edgeR")
save.image(file=paste0(versionName, ".RData"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.