Nothing
#############################################
## groupComparisonPlots
#############################################
#' @export
#' @importFrom gplots heatmap.2
#' @importFrom stats hclust
#' @importFrom ggrepel geom_text_repel
#' @importFrom marray maPalette
groupComparisonPlots <- function(data=data,
type=type,
sig=0.05,
FCcutoff=FALSE,
logBase.pvalue=10,
ylimUp=FALSE,
ylimDown=FALSE,
xlimUp=FALSE,
x.axis.size=10,
y.axis.size=10,
dot.size=3,
text.size=4,
text.angle=0,
legend.size=13,
ProteinName=TRUE,
colorkey=TRUE,
numProtein=100,
clustering="both",
width=10,
height=10,
which.Comparison="all",
which.Protein="all",
address="") {
## save process output in each step
allfiles <- list.files()
filenaming <- "msstats"
if (length(grep(filenaming,allfiles)) == 0) {
finalfile <- "msstats.log"
processout <- NULL
} else {
num <- 0
finalfile <- "msstats.log"
while (is.element(finalfile, allfiles)) {
num <- num + 1
lastfilename <- finalfile ## in order to rea
finalfile <- paste0(paste(filenaming, num, sep="-"), ".log")
}
finalfile <- lastfilename
processout <- as.matrix(read.table(finalfile, header=TRUE, sep="\t"))
}
processout <- rbind(processout,
as.matrix(c(" ", " ", "MSstats - groupComparisonPlots function", " "), ncol=1))
## make upper letter
type <- toupper(type)
if (length(setdiff(type, c("HEATMAP", "VOLCANOPLOT", "COMPARISONPLOT"))) != 0) {
processout <- rbind(processout,
c(paste0("Input for type=", type,
". However,'type' should be one of Heatmap, VolcanoPlot, ComparisonPlot.")))
write.table(processout, file=finalfile, row.names=FALSE)
stop(paste0("Input for type=", type,
". However,'type' should be one of Heatmap, VolcanoPlot, ComparisonPlot."))
}
## check logBase.pvalue is 2,10 or not
if (logBase.pvalue != 2 & logBase.pvalue != 10) {
processout <- rbind(processout,
"ERROR : (-) Logarithm transformation for adjusted p-values : log2 or log10 only - stop")
write.table(processout, file=finalfile, row.names=FALSE)
stop("Only -log2 or -log10 for logarithm transformation for adjusted p-values are posssible.\n")
}
if (which.Comparison != "all") {
## check which.comparison is name of comparison
if (is.character(which.Comparison)) {
temp.name <- which.Comparison
## message if name of comparison is wrong.
if (length(setdiff(temp.name, unique(data$Label))) > 0) {
processout <- rbind(processout,
paste0("Please check labels of comparions. Result does not have this comparison. - ",
toString(temp.name)))
write.table(processout, file=finalfile, row.names=FALSE)
stop(paste0("Please check labels of comparions. Result does not have this comparison. - ",
toString(temp.name)))
}
}
## check which.comparison is order number of comparison
if (is.numeric(which.Comparison)) {
temp.name <- levels(data$Label)[which.Comparison]
## message if name of comparison is wrong.
if (length(levels(data$Label))<max(which.Comparison)) {
stop(paste0("Please check your selection of comparisons. There are ",
length(levels(data$Label)), " comparisons in this result."))
}
}
## use only assigned proteins
data <- data[which(data$Label %in% temp.name), ]
data$Protein <- factor(data$Protein)
data$Label <- factor(data$Label)
} else {
data$Protein <- factor(data$Protein)
data$Label <- factor(data$Label)
}
#######################
## Heatmap
#######################
if (type == "HEATMAP") {
## check whether there is only one protein or not,
if (length(unique(data$Protein)) <= 1) {
stop("At least two proteins are needed for heatmaps.")
}
## check whether there is only one comparison or not,
if (length(unique(data$Label)) <= 1) {
stop("At least two comparisons are needed for heatmaps.")
}
if (logBase.pvalue == 2) {
y.limUp <- 30
}
if (logBase.pvalue == 10) {
y.limUp <- 10
}
if (is.numeric(ylimUp)) {
y.limUp <- ylimUp
}
## when NA, change it
## data$adj.pvalue[is.na(data$adj.pvalue)] <- 1 ## missing will be grey
if (logBase.pvalue == 2) {
data$adj.pvalue[data$adj.pvalue < 2^(-y.limUp)] <- 2^(-y.limUp)
}
if (logBase.pvalue == 10) {
data$adj.pvalue[data$adj.pvalue < 10^(-y.limUp)] <- 10^(-y.limUp)
}
## if FCcutoff is assigned, make p-value insignificant.
if (is.numeric(FCcutoff)) {
if (colnames(data)[3] == "log2FC") {
data$adj.pvalue[data[, 3] < log2(FCcutoff) & data[, 3] > (-log2(FCcutoff))] <- 1
}
if (colnames(data)[3] == "log10FC") {
data$adj.pvalue[data[, 3] < log10(FCcutoff) & data[, 3] > (-log10(FCcutoff))] <- 1
}
}
final <- NULL
## based on p-value
for (i in 1:nlevels(data$Label)) {
sub <- data[data$Label == levels(data$Label)[i], ]
if (logBase.pvalue == 2) {
temp <- -log2(sub$adj.pvalue) * sign(sub[, 3])
}
if (logBase.pvalue == 10) {
temp <- -log10(sub$adj.pvalue) * sign(sub[, 3])
}
final <- data.frame(cbind(final, temp))
}
obj <- final
data$Protein <- factor(data$Protein)
rownames(obj) <- levels(data$Protein)
colnames(obj) <- levels(data$Label)
## remove if whole rows or columns are NA
obj <- obj[rowSums(!is.na(obj)) != 0, colSums(!is.na(obj)) != 0]
## clustering for order
tempobj <- obj
tempobj[is.na(tempobj)] <- 50
if (toupper(clustering) == 'PROTEIN') {
obj <- obj[hclust(dist(tempobj), method="ward.D")$order, ]
}
if (toupper(clustering) == 'COMPARISON') {
obj <- obj[, hclust(dist(t(tempobj)), method="ward.D")$order]
}
if (toupper(clustering) == 'BOTH') {
obj <- obj[hclust(dist(tempobj), method="ward.D")$order,
hclust(dist(t(tempobj)), method="ward.D")$order]
}
if (toupper(clustering) == 'NONE') {
obj <- obj
}
rm(tempobj)
## change the order
##obj$id <- seq(1:nrow(obj))
##obj <- obj[order(obj$id,decreasing=TRUE),]
##obj <- subset(obj, select=-c(id))
## color scale
blue.red.18 <- maPalette(low="blue", high="red", mid="black", k=12)
my.colors <- blue.red.18
#my.colors[my.colors=="#FFFFFF"] <- "gold"
my.colors <- c(my.colors, "grey") ## for NA
## color scale is fixed with log 10 based. then change break for log 2 based
up <- 10
temp <- 10^(-sort(ceiling(seq(2, up, length=10)[c(1, 2, 3, 5, 10)]), decreasing=TRUE))
breaks <- c(temp, sig)
if (logBase.pvalue == 10) {
neg.breaks <- log(breaks, 10)
my.breaks <- c(neg.breaks, 0, -neg.breaks[6:1], 101)
} else if (logBase.pvalue == 2) {
neg.breaks <- log(breaks, 2)
my.breaks <- c(neg.breaks, 0, -neg.breaks[6:1], 101)
}
## draw color key
blocks <- c(-breaks, 1, breaks[6:1])
x.at <- seq(-0.05, 1.05, length.out=13)
## maximum number of proteins per heatmap
namepro <- rownames(obj)
totalpro <- length(namepro)
numheatmap <- totalpro %/% numProtein + 1
## If there are the file with the same name, add next numbering at the end of file name
if (address != FALSE) {
allfiles <- list.files()
num <- 0
filenaming <- paste0(address, "Heatmap")
finalfile <- paste0(address, "Heatmap.pdf")
while (is.element(finalfile, allfiles)) {
num <- num + 1
finalfile <- paste0(paste(filenaming, num, sep="-"), ".pdf")
}
pdf(finalfile, width=width, height=height)
}
if (colorkey) {
par(mar=c(3, 3, 3, 3), mfrow=c(3, 1), oma=c(3, 0, 3, 0))
plot.new()
image(z = matrix(seq(1:(length(my.colors) - 1)), ncol = 1),
col=my.colors[-length(my.colors)],
xaxt = "n",
yaxt = "n")
mtext("Color Key", side=3,line=1, cex=3)
mtext("(sign) Adjusted p-value", side=1, line=3, at=0.5, cex=1.7)
mtext(blocks, side=1, line=1, at=x.at, cex=1)
}
## draw heatmap
## loop for numProtein
for (j in 1:numheatmap) {
## get the number proteins needed
if (j != numheatmap) {
tempobj <- obj[((j - 1) * numProtein + 1):(j * numProtein), ]
} else {
tempobj <- obj[((j - 1) * numProtein + 1):nrow(obj), ]
}
par(oma=c(3, 0, 0, 4))
heatmap.2(as.matrix(tempobj),
col=my.colors,
Rowv=FALSE,
Colv=FALSE,
dendrogram="none",
breaks=my.breaks,
trace="none",
na.color="grey", ## missing value will be grey
cexCol=(x.axis.size / 10), cexRow=(y.axis.size / 10), # assign text.size as option
key=FALSE,
lhei=c(0.1, 0.9),
lwid=c(0.1, 0.9))
}
## end loop for heatmap
if (address != FALSE) {
dev.off()
}
}
#######################
## VolcanoPlot
#######################
if (type == "VOLCANOPLOT") {
## choose comparison to draw plots
if (address == FALSE) { ## here I used == FALSE, instead of !address. Because address can be logical or characters.
if( which.Comparison == 'all') {
if(length(unique(data$Label)) > 1) {
stop('** Cannnot generate all volcano plots in a screen. Please set one comparison at a time.')
}
} else if (length(which.Comparison) > 1) {
stop( '** Cannnot generate multiple volcano plots in a screen. Please set one comparison at a time.' )
}
}
## If there are the file with the same name, add next numbering at the end of file name
if (address != FALSE) {
allfiles <- list.files()
num <- 0
filenaming <- paste0(address, "VolcanoPlot")
finalfile <- paste0(address, "VolcanoPlot.pdf")
while (is.element(finalfile, allfiles)) {
num <- num + 1
finalfile <- paste0(paste(filenaming, num, sep="-"), ".pdf")
}
pdf(finalfile, width=width, height=height)
}
if (logBase.pvalue == 2) {
y.limUp <- 30
}
if (logBase.pvalue == 10) {
y.limUp <- 10
}
if (is.numeric(ylimUp)) {
y.limUp <- ylimUp
}
## remove the result, NA
data <- data[!is.na(data$adj.pvalue), ]
## group for coloring dots
if (!FCcutoff) {
data[data$adj.pvalue >= sig, "colgroup"] <- "black"
data[data$adj.pvalue < sig & data[, 3] > 0, "colgroup"] <- "red"
data[data$adj.pvalue < sig & data[, 3] < 0, "colgroup"] <- "blue"
}
if (is.numeric(FCcutoff)) {
data$colgroup <- "black"
if (colnames(data)[3] == "log2FC") {
data[data$adj.pvalue < sig & data[, 3] > log2(FCcutoff), "colgroup"] <- "red"
data[data$adj.pvalue < sig & data[, 3] < (-log2(FCcutoff)), "colgroup"] <- "blue"
}
if (colnames(data)[3] == "log10FC") {
data[data$adj.pvalue < sig & data[, 3] > log10(FCcutoff), "colgroup"] <- "red"
data[data$adj.pvalue < sig & data[, 3] < (-log10(FCcutoff)), "colgroup"] <- "blue"
}
}
data$colgroup <- factor(data$colgroup, levels=c("black", "blue", "red"))
## for multiple volcano plots,
for (i in 1:nlevels(data$Label)) {
sub <- data[data$Label == levels(data$Label)[i], ]
if (logBase.pvalue == 2) {
sub$adj.pvalue[sub$adj.pvalue < 2^(-y.limUp)] <- 2^(-y.limUp)
}
if (logBase.pvalue == 10) {
sub$adj.pvalue[sub$adj.pvalue < 10^(-y.limUp)] <- 10^(-y.limUp)
}
sub <- as.data.frame(sub)
## ylimUp
if (logBase.pvalue == 2) {
y.limup <- ceiling(max(-log2(sub[!is.na(sub$adj.pvalue), "adj.pvalue"])))
if (y.limup < (-log2(sig))) {
y.limup <- (-log2(sig) + 1) ## for too small y.lim
}
}
if (logBase.pvalue == 10) {
y.limup <- ceiling(max(-log10(sub[!is.na(sub$adj.pvalue), "adj.pvalue"])))
if (y.limup < (-log10(sig))) {
y.limup <- (-log10(sig) + 1) ## for too small y.lim
}
}
## ylimDown
y.limdown <- 0 ## default is zero
if (is.numeric(ylimDown)) {
y.limdown <- ylimDown
}
## x.lim
x.lim <- ceiling(max(abs(sub[!is.na(sub[, 3]) & abs(sub[, 3]) != Inf, 3]))) ## log2FC or log10FC
if (x.lim < 3) {
x.lim <- 3
}
if (is.numeric(xlimUp)) {
x.lim <- xlimUp
}
## for assigning x in ggplot2
subtemp <- sub
colnames(subtemp)[3] <- "logFC"
if (logBase.pvalue == 2) {
subtemp$log2adjp <- (-log2(subtemp$adj.pvalue))
}
if (logBase.pvalue == 10) {
subtemp$log10adjp <- (-log10(subtemp$adj.pvalue))
}
## for x limit for inf or -inf
subtemp$newlogFC <- subtemp$logFC
subtemp[!is.na(subtemp$issue) &
subtemp$issue == "oneConditionMissing" &
subtemp$logFC == Inf, "newlogFC"] <- (x.lim - 0.2)
subtemp[!is.na(subtemp$issue) &
subtemp$issue == "oneConditionMissing" &
subtemp$logFC == (-Inf), "newlogFC"] <- (x.lim - 0.2) * (-1)
## add (*) in Protein name for Inf or -Inf
subtemp$Protein <- as.character(subtemp$Protein)
subtemp[!is.na(subtemp$issue) &
subtemp$issue == "oneConditionMissing", "Protein"] <- paste0("*",
subtemp[!is.na(subtemp$issue) &
subtemp$issue == "oneConditionMissing", "Protein"])
## Plotting
if (logBase.pvalue == 2) {
ptemp <- ggplot(aes_string(x='logFC', y='log2adjp',
color='colgroup',
label='Protein'),
data=subtemp) +
geom_point(size=dot.size) +
scale_colour_manual(values=c("gray65", "blue", "red"),
limits=c("black", "blue", "red"),
breaks=c("black", "blue", "red"),
labels=c("No regulation", "Down-regulated", "Up-regulated")) +
scale_y_continuous('-Log2 (adjusted p-value)',
limits=c(y.limdown, y.limup)) +
labs(title=unique(sub$Label))
}
if (logBase.pvalue == 10) {
ptemp <- ggplot(aes_string(x='logFC', y='log10adjp',
color='colgroup',
label='Protein'),
data=subtemp) +
geom_point(size=dot.size) +
scale_colour_manual(values=c("gray65", "blue", "red"),
limits=c("black", "blue", "red"),
breaks=c("black", "blue", "red"),
labels=c("No regulation", "Down-regulated", "Up-regulated")) +
scale_y_continuous('-Log10 (adjusted p-value)',
limits=c(y.limdown, y.limup)) +
labs(title=unique(sub$Label))
}
## x-axis labeling
if (colnames(sub)[3] == "log2FC") {
ptemp <- ptemp +
scale_x_continuous('Log2 fold change',
limits=c(-x.lim, x.lim))
}
if (colnames(sub)[3] == "log10FC") {
ptemp <- ptemp +
scale_x_continuous('Log10 fold change',
limits=c(-x.lim, x.lim))
}
## add protein name
if (ProteinName) {
if (length(unique(subtemp$colgroup)) == 1 & any(unique(subtemp$colgroup) == 'black')) {
message(paste0("The volcano plot for ", unique(subtemp$Label),
" does not show the protein names because none of them is significant."))
} else {
ptemp <- ptemp +
geom_text_repel(data=subtemp[subtemp$colgroup != "black", ],
aes(label=Protein),
size=text.size,
col='black')
}
}
## For legend of linetype for cutoffs
## first assign line type
ltypes <- c("type1"="twodash", "type2"="dotted")
## cutoff lines, FDR only
if (!FCcutoff) {
if (logBase.pvalue == 2) {
sigcut <- data.frame("Protein"='sigline',
"logFC"=seq(-x.lim, x.lim, length.out=20),
"log2adjp"=(-log2(sig)),
"line"='twodash')
pfinal <- ptemp +
geom_line(data=sigcut,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
scale_linetype_manual(values=c('twodash'=6),
labels=c(paste0("Adj p-value cutoff (", sig, ")"))) +
guides(colour=guide_legend(override.aes=list(linetype=0)),
linetype=guide_legend())
}
if (logBase.pvalue == 10) {
sigcut <- data.frame("Protein"='sigline',
"logFC"=seq(-x.lim, x.lim, length.out=20),
"log10adjp"=(-log10(sig)),
"line"='twodash')
pfinal <- ptemp +
geom_line(data=sigcut,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
scale_linetype_manual(values=c('twodash'=6),
labels=c(paste0("Adj p-value cutoff (", sig, ")"))) +
guides(colour=guide_legend(override.aes=list(linetype=0)),
linetype=guide_legend())
}
}
## cutoff lines, FDR and Fold change cutoff
if (is.numeric(FCcutoff)) {
if (colnames(sub)[3] == "log2FC") {
if (logBase.pvalue == 2) {
## three different lines
sigcut <- data.frame("Protein"='sigline',
"logFC"=seq(-x.lim, x.lim, length.out=10),
"log2adjp"=(-log2(sig)),
"line"='twodash')
FCcutpos <- data.frame("Protein"='sigline',
"logFC"=log2(FCcutoff),
"log2adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
FCcutneg <- data.frame("Protein"='sigline',
"logFC"=(-log2(FCcutoff)),
"log2adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
## three lines, with order color first and then assign linetype manual
pfinal <- ptemp +
geom_line(data=sigcut,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutpos,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutneg,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6) +
scale_linetype_manual(values=c('dotted'=3, 'twodash'=6),
labels=c(paste0("Fold change cutoff (", FCcutoff, ")"),
paste0("Adj p-value cutoff (", sig, ")"))) +
guides(colour=guide_legend(override.aes=list(linetype=0)),
linetype=guide_legend())
}
if (logBase.pvalue == 10) {
## three different lines
sigcut <- data.frame("Protein"='sigline',
"logFC"=seq(-x.lim, x.lim, length.out=10),
"log10adjp"=(-log10(sig)),
"line"='twodash')
FCcutpos <- data.frame("Protein"='sigline',
"logFC"=log2(FCcutoff),
"log10adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
FCcutneg <- data.frame("Protein"='sigline',
"logFC"=(-log2(FCcutoff)),
"log10adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
## three lines, with order color first and then assign linetype manual
pfinal <- ptemp +
geom_line(data=sigcut,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutpos,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutneg,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6) +
scale_linetype_manual(values=c('dotted'=3, 'twodash'=6),
labels=c(paste0("Fold change cutoff (", FCcutoff, ")"),
paste0("Adj p-value cutoff (", sig, ")"))) +
guides(colour=guide_legend(override.aes=list(linetype=0)),
linetype=guide_legend())
}
}
if (colnames(sub)[3] == "log10FC") {
if (logBase.pvalue == 2) {
## three different lines
sigcut <- data.frame("Protein"='sigline',
"logFC"=seq(-x.lim, x.lim, length.out=10),
"log2adjp"=(-log2(sig)),
"line"='twodash')
FCcutpos <- data.frame("Protein"='sigline',
"logFC"=log10(FCcutoff),
"log2adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
FCcutneg <- data.frame("Protein"='sigline',
"logFC"=(-log10(FCcutoff)),
"log2adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
## three lines, with order color first and then assign linetype manual
pfinal <- ptemp +
geom_line(data=sigcut,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutpos,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutneg,
aes_string(x='logFC', y='log2adjp', linetype='line'),
colour="darkgrey",
size=0.6) +
scale_linetype_manual(values=c('dotted'=3, 'twodash'=6),
labels=c(paste0("Fold change cutoff (", FCcutoff, ")"),
paste0("Adj p-value cutoff (", sig, ")"))) +
guides(colour=guide_legend(override.aes=list(linetype=0)),
linetype=guide_legend())
}
if (logBase.pvalue == 10) {
## three different lines
sigcut <- data.frame("Protein"='sigline',
"logFC"=seq(-x.lim, x.lim, length.out=10),
"log10adjp"=(-log10(sig)),
"line"='twodash')
FCcutpos <- data.frame("Protein"='sigline',
"logFC"=log10(FCcutoff),
"log10adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
FCcutneg <- data.frame("Protein"='sigline',
"logFC"=(-log10(FCcutoff)),
"log10adjp"=seq(y.limdown, y.limup, length.out=10),
"line"='dotted')
## three lines, with order color first and then assign linetype manual
pfinal <- ptemp +
geom_line(data=sigcut,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutpos,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6,
show.legend=TRUE) +
geom_line(data=FCcutneg,
aes_string(x='logFC', y='log10adjp', linetype='line'),
colour="darkgrey",
size=0.6) +
scale_linetype_manual(values=c('dotted'=3, 'twodash'=6),
labels=c(paste0("Fold change cutoff (", FCcutoff, ")"),
paste0("Adj p-value cutoff (", sig, ")"))) +
guides(colour=guide_legend(override.aes=list(linetype=0)),
linetype=guide_legend())
}
}
}
pfinal <- pfinal +
theme(
panel.background = element_rect(fill='white', colour="black"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size=x.axis.size, colour="black"),
axis.text.y = element_text(size=y.axis.size, colour="black"),
axis.ticks = element_line(colour="black"),
axis.title.x = element_text(size=x.axis.size+5, vjust=-0.4),
axis.title.y = element_text(size=y.axis.size+5, vjust=0.3),
title = element_text(size=x.axis.size+8, vjust=1.5),
legend.position="bottom",
legend.key = element_rect(fill='white', colour='white'),
legend.text = element_text(size=legend.size),
legend.title = element_blank()
)
print(pfinal)
} ## end-loop
if (address!=FALSE) {
dev.off()
}
}
#######################
## Comparison Plot
#######################
if (type == "COMPARISONPLOT") {
datatemp <- data[!is.na(data$adj.pvalue), ]
datatemp$Protein <- factor(datatemp$Protein)
## choose comparison to draw plots
if (address == FALSE){ ## here I used != FALSE, instead of !address. Because address can be logical or characters.
if (which.Protein == 'all') {
stop('** Cannnot generate all comparison plots in a screen. Please set one protein at a time.')
} else if (length(which.Protein) > 1) {
stop('** Cannnot generate multiple comparison plots in a screen. Please set one protein at a time.')
}
}
## Then address should not be FALASE.
## choose Proteins or not
if (which.Protein != "all") {
## check which.Protein is name of Protein
if (is.character(which.Protein)) {
temp.name <- which.Protein
## message if name of Protein is wrong.
if (length(setdiff(temp.name,unique(datatemp$Protein))) > 0) {
stop(paste0("Please check protein name. Data set does not have this protein. - ",
toString(temp.name)))
}
}
## check which.Protein is order number of Protein
if (is.numeric(which.Protein)) {
temp.name <- levels(datatemp$Protein)[which.Protein]
## message if name of Protein is wrong.
if (length(levels(datatemp$Protein)) < max(which.Protein)) {
stop(paste0("Please check your selection of proteins. There are ",
length(levels(datatemp$Protein))," proteins in this dataset."))
}
}
## use only assigned proteins
datatemp <- datatemp[which(datatemp$Protein %in% temp.name), ]
datatemp$Protein <- factor(datatemp$Protein)
}
## If there are the file with the same name, add next numbering at the end of file name
if (address!=FALSE) {
allfiles <- list.files()
num <- 0
filenaming <- paste0(address, "ComparisonPlot")
finalfile <- paste0(address, "ComparisonPlot.pdf")
while (is.element(finalfile, allfiles)) {
num <- num+1
finalfile <- paste0(paste(filenaming, num, sep="-"), ".pdf")
}
pdf(finalfile, width=width, height=height)
}
for (i in 1:nlevels(datatemp$Protein)) {
sub <- datatemp[datatemp$Protein == levels(datatemp$Protein)[i], ]
#sub$ciw <- qt(1-sig/2,sub$DF)*sub$SE
## adjust for multiple comparison within protein
sub$ciw <- qt(1 - sig / (2 * nrow(sub)), sub$DF) * sub$SE
sub <- as.data.frame(sub)
## for assigning x in ggplot2
colnames(sub)[3] <- "logFC"
## ylimUp
y.limup <- ceiling(max(sub$logFC + sub$ciw))
if (is.numeric(ylimUp)) {
y.limup <- ylimUp
}
## ylimDown
y.limdown <- floor(min(sub$logFC - sub$ciw))
if (is.numeric(ylimDown)) {
y.limdown <- ylimDown
}
## adjust xthe location for x-axis label
if(text.angle != 0){
hjust <- 1
vjust <- 1
} else {
hjust <- 0.5
vjust <- 0.5
}
ptemp <- ggplot(aes_string(x='Label', y='logFC'), data=sub) +
geom_errorbar(aes(ymax = logFC + ciw, ymin=logFC - ciw),
data=sub,
width=0.1,
colour="red") +
geom_point(size=dot.size,
colour="darkred") +
scale_x_discrete('Comparison') +
geom_hline(yintercept=0,
linetype="twodash",
colour="darkgrey",
size=0.6) +
labs(title=levels(datatemp$Protein)[i]) +
theme(
panel.background = element_rect(fill='white', colour="black"),
panel.grid.major.y = element_line(colour="grey95"),
panel.grid.minor.y = element_blank(),
axis.text.x = element_text(size=x.axis.size, colour="black",
angle=text.angle, hjust=hjust, vjust=vjust),
axis.text.y = element_text(size=y.axis.size, colour="black"),
axis.ticks = element_line(colour="black"),
axis.title.x = element_text(size=x.axis.size+5, vjust=-0.4),
axis.title.y = element_text(size=y.axis.size+5, vjust=0.3),
title = element_text(size=x.axis.size+8, vjust=1.5)
)
if (colnames(data)[3] == "log2FC") {
ptemp <- ptemp +
scale_y_continuous("Log2-Fold Change",
limits=c(y.limdown, y.limup))
}
if (colnames(data)[3] == "log10FC") {
ptemp <- ptemp +
scale_y_continuous("Log10-Fold Change",
limits=c(y.limdown, y.limup))
}
print(ptemp)
message(paste0("Drew compasison plot for ", unique(sub$PROTEIN),
"(", i, " of ", length(unique(datatemp$Protein)), ")"))
} ## end-loop
if (address!=FALSE) {
dev.off()
}
} ## end Comparison plot
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.