Nothing
GSCAeda <- function(genedata,pattern,chipdata,SearchOutput,scaledata=F,Pval.co=0.05,Ordering="Average",Title=NULL,outputdir=NULL) {
if(nrow(SearchOutput)==0) stop("No SearchOutput specified.")
###presettings
genedata[,1] <- as.character(genedata[,1])
pattern[,1] <- as.character(pattern[,1])
if (!is.null(outputdir)) {
if (!grepl("/$",outputdir))
outputdir <- paste0(outputdir,"/")
plotfilepath <- paste0(outputdir,"GSCAeda_plot.pdf")
datafilepath <- paste0(outputdir,"GSCAeda_data.csv")
resfilepath <- paste0(outputdir,"GSCAeda_result.csv")
}
#setting path of data package
path <- system.file("extdata",package=paste0("Affy",chipdata,"Expr"))
#read in gene names
load(paste0(path,"/geneid.rda"))
#read in reference tables for given compendium
if(chipdata == "hgu133a") {
if (!require(Affyhgu133aExpr)) {
stop("Affyhgu133aExpr Package is not found")
} else {
data(Affyhgu133aExprtab)
tab <- Affyhgu133aExprtab
}
} else if(chipdata == "moe4302"){
if (!require(Affymoe4302Expr)) {
stop("Affymoe4302Expr Package is not found")
} else {
data(Affymoe4302Exprtab)
tab <- Affymoe4302Exprtab
}
} else if(chipdata == "hgu133A2"){
if (!require(Affyhgu133A2Expr)) {
stop("Affyhgu133A2Expr Package is not found")
} else {
data(Affyhgu133A2Exprtab)
tab <- Affyhgu133A2Exprtab
}
} else if(chipdata == "hgu133Plus2"){
if (!require(Affyhgu133Plus2Expr)) {
stop("Affyhgu133Plus2Expr Package is not found")
} else {
data(Affyhgu133Plus2Exprtab)
tab <- Affyhgu133Plus2Exprtab
}
} else {
stop("Please enter valid name for chipdata. Current Supported chipdata: 'hgu133a', 'moe4302', 'hgu133Plus2', 'hgu133A2'")
}
tabsamplename <- tab$SampleID
tab$SampleType <- substr(tab$SampleType,1,25)
SearchOutput$SampleType <- substr(SearchOutput$SampleType,1,25)
#check whether each geneset has at least one gene on the compendium
genesetname <- NULL
for (tmpgenesetname in unique(genedata[,1])) {
if(sum(geneid %in% genedata[genedata[,1]==tmpgenesetname,2]) == 0) {
warning(paste("No matching target genes found on the compendium for gene set",tmpgenesetname))
} else {
genesetname <- c(genesetname, tmpgenesetname)
}
}
if (length(genesetname) == 0)
stop("No matching target genes found on the compendium for all gene sets")
selectsample <- 1:nrow(tab)
activity <- matrix(0,nrow = length(genesetname),ncol = nrow(tab))
rownames(activity) <- genesetname
genesetcutoff <- genesettotalgenenum <- genesetmissinggene <- rep(0,length(genesetname))
names(genesetcutoff) <- names(genesettotalgenenum) <- names(genesetmissinggene) <- genesetname
for (genesetid in 1:length(genesetname)) {
###Scoring geneset activity
singlegeneset <- genesetname[genesetid]
currentgeneset <- genedata[genedata[,1] == singlegeneset & genedata[,2] %in% geneid,]
tmpgeneexpr <- t(h5read(paste0(path,"/data.h5"),"expr",index=list(NULL,match(currentgeneset[,2],geneid))))/1000
if (scaledata)
tmpgeneexpr <- t(apply(tmpgeneexpr,1,scale))
tmpgeneexpr <- sweep(tmpgeneexpr,1,currentgeneset[,3],"*")
score <- colMeans(tmpgeneexpr)
activity[genesetid,] <- score
}
for (genesetid in 1:length(genesetname)) {
###Scoring geneset activity
score <- activity[genesetid,]
singlegeneset <- genesetname[genesetid]
missinggene <- setdiff(genedata[singlegeneset == genedata[,1],2],geneid)
genesetmissinggene[genesetid] <- length(missinggene)
genesettotalgenenum[genesetid] <- nrow(genedata)
###Find samples matching the given pattern
singlepattern <- pattern[pattern[,1]==singlegeneset,]
if (singlepattern[,3] == "Norm") {
if (singlepattern[,2] == "High") {
cutoff <- qnorm(1-singlepattern[,4],mean(score),sd(score))
selectsample <- intersect(selectsample,which(score >= cutoff))
} else if (singlepattern[,2] == "Low") {
cutoff <- qnorm(singlepattern[,4],mean(score),sd(score))
selectsample <- intersect(selectsample,which(score < cutoff))
} else {
stop(paste("Second Column of pattern in",singlegeneset,"is not correctly given"))
}
} else if (singlepattern[,3] == "Quantile") {
if (singlepattern[,2] == "High") {
cutoff <- quantile(1-singlepattern[,4],mean(score),sd(score))
selectsample <- intersect(selectsample,which(score >= cutoff))
} else if (singlepattern[,2] == "Low") {
cutoff <- quantile(singlepattern[,4],mean(score),sd(score))
selectsample <- intersect(selectsample,which(score < cutoff))
} else {
stop(paste("Second Column of pattern in",singlegeneset,"is not correctly given"))
}
} else if (singlepattern[,3] == "Exprs") {
if (singlepattern[,2] == "High") {
cutoff <- singlepattern[,4]
selectsample <- intersect(selectsample,which(score >= cutoff))
} else if (singlepattern[,2] == "Low") {
cutoff <- singlepattern[,4]
selectsample <- intersect(selectsample,which(score < cutoff))
} else {
stop(paste("Second Column of pattern in",singlegeneset,"is not correctly given"))
}
} else {
stop(paste("Cutoff type pattern of geneset",singlegeneset,"is not correctly given"))
}
genesetcutoff[genesetid] <- cutoff
}
colnames(activity) <- tabsamplename
ExpID <- tab[selectsample,"ExperimentID"]
tmpTypes <- tab[selectsample,"SampleType"]
#### Plotting
if (!is.null(outputdir)) pdf(plotfilepath)
GSE <- unique(SearchOutput[,"ExperimentID"])
GSE2 <- unique(unlist(strsplit(SearchOutput[,"ExperimentID"],";")))
SampleNames <- unique(SearchOutput[,"SampleType"])
#### Expression Plotting
if(sum(tab$ExperimentID %in% GSE |
tab$ExperimentID %in% GSE2 |
tab$SampleType %in% SampleNames)==0)
stop("No matching samples found.")
tabdex <- tab[(tab$ExperimentID %in% GSE |
tab$ExperimentID %in% GSE2) &
tab$SampleType %in% SampleNames,]
eact <- activity[,(tab$ExperimentID %in% GSE |
tab$ExperimentID %in% GSE2) &
tab$SampleType %in% SampleNames]
ExpressionTab <- NULL
ExpressionTab <- tab[tab$SampleID %in% colnames(eact),]
ExpressionTab <- data.frame(ExpressionTab,t(eact))
#### Choice of ordering (Default is average rank)
if (length(unique(ExpressionTab$SampleType)) == 1) {
Expressionorder <- unique(ExpressionTab$SampleType)
} else {
if(Ordering %in% genesetname) {
Expressionorder <- names(sort(tapply(ExpressionTab[,Ordering],ExpressionTab$SampleType,mean),decreasing=T))
} else if(Ordering=="Average") {
Expressionorder <- rep(0,length(unique(ExpressionTab$SampleType)))
for (i in genesetname)
Expressionorder <- Expressionorder + scale(tapply(ExpressionTab[,i],ExpressionTab$SampleType,mean))
Expressionorder <- names(sort(Expressionorder[,1],decreasing=T))
} else {
stop("No ordering specified. Must be one of the geneset name or 'Average'.")
}
}
print(str(ExpressionTab))
Expressionmelt <- melt(ExpressionTab[,-(1:2)],id.vars="SampleType")
Expressionmelt$SampleType <- factor(Expressionmelt$SampleType,levels=Expressionorder)
gboxplot <- ggplot(data = Expressionmelt, aes(x=variable, y=value)) +
geom_boxplot(aes(fill=SampleType))+facet_grid(.~SampleType) +
labs(title=Title) +
theme_bw() + theme(axis.text.x=element_text(angle=90)) +
theme(axis.title.x = element_blank(), axis.title.y=element_blank())
print(gboxplot)
#### Make Heatmap Plot
if(is.null(outputdir)) par(ask = TRUE)
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
alltmat <- NULL
allpmat <- NULL
for (geneset in genesetname) {
pmat <- tmat <- matrix(0,length(Expressionorder),length(Expressionorder))
colnames(pmat) <- colnames(tmat) <- rownames(pmat) <- rownames(tmat) <- Expressionorder
for (i in 1:length(Expressionorder))
for (j in 1:length(Expressionorder)) {
if(length(ExpressionTab[ExpressionTab$SampleType==Expressionorder[i],geneset])==1|
length(ExpressionTab[ExpressionTab$SampleType==Expressionorder[j],geneset])==1) {
tmat[i,j] <- NA
pmat[i,j] <- NA
} else {
ttestres <- t.test(ExpressionTab[ExpressionTab$SampleType==Expressionorder[i],geneset],ExpressionTab[ExpressionTab$SampleType==Expressionorder[j],geneset])
tmat[i,j] <- ttestres$statistic
pmat[i,j] <- ttestres$p.value
}
}
eval(parse(text=paste0("alltmat$",geneset, "<- tmat")))
eval(parse(text=paste0("allpmat$",geneset, "<- pmat")))
#T test score heatmap
if (!all(is.na(tmat))) {
tmatdata <- melt(tmat,value.name="t.stat")
tmatdata$Var1 <- factor(tmatdata$Var1,levels=Expressionorder)
tmatdata$Var2 <- factor(tmatdata$Var2,levels=Expressionorder)
tplot <- ggplot(tmatdata,aes(x = Var1, y = Var2, fill = t.stat)) +
geom_tile() + geom_abline(intercept=0,slope=1,size=1.25) +
scale_fill_gradientn(colours = myPalette(10)) +
scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) +
coord_equal() + labs(title=paste(geneset,"t.stat heatmap")) +
theme_bw() + theme(axis.text.x=element_text(angle=90)) +
theme(axis.title.x = element_blank(), axis.title.y=element_blank())
print(tplot)
#P value Heatmap
pmatdata <- melt(pmat,value.name="P.value")
pmatdata$Var1 <- factor(pmatdata$Var1,levels=Expressionorder)
pmatdata$Var2 <- factor(pmatdata$Var2,levels=Expressionorder)
pplot <- ggplot(pmatdata,aes(x = Var1, y = Var2, fill = P.value)) +
geom_tile() + geom_abline(intercept=0,slope=1,size=1.25) +
scale_fill_gradientn(colours = myPalette(10),breaks=c(0.001,0.01,0.05,0.1,0.25,0.5,0.75,1)) +
guides(fill = guide_legend(title.position = "top")) +
scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) +
coord_equal() + labs(title=paste(geneset,"p.value heatmap")) +
theme_bw() + theme(axis.text.x=element_text(angle=90)) +
theme(axis.title.x = element_blank(), axis.title.y=element_blank())
print(pplot)
}
}
#### GSCA Results with given contexts
ttT <- table(tmpTypes)
sT <- sum(ttT)
bgT <- table(tab$SampleType)
bgT <- bgT[names(ttT)]
SCORE <- matrix(0, nrow = length(Expressionorder), ncol = 4)
ExperimentID <- NULL; FIN <- NULL
pvaldex <- NULL; fold.change <- NULL; active <- NULL; total <- NULL
for (i in Expressionorder) {
if(sum(names(ttT) %in% i) > 0 &
sum(tab$SampleType==i) > 2){
r1c1 <- ttT[names(ttT) %in% i]
r1c2 <- sT - ttT[names(ttT) %in% i]
r2c1 <- bgT[names(ttT) %in% i] - ttT[names(ttT) %in% i]
r2c2 <- length(tab$SampleType) - r1c1 - r1c2 - r2c1
tmpmat <- matrix(c(r1c1, r2c1, r1c2, r2c2), ncol = 2)
pval <- fisher.test(tmpmat, alternative = "greater")$p.value
adj.pval <- min(pval * sum(table(tab$SampleType) > 2),1)
adj.pval <- signif(adj.pval,3)
active <- c(active,as.numeric(ttT)[names(ttT) %in% i])
total <- c(total,as.numeric(bgT)[names(bgT) %in% i])
fold.change <- c(fold.change,round(
((as.numeric(ttT)[names(ttT) %in% i]+
sT/length(tab$SampleType)) /
(as.numeric(bgT)[names(bgT) %in% i]+1)) /
(sT/length(tab$SampleType)),3)
)
pvaldex <- c(pvaldex,adj.pval)
ExperimentID <- c(ExperimentID,
paste(unique(unlist(strsplit(paste(
tab$ExperimentID[tab$SampleType %in%
i], collapse = ";"), ";"))),
collapse = ";"))
} else {
active <- c(active,0)
total <- c(total,sum(tab$SampleType==i))
if(sum(tab$SampleType==i) > 2){
pvaldex <- c(pvaldex,1)
} else { pvaldex <- c(pvaldex,NA) }
fold.change <- c(fold.change,1)
ExperimentID <- c(ExperimentID,paste(unique(
unlist(strsplit(
paste(tab$ExperimentID[tab$SampleType %in%
i], collapse = ";"), ";"))),
collapse = ";"))
}
}
if(!is.null(outputdir)) dev.off()
if(is.null(outputdir)) par(ask = FALSE)
FIN <- data.frame(active,total,fold.change,pvaldex,Expressionorder,ExperimentID)
colnames(FIN) <- c("Active","Total","FoldChange","Adj.Pvalue","SampleType","ExperimentID")
ExpressionSum <- NULL
for(i in Expressionorder){
tmpdat <- ExpressionTab[ExpressionTab$SampleType==i,]
tmpsummary <- sum(ExpressionTab$SampleType==i)
for (j in genedata[,1]) {
if (j %in% colnames(tmpdat))
tmpsummary <- c(tmpsummary,mean(tmpdat[,j]),sd(tmpdat[,j]))
}
ExpressionSum <- rbind(ExpressionSum,tmpsummary)
}
rownames(ExpressionSum) <- Expressionorder
expplotcolname <- c("Sample Count")
for (j in genedata[,1]) {
if (j %in% genesetname)
expplotcolname <- c(expplotcolname,paste0("Mean_",j),paste0("Sd_",j))
}
colnames(ExpressionSum) <- expplotcolname
if(!is.null(outputdir)) {
write.csv(ExpressionTab,file=datafilepath,row.names=F)
write.table(FIN,file=resfilepath,row.names=F,sep=",")
cat("\n Summary", file = resfilepath, append = TRUE)
write.table(rbind(colnames(ExpressionSum),ExpressionSum),file=resfilepath,row.names=T,sep=",",col.names=F,append=T)
for (i in genesetname) {
cat(paste(i,"T statistics"), file = resfilepath, append = TRUE)
tmpout <- eval(parse(text=paste0("alltmat$",i)))
write.table(rbind(colnames(tmpout),tmpout),file=resfilepath,row.names=T,sep=",",col.names=F,append=T)
cat(paste(i,"P-values"), file = resfilepath, append = TRUE)
tmpout <- eval(parse(text=paste0("allpmat$",i)))
write.table(rbind(colnames(tmpout),tmpout),file=resfilepath,row.names=T,sep=",",col.names=F,append=T)
}
} else {
return(list(Ranking=FIN,Data=ExpressionTab,
Summary=ExpressionSum,Tstats=alltmat,
Pval=allpmat))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.