#function 1
addDescription <-
function(genome=c("hg38","hg19", "mm10", "mm9"),genevector){
require(biomaRt)
if(missing(genome)){stop("Error: Please input genome type: 'mm10', 'mm9', 'hg38', or 'hg19'")}
if(genome == "mm10"){
ensembl = biomaRt::useMart("ensembl",dataset="mmusculus_gene_ensembl")
anno <- biomaRt::getBM(attributes=c("mgi_symbol", "mgi_description"), filters = "mgi_symbol",
values = as.vector(genevector), mart = ensembl)
anno <- anno[match(genevector, anno$mgi_symbol),]
anno<-anno[is.na(anno$mgi_symbol)==FALSE,]
}else if(genome == "mm9"){
ensembl = biomaRt::useMart('ENSEMBL_MART_ENSEMBL',dataset='mmusculus_gene_ensembl',
host="may2012.archive.ensembl.org")
anno <- biomaRt::getBM(attributes=c("mgi_symbol", "mgi_description"),
filters = "mgi_symbol", values = as.vector(genevector),
mart = ensembl)
anno <- anno[match(genevector, anno$mgi_symbol),]
anno<-anno[is.na(anno$mgi_symbol)==FALSE,]
}else if(genome == "hg38"){
ensembl = biomaRt::useMart("ensembl",dataset="hsapiens_gene_ensembl")
anno <- biomaRt::getBM(attributes=c("hgnc_symbol", "description"),
filters = "hgnc_symbol", values = as.vector(genevector),
mart = ensembl)
anno <- anno[match(genevector, anno$hgnc_symbol),]
anno<-anno[is.na(anno$hgnc_symbol)==FALSE,]
}else if(genome == "hg19") {
ensembl = biomaRt::useMart(biomart="ensembl", host="grch37.ensembl.org",
dataset="hsapiens_gene_ensembl")
anno <- biomaRt::getBM(attributes=c("hgnc_symbol", "description"),
filters = "hgnc_symbol", values = as.vector(genevector),
mart = ensembl)
anno <- anno[match(genevector, anno$hgnc_symbol),]
anno<-anno[is.na(anno$hgnc_symbol)==FALSE,]
} else{
stop("Error: Please input genome type: 'mm10', 'mm9', 'hg38', or 'hg19'")
}
anno<-anno[is.na(anno[,1])==FALSE,]
return(anno)
}
###function 2
cumulativerank <-
function(sampleExp, GeneID, GeneSet,logCheck,na.rm)
{
if(class(sampleExp)!="numeric"){sampleExp <-as.numeric(levels(sampleExp))[sampleExp]}
if(logCheck){if(max(sampleExp, na.rm=TRUE) > 20) {sampleExp <- log2(sampleExp)}}
if(length(sampleExp)!=length(GeneID)){
stop("Error: GeneID information is missing or not correct!")
}
N <- length(GeneID)
GeneID_NaInSet <- GeneID[which(!GeneID %in% GeneSet)]
# Step 1
rankedExp <- rank(sampleExp, na.last="keep")
rankscore <- rankedExp
# Step 2:
x1 <- which(GeneID %in% GeneSet)
x2 <- which(GeneID %in% GeneID_NaInSet)
ST <- sum(rankscore[x1], na.rm=TRUE)/length(x1)
SN <- sum(rankscore[x2], na.rm=TRUE)/length(x2)
y <- sum(ST, -SN, na.rm=na.rm)
return(y)
}
###function3
cumulativerank_EmpiricalP <-
function(sampleExp, GeneID, GeneSet, logCheck,cumulativerank,B,na.rm)
{
data(gencode_coding,package="seq2pathway.data")
if(class(sampleExp)!="numeric"){
sampleExp <-as.numeric(levels(sampleExp))[sampleExp]
}
if(logCheck){if(max(sampleExp, na.rm=TRUE) > 20) {
sampleExp <- log2(sampleExp)}
}
if(length(sampleExp)!=length(GeneID)){
stop("Error: GeneID information is missing or not correct!")
}
N <- length(GeneID)
count<-0
if(is.na(cumulativerank)==FALSE){
#Step 1: Calculation of weighted rank of gene expression
rankedExp <- rank(sampleExp, na.last="keep")
rankscore <- rankedExp
for(test in 1:B){
GeneID<-sample(gencode_coding,N)
GeneID_NaInSet <- GeneID[which(!GeneID %in% GeneSet)]
# Step 2:
x1 <- which(GeneID %in% GeneSet)
x2 <- which(GeneID %in% GeneID_NaInSet)
ST <- sum(rankscore[x1], na.rm=TRUE)/length(x1)
SN <- sum(rankscore[x2], na.rm=TRUE)/length(x2)
y <- sum(ST, -SN, na.rm=na.rm)
if(y>=cumulativerank&is.na(y)==FALSE){count=count+1}
}#B's sampling loop ends
Pvalue<-count/B
}#end of if cumulativerank is not equal to NA
else{Pvalue=NA}
return(Pvalue)
}
#Function 4
FAIME <-
function(sampleExp, GeneID, GeneSet, alpha, logCheck,na.rm)
{
if(class(sampleExp)!="numeric"){
sampleExp <-as.numeric(levels(sampleExp))[sampleExp]
}
if(logCheck){
if(max(sampleExp, na.rm=TRUE) > 20) {sampleExp <- log2(sampleExp)}
}
if(length(sampleExp)!=length(GeneID)){
stop("Error: GeneID information is missing or not correct!")
}
N <- length(GeneID)
GeneID_NaInSet <- GeneID[which(!GeneID %in% GeneSet)]
#Step 1: Calculation of weighted rank of gene expression
rankedExp <- rank(sampleExp, na.last="keep")
rankscore <- rankedExp*exp((rankedExp/N*alpha)-alpha)
# Step 2:
x1 <- which(GeneID %in% GeneSet)
x2 <- which(GeneID %in% GeneID_NaInSet)
ST <- sum(rankscore[x1], na.rm=TRUE)/length(x1)
SN <- sum(rankscore[x2], na.rm=TRUE)/length(x2)
y <- sum(ST, -SN, na.rm=na.rm)
return(y)
}
#function 5
FAIME_EmpiricalP <-
function(sampleExp, GeneID, GeneSet, alpha, logCheck,FAIME,B,na.rm)
{
data(gencode_coding,package="seq2pathway.data")
if(class(sampleExp)!="numeric"){
sampleExp <-as.numeric(levels(sampleExp))[sampleExp]
}
if(logCheck){
if(max(sampleExp, na.rm=TRUE) > 20) {sampleExp <- log2(sampleExp)}
}
if(length(sampleExp)!=length(GeneID)){
stop("Error: GeneID information is missing or not correct!")
}
N <- length(GeneID)
count<-0
if(is.na(FAIME)==FALSE){
#Step 1: Calculation of weighted rank of gene expression
rankedExp <- rank(sampleExp, na.last="keep")
rankscore <- rankedExp*exp((rankedExp/N*alpha)-alpha)
for(test in 1:B){
GeneID<-sample(as.vector(gencode_coding),N)
GeneID_NaInSet <- GeneID[which(!GeneID %in% GeneSet)]
# Step 2:
x1 <- which(GeneID %in% GeneSet)
x2 <- which(GeneID %in% GeneID_NaInSet)
ST <- sum(rankscore[x1], na.rm=TRUE)/length(x1)
SN <- sum(rankscore[x2], na.rm=TRUE)/length(x2)
y <- sum(ST, -SN, na.rm=na.rm)
if(y>=FAIME&is.na(y)==FALSE){count=count+1}
}#B's sampling loop ends
Pvalue<-count/B
}#end of if FAIME is not equal to NA
else{Pvalue=NA}
return(Pvalue)
}
#Function 6
FisherTest_GO_BP_MF_CC <-function(gs,
genome=c("hg38","hg19","mm10","mm9"),min_Intersect_Count=5,
Ontology=c("GOterm","BP","MF","CC", "newOntology"), newOntology=NULL)
{
if(missing(min_Intersect_Count)) {min_Intersect_Count=5}
if(!Ontology %in% c("GOterm","BP","MF","CC"))
{ warning ("Ontology other than 'GOterm','BP','MF' and 'CC' is test")
if(length(newOntology)<2) {
stop ("newOntology should be a list of two lists with the same list names.")}
if(length(newOntology[[1]])!=length(newOntology[[2]])){
stop ("newOntology should be a list of two equal-length lists.")}
if(!identical(names(newOntology[[1]]),names(newOntology[[2]]))){ ##changed from names(newOntology[[1]]) != names(newOntology[[2]]) 4/2/2019
stop ("the two lists of newOntology should have the same ontology IDs.")}
}
if(is.null(newOntology))
{
####load GP pathway information
if(Ontology =="GOterm") {
data(GO_BP_list,package="seq2pathway.data")
data(GO_MF_list,package="seq2pathway.data")
data(GO_CC_list,package="seq2pathway.data")
data(Des_BP_list,package="seq2pathway.data")
data(Des_MF_list,package="seq2pathway.data")
data(Des_CC_list,package="seq2pathway.data")
} else if(Ontology =="BP") {
data(GO_BP_list,package="seq2pathway.data")
data(Des_BP_list,package="seq2pathway.data")
} else if(Ontology =="MF") {
data(GO_MF_list,package="seq2pathway.data")
data(Des_MF_list,package="seq2pathway.data")
} else if(Ontology =="CC") {
data(GO_CC_list,package="seq2pathway.data")
data(Des_CC_list,package="seq2pathway.data")
}
}
#######check genome
if(missing(genome)){genome="hg19"}
#####load GO_GENCODE_databse_intersect_gene information
if(genome=="hg38"){
data(GO_GENCODE_df_hg_v36,package="seq2pathway.data")
GO_GENCODE_df<-GO_GENCODE_df_hg_v36
}else if(genome=="hg19"){
data(GO_GENCODE_df_hg_v19,package="seq2pathway.data")
GO_GENCODE_df<-GO_GENCODE_df_hg_v19
}else if(genome=="mm10"){
data(GO_GENCODE_df_mm_vM25,package="seq2pathway.data")
GO_GENCODE_df<-GO_GENCODE_df_mm_vM25
}else if(genome=="mm9"){
data(GO_GENCODE_df_mm_vM1,package="seq2pathway.data")
GO_GENCODE_df<-GO_GENCODE_df_mm_vM1
}
GO_FisherTest_result<-list()
n.list=0
############## newOntology
if(!is.null(newOntology))
{
n.list <- n.list+1
# newOntology <- list(GO_BP_list[1:200], Des_BP_list[1:200])
gene_list <- newOntology[[1]]
Des_list <- newOntology[[2]]
###background counts
Para_GenesetsGene<-length(intersect(unique(toupper(unlist(gene_list))),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
###gene vector annotated from GENCODE intersect with gene_list
Para_PeakGene<-length(intersect(unique(toupper(unlist(gene_list))),
unique(toupper(unlist(gs)))))
mdat <- matrix(, nrow = length(gene_list), ncol = 9, byrow = TRUE,
dimnames = list(c(1:length(gene_list)),
c("GOID","Description","Fisher_Pvalue", "Fisher_odds","FDR",
"Intersect_Count","GO_gene_inBackground","GO_gene_raw_Count",
"Intersect_gene")))
mdat<-as.data.frame(mdat)
for(i in 1:length(gene_list)){
pathwaygene<-length(intersect(toupper(gene_list[[i]]),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
a<-length(intersect(toupper(gene_list[[i]]),unique(toupper(unlist(gs)))))
b<-Para_PeakGene-a
c<-pathwaygene-a
d<-Para_GenesetsGene-a-b-c
Con<-matrix(c(a, b, c, d),nrow = 2, byrow =TRUE)
mdat[i,1]<-names(gene_list)[i]
mdat[i,2]<-as.character(Des_list[which(names(Des_list)==names(gene_list)[i])])
mdat[i,3]<-fisher.test(Con)$p.value
mdat[i,4]<-fisher.test(Con)$estimate
mdat[i,6]<-a
mdat[i,7]<-pathwaygene
mdat[i,8]<-length(gene_list[[i]])
mdat[i,9]<-paste(intersect(toupper(gene_list[[i]]),
unique(toupper(unlist(gs)))),collapse=" ")
}
mdat<-mdat[mdat$Intersect_Count>=min_Intersect_Count,]
mdat$FDR <- p.adjust(mdat[,c("Fisher_Pvalue")],method="BH")
mdat<-mdat[order(mdat$FDR),]
row.names(mdat)<-NULL
GO_FisherTest_result[[n.list]]<-mdat
rm(mdat)
names(GO_FisherTest_result)[n.list]<-c("newOntology")
} else
{
####################GO:BP
if(Ontology %in% c("GOterm","BP"))
{
n.list=n.list+1
###background counts
Para_GenesetsGene<-length(intersect(unique(toupper(unlist(GO_BP_list))),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
###gene vector annotated from GENCODE intersect with GO_BP_list
Para_PeakGene<-length(intersect(unique(toupper(unlist(GO_BP_list))),
unique(toupper(unlist(gs)))))
mdat <- matrix(, nrow = length(GO_BP_list), ncol = 9, byrow = TRUE,
dimnames = list(c(1:length(GO_BP_list)),
c("GOID","Description","Fisher_Pvalue", "Fisher_odds","FDR",
"Intersect_Count","GO_gene_inBackground","GO_gene_raw_Count","Intersect_gene")))
mdat<-as.data.frame(mdat)
for(i in 1:length(GO_BP_list)){
pathwaygene<-length(intersect(toupper(GO_BP_list[[i]]),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
a<-length(intersect(toupper(GO_BP_list[[i]]),unique(toupper(unlist(gs)))))
b<-Para_PeakGene-a
c<-pathwaygene-a
d<-Para_GenesetsGene-a-b-c
Con<-matrix(c(a, b, c, d),nrow = 2, byrow =TRUE)
mdat[i,1]<-names(GO_BP_list)[i]
mdat[i,2]<-as.character(Des_BP_list[which(names(Des_BP_list)==names(GO_BP_list)[i])])
mdat[i,3]<-fisher.test(Con)$p.value
mdat[i,4]<-fisher.test(Con)$estimate
mdat[i,6]<-a
mdat[i,7]<-pathwaygene
mdat[i,8]<-length(GO_BP_list[[i]])
mdat[i,9]<-paste(intersect(toupper(GO_BP_list[[i]]),
unique(toupper(unlist(gs)))),collapse=" ")
}
mdat<-mdat[mdat$Intersect_Count>=min_Intersect_Count,]
mdat$FDR <- p.adjust(mdat[,c("Fisher_Pvalue")],method="BH")
mdat<-mdat[order(mdat$FDR),]
row.names(mdat)<-NULL
GO_FisherTest_result[[n.list]]<-mdat
rm(mdat)
names(GO_FisherTest_result)[n.list]<-c("GO_BP")
}
####################GO:CC
if(Ontology %in% c("GOterm","CC"))
{
n.list=n.list+1
###background counts
Para_GenesetsGene<-length(intersect(unique(toupper(unlist(GO_CC_list))),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
###gene vector annotated from GENCODE intersect with GO_CC_list
Para_PeakGene<-length(intersect(unique(toupper(unlist(GO_CC_list))),
unique(toupper(unlist(gs)))))
mdat <- matrix(, nrow = length(GO_CC_list), ncol = 9, byrow = TRUE,
dimnames = list(c(1:length(GO_CC_list)),
c("GOID","Description","Fisher_Pvalue",
"Fisher_odds","FDR","Intersect_Count",
"GO_gene_inBackground","GO_gene_raw_Count","Intersect_gene")))
mdat<-as.data.frame(mdat)
for(i in 1:length(GO_CC_list)){
pathwaygene<-length(intersect(toupper(GO_CC_list[[i]]),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
a<-length(intersect(toupper(GO_CC_list[[i]]),unique(toupper(unlist(gs)))))
b<-Para_PeakGene-a
c<-pathwaygene-a
d<-Para_GenesetsGene-a-b-c
Con<-matrix(c(a, b, c, d),nrow = 2, byrow =TRUE)
mdat[i,1]<-names(GO_CC_list)[i]
mdat[i,2]<-Des_CC_list[which(names(Des_CC_list)==names(GO_CC_list)[i])]
mdat[i,3]<-fisher.test(Con)$p.value
mdat[i,4]<-fisher.test(Con)$estimate
mdat[i,6]<-a
mdat[i,7]<-pathwaygene
mdat[i,8]<-length(GO_CC_list[[i]])
mdat[i,9]<-paste(intersect(toupper(GO_CC_list[[i]]),unique(toupper(unlist(gs)))),collapse=" ")
}
mdat<-mdat[mdat$Intersect_Count>=min_Intersect_Count,]
mdat$FDR <- p.adjust(mdat[,c("Fisher_Pvalue")],method="BH")
mdat<-mdat[order(mdat$FDR),]
row.names(mdat)<-NULL
GO_FisherTest_result[[n.list]]<-mdat
rm(mdat)
names(GO_FisherTest_result)[n.list]<-c("GO_CC")
}
####################GO:MF
if(Ontology %in% c("GOterm","MF"))
{
n.list=n.list+1
###background counts
Para_GenesetsGene<-length(intersect(unique(toupper(unlist(GO_MF_list))),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
###gene vector annotated from GENCODE intersect with GO_MF_list
Para_PeakGene<-length(intersect(unique(toupper(unlist(GO_MF_list))),
unique(toupper(unlist(gs)))))
mdat <- matrix(, nrow = length(GO_MF_list), ncol = 9, byrow = TRUE,
dimnames = list(c(1:length(GO_MF_list)),
c("GOID","Description","Fisher_Pvalue",
"Fisher_odds","FDR","Intersect_Count",
"GO_gene_inBackground","GO_gene_raw_Count","Intersect_gene")))
mdat<-as.data.frame(mdat)
for(i in 1:length(GO_MF_list)){
pathwaygene<-length(intersect(toupper(GO_MF_list[[i]]),
union(unique(toupper(GO_GENCODE_df$gene_name)),toupper(unlist(gs)))))
a<-length(intersect(toupper(GO_MF_list[[i]]),unique(toupper(unlist(gs)))))
b<-Para_PeakGene-a
c<-pathwaygene-a
d<-Para_GenesetsGene-a-b-c
Con<-matrix(c(a, b, c, d),nrow = 2, byrow =TRUE)
mdat[i,1]<-names(GO_MF_list)[i]
mdat[i,2]<-Des_MF_list[which(names(Des_MF_list)==names(GO_MF_list)[i])]
mdat[i,3]<-fisher.test(Con)$p.value
mdat[i,4]<-fisher.test(Con)$estimate
mdat[i,6]<-a
mdat[i,7]<-pathwaygene
mdat[i,8]<-length(GO_MF_list[[i]])
mdat[i,9]<-paste(intersect(toupper(GO_MF_list[[i]]),
unique(toupper(unlist(gs)))),collapse=" ")
}
mdat<-mdat[mdat$Intersect_Count>=min_Intersect_Count,]
mdat$FDR <- p.adjust(mdat[,c("Fisher_Pvalue")],method="BH")
mdat<-mdat[order(mdat$FDR),]
row.names(mdat)<-NULL
GO_FisherTest_result[[n.list]]<-mdat
rm(mdat)
names(GO_FisherTest_result)[n.list]<-c("GO_MF")
}
}
print("Fisher's exact test done")
return(GO_FisherTest_result)
}
#function 7
FisherTest_MsigDB <-
function(gsmap,gs,genome=c("hg38","hg19","mm10","mm9"),min_Intersect_Count=5){
if(missing(min_Intersect_Count)){min_Intersect_Count=5}
if(class(gsmap)!="GSA.genesets"){stop("Error: gsmap should be GAS.genesets object")}
#######check genome
if(missing(genome)){genome="hg19"}
if(genome=="hg38"){
data(Msig_GENCODE_df_hg_v36,package="seq2pathway.data")
Msig_GENCODE_df<-Msig_GENCODE_df_hg_v36
}else if(genome=="hg19"){
data(Msig_GENCODE_df_hg_v19,package="seq2pathway.data")
Msig_GENCODE_df<-Msig_GENCODE_df_hg_v19
}else if(genome=="mm10"){
data(Msig_GENCODE_df_mm_vM25,package="seq2pathway.data")
Msig_GENCODE_df<-Msig_GENCODE_df_mm_vM25
}else if(genome=="mm9"){
data(Msig_GENCODE_df_mm_vM1,package="seq2pathway.data")
Msig_GENCODE_df<-Msig_GENCODE_df_mm_vM1
}
###background counts
Para_GenesetsGene<-length(intersect(unique(toupper(unlist(gsmap$genesets))),
union(unique(toupper(Msig_GENCODE_df$gene_name)),unique(toupper(unlist(gs))))))
###gene vector in Msigdb
Para_PeakGene<-length(intersect(unique(toupper(unlist(gsmap$genesets))),
unique(toupper(unlist(gs)))))
mdat <- matrix(, nrow = length(gsmap$geneset.names), ncol = 9, byrow = TRUE,
dimnames = list(c(1:length(gsmap$geneset.names)),
c("GeneSet","Description","Fisher_Pvalue","Fisher_odds",
"FDR","Intersect_Count",
"MsigDB_gene_inBackground","MsigDB_gene_raw_Count","Intersect_gene")))
mdat<-as.data.frame(mdat)
for(i in 1:length(gsmap$geneset.names)){
pathwaygene<-length(intersect(toupper(gsmap$genesets[[i]]),
union(unique(toupper(Msig_GENCODE_df$gene_name)),toupper(unlist(gs)))))
a<-length(intersect(toupper(gsmap$genesets[[i]]),unique(toupper(unlist(gs)))))
b<-Para_PeakGene-a
c<-pathwaygene-a
d<-Para_GenesetsGene-a-b-c
Con<-matrix(c(a, b, c, d),nrow = 2, byrow =TRUE)
mdat[i,1]<-gsmap$geneset.names[i]
mdat[i,2]<-gsmap$geneset.descriptions[i]
mdat[i,3]<-fisher.test(Con)$p.value
mdat[i,4]<-fisher.test(Con)$estimate
mdat[i,6]<-a
mdat[i,7]<-pathwaygene
mdat[i,8]<-length(gsmap$genesets[[i]])
mdat[i,9]<-paste(intersect(toupper(gsmap$genesets[[i]]),
unique(toupper(unlist(gs)))),collapse=" ")
}
mdat<-mdat[mdat$Intersect_Count>=min_Intersect_Count,]
mdat$FDR <- p.adjust(mdat[,c("Fisher_Pvalue")],method="BH")
mdat<-mdat[order(mdat$FDR),]
row.names(mdat)<-NULL
print("Fisher's exact test done")
return(mdat)
}
#function 8
KSrank <-
function(sampleExp, GeneID, GeneSet, alpha, logCheck)
{
if(class(sampleExp)!="numeric"){sampleExp <-as.numeric(levels(sampleExp))[sampleExp]}
if(logCheck){if(max(sampleExp, na.rm=TRUE) > 20) {sampleExp <- log2(sampleExp)}}
if(length(sampleExp)!=length(GeneID)){
stop("Error: GeneID information is missing or not correct!")}
N <- length(GeneID)
GeneID_NaInSet <- GeneID[which(!GeneID %in% GeneSet)]
# Step 1 (Supporting Figure 2 in the Text S1): Calculation of weighted rank of gene expression -----
## Ranked by score, the lowest to highest. Therefore, the up-regulated genes
## get the higher weighted score Only dir="high"
rankedExp <- rank(sampleExp, na.last="keep")
rankscore <- rankedExp*exp((rankedExp/N*alpha)-alpha)
# Step 2:
x1 <- which(GeneID %in% GeneSet)
x2 <- which(GeneID %in% GeneID_NaInSet)
## WFA algorithm, KS test is emploied
if(length(x1)>0 & length(x2)>0) {
tmp <- ks.test(rankscore[x1], rankscore[x2], alternative="two.sided")
y <- tmp$statistic
}else{y=NA}
return(y)
}
#function 9
KSrank_EmpiricalP <-
function(sampleExp, GeneID, GeneSet, alpha, logCheck,KSrank,B)
{
data(gencode_coding,package="seq2pathway.data")
if(class(sampleExp)!="numeric"){
sampleExp <-as.numeric(levels(sampleExp))[sampleExp]}
if(logCheck){if(max(sampleExp, na.rm=TRUE) > 20) {
sampleExp <- log2(sampleExp)}}
if(length(sampleExp)!=length(GeneID)){
stop("Error: GeneID information is missing or not correct!")}
N <- length(GeneID)
count<-0
if(is.na(KSrank)==FALSE){
#Step 1: Calculation of weighted rank of gene expression
rankedExp <- rank(sampleExp, na.last="keep")
rankscore <- rankedExp*exp((rankedExp/N*alpha)-alpha)
for(test in 1:B){
GeneID<-sample(gencode_coding,N)
GeneID_NaInSet <- GeneID[which(!GeneID %in% GeneSet)]
# Step 2:
x1 <- which(GeneID %in% GeneSet)
x2 <- which(GeneID %in% GeneID_NaInSet)
## WFA algorithm, KS test is emploied
if(length(x1)>0 & length(x2)>0) {
tmp <- ks.test(rankscore[x1], rankscore[x2], alternative="two.sided")
y <- tmp$statistic
}else{y=NA}
if(y>=KSrank&is.na(y)==FALSE){count=count+1}
}#B's sampling loop ends
Pvalue<-count/B
}#end of if KSrank is not equal to NA
else{Pvalue=NA}
return(Pvalue)
}
#function 10
Normalize_F <-
function(input){
Y.high1 <- apply(input, 2, scale)
rownames(Y.high1)<-rownames(input)
colnames(Y.high1)<-c(paste(colnames(input),"_Normalized",sep=""))
dim(Y.high1)
head(Y.high1)
cp2_FAIME<-as.data.frame(Y.high1)
print("Normalization.........done")
return(cp2_FAIME)
}
#function 11
Peak_Gene_Collapse <-
function(input,collapsemethod){
options(warn=-1)
#gene symbol or entrezID
GeneGroup<-as.vector(toupper(input[,2]))
#rowID must be distint,rename rownames of input
rownames(input)<-1:nrow(input)
rowID<-rownames(input)
#just for single sample file, otherwise, collpaseRows function could not work
input$index=0
#drop the first two columns, peak id and gene information
dataforCollapse<-input[,3:dim(input)[2]]
#main function
Collapse_Gene<-collapseRows(dataforCollapse, GeneGroup, rowID, method=collapsemethod)
#main output of collapsed matrix, the rownames are gene symbol/entrezID
PeakGene_MaxMean<-Collapse_Gene$datETcollapsed
##get the reserved rownumber
rw<-as.numeric(Collapse_Gene$group2row[,2])
#remove the dummy index column, and the gene symbol column
selected_Peak_Info<-input[rw,c(1,3:(dim(input)[2]-1))]#remove the dummy index column
##gene symbol or entrezID as rownames
rownames(selected_Peak_Info)<-rownames(PeakGene_MaxMean)
###dummmy yummy
selected_Peak_Info1<-data.frame(selected_Peak_Info)
rownames(selected_Peak_Info1)<-rownames(selected_Peak_Info)
names(selected_Peak_Info1)<-names(selected_Peak_Info)
print("Peak_Gene_Collapse....... done")
return(selected_Peak_Info1)
}
#function 12
rungene2pathway <-
function(dat, gsmap, alpha=5, logCheck=FALSE,
method=c("FAIME","KS-rank","cumulative-rank"),na.rm=FALSE)
{
if(missing(method)){method="FAIME"}
if(missing(alpha)){alpha=5}
if(missing(na.rm)){na.rm=FALSE}
if(missing(logCheck)){logCheck=FALSE}
if(! method %in% c("FAIME", "KS-rank", "cumulative-rank")) {
stop("Method should be a value of 'FAIME', 'KS-rank', or 'cumulative-rank'!")}
if(is.data.frame(dat)==FALSE & is.matrix(dat)==FALSE){
stop("Error: input should be a data frame or a matrix")}
if(class(gsmap)=="GSA.genesets"){
seeds <- gsmap$geneset.names
res <- matrix(nrow=length(seeds), ncol=ncol(dat))
for(i in 1:length(seeds)){
for(j in 1:ncol(dat)){
if(method=="FAIME"){res[i,j] <- FAIME(sampleExp=dat[,j],
GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
alpha=alpha, logCheck=logCheck,na.rm=na.rm)
}else if(method=="KS-rank"){
res[i,j] <- KSrank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
alpha=alpha, logCheck=logCheck)
}else if(method=="cumulative-rank"){
res[i,j] <- cumulativerank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
logCheck=logCheck,na.rm=na.rm)
}
}#j loop
}#i loop
}else if(class(gsmap)=="list"){
if(is.null(names(gsmap))){stop ("please give the names of gsmap as a list")}
seeds <- names(gsmap)
res <- matrix(nrow=length(seeds), ncol=ncol(dat))
for(i in 1:length(seeds)){
for(j in 1:ncol(dat)){
if(method=="FAIME"){
res[i,j] <- FAIME(sampleExp=dat[,j],GeneID=toupper(rownames(dat)),
GeneSet= toupper(gsmap[[i]]), alpha=alpha,
logCheck=logCheck,na.rm=na.rm)
}else if(method=="KS-rank"){
res[i,j] <- KSrank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap[[i]]), alpha=alpha, logCheck=logCheck)
}else if(method=="cumulative-rank"){
res[i,j] <- cumulativerank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap[[i]]), logCheck=logCheck,na.rm=na.rm)
}
}#j loop
}#i loop
}#class loop
rownames(res) <- seeds
colnames(res) <- c(paste(colnames(dat),"2pathscore",sep=""))
print("gene2pathway calculates score....... done")
return(res)
}
#function 13
rungene2pathway_EmpiricalP <-
function(dat, gsmap, alpha=5, logCheck=FALSE,
method=c("FAIME","KS-rank","cumulative-rank"),B=100,na.rm=FALSE)
{
if(missing(method)){method="FAIME"}
if(missing(alpha)){alpha=5}
if(missing(na.rm)){na.rm=FALSE}
if(missing(B)){B=100}
if(missing(logCheck)){logCheck=FALSE}
if(! method %in% c("FAIME", "KS-rank", "cumulative-rank")) {
stop("Error: method should be a value of 'FAIME', 'KS-rank', or 'cumulative-rank'!")}
if(is.data.frame(dat)==FALSE & is.matrix(dat)==FALSE){
stop("Error: input should be a data frame or a matrix")}
if(class(gsmap)=="GSA.genesets"){
seeds <- gsmap$geneset.names
res <- matrix(nrow=length(seeds), ncol=ncol(dat))
for(i in 1:length(seeds)){
for(j in 1:ncol(dat)){
if(method=="FAIME"){
res[i,j] <- FAIME(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
alpha=alpha,logCheck=logCheck,na.rm=na.rm)
}else if(method=="KS-rank"){
res[i,j] <- KSrank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
alpha=alpha, logCheck=logCheck)
}else if(method=="cumulative-rank"){
res[i,j] <- cumulativerank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
logCheck=logCheck,na.rm=na.rm)
}
}#j loop
}#i loop
}else if(class(gsmap)=="list"){
if(is.null(names(gsmap))){stop ("please give the names of gsmap as a list")}
seeds <- names(gsmap)
res <- matrix(nrow=length(seeds), ncol=ncol(dat))
for(i in 1:length(seeds)){
for(j in 1:ncol(dat)){
if(method=="FAIME"){
res[i,j] <- FAIME(sampleExp=dat[,j],GeneID=toupper(rownames(dat)),
GeneSet= toupper(gsmap[[i]]), alpha=alpha,
logCheck=logCheck,na.rm=na.rm)
}else if(method=="KS-rank"){
res[i,j] <- KSrank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap[[i]]), alpha=alpha, logCheck=logCheck)
}else if(method=="cumulative-rank"){
res[i,j] <- cumulativerank(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap[[i]]), logCheck=logCheck,na.rm=na.rm)
}
}#j loop
}#i loop
}#class loop
rownames(res) <- seeds
colnames(res) <- c(paste(colnames(dat),"2pathscore",sep=""))
#######Just progress bar
pb <- txtProgressBar(min = 0, max = length(seeds), style = 3)
#######Just progress bar
if(class(gsmap)=="GSA.genesets"){
seeds <- gsmap$geneset.names
res_p <- matrix(nrow=length(seeds), ncol=ncol(dat))
for(i in 1:length(seeds)){
#if(i%%100 ==0){print(paste("still working..........",Sys.time())) }
setTxtProgressBar(pb, i)
for(j in 1:ncol(dat)){
if(method=="FAIME"){
res_p[i,j] <- FAIME_EmpiricalP(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]), alpha=alpha,
logCheck=logCheck,FAIME=res[i,j],B=B,na.rm=na.rm)
}else if(method=="KS-rank"){
res_p[i,j] <- KSrank_EmpiricalP(sampleExp=dat[,j], GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]), alpha=alpha,
logCheck=logCheck,KSrank=res[i,j],B=B)
}else if(method=="cumulative-rank"){
res_p[i,j] <-cumulativerank_EmpiricalP(sampleExp=dat[,j],
GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap$genesets[[i]]),
logCheck=logCheck,cumulativerank=res[i,j],B=B,na.rm=na.rm)
}
}#j loop
}#i loop
}else if(class(gsmap)=="list"){
if(is.null(names(gsmap))){stop ("please give the names of gsmap as a list")}
seeds <- names(gsmap)
res_p <- matrix(nrow=length(seeds), ncol=ncol(dat))
for(i in 1:length(seeds)){
#if(i%%100 ==0){print(paste("still working..........",Sys.time()))}
setTxtProgressBar(pb, i)
for(j in 1:ncol(dat)){
if(method=="FAIME"){
res_p[i,j]<- FAIME_EmpiricalP(sampleExp=dat[,j],GeneID=toupper(rownames(dat)),
GeneSet= toupper(gsmap[[i]]), alpha=alpha,
logCheck=logCheck,FAIME=res[i,j],B=B,na.rm=na.rm)
}else if(method=="KS-rank"){
res_p[i,j] <- KSrank_EmpiricalP(sampleExp=dat[,j],GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap[[i]]), alpha=alpha,
logCheck=logCheck,KSrank=res[i,j],B=B)
}else if(method=="cumulative-rank"){
res_p[i,j] <-cumulativerank_EmpiricalP(sampleExp=dat[,j],
GeneID=toupper(rownames(dat)),
GeneSet=toupper(gsmap[[i]]), logCheck=logCheck,
cumulativerank=res[i,j],B=B,na.rm=na.rm)
}
}#j loop
}#i loop
}#class loop
rownames(res_p) <- seeds
colnames(res_p) <- c(paste(colnames(dat),"2pathscore_Pvalue",sep=""))
#######Just progress bar
close(pb)
print("pathwayscore Empirical Pvalue calculation..........done")
return(res_p)
}
#Sys.which2: funtion from Herve Pages, Bioconductor Maintainance Team, Oct 9 2020
### A version of Sys.which() that is guaranteed to return "" when the
### command is not found (Sys.which() does not guarantee that).
Sys.which2 <- function(cmdname)
{
stopifnot(length(cmdname) == 1L)
cmdpath <- Sys.which(cmdname)
## Unfortunately we can't rely on .Platform$file.sep (it's broken on
## some Windows versions) so we try with both, a forward slash and a
## backward slash.
pattern1 <- paste0("/", cmdname)
pattern2 <- paste0("\\", cmdname)
success <- grepl(pattern1, cmdpath, fixed=TRUE) ||
grepl(pattern2, cmdpath, fixed=TRUE)
if (success) cmdpath else ""
}
#get_python3_command_path: funtion from Herve Pages, Bioconductor Maintainance Team, Oct 9 2020
### First try 'python3', then try 'python'.
get_python3_command_path <- function()
{
# python3_command_path <- Sys.which2("python3") #3/3/2021 by Holly
python3_command_path <- Sys.which2("python")
if (python3_command_path != "")
temp <- system("python -V", intern = TRUE)
if(exists("temp")){
if(length(temp) > 0){
return(python3_command_path)}
else{
print("system python outdated, checking directly")
}}
else{
print("system python outdated, checking directly")
}
## On some systems (e.g. Windows) the name of the Python 3 command
## might be 'python3', not 'python'.
# python3_command_path <- Sys.which2("python")
python3_command_path <- Sys.which2("python3") #3/3/2021 by Holly
if (python3_command_path != ""){
print(paste0("python3 found: ",python3_command_path))
return(python3_command_path)}
stop("Couldn't find command 'python3' (or 'python') on your system.\n",
" If Python 3 is installed on your system, make sure that the\n",
" 'python3' (or 'python') executable is in your PATH.")
}
#function 14
runseq2gene <-
function(inputfile,search_radius=150000,promoter_radius=200,
promoter_radius2=100,genome=c("hg38","hg19","mm10","mm9"),
adjacent=FALSE,SNP= FALSE,PromoterStop=FALSE,
NearestTwoDirection=TRUE,UTR3=FALSE){
options(scipen=999)
if(missing(inputfile)){stop("please give the input file")}
if(! class(inputfile) %in% c("data.frame","GRanges")){
stop("please check the format of input file")}
###default parameters if missing
if(missing(genome)){genome= "hg19"}
if(missing(search_radius)){search_radius = 150000}
if(missing(promoter_radius)){promoter_radius = 200}
if(missing(promoter_radius2)){promoter_radius2 = 100}
if(missing(SNP)){SNP = "False"}
if(missing(adjacent)){adjacent= "False"}
if(missing(PromoterStop)){PromoterStop= "False"}
if(missing(NearestTwoDirection)){NearestTwoDirection= "True"}
if(missing(UTR3)){UTR3= "False"}
if(SNP %in% c("T","TRUE","True",TRUE)){SNP= "True"}
if(SNP %in% c("F","FALSE","False",FALSE)){SNP= "False"}
if(PromoterStop %in% c("T","TRUE","True",TRUE)){PromoterStop= "True"}
if(PromoterStop %in% c("F","FALSE","False",FALSE)){PromoterStop= "False"}
if(NearestTwoDirection %in% c("T","TRUE","True",TRUE)){NearestTwoDirection= "True"}
if(NearestTwoDirection %in% c("F","FALSE","False",FALSE)){NearestTwoDirection= "False"}
if(UTR3 %in% c("T","TRUE","True",TRUE)){UTR3= "True"}
if(UTR3 %in% c("F","FALSE","False",FALSE)){UTR3= "False"}
if(adjacent %in% c("T","TRUE","True",TRUE)){adjacent= "True"}
if(adjacent %in% c("F","FALSE","False",FALSE)){adjacent= "False"}
if(adjacent== "True"){search_radius =0}
if(length(genome>1)){genome=genome[1]}
### assign the path of main function
### file AJ_1 for hg19 and mm9
### file AJ_2 for hg38 and mm10
if(genome %in% c("hg19","mm9")){
path <-paste(system.file(package="seq2pathway"),
##### "/Function_PeakMutationAnnotation_GENCODE_05182015.py",sep="/") # This script works for Python 2.7
"/Function_PeakMutationAnnotation_GENCODE_05182015_AJ_1.py",sep="/") # This script works for Python 3.8
} else {
path <-paste(system.file(package="seq2pathway"),
##### "/Function_PeakMutationAnnotation_GENCODE_05182015.py",sep="/") # This script works for Python 2.7
"/Function_PeakMutationAnnotation_GENCODE_01142021_AJ_2.py",sep="/") # This script works for Python 3.8
}
## decide the platform
mySys <- ifelse(length(grep("Windows",Sys.info()))==0, "L","W")
###wrap invoke file
name<-paste("inputfile",
gsub(":","_",gsub("-","",gsub(" ","_",Sys.time()))),
"seq2gene_log.py",sep="_")
tmpinfile = tempfile()
if(class(inputfile)=="data.frame"){
if(ncol(inputfile)<4){
stop("please check the format of the input data, some column is missing")}
write.table(inputfile, file=tmpinfile,sep="\t",quote=FALSE,row.names = FALSE)}
if(class(inputfile)=="GRanges"){
test<-as.data.frame(inputfile)
if(ncol(test)<6){stop("please check the format of the input data, some column is missing")}
if(ncol(test)==6){write.table(test[,c(6,1,2,3)],
file=tmpinfile,sep="\t",quote=FALSE,row.names = FALSE)}
if(ncol(test)>6){write.table(test[,c(6,1,2,3,7)],
file=tmpinfile,sep="\t",quote=FALSE,row.names = FALSE)}
}
tmpinfile = gsub("\\","/",tmpinfile,fixed =TRUE)
tmpinfile = gsub("//","/",tmpinfile,fixed=TRUE)
tmpoutfile = tempfile()
write.table(NULL, file=tmpoutfile,sep="\t",quote=FALSE,row.names = FALSE)
tmpoutfile = gsub("\\","/",tmpoutfile,fixed =TRUE)
tmpoutfile = gsub("//","/",tmpoutfile,fixed=TRUE)
tmpoutfile_UTR3 = tempfile()
write.table(NULL, file=tmpoutfile_UTR3,sep="\t",quote=FALSE,row.names = FALSE)
tmpoutfile_UTR3 = gsub("\\","/",tmpoutfile_UTR3,fixed =TRUE)
tmpoutfile_UTR3 = gsub("//","/",tmpoutfile_UTR3,fixed=TRUE)
tmp_ref_file = tempfile()
write.table(NULL, file=tmp_ref_file,sep="\t",quote=FALSE,row.names = FALSE)
tmp_ref_file = gsub("\\","/",tmp_ref_file,fixed =TRUE)
tmp_ref_file = gsub("//","/",tmp_ref_file,fixed=TRUE)
if (mySys=="W") sink(paste(tempdir(),"\\",name,sep="")) else {
sink(file.path(tempdir(),name,fsep = .Platform$file.sep))}
###fixed headers import modules
cat("import sys, string, math, shutil, math, os, gzip, time, glob, multiprocessing",sep="\n")
cat("from shutil import rmtree",sep="\n")
cat("from datetime import datetime",sep="\n")
cat("from bisect import *",sep="\n")
cat("",sep="\n")
###import our function module
##cat("import importlib",sep="\n") ## update from import imp at 8092020 to import importlib
cat("from importlib.machinery import SourceFileLoader", sep="\n")
##cat("imp.load_source('Function_PeakMutationAnnotation_GENCODE_08182020',")
# cat("SourceFileLoader('Function_PeakMutationAnnotation_GENCODE_08182020',")
if(genome %in% c("hg19","mm9")){
cat("SourceFileLoader('Function_PeakMutationAnnotation_GENCODE_05182015_AJ_1',")
}
if(genome %in% c("hg38","mm10")){
cat("SourceFileLoader('Function_PeakMutationAnnotation_GENCODE_01142021_AJ_2',")
}
cat("'", path, "').load_module()",sep="")
cat("",sep="\n")
# cat("from Function_PeakMutationAnnotation_GENCODE_08182020 import FindPeakMutation",sep="\n")
if(genome %in% c("hg19","mm9")){
cat("from Function_PeakMutationAnnotation_GENCODE_05182015_AJ_1 import FindPeakMutation",sep="\n")
}
if(genome %in% c("hg38","mm10")){
cat("from Function_PeakMutationAnnotation_GENCODE_01142021_AJ_2 import FindPeakMutation",sep="\n")
}
cat("",sep="\n")
####write parameters
#cat(paste("inputpath=","'",inputpath,"/'",sep=""),sep="\n")
cat(paste("inputfile=","'",tmpinfile,"'",sep=""),sep="\n")
#cat(paste("outputpath=","'",outputpath,"/'",sep=""),sep="\n")
cat(paste("outputfile=","'",tmpoutfile,"'",sep=""),sep="\n")
cat(paste("outputfileUTR3=","'",tmpoutfile_UTR3,"'",sep=""),sep="\n")
cat(paste("tmp_ref_file=","'",tmp_ref_file,"'",sep=""),sep="\n")
cat(paste("search_radius=",search_radius,sep=""),sep="\n")
cat(paste("promoter_radius=",promoter_radius,sep=""),sep="\n")
cat(paste("promoter_radius2=",promoter_radius2,sep=""),sep="\n")
cat(paste("genome=","'",genome,"'",sep=""),sep="\n")
cat(paste("adjacent=",adjacent,sep=""),sep="\n")
cat(paste("pwd=","'",system.file(package="seq2pathway.data"),"/extdata/'",sep=""),sep="\n")
cat(paste("SNP=",SNP,sep=""),sep="\n")
cat(paste("PromoterStop=",PromoterStop,sep=""),sep="\n")
cat(paste("NearestTwoDirection=",NearestTwoDirection,sep=""),sep="\n")
cat(paste("UTR3=",UTR3,sep=""),sep="\n")
cat("FindPeakMutation(inputfile,outputfile,outputfileUTR3,tmp_ref_file,
search_radius,promoter_radius,promoter_radius2,genome,adjacent,
pwd,SNP,PromoterStop,NearestTwoDirection,UTR3)")
sink()
# from Herve Pages, Bioconductor Maintainance Group, Oct 9 2020
script_path <- file.path(tempdir(), name)
if (!file.exists(script_path))
stop("the python script hasn't been generated correctly")
mypython <- get_python3_command_path()
## system2() is recommended over system().
response <- system2(mypython, args=script_path,
stdout=TRUE, stderr=TRUE)
# end
anno_result<-read.table(file=tmpoutfile_UTR3,header=TRUE,sep="\t")
seq2gene_result=list()
seq2gene_result[[1]]<-anno_result
names(seq2gene_result)[1]<-"seq2gene_FullResult"
seq2gene_result[[2]]<-anno_result[anno_result$source=="protein_coding",]
names(seq2gene_result)[2]<-"seq2gene_CodingGeneOnlyResult"
print(response)
return(seq2gene_result)
}
#function 15
runseq2pathway<-function(inputfile,
search_radius=150000,promoter_radius=200,promoter_radius2=100,
genome=c("hg38","hg19","mm10","mm9"),adjacent=FALSE,SNP= FALSE,
PromoterStop=FALSE,NearestTwoDirection=TRUE,UTR3=FALSE,
DataBase=c("GOterm"),FAIMETest=FALSE,FisherTest=TRUE,
collapsemethod=c("MaxMean","function","ME","maxRowVariance","MinMean","absMinMean","absMaxMean","Average"),
alpha=5,logCheck=FALSE,B=100,na.rm=FALSE,min_Intersect_Count=5)
{
options(warn=-1)
if(missing(inputfile)){stop("please give the input file")}
if(! class(inputfile) %in% c("data.frame","GRanges")){stop("please check the format of input file")}
###default parameters if missing
if(missing(DataBase)){DataBase="GOterm"}
if(missing(FAIMETest)){FAIMETest=FALSE}
if(missing(FisherTest)){FisherTest=TRUE}
if(missing(genome)){genome= "hg19"}
if(missing(search_radius)){search_radius = 150000}
if(missing(promoter_radius)){promoter_radius = 200}
if(missing(promoter_radius2)){promoter_radius2 = 100}
if(missing(SNP)){SNP = "False"}
if(missing(adjacent)){adjacent= "False"}
if(missing(PromoterStop)){PromoterStop= "False"}
if(missing(NearestTwoDirection)){NearestTwoDirection= "True"}
if(missing(UTR3)){UTR3= "False"}
if(missing(collapsemethod)){collapsemethod= "MaxMean"}
if(missing(B)){B= 100}
if(missing(alpha)){alpha= 5}
if(missing(logCheck)){logCheck= FALSE}
if(missing(na.rm)){na.rm= FALSE}
if(missing(min_Intersect_Count)){min_Intersect_Count=5}
if(SNP %in% c("T","TRUE","True",TRUE)){SNP= "True"}
if(SNP %in% c("F","FALSE","False",FALSE)){SNP= "False"}
if(PromoterStop %in% c("T","TRUE","True",TRUE)){PromoterStop= "True"}
if(PromoterStop %in% c("F","FALSE","False",FALSE)){PromoterStop= "False"}
if(NearestTwoDirection %in% c("T","TRUE","True",TRUE)){NearestTwoDirection= "True"}
if(NearestTwoDirection %in% c("F","FALSE","False",FALSE)){NearestTwoDirection= "False"}
if(UTR3 %in% c("T","TRUE","True",TRUE)){UTR3= "True"}
if(UTR3 %in% c("F","FALSE","False",FALSE)){UTR3= "False"}
if(adjacent %in% c("T","TRUE","True",TRUE)){adjacent= "True"}
if(adjacent %in% c("F","FALSE","False",FALSE)){adjacent= "False"}
if(adjacent== "True"){search_radius =0}
if(length(genome>1)) genome=genome[1]
if(!collapsemethod %in% c("MaxMean","function","ME","maxRowVariance","MinMean","absMinMean","absMaxMean","Average")){
stop("please check the collapsemethod")}
data(GO_BP_list,package="seq2pathway.data")
data(GO_MF_list,package="seq2pathway.data")
data(GO_CC_list,package="seq2pathway.data")
data(Des_BP_list,package="seq2pathway.data")
data(Des_CC_list,package="seq2pathway.data")
data(Des_MF_list,package="seq2pathway.data")
#######seq2gene function
seq2gene_result<-runseq2gene(inputfile=inputfile,
search_radius=search_radius,promoter_radius=promoter_radius,
promoter_radius2=promoter_radius2,genome=genome,
adjacent=adjacent,SNP=SNP,PromoterStop=PromoterStop,
NearestTwoDirection=NearestTwoDirection,UTR3=UTR3)
seq2gene_result_fornext<-seq2gene_result[[2]]
seq2gene_result_fornext<-seq2gene_result_fornext[,c(1,13)]
genename<-unique(seq2gene_result_fornext[,2])
print("Start test..............")
############Fisher test
if(FisherTest==TRUE){
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","BP","MF","CC"))){
FS_test<-FisherTest_GO_BP_MF_CC(gs=as.vector(genename),genome=genome,
min_Intersect_Count=min_Intersect_Count,Ontology=DataBase)
}else{
FS_test<-FisherTest_MsigDB(gsmap=DataBase,gs=as.vector(genename),
genome=genome,min_Intersect_Count=min_Intersect_Count)
}
#print("Fisher's exact test is done")
}
#############################rungene2pathway,normalization,empiricalP,summary table
if(FAIMETest==TRUE){
#Preprocess peak score with gene
tmpinfile = tempfile()
if(class(inputfile)=="data.frame"){
if(ncol(inputfile)<5){
stop("please check the format of the input data, some column is missing")}
write.table(inputfile, file=tmpinfile,sep="\t",quote=FALSE,row.names = FALSE)
}
if(class(inputfile)=="GRanges"){
test<-as.data.frame(inputfile)
if(ncol(test)<7){stop("please check the format of the input data, some column is missing")}
if(ncol(test)>=7){write.table(test[,c(6,1,2,3,7)],
file=tmpinfile,sep="\t",quote=FALSE,row.names = FALSE)}
}
peak_fornext<-read.table(file=tmpinfile,header=TRUE,sep="\t")
if(ncol(peak_fornext)<5) stop("Please check input file format, some required column is missing.")
peak_fornext<-peak_fornext[,c(1,5)]
peak_anno_score<-merge(seq2gene_result_fornext,peak_fornext,
by=names(seq2gene_result_fornext)[1],all=TRUE)
############collapse function
dat_collapsed<-Peak_Gene_Collapse(input=peak_anno_score,collapsemethod=collapsemethod)
dat_CP<-data.frame(dat_collapsed[,c(2:ncol(dat_collapsed))])
rownames(dat_CP)<-rownames(dat_collapsed)
colnames(dat_CP)<-colnames(dat_collapsed)[2:ncol(dat_collapsed)]
dat_collapsed$gene<-as.vector(toupper(rownames(dat_collapsed)))
gene2pathway_result<-list()
n.list=0
if(length(DataBase)==1 & all(DataBase%in% c("GOterm","BP","CC","MF"))){
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","BP"))){
#FAIME
GO_BP_FAIME<-rungene2pathway(dat=dat_CP,gsmap=GO_BP_list,alpha=alpha,logCheck=logCheck,
method="FAIME",na.rm=na.rm)
#Normalization
GO_BP_FAIME_N<-Normalize_F(input=GO_BP_FAIME)
#EmpiricalP
GO_BP_FAIME_Pvalue<-rungene2pathway_EmpiricalP(dat=dat_CP,gsmap=GO_BP_list,
alpha=alpha,logCheck=logCheck,
method="FAIME",B=B,na.rm=na.rm)
########gene2pathway table
GO_BP_N_P<-merge(GO_BP_FAIME_N,GO_BP_FAIME_Pvalue,by="row.names",all=TRUE)
rownames(GO_BP_N_P)<-GO_BP_N_P$Row.names
GO_BP_N_P<-GO_BP_N_P[,-1]
for(i in 1:nrow(GO_BP_N_P)){
intsect<-intersect(toupper(rownames(dat_CP)),
toupper(unlist(GO_BP_list[names(GO_BP_list)==rownames(GO_BP_N_P)[i]])))
GO_BP_N_P$Intersect_Count[i]<-length(intsect)
GO_BP_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
GO_BP_N_P$Intersect_element[i]<-paste(as.vector(dat_collapsed[dat_collapsed$gene %in% intsect,c(1)]),collapse=" ")
rm(intsect)
GO_BP_N_P$Des[i]<-as.character(Des_BP_list[which(names(Des_BP_list)==rownames(GO_BP_N_P)[i])])
}
GO_BP_N_P<-GO_BP_N_P[GO_BP_N_P$Intersect_Count>=min_Intersect_Count,]
GO_BP_N_P<-GO_BP_N_P[,c(ncol(GO_BP_N_P),1:(ncol(GO_BP_N_P)-1))]
n.list=n.list+1
gene2pathway_result[[n.list]]<-GO_BP_N_P
names(gene2pathway_result)[n.list]<-c("GO_BP")
}
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","MF"))){
GO_MF_FAIME<-rungene2pathway(dat=dat_CP,gsmap=GO_MF_list,alpha=alpha,logCheck=logCheck,
method="FAIME",na.rm=na.rm)
GO_MF_FAIME_N<-Normalize_F(input=GO_MF_FAIME)
GO_MF_FAIME_Pvalue<-rungene2pathway_EmpiricalP(dat=dat_CP,gsmap=GO_MF_list,
alpha=alpha,logCheck=logCheck,
method="FAIME",B=B,na.rm=na.rm)
GO_MF_N_P<-merge(GO_MF_FAIME_N,GO_MF_FAIME_Pvalue,by="row.names",all=TRUE)
rownames(GO_MF_N_P)<-GO_MF_N_P$Row.names
GO_MF_N_P<-GO_MF_N_P[,-1]
for(i in 1:nrow(GO_MF_N_P)){
intsect<-intersect(toupper(rownames(dat_CP)),toupper(unlist(GO_MF_list[names(GO_MF_list)==rownames(GO_MF_N_P)[i]])))
GO_MF_N_P$Intersect_Count[i]<-length(intsect)
GO_MF_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
GO_MF_N_P$Intersect_element[i]<-paste(as.vector(dat_collapsed[dat_collapsed$gene %in% intsect,c(1)]),collapse=" ")
rm(intsect)
GO_MF_N_P$Des[i]<-as.character(Des_MF_list[which(names(Des_MF_list)==rownames(GO_MF_N_P)[i])])
}
GO_MF_N_P<-GO_MF_N_P[GO_MF_N_P$Intersect_Count>=min_Intersect_Count,]
GO_MF_N_P<-GO_MF_N_P[,c(ncol(GO_MF_N_P),1:(ncol(GO_MF_N_P)-1))]
n.list=n.list+1
gene2pathway_result[[n.list]]<-GO_MF_N_P
names(gene2pathway_result)[n.list]<-c("GO_MF")
}
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","CC"))){
GO_CC_FAIME<-rungene2pathway(dat=dat_CP,gsmap=GO_CC_list,alpha=alpha,logCheck=logCheck,
method="FAIME",na.rm=na.rm)
GO_CC_FAIME_N<-Normalize_F(input=GO_CC_FAIME)
GO_CC_FAIME_Pvalue<-rungene2pathway_EmpiricalP(dat=dat_CP,gsmap=GO_CC_list,
alpha=alpha,logCheck=logCheck,
method="FAIME",B=B,na.rm=na.rm)
GO_CC_N_P<-merge(GO_CC_FAIME_N,GO_CC_FAIME_Pvalue,by="row.names",all=TRUE)
rownames(GO_CC_N_P)<-GO_CC_N_P$Row.names
GO_CC_N_P<-GO_CC_N_P[,-1]
for(i in 1:nrow(GO_CC_N_P)){
intsect<-intersect(toupper(rownames(dat_CP)),
toupper(unlist(GO_CC_list[names(GO_CC_list)==rownames(GO_CC_N_P)[i]])))
GO_CC_N_P$Intersect_Count[i]<-length(intsect)
GO_CC_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
GO_CC_N_P$Intersect_element[i]<-paste(as.vector(dat_collapsed[dat_collapsed$gene %in% intsect,c(1)]),collapse=" ")
rm(intsect)
GO_CC_N_P$Des[i]<-as.character(Des_CC_list[which(names(Des_CC_list)==rownames(GO_CC_N_P)[i])])
}
GO_CC_N_P<-GO_CC_N_P[GO_CC_N_P$Intersect_Count>=min_Intersect_Count,]
GO_CC_N_P<-GO_CC_N_P[,c(ncol(GO_CC_N_P),1:(ncol(GO_CC_N_P)-1))]
n.list=n.list+1
gene2pathway_result[[n.list]]<-GO_CC_N_P
names(gene2pathway_result)[n.list]<-c("GO_CC")
}
} else {
dat_FAIME<-rungene2pathway(dat=dat_CP,gsmap=DataBase,alpha=alpha,logCheck=logCheck,
method="FAIME",na.rm=na.rm)
N_FAIME<-Normalize_F(input=dat_FAIME)
dat_FAIME_Pvalue<-rungene2pathway_EmpiricalP(dat=dat_CP,gsmap=DataBase,
alpha=alpha,logCheck=logCheck,
method="FAIME",B=B,na.rm=na.rm)
N_FAIME<-as.matrix(N_FAIME)
dat_FAIME_Pvalue<-as.matrix(dat_FAIME_Pvalue)
DB_N_P<-cbind(N_FAIME,as.matrix(dat_FAIME_Pvalue[match(rownames(N_FAIME),
rownames(dat_FAIME_Pvalue)),]))
colnames(DB_N_P)<-c("score2pathscore_Normalized","score2pathscore_Pvalue")
DB_N_P<-as.data.frame(DB_N_P)
for(i in 1:nrow(DB_N_P)){
if(class(DataBase)=="GSA.genesets"){
intsect<-intersect(toupper(rownames(dat_CP)),
toupper(unlist(DataBase$genesets[which(DataBase$geneset.names==rownames(DB_N_P)[i])])))
}else if(class(DataBase)=="list"){
intsect<-intersect(toupper(rownames(dat_CP)),
toupper(unlist(DataBase[names(DataBase)==rownames(DB_N_P)[i]])))
}
DB_N_P$Intersect_Count[i]<-length(intsect)
DB_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
DB_N_P$Intersect_element[i]<-paste(as.vector(dat_collapsed[dat_collapsed$gene %in% intsect,c(1)]),collapse=" ")
rm(intsect)
if(class(DataBase)=="GSA.genesets"){
DB_N_P$Des[i]<-as.character(DataBase$geneset.descriptions[which(DataBase$geneset.names==rownames(DB_N_P)[i])])}
}
DB_N_P<-DB_N_P[DB_N_P$Intersect_Count>=min_Intersect_Count,]
gene2pathway_result<-DB_N_P[,c(ncol(DB_N_P),1:(ncol(DB_N_P)-1))]
}
print("gene2pathway analysis is done")
}######ends:FAIMETest==TRUE
if(exists("gene2pathway_result")&exists("FS_test")){
TotalResult<-list()
TotalResult[[1]]<-seq2gene_result
names(TotalResult)[1]<-"seq2gene_result"
TotalResult[[2]]<-gene2pathway_result
names(TotalResult)[2]<-"gene2pathway_result.FAIME"
TotalResult[[3]]<-FS_test
names(TotalResult)[3]<-"gene2pathway_result.FET"
TotalResult[[4]]<-dat_CP
names(TotalResult)[4]<-"gene_collapse"
}else if(exists("gene2pathway_result")&exists("FS_test")==FALSE){
TotalResult<-list()
TotalResult[[1]]<-seq2gene_result
names(TotalResult)[1]<-"seq2gene_result"
TotalResult[[2]]<-gene2pathway_result
names(TotalResult)[2]<-"gene2pathway_result.FAIME"
TotalResult[[3]]<-dat_CP
names(TotalResult)[3]<-"gene_collapse"
}
else if(exists("gene2pathway_result")==FALSE&exists("FS_test")){
TotalResult<-list()
TotalResult[[1]]<-seq2gene_result
names(TotalResult)[1]<-"seq2gene_result"
TotalResult[[2]]<-FS_test
names(TotalResult)[2]<-"gene2pathway_result.FET"
}
return(TotalResult)
}
#function 16
gene2pathway_test<-function(dat,DataBase="GOterm",FisherTest=TRUE,
EmpiricalTest=FALSE,method=c("FAIME","KS-rank","cumulative-rank"),
genome=c("hg38","hg19","mm10","mm9"),alpha=5, logCheck=FALSE,na.rm=FALSE,B=100,min_Intersect_Count=5){
options(warn=-1)
if(missing(FisherTest)){FisherTest=TRUE}
if(missing(DataBase)){DataBase="GOterm"}
if(missing(genome)){genome= "hg19"}
if(length(genome>1)){genome=genome[1]}
if(missing(method)){method="FAIME"}
if(missing(alpha)){alpha=5}
if(missing(na.rm)){na.rm=FALSE}
if(missing(logCheck)){logCheck=FALSE}
if(missing(EmpiricalTest)){EmpiricalTest=FALSE}
if(missing(B)){B=100}
if(missing(min_Intersect_Count)){min_Intersect_Count=5}
data(GO_BP_list,package="seq2pathway.data")
data(GO_MF_list,package="seq2pathway.data")
data(GO_CC_list,package="seq2pathway.data")
data(Des_BP_list,package="seq2pathway.data")
data(Des_CC_list,package="seq2pathway.data")
data(Des_MF_list,package="seq2pathway.data")
############Fisher test
if(FisherTest==TRUE){
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","BP","CC","MF"))){
FS_test<-FisherTest_GO_BP_MF_CC(gs=as.vector(rownames(dat)),genome=genome,
min_Intersect_Count=min_Intersect_Count,Ontology=DataBase)
}else{
FS_test<-FisherTest_MsigDB(gsmap=DataBase,gs=as.vector(rownames(dat)),genome=genome,
min_Intersect_Count=min_Intersect_Count)
}
#print("Fisher's exact test is done")
}
gene2pathway_result<-list()
n.list=0
#############################rungene2pathway,normalization,empiricalP,summary table
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","BP","MF","CC"))){
gene2pathway_result<-list()
n.list=0
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","BP"))){
#method
GO_BP_method<-rungene2pathway(dat=dat,gsmap=GO_BP_list,alpha=alpha,logCheck=logCheck,
method=method,na.rm=na.rm)
#Normalization
GO_BP_method_N<-Normalize_F(input=GO_BP_method)
#EmpiricalP
if(EmpiricalTest==TRUE){
GO_BP_method_Pvalue<-rungene2pathway_EmpiricalP(dat=dat,gsmap=GO_BP_list,alpha=alpha,
logCheck=logCheck,method=method,B=B,na.rm=na.rm)
GO_BP_N_P<-merge(GO_BP_method_N,GO_BP_method_Pvalue,by="row.names",all=TRUE)
rownames(GO_BP_N_P)<-GO_BP_N_P$Row.names
GO_BP_N_P<-GO_BP_N_P[,-1]
}else {GO_BP_N_P<-GO_BP_method_N}
########gene2pathway table
for(i in 1:nrow(GO_BP_N_P)){
intsect<-intersect(toupper(rownames(dat)),
toupper(unlist(GO_BP_list[names(GO_BP_list)==rownames(GO_BP_N_P)[i]])))
GO_BP_N_P$Intersect_Count[i]<-length(intsect)
GO_BP_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
rm(intsect)
GO_BP_N_P$Des[i]<-as.character(Des_BP_list[which(names(Des_BP_list)==rownames(GO_BP_N_P)[i])])
}
GO_BP_N_P<-GO_BP_N_P[GO_BP_N_P$Intersect_Count>=min_Intersect_Count,]
GO_BP_N_P<-GO_BP_N_P[,c(ncol(GO_BP_N_P),1:(ncol(GO_BP_N_P)-1))]
n.list=n.list+1
gene2pathway_result[[n.list]]<-GO_BP_N_P
names(gene2pathway_result)[n.list]<-c("GO_BP")
}
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","MF"))){
GO_MF_method<-rungene2pathway(dat=dat,gsmap=GO_MF_list,alpha=alpha,logCheck=logCheck,
method=method,na.rm=na.rm)
GO_MF_method_N<-Normalize_F(input=GO_MF_method)
if(EmpiricalTest==TRUE){
GO_MF_method_Pvalue<-rungene2pathway_EmpiricalP(dat=dat,gsmap=GO_MF_list,alpha=alpha,
logCheck=logCheck,method=method,
B=B,na.rm=na.rm)
GO_MF_N_P<-merge(GO_MF_method_N,GO_MF_method_Pvalue,by="row.names",all=TRUE)
rownames(GO_MF_N_P)<-GO_MF_N_P$Row.names
GO_MF_N_P<-GO_MF_N_P[,-1]
}else {GO_MF_N_P<-GO_MF_method_N}
for(i in 1:nrow(GO_MF_N_P)){
intsect<-intersect(toupper(rownames(dat)),
toupper(unlist(GO_MF_list[names(GO_MF_list)==rownames(GO_MF_N_P)[i]])))
GO_MF_N_P$Intersect_Count[i]<-length(intsect)
GO_MF_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
rm(intsect)
GO_MF_N_P$Des[i]<-as.character(Des_MF_list[which(names(Des_MF_list)==rownames(GO_MF_N_P)[i])])
}
GO_MF_N_P<-GO_MF_N_P[GO_MF_N_P$Intersect_Count>=min_Intersect_Count,]
GO_MF_N_P<-GO_MF_N_P[,c(ncol(GO_MF_N_P),1:(ncol(GO_MF_N_P)-1))]
n.list=n.list+1
gene2pathway_result[[n.list]]<-GO_MF_N_P
names(gene2pathway_result)[n.list]<-c("GO_MF")
}
if(length(DataBase)==1 & all(DataBase %in% c("GOterm","CC"))){
GO_CC_method<-rungene2pathway(dat=dat,gsmap=GO_CC_list,alpha=alpha,logCheck=logCheck,
method=method,na.rm=na.rm)
GO_CC_method_N<-Normalize_F(input=GO_CC_method)
if(EmpiricalTest==TRUE){
GO_CC_method_Pvalue<-rungene2pathway_EmpiricalP(dat=dat,gsmap=GO_CC_list,alpha=alpha,
logCheck=logCheck,method=method,B=B,na.rm=na.rm)
GO_CC_N_P<-merge(GO_CC_method_N,GO_CC_method_Pvalue,by="row.names",all=TRUE)
rownames(GO_CC_N_P)<-GO_CC_N_P$Row.names
GO_CC_N_P<-GO_CC_N_P[,-1]
}else {GO_CC_N_P<-GO_CC_method_N}
for(i in 1:nrow(GO_CC_N_P)){
intsect<-intersect(toupper(rownames(dat)),
toupper(unlist(GO_CC_list[names(GO_CC_list)==rownames(GO_CC_N_P)[i]])))
GO_CC_N_P$Intersect_Count[i]<-length(intsect)
GO_CC_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
rm(intsect)
GO_CC_N_P$Des[i]<-as.character(Des_CC_list[which(names(Des_CC_list)==rownames(GO_CC_N_P)[i])])
}
GO_CC_N_P<-GO_CC_N_P[GO_CC_N_P$Intersect_Count>=min_Intersect_Count,]
GO_CC_N_P<-GO_CC_N_P[,c(ncol(GO_CC_N_P),1:(ncol(GO_CC_N_P)-1))]
n.list=n.list+1
gene2pathway_result[[n.list]]<-GO_CC_N_P
names(gene2pathway_result)[n.list]<-c("GO_CC")
}
}else{
dat_method<-rungene2pathway(dat=dat,gsmap=DataBase,alpha=alpha,logCheck=logCheck,
method=method,na.rm=na.rm)
N_method<-Normalize_F(input=dat_method)
if(EmpiricalTest==TRUE){
dat_method_Pvalue<-rungene2pathway_EmpiricalP(dat=dat,gsmap=DataBase,alpha=alpha,
logCheck=logCheck,method=method,B=B,na.rm=na.rm)
N_method<-as.matrix(N_method)
dat_method_Pvalue<-as.matrix(dat_method_Pvalue)
if(ncol(dat_method_Pvalue)>1){
DB_N_P<-cbind(N_method,dat_method_Pvalue[match(rownames(N_method), rownames(dat_method_Pvalue)),]) }
if(ncol(dat_method_Pvalue)==1){
DB_N_P<-cbind(N_method,as.matrix(dat_method_Pvalue[match(rownames(N_method), rownames(dat_method_Pvalue)),]))
colnames(DB_N_P)<-c("score2pathscore_Normalized","score2pathscore_Pvalue")
}
DB_N_P<-as.data.frame(DB_N_P)
}else{
DB_N_P=N_method}
for(i in 1:nrow(DB_N_P)){
if(class(DataBase)=="GSA.genesets"){
intsect<-intersect(toupper(rownames(dat)),
toupper(unlist(DataBase$genesets[which(DataBase$geneset.names==rownames(DB_N_P)[i])])))
}else if(class(DataBase)=="list"){
intsect<-intersect(toupper(rownames(dat)),
toupper(unlist(DataBase[names(DataBase)==rownames(DB_N_P)[i]])))
}
DB_N_P$Intersect_Count[i]<-length(intsect)
DB_N_P$Intersect_gene[i]<-paste(intsect,collapse=" ")
rm(intsect)
if(class(DataBase)=="GSA.genesets"){
DB_N_P$Des[i]<-as.character(DataBase$geneset.descriptions[which(DataBase$geneset.names==rownames(DB_N_P)[i])])}
}
DB_N_P<-DB_N_P[DB_N_P$Intersect_Count>=min_Intersect_Count,]
gene2pathway_result<-DB_N_P[,c(ncol(DB_N_P),1:(ncol(DB_N_P)-1))]
}
print("gene2pathway analysis is done")
if(exists("gene2pathway_result")&exists("FS_test")){
TResult<-list()
TResult[[1]]<-gene2pathway_result
names(TResult)[1]<-"gene2pathway_result.2"
TResult[[2]]<-FS_test
names(TResult)[2]<-"gene2pathway_result.FET"
}else if(exists("gene2pathway_result")&exists("FS_test")==FALSE){
TResult<-gene2pathway_result
}
else if(exists("gene2pathway_result")==FALSE&exists("FS_test")){
TResult<-FS_test
}
return(TResult)
}
#function 17
plotTop10 = function(res,fdr=0.05,or=2,myfileID=NULL){
if(nrow(res)>0)
{ res = subset(res,Fisher_odds>or & FDR<fdr)
if(nrow(res)>10) {res <- res[1:10,]}
# because barplot() plots from bottom up !! need to re-order the inputs
res <- res[order(res$FDR,decreasing=T),]
tmp <- barplot(res$Fisher_odds,
main=paste0(myfileID,' n=',nrow(res)),
horiz=TRUE,
xlab="grey bar: Fisher_odds / red line: -log(FDR)",
xlim =c(0,max(c(-log10(res$FDR), res$Fisher_odds))))
lines( y=tmp, -log10(res$FDR), col="red", type="o",pch=19,lwd=2)
text(x = fdr,y=tmp, res$GOID, pos=4)
abline(v=-log10(fdr), lty=2)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.