##################
#methods-CuffSet.R
#
#Introduces the CuffSet Class for analysis, manipulation, and plotting of Cufflinks data
#
#Author: Loyal A. Goff
#
##################
#Initialize
setMethod("initialize","CuffSet",
function(.Object,
DB,
#runInfo=data.frame(),
phenoData=data.frame(),
conditions=data.frame(),
genes,
isoforms,
TSS,
CDS,
promoters,
splicing,
relCDS,
...){
.Object<-callNextMethod(.Object,
DB = DB,
#runInfo=runInfo,
phenoData=phenoData,
conditions = conditions,
genes = genes,
isoforms = isoforms,
TSS = TSS,
CDS = CDS,
promoters = promoters,
splicing = splicing,
relCDS = relCDS,
...)
}
)
##################
#Class Methods
##################
setMethod("show","CuffSet",
function(object){
cat(class(object), "instance with:\n\t",
dim(object@genes)[2],"samples\n\t",
dim(object@genes)[1],"genes\n\t",
dim(object@isoforms)[1],"isoforms\n\t",
dim(object@TSS)[1],"TSS\n\t",
dim(object@CDS)[1],"CDS\n\t",
dim(object@promoters)[1],"promoters\n\t",
dim(object@splicing)[1],"splicing\n\t",
dim(object@relCDS)[1],"relCDS\n"
)
}
)
#This does not subset appropriately yet
#TODO: Fix for multiple values of i
#
#Solution is to test i to determine if it is of type 'numeric' (index), list (multi-index), or 'character' (gene_ids)
#
#TODO: - Add 'j' to select on sampleNames as well
# - Add ability to search on gene_short_name(s) or featureIDs
#setMethod("[",signature(x="CuffSet"),function(x, i, ...){
# featureIDs<-featureNames(x@genes)[i]
# if(length(featureIDs)==1){
# res<-getGene(x,featureID)
# }else{
# res<-getGenes(x,featureIDs)
# }
# res
# }
#)
setValidity("CuffSet",
function(object){
TRUE
}
)
############
#Accessors
############
.samples<-function(object){
sampleQuery<-"SELECT * FROM samples s LEFT JOIN phenoData p on s.sample_name = p.sample_name"
dbGetQuery(object@DB,sampleQuery)
}
setMethod("samples",signature(object="CuffSet"),.samples)
.replicates<-function(object){
replicateQuery<-"SELECT * FROM replicates r"
dbGetQuery(object@DB,replicateQuery)
}
setMethod("replicates",signature(object="CuffSet"),.replicates)
setMethod("DB","CuffSet",function(object){
return(object@DB)
})
.runInfo<-function(object){
runInfoQuery<-"SELECT * FROM runInfo"
dbGetQuery(object@DB,runInfoQuery)
}
setMethod("runInfo","CuffSet",.runInfo)
.varModel<-function(object){
varModelQuery<-"SELECT * from varModel"
dbGetQuery(object@DB,varModelQuery)
}
setMethod("varModel","CuffSet",.varModel)
#setMethod("phenoData","CuffSet",function(object){
# return(object@phenoData)
# })
setMethod("conditions","CuffSet",function(object){
return(object@conditions)
})
setMethod("genes","CuffSet",function(object){
return(object@genes)
})
setMethod("isoforms","CuffSet",function(object){
return(object@isoforms)
})
setMethod("TSS","CuffSet",function(object){
return(object@TSS)
})
setMethod("CDS","CuffSet",function(object){
return(object@CDS)
})
setMethod("promoters","CuffSet",function(object){
return(object@promoters)
})
setMethod("splicing","CuffSet",function(object){
return(object@splicing)
})
setMethod("relCDS","CuffSet",function(object){
return(object@relCDS)
})
.getGenome<-function(object){
genomeQuery<-"SELECT value FROM runInfo WHERE param='genome'"
genome<-dbGetQuery(object@DB,genomeQuery)
genome<-unique(genome[,1])
genome
}
#make CuffGene objects from a gene_ids
.getGene<-function(object,geneId,sampleIdList=NULL){
#Get gene_id from geneId (can use any identifier now to select gene)
geneId<-getGeneId(object,geneId)
if(length(geneId)>1){
stop("More than one gene_id found for given query. Please use getGenes() instead.")
}
#Sample subsetting
if(!is.null(sampleIdList)){
if(.checkSamples(object@DB,sampleIdList)){
myLevels<-sampleIdList
}else{
stop("Sample does not exist!")
}
}else{
myLevels<-getLevels(object)
}
#Sample Search String (SQL)
sampleString<-'('
for (i in myLevels){
sampleString<-paste(sampleString,"'",i,"',",sep="")
}
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
sampleString<-paste(sampleString,')',sep="")
whereString = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",sep="")
whereStringFPKM = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_name IN ',sampleString,')',sep="")
whereStringDiff = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
whereStringRep = paste("JOIN replicates r ON y.rep_name=r.rep_name WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (r.sample_name IN ',sampleString,')',sep="")
#dbQueries
geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING(gene_id) ",whereString,sep="")
geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
#print(geneFPKMQuery)
geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ",whereStringDiff,sep="")
#print(geneDiffQuery)
geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y ON x.gene_id=y.gene_id ",whereStringRep,sep="")
geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
geneFeatureQuery<-paste("SELECT y.* FROM features y JOIN genes x on y.gene_id = x.gene_id ",whereString,sep="")
isoformAnnotationQuery<-paste("SELECT * from isoforms i JOIN genes x ON i.gene_id = x.gene_id ",whereString,sep="")
isoformFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
isoformDiffQuery<-paste("SELECT y.* from isoforms i JOIN isoformExpDiffData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringDiff,sep="")
isoformRepFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformReplicateData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringRep,sep="")
isoformCountQuery<-paste("SELECT y.* from isoforms i JOIN isoformCount y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
TSSAnnotationQuery<-paste("SELECT * from TSS t JOIN genes x ON t.gene_id = x.gene_id ",whereString,sep="")
TSSFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
TSSDiffQuery<-paste("SELECT y.* from TSS t JOIN TSSExpDiffData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringDiff,sep="")
TSSRepFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSReplicateData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringRep,sep="")
TSSCountQuery<-paste("SELECT y.* from TSS t JOIN TSSCount y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
CDSAnnotationQuery<-paste("SELECT * from CDS c JOIN genes x ON c.gene_id = x.gene_id ",whereString,sep="")
CDSFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
CDSDiffQuery<-paste("SELECT y.* from CDS c JOIN CDSExpDiffData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringDiff,sep="")
CDSRepFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSReplicateData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringRep,sep="")
CDSCountQuery<-paste("SELECT y.* from CDS c JOIN CDSCount y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
begin<-dbSendQuery(object@DB,"BEGIN;")
#fetch records
#genes
genes.fpkm<-dbGetQuery(object@DB,geneFPKMQuery)
genes.fpkm$sample_name<-factor(genes.fpkm$sample_name,levels=myLevels)
genes.diff<-dbGetQuery(object@DB,geneDiffQuery)
genes.diff$sample_1<-factor(genes.diff$sample_1,levels=myLevels)
genes.diff$sample_2<-factor(genes.diff$sample_2,levels=myLevels)
genes.repFpkm<-dbGetQuery(object@DB,geneRepFPKMQuery)
genes.count<-dbGetQuery(object@DB,geneCountQuery)
genes.annotation<-dbGetQuery(object@DB,geneAnnotationQuery)
genes.features<-dbGetQuery(object@DB,geneFeatureQuery)
#isoforms
isoform.fpkm<-dbGetQuery(object@DB,isoformFPKMQuery)
isoform.fpkm$sample_name<-factor(isoform.fpkm$sample_name,levels=myLevels)
isoform.diff<-dbGetQuery(object@DB,isoformDiffQuery)
isoform.diff$sample_1<-factor(isoform.diff$sample_1,levels=myLevels)
isoform.diff$sample_2<-factor(isoform.diff$sample_2,levels=myLevels)
isoform.repFpkm<-dbGetQuery(object@DB,isoformRepFPKMQuery)
isoform.count<-dbGetQuery(object@DB,isoformCountQuery)
isoform.annotation<-dbGetQuery(object@DB,isoformAnnotationQuery)
#CDS
CDS.fpkm<-dbGetQuery(object@DB,CDSFPKMQuery)
CDS.fpkm$sample_name<-factor(CDS.fpkm$sample_name,levels=myLevels)
CDS.diff<-dbGetQuery(object@DB,CDSDiffQuery)
CDS.diff$sample_1<-factor(CDS.diff$sample_1,levels=myLevels)
CDS.diff$sample_2<-factor(CDS.diff$sample_2,levels=myLevels)
CDS.repFpkm<-dbGetQuery(object@DB,CDSRepFPKMQuery)
CDS.count<-dbGetQuery(object@DB,CDSCountQuery)
CDS.annotation<-dbGetQuery(object@DB,CDSAnnotationQuery)
#TSS
TSS.fpkm<-dbGetQuery(object@DB,TSSFPKMQuery)
TSS.fpkm$sample_name<-factor(TSS.fpkm$sample_name,levels=myLevels)
TSS.diff<-dbGetQuery(object@DB,TSSDiffQuery)
TSS.diff$sample_1<-factor(TSS.diff$sample_1,levels=myLevels)
TSS.diff$sample_2<-factor(TSS.diff$sample_2,levels=myLevels)
TSS.repFpkm<-dbGetQuery(object@DB,TSSRepFPKMQuery)
TSS.count<-dbGetQuery(object@DB,TSSCountQuery)
TSS.annotation<-dbGetQuery(object@DB,TSSAnnotationQuery)
end<-dbSendQuery(object@DB,"END;")
#write(.getGenome(object),stderr())
res<-new("CuffGene",
id=geneId,
features=genes.features,
annotation=genes.annotation,
fpkm=genes.fpkm,
diff=genes.diff,
repFpkm=genes.repFpkm,
count=genes.count,
genome=.getGenome(object),
isoforms=new("CuffFeature",
annotation=isoform.annotation,
fpkm=isoform.fpkm,
diff=isoform.diff,
repFpkm=isoform.repFpkm,
count=isoform.count
),
TSS=new("CuffFeature",
annotation=TSS.annotation,
fpkm=TSS.fpkm,
diff=TSS.diff,
repFpkm=TSS.repFpkm,
count=TSS.count
),
CDS=new("CuffFeature",
annotation=CDS.annotation,
fpkm=CDS.fpkm,
diff=CDS.diff,
repFpkm=CDS.repFpkm,
count=CDS.count
)
)
res
}
setMethod("getGene",signature(object="CuffSet"),.getGene)
.getGenes<-function(object,geneIdList,sampleIdList=NULL){
#Determine gene_id from geneIdList
#This is useful so that people can pass, for example, isoform_id to geneIdList and getGenes will return full genes
geneIdList<-getGeneId(object=object,geneIdList)
#Sample subsetting
if(!is.null(sampleIdList)){
if(.checkSamples(object@DB,sampleIdList)){
myLevels<-sampleIdList
}else{
stop("Sample does not exist!")
}
}else{
myLevels<-getLevels(object)
}
#Sample Search String (SQL)
sampleString<-'('
for (i in myLevels){
sampleString<-paste(sampleString,"'",i,"',",sep="")
}
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
sampleString<-paste(sampleString,")",sep="")
#idQuery<-paste('SELECT DISTINCT x.gene_id from genes x WHERE (x.gene_id IN ',origIdString,' OR x.gene_short_name IN ',origIdString,')',sep="")
idString<-'('
for (i in geneIdList){
idString<-paste(idString,"'",i,"',",sep="")
}
idString<-substr(idString,1,nchar(idString)-1)
idString<-paste(idString,")",sep="")
#write(idString,stderr())
whereStringGene<-paste('WHERE x.gene_id IN ',idString,sep="")
whereStringGeneFPKM<-paste('WHERE x.gene_id IN ',idString,sep="")
whereStringGeneDiff<-paste('WHERE x.gene_id IN ',idString,sep="")
whereString<-paste('WHERE x.gene_id IN ',idString,sep="")
whereStringFPKM<-paste('WHERE x.gene_id IN ',idString,sep="")
whereStringDiff<-paste('WHERE x.gene_id IN ',idString,sep="")
if(!is.null(sampleIdList)){
whereStringGene<-whereStringGene
whereStringGeneFPKM<-paste(whereStringGeneFPKM,' AND y.sample_name IN ',sampleString,sep="")
whereStringGeneDiff<-paste(whereStringGeneDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
whereString<-whereString
whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
}
#geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf ON x.gene_id=xf.gene_id ", whereStringGene,sep="")
geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING (gene_id) ", whereStringGene,sep="")
geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ", whereStringGeneDiff,sep="")
geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
isoformAnnotationQuery<-paste("SELECT x.* from isoforms x LEFT JOIN isoformFeatures xf ON x.isoform_id=xf.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
isoformFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
isoformDiffQuery<-paste("SELECT y.* from isoforms x JOIN isoformExpDiffData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
isoformRepFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformReplicateData y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
isoformCountQuery<-paste("SELECT y.* from isoforms x JOIN isoformCount y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
TSSAnnotationQuery<-paste("SELECT x.* from TSS x LEFT JOIN TSSFeatures xf ON x.TSS_group_id=xf.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
TSSFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
TSSDiffQuery<-paste("SELECT y.* from TSS x JOIN TSSExpDiffData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
TSSRepFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSReplicateData y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
TSSCountQuery<-paste("SELECT y.* from TSS x JOIN TSSCount y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
CDSAnnotationQuery<-paste("SELECT x.* from CDS x LEFT JOIN CDSFeatures xf ON x.CDS_id=xf.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
CDSFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
CDSDiffQuery<-paste("SELECT y.* from CDS x JOIN CDSExpDiffData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
CDSRepFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSReplicateData y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
CDSCountQuery<-paste("SELECT y.* from CDS x JOIN CDSCount y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
promotersDistQuery<-paste("SELECT x.* FROM promoterDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
splicingDistQuery<-paste("SELECT x.* FROM splicingDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
CDSDistQuery<-paste("SELECT x.* FROM CDSDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
begin<-dbSendQuery(object@DB,"BEGIN;")
#fetch records
#genes
write("Getting gene information:",stderr())
write("\tFPKM",stderr())
genes.fpkm<-dbGetQuery(object@DB,geneFPKMQuery)
genes.fpkm$sample_name<-factor(genes.fpkm$sample_name,levels=myLevels)
write("\tDifferential Expression Data",stderr())
genes.diff<-dbGetQuery(object@DB,geneDiffQuery)
genes.diff$sample_1<-factor(genes.diff$sample_1,levels=myLevels)
genes.diff$sample_2<-factor(genes.diff$sample_2,levels=myLevels)
write("\tAnnotation Data",stderr())
genes.annot<-dbGetQuery(object@DB,geneAnnotationQuery)
write("\tReplicate FPKMs",stderr())
genes.repFpkm<-dbGetQuery(object@DB,geneRepFPKMQuery)
write("\tCounts",stderr())
genes.count<-dbGetQuery(object@DB,geneCountQuery)
#isoforms
write("Getting isoforms information:",stderr())
write("\tFPKM",stderr())
isoform.fpkm<-dbGetQuery(object@DB,isoformFPKMQuery)
isoform.fpkm$sample_name<-factor(isoform.fpkm$sample_name,levels=myLevels)
write("\tDifferential Expression Data",stderr())
isoform.diff<-dbGetQuery(object@DB,isoformDiffQuery)
isoform.diff$sample_1<-factor(isoform.diff$sample_1,levels=myLevels)
isoform.diff$sample_2<-factor(isoform.diff$sample_2,levels=myLevels)
write("\tAnnotation Data",stderr())
isoform.annot<-dbGetQuery(object@DB,isoformAnnotationQuery)
write("\tReplicate FPKMs",stderr())
isoform.repFpkm<-dbGetQuery(object@DB,isoformRepFPKMQuery)
write("\tCounts",stderr())
isoform.count<-dbGetQuery(object@DB,isoformCountQuery)
#CDS
write("Getting CDS information:",stderr())
write("\tFPKM",stderr())
CDS.fpkm<-dbGetQuery(object@DB,CDSFPKMQuery)
CDS.fpkm$sample_name<-factor(CDS.fpkm$sample_name,levels=myLevels)
write("\tDifferential Expression Data",stderr())
CDS.diff<-dbGetQuery(object@DB,CDSDiffQuery)
CDS.diff$sample_1<-factor(CDS.diff$sample_1,levels=myLevels)
CDS.diff$sample_2<-factor(CDS.diff$sample_2,levels=myLevels)
write("\tAnnotation Data",stderr())
CDS.annot<-dbGetQuery(object@DB,CDSAnnotationQuery)
write("\tReplicate FPKMs",stderr())
CDS.repFpkm<-dbGetQuery(object@DB,CDSRepFPKMQuery)
write("\tCounts",stderr())
CDS.count<-dbGetQuery(object@DB,CDSCountQuery)
#TSS
write("Getting TSS information:",stderr())
write("\tFPKM",stderr())
TSS.fpkm<-dbGetQuery(object@DB,TSSFPKMQuery)
TSS.fpkm$sample_name<-factor(TSS.fpkm$sample_name,levels=myLevels)
write("\tDifferential Expression Data",stderr())
TSS.diff<-dbGetQuery(object@DB,TSSDiffQuery)
TSS.diff$sample_1<-factor(TSS.diff$sample_1,levels=myLevels)
TSS.diff$sample_2<-factor(TSS.diff$sample_2,levels=myLevels)
write("\tAnnotation Data",stderr())
TSS.annot<-dbGetQuery(object@DB,TSSAnnotationQuery)
write("\tReplicate FPKMs",stderr())
TSS.repFpkm<-dbGetQuery(object@DB,TSSRepFPKMQuery)
write("\tCounts",stderr())
TSS.count<-dbGetQuery(object@DB,TSSCountQuery)
end<-dbSendQuery(object@DB,"END;")
#Promoters
write("Getting promoter information:", stderr())
write("\tdistData",stderr())
promoters.distData<-dbGetQuery(object@DB,promotersDistQuery)
promoters.distData$sample_1<-factor(promoters.distData$sample_1,levels=myLevels)
promoters.distData$sample_2<-factor(promoters.distData$sample_2,levels=myLevels)
#Splicing
write("Getting splicing information:", stderr())
write("\tdistData",stderr())
splicing.distData<-dbGetQuery(object@DB,splicingDistQuery)
splicing.distData$sample_1<-factor(splicing.distData$sample_1,levels=myLevels)
splicing.distData$sample_2<-factor(splicing.distData$sample_2,levels=myLevels)
#relCDS
write("Getting relCDS information:", stderr())
write("\tdistData",stderr())
CDS.distData<-dbGetQuery(object@DB,CDSDistQuery)
CDS.distData$sample_1<-factor(CDS.distData$sample_1,levels=myLevels)
CDS.distData$sample_2<-factor(CDS.distData$sample_2,levels=myLevels)
res<-new("CuffGeneSet",
#TODO: Fix ids so that it only displays those genes in CuffGeneSet
ids=geneIdList,
annotation=genes.annot,
fpkm=genes.fpkm,
diff=genes.diff,
repFpkm=genes.repFpkm,
count=genes.count,
genome=.getGenome(object),
isoforms=new("CuffFeatureSet",
annotation=isoform.annot,
fpkm=isoform.fpkm,
diff=isoform.diff,
repFpkm=isoform.repFpkm,
count=isoform.count
),
TSS=new("CuffFeatureSet",
annotation=TSS.annot,
fpkm=TSS.fpkm,
diff=TSS.diff,
repFpkm=TSS.repFpkm,
count=TSS.count
),
CDS=new("CuffFeatureSet",
annotation=CDS.annot,
fpkm=CDS.fpkm,
diff=CDS.diff,
repFpkm=CDS.repFpkm,
count=CDS.count
),
promoters=new("CuffFeatureSet",
annotation=genes.annot,
fpkm=genes.fpkm,
diff=promoters.distData
),
splicing=new("CuffFeatureSet",
annotation=TSS.annot,
fpkm=TSS.fpkm,
diff=splicing.distData
),
relCDS=new("CuffFeatureSet",
annotation=genes.annot,
fpkm=genes.fpkm,
diff=CDS.distData
)
)
res
}
setMethod("getGenes",signature(object="CuffSet"),.getGenes)
.getGeneId<-function(object,idList){
#Query that takes list of any identifier and retrieves gene_id values from db (does not report missing finds)
searchString<-"("
for(i in idList){
searchString<-paste(searchString,"'",i,"',",sep="")
}
searchString<-substr(searchString,1,nchar(searchString)-1)
searchString<-paste(searchString,")",sep="")
geneIdQuery<-paste("SELECT DISTINCT g.gene_id FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id IN ",searchString," OR g.gene_short_name IN ",searchString," OR i.isoform_id IN ",searchString," OR t.tss_group_id IN ",searchString," OR c.CDS_id IN ",searchString,")",sep="")
#print(geneIdQuery)
res<-dbGetQuery(object@DB,geneIdQuery)
as.vector(res[,1])
}
setMethod("getGeneId",signature(object="CuffSet"),.getGeneId)
.findGene<-function(object,query){
#Utility to search for gene_id and gene_short_name given a single 'query' string (e.g. query='pink1' will return all genes with 'pink1' (case-insensitive) in the gene_short_name field.
geneQuery<-paste("SELECT DISTINCT g.gene_id,g.gene_short_name FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id = '",query,"' OR g.gene_short_name = '",query,"' OR i.isoform_id = '",query,"' OR t.tss_group_id = '",query,"' OR c.CDS_id ='",query,"') OR g.gene_short_name LIKE '%",query,"%'",sep="")
res<-dbGetQuery(object@DB,geneQuery)
res
}
setMethod("findGene",signature(object="CuffSet"),.findGene)
.getFeatures<-function(object,featureIdList,sampleIdList=NULL,level='isoforms'){
#Sample subsetting
if(!is.null(sampleIdList)){
if(.checkSamples(object@DB,sampleIdList)){
myLevels<-sampleIdList
}else{
stop("Sample does not exist!")
}
}else{
myLevels<-getLevels(object)
}
sampleString<-'('
for (i in myLevels){
sampleString<-paste(sampleString,"'",i,"',",sep="")
}
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
sampleString<-paste(sampleString,")",sep="")
#ID Search String (SQL)
idString<-'('
for (i in featureIdList){
idString<-paste(idString,"'",i,"',",sep="")
}
idString<-substr(idString,1,nchar(idString)-1)
idString<-paste(idString,")",sep="")
whereString<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
whereStringFPKM<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
whereStringDiff<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
if(!is.null(sampleIdList)){
whereString<-whereString
whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
}
AnnotationQuery<-paste("SELECT x.* from ",slot(object,level)@tables$mainTable," x LEFT JOIN ",slot(object,level)@tables$featureTable," xf ON x.",slot(object,level)@idField,"=xf.",slot(object,level)@idField, whereString,sep="")
FPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$dataTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
DiffQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$expDiffTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringDiff,sep="")
repFPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$replicateTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
countQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$countTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
#print(AnnotationQuery)
#print(FPKMQuery)
#print(DiffQuery)
#print(repFPKMQuery)
#print(countQuery)
begin<-dbSendQuery(object@DB,"BEGIN;")
res<-new("CuffFeatureSet",
annotation=dbGetQuery(object@DB,AnnotationQuery),
fpkm=dbGetQuery(object@DB,FPKMQuery),
diff=dbGetQuery(object@DB,DiffQuery),
repFpkm=dbGetQuery(object@DB,repFPKMQuery),
count=dbGetQuery(object@DB,countQuery),
genome=.getGenome(object)
)
end<-dbSendQuery(object@DB,"END;")
res
}
setMethod("getFeatures",signature(object="CuffSet"),.getFeatures)
#getGeneIds from featureIds
#SELECT DISTINCT g.gene_id from genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c on g.gene_id=c.gene_id WHERE (g.gene_id IN ('$VAL') OR i.isoform_id IN ('$VAL') OR t.tss_group_id IN ('$VAL') OR c.CDS_id IN ('$VAL') OR g.gene_short_name IN ('$VAL'));
#getSig() returns a list vectors of significant features by pairwise comparisons
#Depricated in favor of .getSig2
#.getSig<-function(object,x,y,level="genes",testTable=FALSE){
# mySamp<-samples(slot(object,level))
# sigGenes<-list()
# if(level %in% c('promoters','splicing','relCDS')){
# diffTable<-slot(object,level)@table
# }else{
# diffTable<-slot(object,level)@tables$expDiffTable
# }
#
# #Restrict samples to those provided as x and y
# if(!missing(x) && !missing(y)){
# mySamp<-c(x,y)
# if(!all(mySamp %in% samples(slot(object,level)))){
# stop("One or more values of 'x' or 'y' are not valid sample names!")
# }
# }
#
# for (ihat in c(1:(length(mySamp)-1))){
# for(jhat in c((ihat+1):length(mySamp))){
# i<-mySamp[ihat]
# j<-mySamp[jhat]
# testName<-paste(i,j,sep="vs")
# queryString<-paste("('",i,"','",j,"')",sep="")
# sql<-paste("SELECT ",slot(object,level)@idField," from ", diffTable," WHERE sample_1 IN ",queryString," AND sample_2 IN ",queryString, " AND significant='yes'",sep="")
# sig<-dbGetQuery(object@DB,sql)
# sigGenes[[testName]]<-sig[,1]
# }
# }
# #TODO: Add conditional return for if x & y are not null, to just return that test...
# if(testTable){
# tmp<-reshape2:::melt.list(sigGenes)
# return(cast(tmp,value~...,length))
# }else{
# return(sigGenes)
# }
#
#}
#Depricated in favor of .getSig
#.getSig2<-function(object,x,y,level="genes",testTable=FALSE,alpha=0.05){
# mySamp<-samples(slot(object,level))
# sigGenes<-list()
# if(level %in% c('promoters','splicing','relCDS')){
# diffTable<-slot(object,level)@table
# }else{
# diffTable<-slot(object,level)@tables$expDiffTable
# }
#
# #Restrict samples to those provided as x and y
# if(!missing(x) && !missing(y)){
# mySamp<-c(x,y)
# if(!all(mySamp %in% samples(slot(object,level)))){
# stop("One or more values of 'x' or 'y' are not valid sample names!")
# }
# }
#
# for (ihat in c(1:(length(mySamp)-1))){
# for(jhat in c((ihat+1):length(mySamp))){
# i<-mySamp[ihat]
# j<-mySamp[jhat]
# testName<-paste(i,j,sep="vs")
# queryString<-paste("('",i,"','",j,"')",sep="")
# sql<-paste("SELECT ",slot(object,level)@idField,",p_value,q_value from ", diffTable," WHERE sample_1 IN ",queryString," AND sample_2 IN ",queryString, " AND STATUS='OK'",sep="")
# sig<-dbGetQuery(object@DB,sql)
#
# #recalculate q-values for all tests in single pairwise comparison
# if(!missing(x) && !(missing(y))) {
# sig$q_value<-p.adjust(sig$p_value,method="BH")
# }
# #Filter on alpha
# sig<-sig[sig$q_value<=alpha,]
# sigGenes[[testName]]<-sig[,1]
# }
# }
#
# if(testTable){
# tmp<-reshape2:::melt.list(sigGenes)
# return(cast(tmp,value~...,length))
# }else{
# return(sigGenes)
# }
#
#}
.getSig<-function(object,x,y,alpha=0.05,level='genes',method="BH",useCuffMTC=FALSE){
mySamp<-samples(slot(object,level))
if(level %in% c('promoters','splicing','relCDS')){
diffTable<-slot(object,level)@table
}else{
diffTable<-slot(object,level)@tables$expDiffTable
}
#Restrict samples to those provided as x and y
if(!missing(x) && !missing(y)){
mySamp<-c(x,y)
if(!all(mySamp %in% samples(slot(object,level)))){
stop("One or more values of 'x' or 'y' are not valid sample names!")
}
}
queryString<-paste("(",paste(mySamp,collapse="','",sep=""),")",sep="'")
sql<-paste("SELECT ",slot(object,level)@idField,",p_value,q_value from ", diffTable," WHERE sample_1 IN ",queryString," AND sample_2 IN ",queryString, " AND STATUS='OK'",sep="")
#print(sql)
sig<-dbGetQuery(object@DB,sql)
if(!missing(x) && !missing(y) && !useCuffMTC){
sig$q_value<-p.adjust(sig$p_value,method=method)
#print(sig[order(sig$q_value,decreasing=T),])
}
sig<-sig[sig$q_value<=alpha,]
sigGenes<-unique(sig[[slot(object,level)@idField]])
sigGenes
}
setMethod("getSig",signature(object="CuffSet"),.getSig)
.getSigTable<-function(object,alpha=0.05,level='genes'){
if(level %in% c('promoters','splicing','relCDS')){
diffTable<-slot(object,level)@table
}else{
diffTable<-slot(object,level)@tables$expDiffTable
}
sql<-paste("SELECT ",slot(object,level)@idField,", sample_1, sample_2, p_value from ", diffTable," WHERE status='OK';")
sig<-dbGetQuery(object@DB,sql)
sig$q_value<-p.adjust(sig$p_value,method='BH')
sig$testName<-paste(sig$sample_1,"vs",sig$sample_2,sep="")
#filter on alpha and clean table
sig$testResult<-0
sig$testResult[sig$q_value<=alpha]<-1
fieldsNeeded<-c(slot(object,level)@idField,'testName','testResult')
sig<-sig[,fieldsNeeded]
#recast
sig.table<-acast(sig,as.formula(paste(slot(object,level)@idField,"~testName")),value='testResult')
#remove genes that do not reject null in any test
sig.table<-sig.table[rowSums(sig.table,na.rm=T)>0,]
sig.table
}
setMethod("getSigTable",signature(object="CuffSet"),.getSigTable)
####################
#QC Visualizations on entire dataset
#####################
.sigMatrix<-function(object,alpha=0.05,level='genes',orderByDist=FALSE){
if(level %in% c('promoters','splicing','relCDS')){
diffTable<-slot(object,level)@table
}else{
diffTable<-slot(object,level)@tables$expDiffTable
}
sql<-paste("SELECT ",slot(object,level)@idField,", sample_1, sample_2, p_value from ", diffTable," WHERE status='OK';")
sig<-dbGetQuery(object@DB,sql)
sig$q_value<-p.adjust(sig$p_value,method='BH')
sig<-sig[sig$q_value<=alpha,]
fieldsNeeded<-c('sample_1','sample_2')
sig<-sig[,fieldsNeeded]
if(orderByDist){
#This does not work yet...
mySamples<-colnames(fpkmMatrix(slot(object,level)))
sampleOrder<-mySamples[order.dendrogram(as.dendrogram(hclust(JSdist(makeprobs(log10(fpkmMatrix(slot(object,level))))))))]
}
else {
sampleOrder<-rev(samples(object)$sample_name)
}
sig$sample_1<-factor(sig$sample_1,levels=sampleOrder)
sig$sample_2<-factor(sig$sample_2,levels=sampleOrder)
p<-ggplot(sig,aes(x=sample_1,y=sample_2))
p<- p + stat_sum(aes(fill=..n..),color="black",size=0.3, geom="tile") + scale_fill_continuous(low="white",high="green") + expand_limits(fill=0)
p<- p + stat_sum(aes(label=..n..),geom="text",size=6,show_guide=FALSE)
#p <- p + geom_tile(aes(fill=..n..))
p + theme_bw() + labs(title=paste("Significant ",slot(object,level)@tables$mainTable,"\n at alpha ",alpha,sep="")) +theme(axis.text.x=element_text(angle=-90, hjust=0)) + coord_equal(1)
}
setMethod("sigMatrix",signature(object="CuffSet"),.sigMatrix)
#dispersionPlot on cuffSet objects actually draws from the varModel table and is the preferred way to visualize the dispersion and model fitting from cuffdiff 2.1 or greater
.dispersionPlot<-function(object){
dat<-varModel(object)
p<-ggplot(dat)
p<-p + geom_point(aes(x=compatible_count_mean,y=compatible_count_var,color=condition),alpha=0.3,size=0.8) + geom_line(aes(x=compatible_count_mean,y=fitted_var),lwd=0.5,color="black") + facet_wrap('condition') +scale_y_log10() + scale_x_log10() + theme_bw() + guides(color=FALSE)
p
}
setMethod("dispersionPlot",signature(object="CuffSet"),.dispersionPlot)
#Find similar genes
.findSimilar<-function(object,x,n,distThresh,returnGeneSet=TRUE,...){
#x can be either a gene_id, gene_short_name or a vector of FPKM values (fake gene expression profile)
#TODO: make findSimilar work with all levels
#TODO: Possibly add FPKM thresholding
if(is.character(x)){
myGene<-getGene(object,x)
sig<-makeprobsvec(fpkmMatrix(myGene,...)[1,])
}else if(is.vector(x)){
sig<-makeprobsvec(x)
}
allGenes<-fpkmMatrix(object@genes,...)
allGenes<-t(makeprobs(t(allGenes)))
compare<-function(q){
JSdistVec(sig,q)
}
myDist<-apply(allGenes,MARGIN=1,compare)
if(!missing(distThresh)){
myDist<-myDist[myDist<=distThresh]
}
myDist<-sort(myDist)
if(!missing(n)){
myDist<-myDist[1:n]
}
mySimilarIds<-names(myDist)
if(returnGeneSet){
mySimilarGenes<-getGenes(object,mySimilarIds,...)
return(mySimilarGenes)
}else{
res<-as.data.frame(myDist)
colnames(res)<-c("distance")
return(res)
}
}
setMethod("findSimilar",signature(object="CuffSet"),.findSimilar)
############
#SQL access
############
################
#Misc Utilities
################
.getLevels<-function(object){
levelsQuery<-'SELECT s.sample_name FROM samples s ORDER BY s.sample_index ASC'
levels<-dbGetQuery(object@DB,levelsQuery)$sample_name
levels
}
setMethod("getLevels",signature(object="CuffSet"),.getLevels)
.getRepLevels<-function(object){
levelsQuery<-'SELECT r.rep_name FROM replicates r LEFT JOIN samples s ON r.sample_name=s.sample_name ORDER BY s.sample_index ASC'
levels<-dbGetQuery(object@DB,levelsQuery)$rep_name
levels
}
setMethod("getRepLevels",signature(object="CuffSet"),.getRepLevels)
.checkSamples<-function(dbConn,sampleIdList){
dbSamples<-dbReadTable(dbConn,"samples")
if (all(sampleIdList %in% dbSamples$sample_name)){
return(TRUE)
}else{
return(FALSE)
}
}
#####################
#GenomicRanges
#####################
#.makeGRanges<-function(dbConn,id,idField='transcript_id'){
# txQuery<-paste("SELECT * FROM features WHERE ",idField,"='",id,"'",sep="")
# print(txQuery)
# features<-dbGetQuery(dbConn,txQuery)
# #res<-GRanges(seqnames=features$seqnames,ranges=IRanges(start=features$start,end=features$end,names=features$exon_number),strand=features$strand)
# res<-GRanges(features[,-1])
# res
#}
.makeGRanges<-function(object,id,idField='transcript_id'){
txQuery<-paste("SELECT * from features WHERE ",idField,"='",id,"'",sep="")
myFeat<-dbGetQuery(object@DB,txQuery)
#res<-GRanges(myFeat[,-1])
res<-GRanges(seqnames=myFeat$seqnames,ranges=IRanges(start=myFeat$start,end=myFeat$end,names=myFeat$exon_number),strand=myFeat$strand)
res
}
setMethod("makeGRanges",signature(object="CuffSet"),.makeGRanges)
#.makeGRangesList<-function(object,id,idField="gene_id"){
# #use .makeGRanges for each sub-feature of id to create GRangesList
#}
#
#setMethod("makeGRangesList",signature(object="CuffSet"),.makeGRangesList)
#####################
#Add FeatureData #
#####################
.addFeatures<-function(object,features,level="genes",...){
if(!is.data.frame(features)){
stop("features must be a data.frame")
}
colnames(features)[1]<-slot(object,level)@idField
colnames(features)<-dbQuoteIdentifier(object@DB,colnames(features),unique=T)
dbWriteTable(object@DB,slot(object,level)@tables$featureTable,features,row.names=F,overwrite=T)
indexQuery<-paste("CREATE INDEX ",slot(object,level)@idField," ON ", slot(object,level)@tables$featureTable," (",slot(object,level)@idField,")",sep="")
res<-dbGetQuery(object@DB,indexQuery)
}
setMethod("addFeatures",signature(object="CuffSet"),.addFeatures)
#TODO: Add method to purge existing feature data table to allow 'refresh' of feature level data
##############
#Reporting
##############
#runReport<-function(){
# if(!file.exists(".output")){
# dir.create(".output")
# }
# file.copy(system.file("reports/runReport.Rnw", package="cummeRbund"),paste(".output/","runReport.Rnw",sep=""),overwrite=T)
# myWD<-getwd()
# setwd(".output")
# Sweave("runReport.Rnw")
# tools::texi2dvi("runReport.tex",pdf=TRUE)
# setwd(myWD)
#}
###################
# Coersion methods
###################
#As ExpressionSet
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.