R/methods-CuffSet.R

Defines functions .samples .replicates .runInfo .varModel .getGenome .getGene .getGenes .getGeneId .findGene .getFeatures .getSig .getSigTable .sigMatrix .dispersionPlot .findSimilar .getLevels .getRepLevels .checkSamples .makeGRanges .addFeatures

##################
#methods-CuffSet.R
#
#Introduces the CuffSet Class for analysis, manipulation, and plotting of Cufflinks data
#
#Author: Loyal A. Goff
#
##################

#Initialize
setMethod("initialize","CuffSet",
		function(.Object,
				DB,
				#runInfo=data.frame(),
				phenoData=data.frame(),
				conditions=data.frame(),
				genes,
				isoforms,
				TSS,
				CDS,
				promoters,
				splicing,
				relCDS,
				...){
			.Object<-callNextMethod(.Object,
					DB = DB,
					#runInfo=runInfo,
					phenoData=phenoData,
					conditions = conditions,
					genes = genes,
					isoforms = isoforms,
					TSS = TSS,
					CDS = CDS,
					promoters = promoters,
					splicing = splicing,
					relCDS = relCDS,
					...)				
		}
)

##################
#Class Methods
##################
setMethod("show","CuffSet",
		function(object){
			cat(class(object), "instance with:\n\t",
					dim(object@genes)[2],"samples\n\t",
					dim(object@genes)[1],"genes\n\t",
					dim(object@isoforms)[1],"isoforms\n\t",
					dim(object@TSS)[1],"TSS\n\t",
					dim(object@CDS)[1],"CDS\n\t",
					dim(object@promoters)[1],"promoters\n\t",
					dim(object@splicing)[1],"splicing\n\t",
					dim(object@relCDS)[1],"relCDS\n"
					)
		}
)

#This does not subset appropriately yet
#TODO: Fix for multiple values of i
#
#Solution is to test i to determine if it is of type 'numeric' (index), list (multi-index), or 'character' (gene_ids)
#
#TODO: 	- Add 'j' to select on sampleNames as well
#		- Add ability to search on gene_short_name(s) or featureIDs
		
#setMethod("[",signature(x="CuffSet"),function(x, i, ...){
#			featureIDs<-featureNames(x@genes)[i]
#			if(length(featureIDs)==1){
#				res<-getGene(x,featureID)
#			}else{
#				res<-getGenes(x,featureIDs)
#			}
#			res
#		}
#)


setValidity("CuffSet",
		function(object){
		TRUE	
		}
)

############
#Accessors
############
.samples<-function(object){
	sampleQuery<-"SELECT * FROM samples s LEFT JOIN phenoData p on s.sample_name = p.sample_name"
	dbGetQuery(object@DB,sampleQuery)
}

setMethod("samples",signature(object="CuffSet"),.samples)

.replicates<-function(object){
	replicateQuery<-"SELECT * FROM replicates r"
	dbGetQuery(object@DB,replicateQuery)
}

setMethod("replicates",signature(object="CuffSet"),.replicates)

setMethod("DB","CuffSet",function(object){
		return(object@DB)
		})

.runInfo<-function(object){
	runInfoQuery<-"SELECT * FROM runInfo"
	dbGetQuery(object@DB,runInfoQuery)
}

setMethod("runInfo","CuffSet",.runInfo)

.varModel<-function(object){
	varModelQuery<-"SELECT * from varModel"
	dbGetQuery(object@DB,varModelQuery)
}

setMethod("varModel","CuffSet",.varModel)

#setMethod("phenoData","CuffSet",function(object){
#			return(object@phenoData)
#		})

setMethod("conditions","CuffSet",function(object){
		return(object@conditions)
		})

setMethod("genes","CuffSet",function(object){
		return(object@genes)
		})

setMethod("isoforms","CuffSet",function(object){
		return(object@isoforms)
		})

setMethod("TSS","CuffSet",function(object){
		return(object@TSS)
		})

setMethod("CDS","CuffSet",function(object){
		return(object@CDS)
		})

setMethod("promoters","CuffSet",function(object){
		return(object@promoters)
		})

setMethod("splicing","CuffSet",function(object){
		return(object@splicing)
		})

setMethod("relCDS","CuffSet",function(object){
		return(object@relCDS)
		})

.getGenome<-function(object){
	genomeQuery<-"SELECT value FROM runInfo WHERE param='genome'"
	genome<-dbGetQuery(object@DB,genomeQuery)
	genome<-unique(genome[,1])
	genome
}

#make CuffGene objects from a gene_ids
.getGene<-function(object,geneId,sampleIdList=NULL){
	
	#Get gene_id from geneId (can use any identifier now to select gene)
	geneId<-getGeneId(object,geneId)
	
	if(length(geneId)>1){
		stop("More than one gene_id found for given query. Please use getGenes() instead.")
	}
	
	#Sample subsetting
	if(!is.null(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	
	#Sample Search String (SQL)
	sampleString<-'('
	for (i in myLevels){
		sampleString<-paste(sampleString,"'",i,"',",sep="")
	}
	sampleString<-substr(sampleString,1,nchar(sampleString)-1)
	sampleString<-paste(sampleString,')',sep="")
	
	whereString = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",sep="")
	whereStringFPKM = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_name IN ',sampleString,')',sep="")
	whereStringDiff = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
	whereStringRep = paste("JOIN replicates r ON y.rep_name=r.rep_name WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (r.sample_name IN ',sampleString,')',sep="")
	
	#dbQueries
	geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING(gene_id) ",whereString,sep="")
	geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
	#print(geneFPKMQuery)
	geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ",whereStringDiff,sep="")
	#print(geneDiffQuery)
	geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y ON x.gene_id=y.gene_id ",whereStringRep,sep="")
	geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
	geneFeatureQuery<-paste("SELECT y.* FROM features y JOIN genes x on y.gene_id = x.gene_id ",whereString,sep="")
	
	isoformAnnotationQuery<-paste("SELECT * from isoforms i JOIN genes x ON i.gene_id = x.gene_id ",whereString,sep="")
	isoformFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
	isoformDiffQuery<-paste("SELECT y.* from isoforms i JOIN isoformExpDiffData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringDiff,sep="")
	isoformRepFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformReplicateData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringRep,sep="")
	isoformCountQuery<-paste("SELECT y.* from isoforms i JOIN isoformCount y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
	
	TSSAnnotationQuery<-paste("SELECT * from TSS t JOIN genes x ON t.gene_id = x.gene_id ",whereString,sep="")
	TSSFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
	TSSDiffQuery<-paste("SELECT y.* from TSS t JOIN TSSExpDiffData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringDiff,sep="")
	TSSRepFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSReplicateData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringRep,sep="")
	TSSCountQuery<-paste("SELECT y.* from TSS t JOIN TSSCount y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
	
	
	CDSAnnotationQuery<-paste("SELECT * from CDS c JOIN genes x ON c.gene_id = x.gene_id ",whereString,sep="")
	CDSFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
	CDSDiffQuery<-paste("SELECT y.* from CDS c JOIN CDSExpDiffData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringDiff,sep="")
	CDSRepFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSReplicateData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringRep,sep="")
	CDSCountQuery<-paste("SELECT y.* from CDS c JOIN CDSCount y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
	
	begin<-dbSendQuery(object@DB,"BEGIN;")
	
	#fetch records
	#genes
	genes.fpkm<-dbGetQuery(object@DB,geneFPKMQuery)
	genes.fpkm$sample_name<-factor(genes.fpkm$sample_name,levels=myLevels)
	genes.diff<-dbGetQuery(object@DB,geneDiffQuery)
	genes.diff$sample_1<-factor(genes.diff$sample_1,levels=myLevels)
	genes.diff$sample_2<-factor(genes.diff$sample_2,levels=myLevels)
	genes.repFpkm<-dbGetQuery(object@DB,geneRepFPKMQuery)
	genes.count<-dbGetQuery(object@DB,geneCountQuery)
	genes.annotation<-dbGetQuery(object@DB,geneAnnotationQuery)
	genes.features<-dbGetQuery(object@DB,geneFeatureQuery)
	
	#isoforms
	isoform.fpkm<-dbGetQuery(object@DB,isoformFPKMQuery)
	isoform.fpkm$sample_name<-factor(isoform.fpkm$sample_name,levels=myLevels)
	isoform.diff<-dbGetQuery(object@DB,isoformDiffQuery)
	isoform.diff$sample_1<-factor(isoform.diff$sample_1,levels=myLevels)
	isoform.diff$sample_2<-factor(isoform.diff$sample_2,levels=myLevels)
	isoform.repFpkm<-dbGetQuery(object@DB,isoformRepFPKMQuery)
	isoform.count<-dbGetQuery(object@DB,isoformCountQuery)
	isoform.annotation<-dbGetQuery(object@DB,isoformAnnotationQuery)
	
	#CDS
	CDS.fpkm<-dbGetQuery(object@DB,CDSFPKMQuery)
	CDS.fpkm$sample_name<-factor(CDS.fpkm$sample_name,levels=myLevels)
	CDS.diff<-dbGetQuery(object@DB,CDSDiffQuery)
	CDS.diff$sample_1<-factor(CDS.diff$sample_1,levels=myLevels)
	CDS.diff$sample_2<-factor(CDS.diff$sample_2,levels=myLevels)
	CDS.repFpkm<-dbGetQuery(object@DB,CDSRepFPKMQuery)
	CDS.count<-dbGetQuery(object@DB,CDSCountQuery)
	CDS.annotation<-dbGetQuery(object@DB,CDSAnnotationQuery)
	
	#TSS
	TSS.fpkm<-dbGetQuery(object@DB,TSSFPKMQuery)
	TSS.fpkm$sample_name<-factor(TSS.fpkm$sample_name,levels=myLevels)
	TSS.diff<-dbGetQuery(object@DB,TSSDiffQuery)
	TSS.diff$sample_1<-factor(TSS.diff$sample_1,levels=myLevels)
	TSS.diff$sample_2<-factor(TSS.diff$sample_2,levels=myLevels)
	TSS.repFpkm<-dbGetQuery(object@DB,TSSRepFPKMQuery)
	TSS.count<-dbGetQuery(object@DB,TSSCountQuery)
	TSS.annotation<-dbGetQuery(object@DB,TSSAnnotationQuery)
	
	end<-dbSendQuery(object@DB,"END;")
	
	#write(.getGenome(object),stderr())
	res<-new("CuffGene",
			id=geneId,
			features=genes.features,
			annotation=genes.annotation,
			fpkm=genes.fpkm,
			diff=genes.diff,
			repFpkm=genes.repFpkm,
			count=genes.count,
			genome=.getGenome(object),
			isoforms=new("CuffFeature",
					annotation=isoform.annotation,
					fpkm=isoform.fpkm,
					diff=isoform.diff,
					repFpkm=isoform.repFpkm,
					count=isoform.count
					),
			TSS=new("CuffFeature",
					annotation=TSS.annotation,
					fpkm=TSS.fpkm,
					diff=TSS.diff,
					repFpkm=TSS.repFpkm,
					count=TSS.count
					),
			CDS=new("CuffFeature",
					annotation=CDS.annotation,
					fpkm=CDS.fpkm,
					diff=CDS.diff,
					repFpkm=CDS.repFpkm,
					count=CDS.count
					)
			
			)
		res
}

setMethod("getGene",signature(object="CuffSet"),.getGene)
	
.getGenes<-function(object,geneIdList,sampleIdList=NULL){
	
	#Determine gene_id from geneIdList
	#This is useful so that people can pass, for example, isoform_id to geneIdList and getGenes will return full genes
	geneIdList<-getGeneId(object=object,geneIdList)
	
	#Sample subsetting
	if(!is.null(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	
	#Sample Search String (SQL)
	sampleString<-'('
	for (i in myLevels){
		sampleString<-paste(sampleString,"'",i,"',",sep="")
	}
	sampleString<-substr(sampleString,1,nchar(sampleString)-1)
	sampleString<-paste(sampleString,")",sep="")

	
	#idQuery<-paste('SELECT DISTINCT x.gene_id from genes x WHERE (x.gene_id IN ',origIdString,' OR x.gene_short_name IN ',origIdString,')',sep="")

	idString<-'('
	for (i in geneIdList){
		idString<-paste(idString,"'",i,"',",sep="")
	}
	idString<-substr(idString,1,nchar(idString)-1)
	idString<-paste(idString,")",sep="")
	
	#write(idString,stderr())
	
	whereStringGene<-paste('WHERE x.gene_id IN ',idString,sep="")
	whereStringGeneFPKM<-paste('WHERE x.gene_id IN ',idString,sep="")
	whereStringGeneDiff<-paste('WHERE x.gene_id IN ',idString,sep="")
	whereString<-paste('WHERE x.gene_id IN ',idString,sep="")
	whereStringFPKM<-paste('WHERE x.gene_id IN ',idString,sep="")
	whereStringDiff<-paste('WHERE x.gene_id IN ',idString,sep="")
	
	if(!is.null(sampleIdList)){
		whereStringGene<-whereStringGene
		whereStringGeneFPKM<-paste(whereStringGeneFPKM,' AND y.sample_name IN ',sampleString,sep="")
		whereStringGeneDiff<-paste(whereStringGeneDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
		whereString<-whereString
		whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
		whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
		
	}
	
	#geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf ON x.gene_id=xf.gene_id ", whereStringGene,sep="")
	geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING (gene_id) ", whereStringGene,sep="")
	geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
	geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ", whereStringGeneDiff,sep="")
	geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
	geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
	
	isoformAnnotationQuery<-paste("SELECT x.* from isoforms x LEFT JOIN isoformFeatures xf ON x.isoform_id=xf.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
	isoformFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	isoformDiffQuery<-paste("SELECT y.* from isoforms x JOIN isoformExpDiffData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
	isoformRepFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformReplicateData y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	isoformCountQuery<-paste("SELECT y.* from isoforms x JOIN isoformCount y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	
	TSSAnnotationQuery<-paste("SELECT x.* from TSS x LEFT JOIN TSSFeatures xf ON x.TSS_group_id=xf.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
	TSSFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	TSSDiffQuery<-paste("SELECT y.* from TSS x JOIN TSSExpDiffData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
	TSSRepFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSReplicateData y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	TSSCountQuery<-paste("SELECT y.* from TSS x JOIN TSSCount y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	
	CDSAnnotationQuery<-paste("SELECT x.* from CDS x LEFT JOIN CDSFeatures xf ON x.CDS_id=xf.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
	CDSFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	CDSDiffQuery<-paste("SELECT y.* from CDS x JOIN CDSExpDiffData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
	CDSRepFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSReplicateData y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	CDSCountQuery<-paste("SELECT y.* from CDS x JOIN CDSCount y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
	
	promotersDistQuery<-paste("SELECT x.* FROM promoterDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
	splicingDistQuery<-paste("SELECT x.* FROM splicingDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
	CDSDistQuery<-paste("SELECT x.* FROM CDSDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
	
	begin<-dbSendQuery(object@DB,"BEGIN;")
	
	#fetch records
	#genes
	write("Getting gene information:",stderr())
	write("\tFPKM",stderr())
	genes.fpkm<-dbGetQuery(object@DB,geneFPKMQuery)
	genes.fpkm$sample_name<-factor(genes.fpkm$sample_name,levels=myLevels)
	write("\tDifferential Expression Data",stderr())
	genes.diff<-dbGetQuery(object@DB,geneDiffQuery)
	genes.diff$sample_1<-factor(genes.diff$sample_1,levels=myLevels)
	genes.diff$sample_2<-factor(genes.diff$sample_2,levels=myLevels)
	write("\tAnnotation Data",stderr())
	genes.annot<-dbGetQuery(object@DB,geneAnnotationQuery)
	write("\tReplicate FPKMs",stderr())
	genes.repFpkm<-dbGetQuery(object@DB,geneRepFPKMQuery)
	write("\tCounts",stderr())
	genes.count<-dbGetQuery(object@DB,geneCountQuery)
	
	#isoforms
	write("Getting isoforms information:",stderr())
	write("\tFPKM",stderr())
	isoform.fpkm<-dbGetQuery(object@DB,isoformFPKMQuery)
	isoform.fpkm$sample_name<-factor(isoform.fpkm$sample_name,levels=myLevels)
	write("\tDifferential Expression Data",stderr())
	isoform.diff<-dbGetQuery(object@DB,isoformDiffQuery)
	isoform.diff$sample_1<-factor(isoform.diff$sample_1,levels=myLevels)
	isoform.diff$sample_2<-factor(isoform.diff$sample_2,levels=myLevels)
	write("\tAnnotation Data",stderr())
	isoform.annot<-dbGetQuery(object@DB,isoformAnnotationQuery)
	write("\tReplicate FPKMs",stderr())
	isoform.repFpkm<-dbGetQuery(object@DB,isoformRepFPKMQuery)
	write("\tCounts",stderr())
	isoform.count<-dbGetQuery(object@DB,isoformCountQuery)
	
	#CDS
	write("Getting CDS information:",stderr())
	write("\tFPKM",stderr())
	CDS.fpkm<-dbGetQuery(object@DB,CDSFPKMQuery)
	CDS.fpkm$sample_name<-factor(CDS.fpkm$sample_name,levels=myLevels)
	write("\tDifferential Expression Data",stderr())
	CDS.diff<-dbGetQuery(object@DB,CDSDiffQuery)
	CDS.diff$sample_1<-factor(CDS.diff$sample_1,levels=myLevels)
	CDS.diff$sample_2<-factor(CDS.diff$sample_2,levels=myLevels)
	write("\tAnnotation Data",stderr())
	CDS.annot<-dbGetQuery(object@DB,CDSAnnotationQuery)
	write("\tReplicate FPKMs",stderr())
	CDS.repFpkm<-dbGetQuery(object@DB,CDSRepFPKMQuery)
	write("\tCounts",stderr())
	CDS.count<-dbGetQuery(object@DB,CDSCountQuery)
	
	#TSS
	write("Getting TSS information:",stderr())
	write("\tFPKM",stderr())
	TSS.fpkm<-dbGetQuery(object@DB,TSSFPKMQuery)
	TSS.fpkm$sample_name<-factor(TSS.fpkm$sample_name,levels=myLevels)
	write("\tDifferential Expression Data",stderr())
	TSS.diff<-dbGetQuery(object@DB,TSSDiffQuery)
	TSS.diff$sample_1<-factor(TSS.diff$sample_1,levels=myLevels)
	TSS.diff$sample_2<-factor(TSS.diff$sample_2,levels=myLevels)
	write("\tAnnotation Data",stderr())
	TSS.annot<-dbGetQuery(object@DB,TSSAnnotationQuery)
	write("\tReplicate FPKMs",stderr())
	TSS.repFpkm<-dbGetQuery(object@DB,TSSRepFPKMQuery)
	write("\tCounts",stderr())
	TSS.count<-dbGetQuery(object@DB,TSSCountQuery)
	
	end<-dbSendQuery(object@DB,"END;")
	
	#Promoters
	write("Getting promoter information:", stderr())
	write("\tdistData",stderr())
	promoters.distData<-dbGetQuery(object@DB,promotersDistQuery)
	promoters.distData$sample_1<-factor(promoters.distData$sample_1,levels=myLevels)
	promoters.distData$sample_2<-factor(promoters.distData$sample_2,levels=myLevels)
	
	#Splicing
	write("Getting splicing information:", stderr())
	write("\tdistData",stderr())
	splicing.distData<-dbGetQuery(object@DB,splicingDistQuery)
	splicing.distData$sample_1<-factor(splicing.distData$sample_1,levels=myLevels)
	splicing.distData$sample_2<-factor(splicing.distData$sample_2,levels=myLevels)

	#relCDS
	write("Getting relCDS information:", stderr())
	write("\tdistData",stderr())
	CDS.distData<-dbGetQuery(object@DB,CDSDistQuery)
	CDS.distData$sample_1<-factor(CDS.distData$sample_1,levels=myLevels)
	CDS.distData$sample_2<-factor(CDS.distData$sample_2,levels=myLevels)
	
	res<-new("CuffGeneSet",
			#TODO: Fix ids so that it only displays those genes in CuffGeneSet
			ids=geneIdList,
			annotation=genes.annot,
			fpkm=genes.fpkm,
			diff=genes.diff,
			repFpkm=genes.repFpkm,
			count=genes.count,
			genome=.getGenome(object),
			isoforms=new("CuffFeatureSet",
					annotation=isoform.annot,
					fpkm=isoform.fpkm,
					diff=isoform.diff,
					repFpkm=isoform.repFpkm,
					count=isoform.count
					),
			TSS=new("CuffFeatureSet",
					annotation=TSS.annot,
					fpkm=TSS.fpkm,
					diff=TSS.diff,
					repFpkm=TSS.repFpkm,
					count=TSS.count
					),
			CDS=new("CuffFeatureSet",
					annotation=CDS.annot,
					fpkm=CDS.fpkm,
					diff=CDS.diff,
					repFpkm=CDS.repFpkm,
					count=CDS.count
					),
			promoters=new("CuffFeatureSet",
					annotation=genes.annot,
					fpkm=genes.fpkm,
					diff=promoters.distData
					),
			splicing=new("CuffFeatureSet",
					annotation=TSS.annot,
					fpkm=TSS.fpkm,
					diff=splicing.distData
					),
			relCDS=new("CuffFeatureSet",
					annotation=genes.annot,
					fpkm=genes.fpkm,
					diff=CDS.distData
					)
			)	
	res
}

setMethod("getGenes",signature(object="CuffSet"),.getGenes)

.getGeneId<-function(object,idList){
	#Query that takes list of any identifier and retrieves gene_id values from db (does not report missing finds)
	searchString<-"("
	for(i in idList){
		searchString<-paste(searchString,"'",i,"',",sep="")
	}
	searchString<-substr(searchString,1,nchar(searchString)-1)
	searchString<-paste(searchString,")",sep="")
	
	geneIdQuery<-paste("SELECT DISTINCT g.gene_id FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id IN ",searchString," OR g.gene_short_name IN ",searchString," OR i.isoform_id IN ",searchString," OR t.tss_group_id IN ",searchString," OR c.CDS_id IN ",searchString,")",sep="")
	#print(geneIdQuery)
	res<-dbGetQuery(object@DB,geneIdQuery)
	as.vector(res[,1])
}

setMethod("getGeneId",signature(object="CuffSet"),.getGeneId)

.findGene<-function(object,query){
	#Utility to search for gene_id and gene_short_name given a single 'query' string (e.g. query='pink1' will return all genes with 'pink1' (case-insensitive) in the gene_short_name field.
	geneQuery<-paste("SELECT DISTINCT g.gene_id,g.gene_short_name FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id = '",query,"' OR g.gene_short_name = '",query,"' OR i.isoform_id = '",query,"' OR t.tss_group_id = '",query,"' OR c.CDS_id ='",query,"') OR g.gene_short_name LIKE '%",query,"%'",sep="")
	res<-dbGetQuery(object@DB,geneQuery)
	res
}

setMethod("findGene",signature(object="CuffSet"),.findGene)


.getFeatures<-function(object,featureIdList,sampleIdList=NULL,level='isoforms'){
	#Sample subsetting
	if(!is.null(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	
	sampleString<-'('
	for (i in myLevels){
		sampleString<-paste(sampleString,"'",i,"',",sep="")
	}
	sampleString<-substr(sampleString,1,nchar(sampleString)-1)
	sampleString<-paste(sampleString,")",sep="")
	
	#ID Search String (SQL)
	idString<-'('
	for (i in featureIdList){
		idString<-paste(idString,"'",i,"',",sep="")
	}
	idString<-substr(idString,1,nchar(idString)-1)
	idString<-paste(idString,")",sep="")
	
	whereString<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
	whereStringFPKM<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
	whereStringDiff<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
	
	if(!is.null(sampleIdList)){
		whereString<-whereString
		whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
		whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
	}
	
	
	AnnotationQuery<-paste("SELECT x.* from ",slot(object,level)@tables$mainTable," x LEFT JOIN ",slot(object,level)@tables$featureTable," xf ON x.",slot(object,level)@idField,"=xf.",slot(object,level)@idField, whereString,sep="")
	FPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$dataTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
	DiffQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$expDiffTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringDiff,sep="")
	repFPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$replicateTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
	countQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$countTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
	
	#print(AnnotationQuery)
	#print(FPKMQuery)
	#print(DiffQuery)
	#print(repFPKMQuery)
	#print(countQuery)
	
	begin<-dbSendQuery(object@DB,"BEGIN;")	
	res<-new("CuffFeatureSet",
			annotation=dbGetQuery(object@DB,AnnotationQuery),
			fpkm=dbGetQuery(object@DB,FPKMQuery),
			diff=dbGetQuery(object@DB,DiffQuery),
			repFpkm=dbGetQuery(object@DB,repFPKMQuery),
			count=dbGetQuery(object@DB,countQuery),
			genome=.getGenome(object)
		)
	end<-dbSendQuery(object@DB,"END;")		
	res
	
}

setMethod("getFeatures",signature(object="CuffSet"),.getFeatures)


#getGeneIds from featureIds
#SELECT DISTINCT g.gene_id from genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c on g.gene_id=c.gene_id WHERE (g.gene_id IN ('$VAL') OR i.isoform_id IN ('$VAL') OR t.tss_group_id IN ('$VAL') OR c.CDS_id IN ('$VAL') OR g.gene_short_name IN ('$VAL'));


#getSig() returns a list vectors of significant features by pairwise comparisons
#Depricated in favor of .getSig2
#.getSig<-function(object,x,y,level="genes",testTable=FALSE){
#	mySamp<-samples(slot(object,level))
#	sigGenes<-list()
#	if(level %in% c('promoters','splicing','relCDS')){
#		diffTable<-slot(object,level)@table
#	}else{
#		diffTable<-slot(object,level)@tables$expDiffTable
#	}
#	
#	#Restrict samples to those provided as x and y
#	if(!missing(x) && !missing(y)){
#		mySamp<-c(x,y)
#		if(!all(mySamp %in% samples(slot(object,level)))){
#			stop("One or more values of 'x' or 'y' are not valid sample names!")
#		}
#	}
#	
#	for (ihat in c(1:(length(mySamp)-1))){
#		for(jhat in c((ihat+1):length(mySamp))){
#			i<-mySamp[ihat]
#			j<-mySamp[jhat]
#			testName<-paste(i,j,sep="vs")
#			queryString<-paste("('",i,"','",j,"')",sep="")
#			sql<-paste("SELECT ",slot(object,level)@idField," from ", diffTable," WHERE sample_1 IN ",queryString," AND sample_2 IN ",queryString, " AND significant='yes'",sep="")
#			sig<-dbGetQuery(object@DB,sql)
#			sigGenes[[testName]]<-sig[,1]
#		}
#	}
#	#TODO: Add conditional return for if x & y are not null, to just return that test...
#	if(testTable){
#		tmp<-reshape2:::melt.list(sigGenes)
#		return(cast(tmp,value~...,length))
#	}else{
#		return(sigGenes)
#	}
#
#}

#Depricated in favor of .getSig
#.getSig2<-function(object,x,y,level="genes",testTable=FALSE,alpha=0.05){
#	mySamp<-samples(slot(object,level))
#	sigGenes<-list()
#	if(level %in% c('promoters','splicing','relCDS')){
#		diffTable<-slot(object,level)@table
#	}else{
#		diffTable<-slot(object,level)@tables$expDiffTable
#	}
#	
#	#Restrict samples to those provided as x and y
#	if(!missing(x) && !missing(y)){
#		mySamp<-c(x,y)
#		if(!all(mySamp %in% samples(slot(object,level)))){
#			stop("One or more values of 'x' or 'y' are not valid sample names!")
#		}
#	}
#	
#	for (ihat in c(1:(length(mySamp)-1))){
#		for(jhat in c((ihat+1):length(mySamp))){
#			i<-mySamp[ihat]
#			j<-mySamp[jhat]
#			testName<-paste(i,j,sep="vs")
#			queryString<-paste("('",i,"','",j,"')",sep="")
#			sql<-paste("SELECT ",slot(object,level)@idField,",p_value,q_value from ", diffTable," WHERE sample_1 IN ",queryString," AND sample_2 IN ",queryString, " AND STATUS='OK'",sep="")
#			sig<-dbGetQuery(object@DB,sql)
#			
#			#recalculate q-values for all tests in single pairwise comparison
#			if(!missing(x) && !(missing(y))) {
#				sig$q_value<-p.adjust(sig$p_value,method="BH")
#			}
#			#Filter on alpha
#			sig<-sig[sig$q_value<=alpha,]
#			sigGenes[[testName]]<-sig[,1]
#		}
#	}
#
#	if(testTable){
#		tmp<-reshape2:::melt.list(sigGenes)
#		return(cast(tmp,value~...,length))
#	}else{
#		return(sigGenes)
#	}
#	
#}

.getSig<-function(object,x,y,alpha=0.05,level='genes',method="BH",useCuffMTC=FALSE){
	mySamp<-samples(slot(object,level))
	
	if(level %in% c('promoters','splicing','relCDS')){
		diffTable<-slot(object,level)@table
	}else{
		diffTable<-slot(object,level)@tables$expDiffTable
	}
	
	#Restrict samples to those provided as x and y
	if(!missing(x) && !missing(y)){
		mySamp<-c(x,y)
		if(!all(mySamp %in% samples(slot(object,level)))){
			stop("One or more values of 'x' or 'y' are not valid sample names!")
		}
	}
	queryString<-paste("(",paste(mySamp,collapse="','",sep=""),")",sep="'")
	sql<-paste("SELECT ",slot(object,level)@idField,",p_value,q_value from ", diffTable," WHERE sample_1 IN ",queryString," AND sample_2 IN ",queryString, " AND STATUS='OK'",sep="")
	#print(sql)
	sig<-dbGetQuery(object@DB,sql)
	if(!missing(x) && !missing(y) && !useCuffMTC){
		sig$q_value<-p.adjust(sig$p_value,method=method)
		#print(sig[order(sig$q_value,decreasing=T),])
	}
	sig<-sig[sig$q_value<=alpha,]
	sigGenes<-unique(sig[[slot(object,level)@idField]])
	sigGenes
}

setMethod("getSig",signature(object="CuffSet"),.getSig)


.getSigTable<-function(object,alpha=0.05,level='genes'){
	
	if(level %in% c('promoters','splicing','relCDS')){
		diffTable<-slot(object,level)@table
	}else{
		diffTable<-slot(object,level)@tables$expDiffTable
	}
	
	sql<-paste("SELECT ",slot(object,level)@idField,", sample_1, sample_2, p_value from ", diffTable," WHERE status='OK';")
	sig<-dbGetQuery(object@DB,sql)
	sig$q_value<-p.adjust(sig$p_value,method='BH')
	sig$testName<-paste(sig$sample_1,"vs",sig$sample_2,sep="")
	
	#filter on alpha and clean table
	sig$testResult<-0
	sig$testResult[sig$q_value<=alpha]<-1
	
	fieldsNeeded<-c(slot(object,level)@idField,'testName','testResult')
	sig<-sig[,fieldsNeeded]
	
	#recast
	sig.table<-acast(sig,as.formula(paste(slot(object,level)@idField,"~testName")),value='testResult')
	
	#remove genes that do not reject null in any test
	sig.table<-sig.table[rowSums(sig.table,na.rm=T)>0,]
	
	sig.table
}

setMethod("getSigTable",signature(object="CuffSet"),.getSigTable)

####################
#QC Visualizations on entire dataset
#####################

.sigMatrix<-function(object,alpha=0.05,level='genes',orderByDist=FALSE){
	if(level %in% c('promoters','splicing','relCDS')){
		diffTable<-slot(object,level)@table
	}else{
		diffTable<-slot(object,level)@tables$expDiffTable
	}
	
	sql<-paste("SELECT ",slot(object,level)@idField,", sample_1, sample_2, p_value from ", diffTable," WHERE status='OK';")
	sig<-dbGetQuery(object@DB,sql)
	sig$q_value<-p.adjust(sig$p_value,method='BH')
	
	sig<-sig[sig$q_value<=alpha,]
	
	fieldsNeeded<-c('sample_1','sample_2')
	sig<-sig[,fieldsNeeded]
	
	if(orderByDist){
		#This does not work yet...
		mySamples<-colnames(fpkmMatrix(slot(object,level)))
		sampleOrder<-mySamples[order.dendrogram(as.dendrogram(hclust(JSdist(makeprobs(log10(fpkmMatrix(slot(object,level))))))))]
	}
	else {
		sampleOrder<-rev(samples(object)$sample_name)
	}
	sig$sample_1<-factor(sig$sample_1,levels=sampleOrder)
	sig$sample_2<-factor(sig$sample_2,levels=sampleOrder)
	
	p<-ggplot(sig,aes(x=sample_1,y=sample_2))
	
	p<- p + stat_sum(aes(fill=..n..),color="black",size=0.3, geom="tile") + scale_fill_continuous(low="white",high="green") + expand_limits(fill=0)
	
	p<- p + stat_sum(aes(label=..n..),geom="text",size=6,show_guide=FALSE)
	
	#p <- p + geom_tile(aes(fill=..n..))
	
	p + theme_bw() + labs(title=paste("Significant ",slot(object,level)@tables$mainTable,"\n at alpha ",alpha,sep="")) +theme(axis.text.x=element_text(angle=-90, hjust=0)) + coord_equal(1)
	
}

setMethod("sigMatrix",signature(object="CuffSet"),.sigMatrix)

#dispersionPlot on cuffSet objects actually draws from the varModel table and is the preferred way to visualize the dispersion and model fitting from cuffdiff 2.1 or greater
.dispersionPlot<-function(object){
	dat<-varModel(object)
	p<-ggplot(dat)
	p<-p + geom_point(aes(x=compatible_count_mean,y=compatible_count_var,color=condition),alpha=0.3,size=0.8) + geom_line(aes(x=compatible_count_mean,y=fitted_var),lwd=0.5,color="black") + facet_wrap('condition') +scale_y_log10() + scale_x_log10() + theme_bw() + guides(color=FALSE)
	p
}

setMethod("dispersionPlot",signature(object="CuffSet"),.dispersionPlot)


#Find similar genes
.findSimilar<-function(object,x,n,distThresh,returnGeneSet=TRUE,...){
	#x can be either a gene_id, gene_short_name or a vector of FPKM values (fake gene expression profile)
	#TODO: make findSimilar work with all levels
	#TODO: Possibly add FPKM thresholding
	
	if(is.character(x)){
		myGene<-getGene(object,x)
		sig<-makeprobsvec(fpkmMatrix(myGene,...)[1,])
	}else if(is.vector(x)){
		sig<-makeprobsvec(x)
	}
	allGenes<-fpkmMatrix(object@genes,...)
	allGenes<-t(makeprobs(t(allGenes)))
	compare<-function(q){
		JSdistVec(sig,q)
	}
	myDist<-apply(allGenes,MARGIN=1,compare)
	
	if(!missing(distThresh)){
		myDist<-myDist[myDist<=distThresh]
	}
	myDist<-sort(myDist)
	
	if(!missing(n)){
		myDist<-myDist[1:n]
	}
	
	mySimilarIds<-names(myDist)
	
	if(returnGeneSet){
		mySimilarGenes<-getGenes(object,mySimilarIds,...)
		return(mySimilarGenes)
	}else{
		res<-as.data.frame(myDist)
		colnames(res)<-c("distance")
		return(res)
	}
	
}
setMethod("findSimilar",signature(object="CuffSet"),.findSimilar)

############
#SQL access
############


################
#Misc Utilities
################
.getLevels<-function(object){
	levelsQuery<-'SELECT s.sample_name FROM samples s ORDER BY s.sample_index ASC'
	levels<-dbGetQuery(object@DB,levelsQuery)$sample_name
	levels
}

setMethod("getLevels",signature(object="CuffSet"),.getLevels)

.getRepLevels<-function(object){
	levelsQuery<-'SELECT r.rep_name FROM replicates r LEFT JOIN samples s ON r.sample_name=s.sample_name ORDER BY s.sample_index ASC'
	levels<-dbGetQuery(object@DB,levelsQuery)$rep_name
	levels
}

setMethod("getRepLevels",signature(object="CuffSet"),.getRepLevels)

.checkSamples<-function(dbConn,sampleIdList){
	dbSamples<-dbReadTable(dbConn,"samples")
	if (all(sampleIdList %in% dbSamples$sample_name)){
		return(TRUE)
	}else{
		return(FALSE)
	}
}

#####################
#GenomicRanges
#####################
#.makeGRanges<-function(dbConn,id,idField='transcript_id'){
#	txQuery<-paste("SELECT * FROM features WHERE ",idField,"='",id,"'",sep="")
#	print(txQuery)
#	features<-dbGetQuery(dbConn,txQuery)
#	#res<-GRanges(seqnames=features$seqnames,ranges=IRanges(start=features$start,end=features$end,names=features$exon_number),strand=features$strand)
#	res<-GRanges(features[,-1])
#	res
#}

.makeGRanges<-function(object,id,idField='transcript_id'){
	txQuery<-paste("SELECT * from features WHERE ",idField,"='",id,"'",sep="")
	myFeat<-dbGetQuery(object@DB,txQuery)
	#res<-GRanges(myFeat[,-1])
	res<-GRanges(seqnames=myFeat$seqnames,ranges=IRanges(start=myFeat$start,end=myFeat$end,names=myFeat$exon_number),strand=myFeat$strand)
	res
}

setMethod("makeGRanges",signature(object="CuffSet"),.makeGRanges)

#.makeGRangesList<-function(object,id,idField="gene_id"){
#	#use .makeGRanges for each sub-feature of id to create GRangesList
#}
#
#setMethod("makeGRangesList",signature(object="CuffSet"),.makeGRangesList)

#####################
#Add FeatureData    #
#####################
.addFeatures<-function(object,features,level="genes",...){
	if(!is.data.frame(features)){
		stop("features must be a data.frame")
	}
	colnames(features)[1]<-slot(object,level)@idField
	colnames(features)<-dbQuoteIdentifier(object@DB,colnames(features),unique=T)
	dbWriteTable(object@DB,slot(object,level)@tables$featureTable,features,row.names=F,overwrite=T)
	indexQuery<-paste("CREATE INDEX ",slot(object,level)@idField," ON ", slot(object,level)@tables$featureTable," (",slot(object,level)@idField,")",sep="")
	res<-dbGetQuery(object@DB,indexQuery)
}

setMethod("addFeatures",signature(object="CuffSet"),.addFeatures)

#TODO: Add method to purge existing feature data table to allow 'refresh' of feature level data

##############
#Reporting
##############
#runReport<-function(){
#	if(!file.exists(".output")){
#		dir.create(".output")
#	}
#	file.copy(system.file("reports/runReport.Rnw", package="cummeRbund"),paste(".output/","runReport.Rnw",sep=""),overwrite=T)
#	myWD<-getwd()
#	setwd(".output")
#	Sweave("runReport.Rnw")
#	tools::texi2dvi("runReport.tex",pdf=TRUE)
#	setwd(myWD)
#}

###################
# Coersion methods
###################
#As ExpressionSet
shiauck/cummeRbund_on_RSQLite2.0 documentation built on May 5, 2019, 12:33 p.m.