cummeRbund_on_RSQLite2.0: Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.

#methods-CuffData.R
#
#Author: Loyal A. Goff
#
#
####################

##################
#Initialize
##################
setMethod("initialize","CuffData",
			function(.Object,
					DB,
					tables=list(mainTable = "",dataTable = "",expDiffTable = "",featureTable = "",countTable = "", replicateTable = ""),
					filters=list(),
					type = c("genes","isoforms","TSS","CDS"),
					idField,
					... ){
				.Object<-callNextMethod(.Object,
						DB = DB,
						tables = tables,
						filters = filters,
						type = type,
						idField = idField,
						...)				
		}
)

setValidity("CuffData",function(object){
		TRUE
		}
)			

################
#Class Methods
################
setMethod("show","CuffData",
		function(object){
			size<-dim(object)
			cat(class(object), "instance with:\n\t",size[1],"features and",size[2],"samples\n")
		}
)

setMethod("dim","CuffData",
		function(x){
			countQuery<-paste("SELECT COUNT(",x@idField,") as n FROM ",x@tables$mainTable)
			nIds<-dbGetQuery(x@DB,countQuery)
			sampleQuery<-("SELECT COUNT(sample_name) as n FROM samples")
			nSamples<-dbGetQuery(x@DB,sampleQuery)
			c(nIds$n,nSamples$n)
		}
)

#####################
#Feature Table
#####################

.addFeatures<-function(object,features,...){
	if(!is.data.frame(features)){
		stop("features must be a data.frame")
	}
	colnames(features)[1]<-object@idField
	colnames(features)<-dbQuoteIdentifier(object@DB,colnames(features),unique=T)
	dbWriteTable(object@DB,object@tables$featureTable,features,row.names=F,overwrite=T)
	indexQuery<-paste("CREATE INDEX ",object@idField," ON ", object@tables$featureTable," (",object@idField,")",sep="")
	res<-dbGetQuery(object@DB,indexQuery)
}

setMethod("addFeatures",signature="CuffData",.addFeatures)

###################
#Accessors
###################
.annotation<-function(object){
	featureQuery<-paste("SELECT * FROM ",object@tables$mainTable," x LEFT JOIN features xf USING (",object@idField,")",sep="")
	dbGetQuery(object@DB, featureQuery)
}

setMethod(BiocGenerics::annotation,signature(object="CuffData"),.annotation)

.featureNames<-function(object){
	featureQuery<-paste("SELECT ",object@idField," FROM ",object@tables$mainTable, sep="")
	res<-dbGetQuery(object@DB,featureQuery)
	res[,1]
}

setMethod("featureNames","CuffData",.featureNames)

.samples<-function(object){
	res<-dbReadTable(object@DB,'samples')
	res<-res$sample_name
	res
}

setMethod("samples","CuffData",.samples)

.replicates<-function(object){
	res<-dbReadTable(object@DB,'replicates')
	res<-res$rep_name
	res
}

setMethod("replicates","CuffData",.replicates)

.fpkm<-function(object,features=FALSE,sampleIdList){
	#Sample subsetting
	if(!missing(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	#Sample Search String (SQL)
	sampleString<-'('
	for (i in myLevels){
		sampleString<-paste(sampleString,"'",i,"',",sep="")
	}
	sampleString<-substr(sampleString,1,nchar(sampleString)-1)
	sampleString<-paste(sampleString,")",sep="")
	
	if(!features){
		FPKMQuery<-paste("SELECT * FROM ",object@tables$dataTable," WHERE sample_name IN ",sampleString,sep="")
	}else{
		FPKMQuery<-paste("SELECT xf.*,xm.*,x.sample_name,x.fpkm,x.conf_hi,x.conf_lo FROM ",object@tables$dataTable," x LEFT JOIN features xf ON x.",object@idField,"=xf.",object@idField," LEFT JOIN ",object@tables$mainTable," xm ON x.",object@idField,"=xm.",object@idField," WHERE x.sample_name IN ",sampleString,sep="")
		#print(FPKMQuery)
	}
	res<-dbGetQuery(object@DB,FPKMQuery)
	res$sample_name<-factor(res$sample_name,levels=getLevels(object))
	res$stdev<-(res$conf_hi-res$fpkm)/2
	res
}

setMethod("fpkm","CuffData",.fpkm)

.repFpkm<-function(object,features=FALSE,repIdList){
	#Sample subsetting
	if(!missing(repIdList)){
		if(.checkReps(object@DB,repIdList)){
			myLevels<-repIdList
		}else{
			stop("Replicate does not exist!")
		}
	}else{
		myLevels<-getRepLevels(object)
	}
	#Sample Search String (SQL)
	sampleString<-'('
	for (i in myLevels){
		sampleString<-paste(sampleString,"'",i,"',",sep="")
	}
	sampleString<-substr(sampleString,1,nchar(sampleString)-1)
	sampleString<-paste(sampleString,")",sep="")
	
	if(!features){
		FPKMQuery<-paste("SELECT * FROM ",object@tables$replicateTable," WHERE rep_name IN ",sampleString,sep="")
	}else{
		FPKMQuery<-paste("SELECT xf.*,xm.*,x.rep_name,x.raw_frags,x.internal_scaled_frags,x.external_scaled_frags,x.fpkm,x.effective_length,x.status FROM ",object@tables$replicateTable," x LEFT JOIN features xf on x.",object@idField,"=xf.",object@idField," LEFT JOIN ",object@tables$mainTable," xm ON x.",object@idField,"=xm.",object@idField," WHERE x.rep_name IN ",sampleString,sep="")
	}
	#print(FPKMQuery)
	res<-dbGetQuery(object@DB,FPKMQuery)
	res$rep_name<-factor(res$rep_name,levels=getRepLevels(object))
	#res$stdev<-(res$conf_hi-res$fpkm)/2 #Not really available yet since conf_hi and conf_lo are not 
	res
}

setMethod("repFpkm","CuffData",.repFpkm)

.count<-function(object,sampleIdList){
	#Sample subsetting
	if(!missing(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	#Sample Search String (SQL)
	sampleString<-'('
	for (i in myLevels){
		sampleString<-paste(sampleString,"'",i,"',",sep="")
	}
	sampleString<-substr(sampleString,1,nchar(sampleString)-1)
	sampleString<-paste(sampleString,")",sep="")
	
	CountQuery<-FPKMQuery<-paste("SELECT * FROM ",object@tables$countTable," WHERE sample_name IN ",sampleString,sep="")
	
	res<-dbGetQuery(object@DB,CountQuery)
	res$sample_name<-factor(res$sample_name,levels=getLevels(object))
	res
	
}

setMethod("count","CuffData",.count)

.fpkmMatrix<-function(object,fullnames=FALSE,sampleIdList){
	#TODO: fix fullnames for CuffData::fpkmMatrix
	#Sample subsetting
	if(!missing(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	
	samp<-samples(object)
	FPKMMatQuery<-paste("select x.",object@idField,", x.gene_short_name, ",sep="")
	for (i in samp){
		FPKMMatQuery<-paste(FPKMMatQuery,"sum(case when xd.sample_name ='",i,"' then fpkm end) as ",i,",",sep="")
	}
	FPKMMatQuery<-substr(FPKMMatQuery, 1, nchar(FPKMMatQuery)-1)
	FPKMMatQuery<-paste(FPKMMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$dataTable," xd on x.",object@idField," = xd.",object@idField," group by x.",object@idField,sep="")
	res<-dbGetQuery(object@DB,FPKMMatQuery)
	if(fullnames){
		res<-data.frame(res[,-c(1,2)],row.names=paste(res[,2],res[,1],sep="|"))
	}else{
		res<-data.frame(res[,-c(1,2)],row.names=res[,1])	
	}
	
	if(!missing(sampleIdList)){
		res<-data.frame(res[,sampleIdList],row.names=rownames(res))
		colnames(res)<-sampleIdList
	}
	res
}

setMethod("fpkmMatrix","CuffData",.fpkmMatrix)

.repFpkmMatrix<-function(object,fullnames=FALSE,repIdList){
	#Sample subsetting
	if(!missing(repIdList)){
		if(.checkReps(object@DB,repIdList)){
			myLevels<-repIdList
		}else{
			stop("Replicate does not exist!")
		}
	}else{
		myLevels<-getRepLevels(object)
	}
	
	samp<-replicates(object)
	FPKMMatQuery<-paste("select x.",object@idField,", x.gene_short_name, ",sep="")
	for (i in samp){
		FPKMMatQuery<-paste(FPKMMatQuery,"sum(case when xd.rep_name ='",i,"' then fpkm end) as ",i,",",sep="")
	}
	FPKMMatQuery<-substr(FPKMMatQuery, 1, nchar(FPKMMatQuery)-1)
	FPKMMatQuery<-paste(FPKMMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$replicateTable," xd on x.",object@idField," = xd.",object@idField," group by x.",object@idField,sep="")
	res<-dbGetQuery(object@DB,FPKMMatQuery)

	if(fullnames){
		res<-data.frame(res[,-c(1,2)],row.names=paste(res[,2],res[,1],sep="|"))
	}else{
		res<-data.frame(res[,-c(1,2)],row.names=res[,1])	
	}
	if(!missing(repIdList)){
		res<-data.frame(res[,repIdList],row.names=rownames(res))
		colnames(res)<-repIdList
	}
	res
}

setMethod("repFpkmMatrix","CuffData",.repFpkmMatrix)

.countMatrix<-function(object,fullnames=FALSE,sampleIdList){
	#Sample subsetting
	if(!missing(sampleIdList)){
		if(.checkSamples(object@DB,sampleIdList)){
			myLevels<-sampleIdList
		}else{
			stop("Sample does not exist!")
		}
	}else{
		myLevels<-getLevels(object)
	}
	
	samp<-samples(object)
	CountMatQuery<-paste("select x.",object@idField,", x.gene_short_name, ",sep="")
	for (i in samp){
		CountMatQuery<-paste(CountMatQuery,"sum(case when xd.sample_name ='",i,"' then count end) as ",i,",",sep="")
	}
	CountMatQuery<-substr(CountMatQuery, 1, nchar(CountMatQuery)-1)
	CountMatQuery<-paste(CountMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$countTable," xd on x.",object@idField," = xd.",object@idField," group by x.",object@idField,sep="")
	res<-dbGetQuery(object@DB,CountMatQuery)
	if(fullnames){
		res<-data.frame(res[,-c(1,2)],row.names=paste(res[,2],res[,1],sep="|"))
	}else{
		res<-data.frame(res[,-c(1,2)],row.names=res[,1])	
	}
	if(!missing(sampleIdList)){
		res<-data.frame(res[,sampleIdList],row.names=rownames(res))
		colnames(res)<-sampleIdList
	}
	res
}

setMethod("countMatrix","CuffData",.countMatrix)

.repCountMatrix<-function(object,fullnames=FALSE,repIdList){
	#Sample subsetting
	if(!missing(repIdList)){
		if(.checkReps(object@DB,repIdList)){
			myLevels<-repIdList
		}else{
			stop("Replicate does not exist!")
		}
	}else{
		myLevels<-getRepLevels(object)
	}
	reps<-replicates(object)
	repCountMatQuery<-paste("select x.",object@idField,", x.gene_short_name, ",sep="")
	for (i in reps){
		repCountMatQuery<-paste(repCountMatQuery,"sum(case when xr.rep_name ='",i,"' then external_scaled_frags end) as ",i,",",sep="")
	}
	repCountMatQuery<-substr(repCountMatQuery, 1, nchar(repCountMatQuery)-1)
	repCountMatQuery<-paste(repCountMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$replicateTable," xr on x.",object@idField," = xr.",object@idField," group by x.",object@idField,sep="")
	res<-dbGetQuery(object@DB,repCountMatQuery)
	if(fullnames){
		res<-data.frame(res[,-c(1,2)],row.names=paste(res[,2],res[,1],sep="|"))
	}else{
		res<-data.frame(res[,-c(1,2)],row.names=res[,1])	
	}
	if(!missing(repIdList)){
		res<-data.frame(res[,repIdList],row.names=rownames(res))
		colnames(res)<-repIdList
	}
	res
}

setMethod("repCountMatrix","CuffData",.repCountMatrix)

.statsMatrix<-function(object){
	statsQuery<-paste("SELECT xd.*, xc.count, xc.variance as count_variance , xc.uncertainty as count_uncertainty, xc.dispersion as count_dispersion, (xd.conf_hi-xd.fpkm)/2 as fpkm_stdev,((xd.conf_hi-xd.fpkm)/2)/xd.fpkm AS 'CV' FROM ",object@tables$dataTable," xd LEFT JOIN ",object@tables$countTable," xc ON xd.",object@idField,"=xc.",object@idField," AND xd.sample_name=xc.sample_name",sep="")
	res<-dbGetQuery(object@DB,statsQuery)
	res$sample_name<-factor(res$sample_name,levels=samples(object))
	res
}

#This needs a lot of work...
#TODO: Change this to remove lnFcCutoff but make sure that functions that rely on diffData have their own FC cutoff so that plotting doesn't suffer
.diffData<-function(object,x,y,features=FALSE){
	if(missing(x) && missing(y)){
		if(!features){
			diffQuery<-paste("SELECT * FROM ",object@tables$expDiffTable,sep="")
		}else{
			diffQuery<-paste("SELECT xm.*, xed.*, xf.* FROM ",object@tables$mainTable," xm LEFT JOIN ",object@tables$expDiffTable," xed ON xm.",object@idField,"=xed.",object@idField," LEFT JOIN features xf ON xm.",object@idField,"=xf.",object@idField,sep="")
		}
	}else if (missing(x) || missing(y)){
		stop("You must supply both x and y or neither.")
	}else{
		if(!features){
			diffQuery<-paste("SELECT x.",object@idField,", xed.* FROM ",object@tables$mainTable," x LEFT JOIN ",object@tables$expDiffTable," xed on x.",object@idField," = xed.",object@idField," WHERE ((sample_1 = '",x,"' AND sample_2 = '",y,"') OR (sample_1 = '",y,"' AND sample_2 = '",x,"'))",sep="")
		}else{
			diffQuery<-paste("SELECT xm.*, xed.*, xf.* FROM ",object@tables$mainTable," xm LEFT JOIN ",object@tables$expDiffTable," xed on xm.",object@idField," = xed.",object@idField," LEFT JOIN features xf ON xm.",object@idField,"=xf.",object@idField," WHERE ((sample_1 = '",x,"' AND sample_2 = '",y,"') OR (sample_1 = '",y,"' AND sample_2 = '",x,"'))",sep="")
		}
	}
	dat<-dbGetQuery(object@DB,diffQuery)
	#diffQuery
	dat
}

setMethod("diffData",signature(object="CuffData"),.diffData)

.diffTable<-function(object,logCutoffValue=99999){
	measureVars<-c('status','value_1','value_2','log2_fold_change','test_stat','p_value','q_value','significant')
	all.diff<-diffData(object,features=TRUE)
	all.diff$log2_fold_change[all.diff$log2_fold_change>=logCutoffValue]<-Inf
	all.diff$log2_fold_change[all.diff$log2_fold_change<=-logCutoffValue]<--Inf
	all.diff.melt<-melt(all.diff,measure.vars=measureVars)
	#all.diff.melt<-all.diff.melt[!grepl("^value_",all.diff.melt$variable),]
	all.diff.cast<-dcast(all.diff.melt,formula=...~sample_2+sample_1+variable)
	all.diff.cast
}

setMethod("diffTable",signature(object="CuffData"),.diffTable)

.QCTable<-function(object){
	qcSQL<-paste("SELECT d.*, (d.conf_hi-d.fpkm)/2 as stdev, c.count, c.variance, c.uncertainty, c.dispersion, (c.variance/d.fpkm) AS 'IOD', ((d.conf_hi-d.fpkm)/2)/d.fpkm AS 'CV' FROM ",object@tables$dataTable," d LEFT JOIN ",object@tables$countTable," c ON d.gene_id=c.gene_id AND d.sample_name=c.sample_name",sep="")
	res<-dbGetQuery(object@DB,qcSQL)
	res
}

.getMA<-function(object,x,y,logMode=T,pseudocount=1){
	if (missing(x) || missing(y)){
		stop("You must supply both x and y.")
	}else{
		sql<-paste("SELECT x.",object@idField,", sum(case when x.sample_name = '",x,"' then x.fpkm end) AS 'x', sum(case when x.sample_name = '",y,"' then x.fpkm end) AS 'y' FROM ",object@tables$dataTable," x GROUP BY x.",object@idField,";",sep="")
		#print(sql)
		dat<-dbGetQuery(object@DB,sql)
		
		if(logMode){
			dat$x<-log10(dat$x+pseudocount)
			dat$y<-log10(dat$y+pseudocount)
		}
		dat$A<-(dat$x+dat$y)/2
		dat$M<-dat$x/dat$y
		res<-dat[,c(1,4:5)]
		res
	}
}

.getCountMA<-function(object,x,y,logMode=T,pseudocount=1){
	if (missing(x) || missing(y)){
		stop("You must supply both x and y.")
	}else{
		sql<-paste("SELECT x.",object@idField,", sum(case when x.sample_name = '",x,"' then x.count end) AS 'x', sum(case when x.sample_name = '",y,"' then x.count end) AS 'y' FROM ",object@tables$countTable," x GROUP BY x.",object@idField,";",sep="")
		dat<-dbGetQuery(object@DB,sql)
		
		if(logMode){
			dat$x<-log10(dat$x+pseudocount)
			dat$y<-log10(dat$y+pseudocount)
		}
		dat$A<-(dat$x+dat$y)/2
		dat$M<-dat$x/dat$y
		res<-dat[,c(1,4:5)]
		res
	}
}

#.getRankOrder<-function(object,x,y,logMode=TRUE,pseudocount=1,ratio=TRUE){
#	if (missing(x) || missing(y)){
#		stop("You must supply both x and y.")
#	}else{
#		
#	}
#}

setMethod("DB","CuffData",function(object){
		return(object@DB)
		})

setMethod("tables","CuffData",function(object){
		return(object@tables)
		})

setMethod("filters","CuffData",function(object){
		return(object@filters)
		})

setMethod("type","CuffData",function(object){
		return(object@type)
		})

setMethod("idField","CuffData",function(object){
		return(object@idField)
		})

##################
#Setters
##################


##################
#Subsetting
##################
#Example query
#"SELECT * FROM genes WHERE gene_id in ('XLOC_000005','XLOC_000015','XLOC_000055','XLOC_000595','XLOC_005998','ucscCodingXLOC_018816')"
.getLevels<-function(object){
	levelsQuery<-'SELECT s.sample_name FROM samples s ORDER BY s.sample_index ASC'
	levels<-dbGetQuery(object@DB,levelsQuery)$sample_name
	levels
}

setMethod("getLevels",signature(object="CuffData"),.getLevels)

.getRepLevels<-function(object){
	levelsQuery<-'SELECT r.rep_name FROM replicates r JOIN samples s ON r.sample_name=s.sample_name ORDER BY s.sample_index ASC'
	levels<-dbGetQuery(object@DB,levelsQuery)$rep_name
	levels
}

setMethod("getRepLevels",signature(object="CuffData"),.getRepLevels)

.getRepConditionLevels<-function(object){
	levelsQuery<-'SELECT r.sample_name FROM replicates r JOIN samples s ON r.sample_name=s.sample_name ORDER BY s.sample_index ASC'
	levels<-dbGetQuery(object@DB,levelsQuery)$sample_name
	levels
}

#Useful SQL commands

#SELECT g.gene_id, g.class_code, g.nearest_ref_id, g.gene_short_name, g.locus, g.length, g.coverage, g.status, gd.sample_name, gd.fpkm, gd.conf_hi, gd.conf_lo FROM genes g LEFT JOIN geneData gd ON g.gene_id = gd.gene_id WHERE (g.gene_id = 'XLOC_000001');

#SELECT g.gene_id, ged.* FROM genes g LEFT JOIN geneExpDiffData ged on g.gene_id = ged.gene_id WHERE ((sample_1 = 'H1_hESC' AND sample_2 = 'Fibroblasts') OR (sample_1 = 'Fibroblasts' AND sample_2 = 'H1_hESC')) AND ged.log2_fold_change>-20 AND ged.log2_fold_change<20 ;

#Pivot table SQL for scatterplots
#select g.*, sum(case when gd.sample_name = 'Fibroblasts' then fpkm end) as Fibroblasts, sum(case when gd.sample_name = 'H1_hESC' then fpkm end) as H1_hESC from genes g LEFT JOIN geneData gd on g.gene_id = gd.gene_id group by g.gene_id;


##################
#Plotting
##################

.density<-function(object, logMode = TRUE, pseudocount=0.0, labels, features=FALSE, replicates=FALSE,...){
	if(is(object,'CuffData')) {
		if(replicates){
			dat<-repFpkm(object,features=features)
			colnames(dat)[colnames(dat)=="rep_name"]<-"condition"
		}else{
			dat<-fpkm(object,features=features)
			colnames(dat)[colnames(dat)=="sample_name"]<-"condition"
		}
	} else {
		stop('Un-supported class of object.')
	}
	if(logMode) dat$fpkm<-dat$fpkm+pseudocount
	p<-ggplot(dat)
		if(logMode) {
			p<-p+geom_density(aes(x= log10(fpkm),group=condition,color=condition,fill=condition),alpha=I(1/3))
		}else{
			p<-p+geom_density(aes(x=fpkm,group=condition,color=condition,fill=condition),alpha=I(1/3))
		}
	
	p<-p + labs(title=object@tables$mainTable)
	
	#Default cummeRbund colorscheme
	p<-p + scale_fill_hue(l=50,h.start=200) + scale_color_hue(l=50,h.start=200)
	
	#TODO: Add label callout
	p
}

setMethod("csDensity",signature(object="CuffData"),.density)

.scatter<-function(object,x,y,logMode=TRUE,pseudocount=1.0,labels,smooth=FALSE,colorByStatus=FALSE, drawRug=TRUE, ...){
	dat<-fpkmMatrix(object,fullnames=TRUE)
	samp<-samples(object)
	
	#check to make sure x and y are in samples
	if (!all(c(x,y) %in% samp)){
		stop("One or more values of 'x' or 'y' are not valid sample names!")
	}
	
	#add pseudocount if necessary
	if(logMode){
		for (i in samp){
			dat[[i]]<-dat[[i]]+pseudocount
		}
	}
	
	#Attach tracking_id and gene_short_name
	if(!missing(labels)){
		require(stringr)
		tracking<-str_split_fixed(rownames(dat),"\\|",2)
		dat$gene_short_name<-tracking[,1]
		dat$tracking_id<-tracking[,2]
		
		labeled.dat<-dat[dat$gene_short_name %in% labels,]
	}
	
	#make plot object
	p<-ggplot(dat)
	p<- p + aes_string(x=x,y=y)
	
	#Right now, this does nothing, because 'significant' is not returned from fpkmMatrix object so I don't have this as a feature to draw
	if(colorByStatus){
		p<- p + geom_point(size=1.2,alpha=I(1/3))
	}else{
		p<- p + geom_point(size=1.2,alpha=I(1/3))
	}
	#Add symmetry line
	p<- p + geom_abline(intercept=0,slope=1,linetype=2) 
	
	#Add rug
	if(drawRug){
		p<- p + geom_rug(size=0.8,alpha=0.01)
	}
	
	#add smoother
	if(smooth){
		p <- p + stat_smooth(method="lm",fill="blue",alpha=0.2)
	}
	
	#Add highlights from labels
	if(!missing(labels)){
		p <- p + geom_point(data=labeled.dat,aes_string(x=x,y=y),size=1.3,color="red")
		p <- p + geom_text(data=labeled.dat,aes_string(x=x,y=y,label='gene_short_name'),color="red",hjust=0,vjust=0,angle=0,size=4)
	}
	
	#logMode
	if(logMode){
		p <- p + scale_y_log10() + scale_x_log10()
	}
	
	#Add title & Return value
	p<- p + labs(title=object@tables$mainTable)
	p
}

setMethod("csScatter",signature(object="CuffData"), .scatter)

.scatter2<-function(object,x,y,logMode=TRUE,pseudocount=1.0,labels,smooth=FALSE,alpha=0.05,colorByStatus=FALSE, drawRug=TRUE, ...){
	samp<-samples(object)
	
	#check to make sure x and y are in samples
	if (!all(c(x,y) %in% samp)){
		stop("One or more values of 'x' or 'y' are not valid sample names!")
	}
	
	#Setup query string
	sampleList<-paste("('",x,"','",y,"')",sep="")
	
	scatterQuery<-paste("SELECT m.gene_short_name,d.",object@idField,",sum(CASE WHEN d.sample_name='",x,"' THEN d.fpkm END) as x,sum(CASE WHEN d.sample_name='",y,"' THEN d.fpkm END) as y, edd.status, edd.p_value, edd.q_value FROM ",object@tables$mainTable," m LEFT JOIN ",object@tables$dataTable," d ON m.",object@idField,"=d.",object@idField," LEFT JOIN ",object@tables$expDiffTable," edd ON m.",object@idField," = edd.",object@idField," WHERE (edd.sample_1 in ",sampleList," AND (edd.sample_2 in ",sampleList,")) GROUP BY d.",object@idField,";",sep="")
	#write(scatterQuery,stderr())
	
	#Retrieve data
	dat<-dbGetQuery(object@DB,scatterQuery)
	
	#add pseudocount if necessary
	if(logMode){
		for (i in samp){
			dat$x<-dat$x+pseudocount
			dat$y<-dat$y+pseudocount
		}
	}
	
	#Flag significant genes
	dat$significant<-"no"
	dat$significant[dat$q_value<=alpha]<-"yes"
	dat$significant<-factor(dat$significant,levels=c("no","yes"))
	
	#Attach tracking_id and gene_short_name
	if(!missing(labels)){
		labeled.dat<-dat[dat$gene_short_name %in% labels,]
	}
	
	#make plot object
	p<-ggplot(dat) + theme_bw()
	p<- p + aes_string(x='x',y='y')
	
	#Right now, this does nothing, because 'significant' is not returned from fpkmMatrix object so I don't have this as a feature to draw
	if(colorByStatus){
		p<- p + geom_point(aes(color=significant),size=1.2) + scale_color_manual(values=c("black","red"))
	}else{
		p<- p + geom_point(size=1.2,alpha=I(1/3))
	}
	#Add symmetry line
	p<- p + geom_abline(intercept=0,slope=1,linetype=2) 
	
	#Add rug
	if(drawRug){
		p<- p + geom_rug(size=0.8,alpha=0.01)
	}
	
	#add smoother
	if(smooth){
		p <- p + stat_smooth(method="lm",fill="blue",alpha=0.2)
	}
	
	#Add highlights from labels
	if(!missing(labels)){
		p <- p + geom_point(data=labeled.dat,aes_string(x='x',y='y'),size=1.3,color="red")
		p <- p + geom_text(data=labeled.dat,aes_string(x='x',y='y',label='gene_short_name'),color="red",hjust=0,vjust=0,angle=0,size=4)
	}
	
	#logMode
	if(logMode){
		p <- p + scale_y_log10() + scale_x_log10()
	}
	
	#Add title & Return value
	p<- p + labs(title=object@tables$mainTable,x=x,y=y)
	p
}

.scatterMat<-function(object,replicates=FALSE,logMode=TRUE,pseudocount=1.0,hexbin=FALSE,useCounts=FALSE,...){
	if(replicates){
		if(useCounts){
			dat<-repCountMatrix(object)
		}else{
			dat<-repFpkmMatrix(object)
		}
	}else{
		if(useCounts){
			dat<-countMatrix(object)
		}else{
			dat<-fpkmMatrix(object)
		}
	}

	if(logMode)
	{
		dat<-dat+pseudocount
	}
	
	if(useCounts){
		myLab = "Normalized Counts"
	}else{
		myLab = "FPKM"
	}
	if(logMode){
		myLab = paste("log10 ",myLab,sep="")
		p <- .plotmatrix(log10(dat),hexbin=hexbin,...)
	}else{
		p <- .plotmatrix(dat,hexbin=hexbin,...)
	}
	
	p<- p + geom_abline(intercept=0,slope=1,linetype=2)
	
	p <- p + theme_bw() + ylab(myLab) + xlab(myLab)
	
	#p<- p + aes(alpha=0.01)
	
	p
	
}

setMethod("csScatterMatrix",signature(object="CuffData"),.scatterMat)

.volcano<-function(object,x,y,alpha=0.05,showSignificant=TRUE,features=FALSE,xlimits=c(-20,20),...){
	samp<-samples(object)
	
	#check to make sure x and y are in samples
	if (!all(c(x,y) %in% samp)){
		stop("One or more values of 'x' or 'y' are not valid sample names!")
	}
	
	dat<-diffData(object=object,features=features)
	
	#subset dat for samples of interest
	mySamples<-c(x,y)
	dat<-dat[(dat$sample_1 %in% mySamples & dat$sample_2 %in% mySamples),]
	dat$significant <- 'no'
	dat$significant[dat$q_value<=alpha]<-'yes'
	s1<-unique(dat$sample_1)
	s2<-unique(dat$sample_2)
	
	p<-ggplot(dat)
	if(showSignificant==FALSE){
		p<- p + geom_point(aes(x=log2_fold_change,y=-log10(p_value)),size=1.2)
	}else{
		p<- p + geom_point(aes(x=log2_fold_change,y=-log10(p_value),color=significant),size=1.2)
	}
	#Add title and return
	p<- p + theme_bw()
	p<- p + labs(title=paste(object@tables$mainTable,": ",s2,"/",s1,sep=""))
	p<- p + scale_colour_manual(values = c("black","red"))
	
	#Set axis limits
	p<- p + scale_x_continuous(limits=xlimits)
	
	p <- p + xlab(bquote(paste(log[2],"(fold change)",sep=""))) + 
	    ylab(bquote(paste(-log[10],"(p value)",sep="")))
	p
	
}

setMethod("csVolcano",signature(object="CuffData"), .volcano)

.volcanoMatrix<-function(object,alpha=0.05,xlimits=c(-20,20),mapping=aes(),...){
	dat<-diffData(object)
	part1<-dat[,c('sample_1','sample_2','value_1','value_2','test_stat','p_value','q_value')]
	part2<-data.frame(sample_1=part1$sample_2,sample_2=part1$sample_1,value_1=part1$value_2,value_2=part1$value_1,test_stat=-part1$test_stat,p_value=part1$p_value,q_value=part1$q_value)
	dat<-rbind(part1,part2)
	
	myLevels<-union(dat$sample_1,dat$sample_2)
	dat$sample_1<-factor(dat$sample_1,levels=myLevels)
	dat$sample_2<-factor(dat$sample_2,levels=myLevels)
	dat$log2_fold_change<-log2(dat$value_2/dat$value_1)
	
	#Set significance value
	dat$significant<-"no"
	dat$significant[dat$q_value<=alpha]<-"yes"
	
	#May need to expand filler for time-series data (when there aren't always all pairwise comparisons on which to facet
	filler<-data.frame(sample_1=factor(myLevels,levels=myLevels),sample_2=factor(myLevels,levels=myLevels),label="")
	filler$label<-as.character(filler$label)
	mapping <- defaults(mapping, aes_string(x = "log2_fold_change", y = "-log10(p_value)", color="significant"))
	class(mapping) <- "uneval"
	
	p <-ggplot(dat) + geom_point(mapping,na.rm=TRUE,size=0.8) + scale_colour_manual(values = c("black","red")) + facet_grid(sample_1~sample_2)
	
	p<- p + geom_vline(aes(xintercept=0),linetype=2)

	
	p <- p + theme_bw() + xlab(bquote(paste(log[2],"(fold change)",sep=""))) + ylab(bquote(paste(-log[10],"(p value)",sep="")))

	p
	
}

setMethod("csVolcanoMatrix",signature(object="CuffData"),.volcanoMatrix)

.distheat<-function(object, replicates=F, samples.not.genes=T, logMode=T, pseudocount=1.0, heatscale=c(low='lightyellow',mid='orange',high='darkred'), heatMidpoint=NULL, sigDigits=3, ...) {
	# get expression from a sample or gene perspective
	if(replicates){
		obj.fpkm<-repFpkmMatrix(object,fullnames=T)
	}else{
		obj.fpkm<-fpkmMatrix(object,fullnames=T)
	}
	
	if(samples.not.genes) {
		obj.fpkm.pos = obj.fpkm[rowSums(obj.fpkm)>0,]
	} else {
		obj.fpkm = t(obj.fpkm)
		obj.fpkm.pos = obj.fpkm[,colSums(obj.fpkm)>0]
	}
	
	if(logMode) {
		obj.fpkm.pos = log10(obj.fpkm.pos+pseudocount)
	}
	
	# compute distances
	obj.dists = JSdist(makeprobs(obj.fpkm.pos))
	
	# cluster to order
	obj.hc = hclust(obj.dists)
	
	# make data frame
	dist.df = melt(as.matrix(obj.dists),varnames=c("X1","X2"))
	dist.df$value<-as.numeric(format(dist.df$value,digits=sigDigits))
	# initialize
	g = ggplot(dist.df, aes(x=X1, y=X2, fill=value))
	
	# draw
	labels = labels(obj.dists)
	g = g + geom_tile(color="black") + scale_x_discrete("", limits=labels[obj.hc$order]) + scale_y_discrete("", limits=labels[obj.hc$order])
	
	# roll labels
	g = g + theme(axis.text.x=element_text(angle=-90, hjust=0), axis.text.y=element_text(angle=0, hjust=1))
	
	# drop grey panel background and gridlines
	g = g + theme(panel.grid.minor=element_line(colour=NA), panel.grid.major=element_line(colour=NA), panel.background=element_rect(fill=NA, colour=NA))
	
	# adjust heat scale
	if (length(heatscale) == 2) {
		g = g + scale_fill_gradient(low=heatscale[1], high=heatscale[3], name="JS Distance")
	}
	else if (length(heatscale) == 3) {
		if (is.null(heatMidpoint)) {
			heatMidpoint = max(obj.dists) / 2.0
		}
		g = g + scale_fill_gradient2(low=heatscale[1], mid=heatscale[2], high=heatscale[3], midpoint=heatMidpoint, name="JS Distance")
	}
	if(samples.not.genes){
		g <- g + geom_text(aes(label=value))
	}
	# return
	g
}

setMethod("csDistHeat", signature("CuffData"), .distheat)


.boxplot<-function(object,logMode=TRUE,pseudocount=0.0001,replicates=FALSE,...){
	if(replicates){
		dat<-repFpkm(object)
		colnames(dat)[colnames(dat)=="rep_name"]<-"condition"
	}else{
		dat<-fpkm(object)
		colnames(dat)[colnames(dat)=="sample_name"]<-"condition"
	}
	if(logMode) {
		dat$fpkm<-dat$fpkm+pseudocount
		p<-ggplot(dat)
		p<-p+geom_boxplot(aes(x=condition,y=log10(fpkm),fill=condition),size=0.3,alpha=I(1/3))
	} else {
		p <- ggplot(dat)
		p<-p+geom_boxplot(aes(x=condition,y=fpkm,fill=condition),alpha=I(1/3),size=0.3)
	}
	p<- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
	
	#Default cummeRbund colorscheme
	p<-p + scale_fill_hue(l=50,h.start=200)
	
	p
}

setMethod("csBoxplot",signature(object="CuffData"),.boxplot)

.dendro<-function(object,logMode=T,pseudocount=1,replicates=FALSE,...){
	if(replicates){
		fpkmMat<-repFpkmMatrix(object)
	}else{
		fpkmMat<-fpkmMatrix(object)
	}
	if(logMode){
		fpkmMat<-log10(fpkmMat+pseudocount)
	}
	res<-JSdist(makeprobs(fpkmMat))
	#colnames(res)<-colnames(fpkmMat)
	
	#res<-as.dist(res)
	res<-as.dendrogram(hclust(res))
	plot(res,main=paste("All",deparse(substitute(object)),sep=" "),...)
	res
}

setMethod("csDendro",signature(object="CuffData"),.dendro)

.MAplot<-function(object,x,y,logMode=T,pseudocount=1,smooth=FALSE,useCount=FALSE){
	if(useCount){
		dat<-.getCountMA(object,x,y,logMode=logMode,pseudocount=pseudocount)
	}else{
		dat<-.getMA(object,x,y,logMode=logMode,pseudocount=pseudocount)
	}
	p<-ggplot(dat)
	p<-p+geom_point(aes(x=A,y=log2(M)),size=0.8)
	
	#add smoother
	if(smooth){
		p <- p + stat_smooth(aes(x=A,y=log2(M)),fill="blue")
	}
	#add baseline
	p <- p + geom_hline(yintercept=0)
	
	p
}

setMethod("MAplot",signature(object="CuffData"),.MAplot)

.dispersionPlot<-function(object){
	dat<-count(object)
	p<-ggplot(dat)
	p<-p+geom_point(aes(x=count,y=dispersion,color=sample_name)) + facet_wrap('sample_name') + scale_x_log10() + scale_y_log10()
	p
}

setMethod("dispersionPlot",signature(object="CuffData"),.dispersionPlot)

.MDSplot<-function(object,replicates=FALSE,logMode=TRUE,pseudocount=1.0){
	if(replicates){
		dat<-repFpkmMatrix(object)
		#repData<-sapply(replicates(object),function(x){strsplit(x,"_")[[1]][1]}) This is to color by condition and not replicate...
	}else{
		dat<-fpkmMatrix(object)
	}
	
	if(logMode){
		dat<-log10(dat+pseudocount)
	}

	d<-JSdist(makeprobs(dat))
	fit <- cmdscale(d,eig=TRUE, k=2)
	res<-data.frame(names=rownames(fit$points),M1=fit$points[,1],M2=fit$points[,2])
	p <- ggplot(res)
	p <- p + geom_point(aes(x=M1,y=M2,color=names)) + geom_text(aes(x=M1,y=M2,label=names,color=names)) + theme_bw()
	p
}

setMethod("MDSplot",signature(object="CuffData"),.MDSplot)

#Not sure if I want to include this or not..
.PCAplot<-function(object,x="PC1", y="PC2",replicates=FALSE,pseudocount=1.0,scale=TRUE,showPoints=TRUE,...){
	if(replicates){
		fpkms<-repFpkmMatrix(object)
	}else{
		fpkms<-fpkmMatrix(object)
	}
	fpkms<-log10(fpkms+pseudocount)
	PC<-prcomp(fpkms,scale=scale,...)
	dat <- data.frame(obsnames=row.names(PC$x), PC$x)
	#dat$shoutout<-""
	#dat$shoutout[matchpt(PC$rotation,PC$x)$index]<-rownames(pca$x[matchpt(pca$rotation,pca$x)$index,])
	plot <- ggplot(dat, aes_string(x=x, y=y)) 
	if(showPoints){
		plot<- plot + geom_point(alpha=.4, size=0.8, aes(label=obsnames))
	}
	
	plot <- plot + geom_hline(aes(yintercept=0), size=.2) + geom_vline(aes(xintercept=0), size=.2) #+ geom_text(aes(label=shoutout),size=2,color="red")

	datapc <- data.frame(varnames=rownames(PC$rotation), PC$rotation)
	mult <- min(
			(max(dat[,y]) - min(dat[,y])/(max(datapc[,y])-min(datapc[,y]))),
			(max(dat[,x]) - min(dat[,x])/(max(datapc[,x])-min(datapc[,x])))
	)
	datapc <- transform(datapc,
			v1 = .7 * mult * (get(x)),
			v2 = .7 * mult * (get(y))
	)
	plot <- plot + 
			#coord_equal() + 
			geom_text(data=datapc, aes(x=v1, y=v2, label=varnames,color=varnames), vjust=1)
	plot <- plot + geom_segment(data=datapc, aes(x=0, y=0, xend=v1, yend=v2,color=varnames), arrow=arrow(length=unit(0.2,"cm")), alpha=0.75) + theme_bw()
	plot
}

setMethod('PCAplot',signature(object="CuffData"),.PCAplot)

#This is most definitely a work in progress
.confidencePlot<-function(object,percentCutoff=20){
	res<-.statsMatrix(object)
	#res$CV[is.na(res$CV)]<-0.0
	res$CV<-as.numeric(res$CV)
	p<-ggplot(res)
	p<-p + 	geom_point(aes(x=log10(fpkm),y=log10(CV)),size=1.5,alpha=0.3)
	p<-p + 	geom_smooth(aes(x=log10(fpkm),y=log10(CV),color=sample_name)) +
	#p<-p + 	geom_point(aes(x=log10(fpkm),y=log10(count),color=-log10(CV))) +
	#p<-p + 	geom_point(aes(x=log10(fpkm),y=log2(fpkm/count),color=-log10(CV))) + geom_hline(aes(0),linetype=2) +
			#facet_wrap('sample_name') + 
			geom_abline(intercept=0,slope=1,linetype=2,size=0.3) + 
			scale_color_gradient(name="%CV",low="darkblue",high="white",limits=c(0,percentCutoff), na.value = "white") +
			labs(title=object@type)
	p
}

.CVdensity<-function(object){
	dat<-.statsMatrix(object)
	p<-ggplot(dat)
	p <- p + geom_density(aes(x=CV,fill=sample_name),alpha=0.3,position="dodge") + scale_x_log10()
	p
}

.fpkmSCVPlot<-function(object,FPKMLowerBound=1,showPool=FALSE){
	if(!showPool){
		showPool=any(table(.getRepConditionLevels(object))<2) # Counts number of replicates per condition and requires a minimum of 2.  If any n<2, values are determined from pooling across samples.
		if(showPool){
			warning("At least one of your conditions does not have enough replicates to estimate variance. Estimating variance across all conditions instead.")
		}
	}
	dat<-repFpkm(object)
	colnames(dat)[1]<-"tracking_id"
	dat<-dat[,c('tracking_id','sample_name','fpkm')]
	dat<-dat[dat$fpkm>0,]
	
	if(showPool){
		subsetCols<-c('tracking_id')
	}else{
		subsetCols<-c('tracking_id','sample_name')
	}
	
	#Option 3 (tapply on log10(replicateFPKM) values)
	dat.means<-tapply(dat$fpkm,dat[,subsetCols],function(x){mean(x,na.rm=T)})
	dat.sd<-tapply(dat$fpkm,dat[,subsetCols],function(x){sd(x,na.rm=T)})
	#write("Calculating replicate fpkm mean...",stderr())
	dat.means<-melt(dat.means)
	#write("Calculating replicate fpkm stdev...",stderr())
	dat.sd<-melt(dat.sd)
	colnames(dat.means)[colnames(dat.means)=="value"]<-'fpkm'
	colnames(dat.sd)[colnames(dat.sd)=="value"]<-'stdev'
	dat<-cbind(dat.means,dat.sd$stdev)
	colnames(dat)[colnames(dat)=="dat.sd$stdev"]<-'stdev'
	dat<-dat[!is.na(dat$stdev) & !is.na(dat$fpkm),]
	dat<-dat[dat$fpkm>0 & dat$stdev>0,]
	if(!showPool){
		dat$sample_name<-factor(dat$sample_name,levels=samples(object))
	}
	
	p <-ggplot(dat,aes(x=fpkm,y=(stdev/fpkm)^2),na.rm=T)
	#p <-ggplot(dat,aes(x=log10(fpkm+1),y=log10(stdev)),na.rm=T)
	if(showPool){
		p <- p + stat_smooth(na.rm=T,method='auto',fullrange=T)
	}else{
		p <- p + #geom_point(aes(color=sample_name),size=1,na.rm=T) +
				stat_smooth(aes(color=sample_name,fill=sample_name),na.rm=T,method='auto',fullrange=T)
	}
	p <- p + scale_x_log10() +
		 scale_y_continuous(name=bquote(CV^2)) +
		 xlab(bquote(paste(log[10],"FPKM",sep=" "))) +
		 theme_bw() + xlim(c(log10(FPKMLowerBound),max(log10(dat$fpkm)))) + labs(title=object@type)
	p
	
}

setMethod("fpkmSCVPlot",signature(object="CuffData"),.fpkmSCVPlot)

.sensitivityPlot<-function(object){
	return
}

#TODO:Log2FC vs Test-statistic

#TODO:log2FPKM vs log2(stdev) (color by sample)

#TODO: Index of dispersion alpha (FPKM vs ?)
#SELECT gd.*, (gd.conf_hi-gd.fpkm)/2 as fpkm_stdev, gc.count, gc.variance as count_variance , gc.uncertainty, gc.dispersion, ((gd.conf_hi-gd.fpkm)/2)/gd.fpkm AS 'CV' FROM geneData gd LEFT JOIN geneCount gc ON gd.gene_id=gc.gene_id AND gd.sample_name=gc.sample_name;



#############
# Other Methods
#############
.specificity<-function(object,logMode=T,pseudocount=1,relative=FALSE,...){
	fpkms<-fpkmMatrix(object,...)
	if(logMode){
		fpkms<-log10(fpkms+pseudocount)
	}
	fpkms<-t(makeprobs(t(fpkms)))
	d<-diag(ncol(fpkms))
	res<-apply(d,MARGIN=1,function(q){
				JSdistFromP(fpkms,q)
			})
	colnames(res)<-paste(colnames(fpkms),"_spec",sep="")
	
	if(relative){
		res<-res/max(res)
	}
	1-res
}

setMethod("csSpecificity",signature(object="CuffData"),.specificity)

######################
# Exploratory Analysis
######################

.nmf<-function(object,k,logMode=T,pseudocount=1,maxiter=1000,replicates=FALSE,fullnames=FALSE){
	require(NMFN)
	if(missing(k)) stop("Please provide a rank value for factorization (arg=k)")
	
	if(replicates){
		m=repFpkmMatrix(object,fullnames=fullnames)
	}else{
		m=fpkmMatrix(object,fullnames=fullnames)
	}
	
	if(logMode) 
	{
		m = log10(m+pseudocount)
	}
	
	myNMF<-nnmf(m,k=k,maxiter=maxiter)
	return (myNMF)
}

setMethod("csNMF",signature(object="CuffData"),.nmf)

#############
# GSEA helper methods
#############

.makeRnk<-function(object,x,y,filename,...){
	#Creates a log2-fold change .rnk file for all genes given an x/y comparison.
	#While this method will work for any cuffData level, it really only makes sense for 'genes' as this is what is required for GSEA...
	#Must provide 'x' and 'y' so as to provide a log2 fold change.
	samp<-samples(object)
	#check to make sure x and y are in samples
	if (!all(c(x,y) %in% samp)){
		stop("One or more values of 'x' or 'y' are not valid sample names!")
	}
	query<-paste("SELECT gd.gene_id,g.gene_short_name,(sum(CASE WHEN gd.sample_name='",x,"' THEN gd.fpkm+1 END))/(sum(CASE WHEN gd.sample_name='",y,"' THEN gd.fpkm+1 END)) as 'ratio' FROM genes g LEFT JOIN geneData gd ON g.gene_id=gd.gene_id GROUP BY g.gene_id ORDER BY ratio DESC",sep="")
	res<-dbGetQuery(object@DB,query)
	res$ratio<-log2(res$ratio)
	#Remove gene_id field
	res<-res[,-1]
	#Remove rows with "NA" for gene_short_name
	res<-res[!is.na(res$gene_short_name),]
	#Write to file
	write.table(res,file=filename,sep="\t",quote=F,...,row.names=F,col.names=F)
	#res
}

.makeRnkTest<-function(object,x,y,filename,...){
	samp<-samples(object)
	#check to make sure x and y are in samples
	if (!all(c(x,y) %in% samp)){
		stop("One or more values of 'x' or 'y' are not valid sample names!")
	}
	query<-paste("SELECT gd.gene_id,g.gene_short_name,gd.test_stat FROM genes g LEFT JOIN geneExpDiffData gd ON g.gene_id=gd.gene_id WHERE ((gd.sample_1='",x,"' AND gd.sample_2='",y,"') OR (gd.sample_2='",x,"' AND gd.sample_1='",y,"')) ORDER BY gd.test_stat DESC",sep="")
	#print(query)
	res<-dbGetQuery(object@DB,query)
	#Remove gene_id field
	res<-res[,-1]
	#Remove rows with "NA" for gene_short_name
	res<-res[!is.na(res$gene_short_name),]
	#Write to file
	write.table(res,file=filename,sep="\t",quote=F,...,row.names=F,col.names=F)	
}

.makeRnkTestStat<-function(object,x,y,filename,...){
	samp<-samples(object)
	#check to make sure x and y are in samples
	if (!all(c(x,y) %in% samp)){
		stop("One or more values of 'x' or 'y' are not valid sample names!")
	}
	query<-paste("SELECT gd.gene_id,g.gene_short_name,gd.sample_1, gd.sample_2,gd.test_stat FROM genes g LEFT JOIN geneExpDiffData gd ON g.gene_id=gd.gene_id WHERE ((gd.sample_1 ='",x,"' AND gd.sample_2='",y,"') OR (gd.sample_2='",x,"' AND gd.sample_1='",y,"')) GROUP BY gd.gene_id",sep="")
	res<-dbGetQuery(object@DB,query)
	if(unique(res$sample_2)==x){
		res2<-res
		res2$test_stat<--(res$test_stat)
		res2$sample_1<-res$sample_2
		res2$sample_2<-res$sample_1
		res<-res2
	}
	#Order by test_stat
	res<-res[order(res$test_stat,decreasing=F),]
	#Remove gene_id field
	res<-res[,c('gene_short_name','test_stat')]
	#Remove rows with "NA" for gene_short_name
	res<-res[!is.na(res$gene_short_name),]
	#Write to file
	write.table(res,file=filename,sep="\t",quote=F,...,row.names=F,col.names=F)
	#head(res)
}

setMethod("makeRnk",signature(object="CuffData"),.makeRnkTestStat)


#############
#Utility functions
#############
.checkSamples<-function(dbConn,sampleIdList){
	dbSamples<-dbReadTable(dbConn,"samples")
	if (all(sampleIdList %in% dbSamples$sample_name)){
		return(TRUE)
	}else{
		return(FALSE)
	}
}

.checkReps<-function(dbConn,repIdList){
	dbReps<-dbReadTable(dbConn,"replicates")
	if (all(repIdList %in% dbReps$rep_name)){
		return(TRUE)
	}else{
		return(FALSE)
	}
}

###################
# Coersion methods
###################
#As ExpressionSet

###################
#Utility functions
###################

#.volcanoMatrix <- function(data){
#	densities <- do.call('rbind',lapply(1:))
#	p <-ggplot(data) + facet_grid(sample1~sample2,scales="free") + geom_point(aes(x=log2_fold_change,y=-log10(p_value))) + stat_density(aes(x = log2_fold_change, 
#					y = ..scaled.. * diff(range(log2_fold_change)) + min(log2_fold_change)), data = densities, 
#			position = "identity", colour = "grey20", geom = "line")
#	
#}
shiauck/cummeRbund_on_RSQLite2.0 documentation built on May 5, 2019, 12:33 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
shiauck/cummeRbund_on_RSQLite2.0
Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.

R/methods-CuffData.R
In shiauck/cummeRbund_on_RSQLite2.0: Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.

R Package Documentation

Browse R Packages

We want your feedback!

shiauck/cummeRbund_on_RSQLite2.0 Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.

R/methods-CuffData.R In shiauck/cummeRbund_on_RSQLite2.0: Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.

R Package Documentation

Browse R Packages

We want your feedback!

shiauck/cummeRbund_on_RSQLite2.0
Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.

R/methods-CuffData.R
In shiauck/cummeRbund_on_RSQLite2.0: Analysis, exploration, manipulation, and visualization of Cufflinks high-throughput sequencing data.