######################################################################
## quantity build ##
######################################################################
input_quantity <- function(object){
peaks <- as.data.frame(object@peaks)
lst <- by(peaks,peaks[,"sample"],function(peaks){
tapply(peaks[,"maxf"],peaks[,"comp"],sum)
})
quantity <- unlist(lst)
object@comps <- cbind(object@comps,quantity=quantity)
object
}
######################################################################
## TIC ##
######################################################################
cplotTIC <- function(object,xintercept=NULL,xscale=NULL,color=NULL){
if(is.null(color)){
color <- c("red","blue","green")
}
if(!("quantity" %in% names(object@comps))){
object <- input_quantity(object)
}
comps <- as.data.frame(object@comps)
if(is.null(xscale)){
xscale=range(comps[,'rt'])
}
p <- ggplot(comps,aes(x=rt,y=quantity,xend=rt,yend=0,group=sample))
p <- p+geom_segment()+facet_grid(sample~.)
if(is.null(xintercept)){
print(p)
}else{
for(i in 1:length(xintercept)){
p <- p+geom_vline(xintercept=xintercept[[i]],colour=color[i],alpha=0.5)
cat(color[i]," : ",xintercept[[i]],"\n")
}
print(p)
}
}
######################################################################
## cplotResult ##
######################################################################
## FIX ME: HOW TO FIND STH IN COMMON MORE PRECISELY
cplotResult <- function(object,id.diff=NULL,id.common=NULL,group.name=F){
final <- as(object,"ExpressionSet")
mxt <- exprs(final)
cl <- ncol(mxt)
if(group.name==F){
if(!is.null(id.diff)){
mx <- mxt[rownames(mxt)%in%id.diff,]
mx <- cbind(mx,diff=apply(mx,1,function(x){
mean(x[1:cl/2])-mean(x[(cl/2+1):cl])
})
)
mx1 <- mx[mx[,'diff']>0,]
mx2 <- mx[mx[,'diff']<0,]
mx1 <- mx1[,-(ncol(mx))]
df1 <- data.frame(level=as.numeric(mx1))
df1$sample <- rep(colnames(mx1),each=nrow(mx1))
df1$id <- rep(rownames(mx1),ncol(mx1))
df1$group <- 'A.High'
mx2 <- mx2[,-(ncol(mx))]
df2 <- data.frame(level=as.numeric(mx2))
df2$sample <- rep(colnames(mx2),each=nrow(mx2))
df2$id <- rep(rownames(mx2),ncol(mx2))
df2$group <- 'B.High'
df <- rbind(df1,df2)
if(!is.null(id.common)){
mx <- mxt[rownames(mxt)%in%id.common,]
df3 <- data.frame(level=as.numeric(mx))
df3$sample <- rep(colnames(mx),each=nrow(mx))
df3$id <- rep(rownames(mx),ncol(mx))
df3$group <- rep('Same level',nrow(mx))
df <- rbind(df,df3)
}
p <- qplot(sample,level,data=df,colour=group,geom="line",group=id)
p
}
}
if(group.name==T){
id <- c(id.diff,id.common)
id <- id[!is.null(id)]
mx <- mxt[rownames(mxt)%in%id,]
if(length(id)==1) mx <- t(mx)
df <- data.frame(level=as.numeric(mx))
df$sample <- rep(colnames(mx),each=nrow(mx))
df$group <- rep(id,ncol(mx))
if(length(id)>1) p <- qplot(sample,level,data=df,colour=factor(group),geom="line",group=group)
if(length(id)==1) p <- qplot(sample,level,data=df,geom="line",group=group)
}
p
}
######################################################################
## view aninmation based on parameter ##
######################################################################
finalObj <- function(object,tic.cutoff,npeaks.cutoff,dist.cutoff,rt_window){
xset_comps_filt <- findComps(object,"sigma_filt",tic.cutoff=tic.cutoff,npeaks.cutoff=npeaks.cutoff)
xset_groups <- groupComps(xset_comps_filt, "angle",dist.cutoff=dist.cutoff,rt_window=rt_window)
xset_sum <- summarize(xset_groups, "common")
xset_norm <- normalize(xset_sum, "scale")
xset_norm
}
## getPvalue <- function(object){
## require(limma)
## final <- as(object,"ExpressionSet")
## design <- final@phenoData@data
## df <- exprs(final)
## design <- model.matrix(~class,data=design)
## fit <- lmFit(df,design)
## fit <- eBayes(fit)
## df <- cbind(df,pvalue=fit$p.value[,2])
## df <- as.data.frame(df)
## df
## }
## dir <- "~/Desktop/data/"
## cplotPvalue(dir,xlim=c(1,20))
## cplotPvalue <- function(dir,xlim=NA,output=NA){
## require(animation)
## if(is.na(output)){
## ani.options(nmax = length(list.files(dir))+1)
## }else{
## ani.options(nmax = length(list.files(dir))+1,outdir=output)
## }
## ani.start()
## for(nm in list.files(dir)){
## file <- file.path(dir,nm)
## df <- read.csv(file,header=T)
## df <- df[order(df$pvalue),]
## df2 <- apply(df[,1:8],1,sum)
## summ <- sort(df2,decreasing=T)
## if(is.na(xlim)) xlim=c(1,nrow(df))
## xrange=c(min(xlim),min(nrow(df),max(xlim)))
## par(mfrow=c(2,1),mar=c(4,2,1,1))
## plot(df$pvalue,type='l',col="blue",xlim=xlim,ylab="p-value",
## ylim=range(df$pvalue[min(xrange):max(xrange)]))
## abline(v=seq(min(xlim),max(xlim),by=round(diff(xlim)/10)),
## lty="dotted",col="lightgray")
## mtext(nm,side=3,line=-6,cex=3,col="gray")
## plot(summ,type="l",col="red",xlim=xlim,ylab="Sum for each row",
## ylim=range(summ[min(xrange):max(xrange)]))
## abline(v=seq(min(xlim),max(xlim),by=round(diff(xlim)/10)),lty="dotted",col="lightgray")
## mtext(nm,side=3,line=-6,cex=3,col="gray")
## }
## ani.stop()
## }
cplotHist <- function(dir){
t <- numeric()
for(nm in list.files(dir)){
file <- file.path(dir,nm)
df <- read.csv(file,header=T)
top <- nrow(subset(df,df$pvalue<0.0001))
t <- c(t,top)
}
d <- data.frame(t=t)
qplot(data=d,x=t,xlab="Number of metabolites[pvlaue<0.00001]",ylab="count",geom="histogram")
# geom_text(x=max(t),y=max(table(t),label=names(table(t))[table(t)==as.numeric(max(table(t)))]))
}
## plot sigma_d vs sigma_t
cplotDt <- function(object){
group <- as.data.frame(object@groups)
p <- ggplot(data=group,aes(x=sigma_t,y=sigma_d))+geom_point()
p
}
getFeatures <- function(object){
final <- as(object,"ExpressionSet")
df <- getPvalue(object)
design <- object@phenoData
f <- factor(design[,'class'])
p <- df$pvalue
mx <- exprs(final)
diff <- apply(mx,1,function(mx){
sp <- split(mx,f)
diff <- mean(sp[[1]])-mean(sp[[2]])
}
)
feat <- final@featureData@data
feat$diff <- diff
feat$pvalue <- p
feat
}
## find top different metabolite
## make it return group id
topDiff <- function(object,scale=NULL,pvalue=NULL){
ob <- getFeatures(object)
if(is.null(scale)) scale <- 1:nrow(ob)
ob <- ob[order(ob$pvalue,decreasing=F),][scale,]
if(!is.null(pvalue))
ob <- ob[ob$pvalue<pvalue,]
ob
}
## object from this could be used for dump2msp
## find top common metabolite
topCommon <- function(object,scale=NULL,method="diff"){
final <- as(object,"ExpressionSet")
design <- object@phenoData
ob <- getFeatures(object)
if(is.null(scale)) scale=1:nrow(ob)
ob$absdiff <- abs(ob$diff)
mx <- exprs(final)
ob$sum <- apply(mx,1,sum)
if(method=="diff") idx <- order(ob$absdiff,decreasing=F)
if(method=="sum") idx <- order(ob$sum,decreasing=F)
ob <- ob[idx,][scale,]
ob
}
cplotTotalP <- function(dir,method="diff",xscale=NA){
final <- data.frame()
if(method=="diff"){
for(nm in list.files(dir)){
file <- file.path(dir,nm)
df <- read.csv(file,header=T)
p <- df$pvalue
this <- data.frame(pvalue= sort(p),
id=seq_len(nrow(df)),
group = rep(nm,nrow(df)))
final <- rbind(final,this)
}
p <- qplot(x=id,y=pvalue,data=final,geom="line",colour=group,group=group)+
opts(legend.position="none")
if(!is.na(xscale)) {
sub <- subset(final,id<max(xscale))
yscale <- range(sub$pvalue)
p <- p+scale_x_continuous(limits=xscale)+scale_y_continuous(limits=yscale)
}}
if(method=="sum"){
for(nm in list.files(dir)){
file <- file.path(dir,nm)
df <- read.csv(file,header=T)
df <- df[,1:8]
df$sum <- apply(df,1,sum)
this <- data.frame(sum =sort(df$sum,decreasing=T),
id=seq_len(nrow(df)),
group=rep(nm,nrow(df)))
final <- rbind(final,this)
}
p <- qplot(x=id,y=sum,data=final,geom="line",colour=group,group=group)+
opts(legend.position="none")
if(!is.na(xscale)) {
sub <- subset(final,id<max(xscale))
yscale <- range(sub$sum)
p <- p+scale_x_continuous(limits=xscale)+scale_y_continuous(limits=yscale)
}}
print(p)
}
######################################################################
## retention time ##
######################################################################
# function rtmatch
rtmatch=function(old,new,mode,noise,range.min=-0.1,range.max=0.1,plot.match=T){
cat("Retention time unit: minute\n\n")
old=sort(old)
n=0
x=numeric()
y=numeric()
for(i in 1:length(old)){
z=0
cat("old rt(",old[i],") matches: ")
for(k in 1:length(new)){
if(new[k]<=(old[i]+range.max)&new[k]>=(old[i]+range.min)){
x=old[i]
y=c(y,new[k])
cat(new[k]," ")
z=1
}
}
if(z==1)n=n+1
cat("\n")
}
cat("\n")
percent=n/length(old)*100
cat("Total match:",percent,"%(",n,"in",length(old),") of top",length(old),"\n")
d.old=old
d.new=new
if(plot.match){
par(mfrow=c(1,1),mar=c(4,2,1,1))
ddd <- c(d.old,d.new)
d.min <- min(ddd)
d.max <- max(ddd)
plot(c(d.min,d.max),c(1,2),xlim=c(d.min,d.max),xlab="RT",ylim=c(0.5,2.5),ylab="",
type="n",axes=F,main=paste("match range: [",range.min,",",range.max,"]","Total match:",percent,"%(",n,"in",length(old),") of top",length(old)))
rect(0,0,d.max+5,3,col="grey90")
abline(v=seq(0,d.max+5,0.25),col="white")
abline(v=seq(0,d.max+5,1),col="white",lwd=2)
abline(h=1:2,col="white")
points(d.old,pch=16,jitter(rep(1,length(d.old)),4))
points(d.new,pch=16,jitter(rep(2,length(d.new)),4))
axis(side=1,at=seq(0,d.max+5,5),labels=seq(0,d.max+5,5))
axis(side=2,at=1:2,labels=c("old","new"))
box(col="grey80")
if(plot.match==T){
abline(v=mode,col="red")
abline(v=noise,col="blue")
}}
return(percent)
}
## Directly read raw dir, and compare tic, used to compare RT
cplotTIC3 <- function(dir,xscale=NULL,facets=FALSE){
lst <- list.files(dir,full.names=T)
prof.lst <- list()
rt.lst <- list()
for(i in lst){
x <- loadSample(i)
x <- genProfile(x)
prof.lst[[i]] <- x@env$profile
rt.lst[[i]] <- x@scantime
}
lst <- lapply(prof.lst,function(x){
apply(x,2,sum)
})
df <- cbind(rt=unlist(rt.lst),int=unlist(lst))
df <- cbind(df,sample=rep(1:length(lst),each=length(rt.lst[[1]])))
df <- as.data.frame(df)
p <- qplot(rt,int,data=df,group=factor(sample),geom='line',colour=factor(sample))+ylab('intensity')
if(facets) p <- p+facet_grid(sample~.)
if(is.null(xscale)) {print(p)} else {
p <- p+scale_x_continuous(limits=xscale)
print(p)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.