Nothing
getPValueMean <- function(expression_values,param_p,null_p){
G=ncol(param_p)
if(G==1){
pv=sapply(1:length(expression_values), function(i) min(pnorm(expression_values[i],param_p[2,1],param_p[3,1]),(1-pnorm(expression_values[i],param_p[2,1],param_p[3,1]))))
null_mean=param_p[2,1]
}
if(G==2){
if(sum(null_p)==2){
pv=sapply(1:length(expression_values), function(i) min((param_p[1,1]*pnorm(expression_values[i],param_p[2,1],param_p[3,1]) + param_p[1,2] * pnorm(expression_values[i],param_p[2,2],param_p[3,2])), (1-(param_p[1,1]*pnorm(expression_values[i],param_p[2,1],param_p[3,1])+ param_p[1,2] * pnorm(expression_values[i],param_p[2,2],param_p[3,2])))))
null_mean =(param_p[1,1]* param_p[2,1]) + (param_p[1,2] *param_p[2,2])
}
else{
## Use the normal distribution which have the largest proportion and well separated of the other one
n=which(null_p==1)
pv=sapply(1:length(expression_values), function(i) min(pnorm(expression_values[i],param_p[2,n],param_p[3,n]), (1-pnorm(expression_values[i],param_p[2,n],param_p[3,n]))))
null_mean=param_p[2,n]
}
}
if(G==3){
if(sum(null_p)==3){
## Combination of the three normal distributions
pv=sapply(1:length(expression_values), function(i) min((param_p[1,1]*pnorm(expression_values[i],param_p[2,1],param_p[3,1]) + param_p[1,2] * pnorm(expression_values[i],param_p[2,2],param_p[3,2])+ param_p[1,3]*pnorm(expression_values[i],param_p[2,3],param_p[3,3])),(1- (param_p[1,1]*pnorm(expression_values[i],param_p[2,1],param_p[3,1]) + param_p[1,2] * pnorm(expression_values[i],param_p[2,2],param_p[3,2])+ param_p[1,3]*pnorm(expression_values[i],param_p[2,3],param_p[3,3])))))
null_mean=(param_p[1,1]* param_p[2,1]) + (param_p[1,2] *param_p[2,2]) + (param_p[1,3] *param_p[2,3])
}
else{
if(sum(null_p)==2){
## Combination of the two normal which have the largest proportion
n=which(null_p==1)
W1=param_p[1,n[1]]/(param_p[1,n[1]]+param_p[1,n[2]])
W2=param_p[1,n[2]]/(param_p[1,n[1]]+param_p[1,n[2]])
pv=sapply(1:length(expression_values), function(i) min((W1*pnorm(expression_values[i],param_p[2,n[1]],param_p[3,n[1]]) + W2* pnorm(expression_values[i],param_p[2,n[2]],param_p[3,n[2]])), (1-(W1*pnorm(expression_values[i],param_p[2,n[1]],param_p[3,n[1]])+ W2*pnorm(expression_values[i],param_p[2,n[2]],param_p[3,n[2]])))))
null_mean=(W1* param_p[2,n[1]]) + (W2 *param_p[2,n[2]])
}
else{
## only one distribution from the 3 possible is kept
n=which(null_p==1)
pv=sapply(1:length(expression_values), function(i) min(pnorm(expression_values[i],param_p[2,n],param_p[3,n]),(1-pnorm(expression_values[i],param_p[2,n],param_p[3,n]))))
null_mean=param_p[2,n]
}
}
}
if(G>2){
## Combination of the normal distribution
n=which(null_p==1)
W_sum=sum(param_p[1,n])
W=sapply(1:length(n), function(i) param_p[1,n[i]]/W_sum)
pv1=sapply(1:length(expression_values), function(i) sum(W*pnorm(expression_values[i],param_p[2,n],param_p[3,n])))
pv2=sapply(1:length(expression_values), function(i) 1-sum(W*pnorm(expression_values[i],param_p[2,n],param_p[3,n])))
pv=sapply(1:length(pv1),function(i) min(pv1[i],pv2[i]))
null_mean=sum(W* param_p[2,n])
}
mean_pv=c(null_mean,pv)
return(mean_pv)
}
getLoglikelihoodFromBIC <- function(v_BIC,nb_condition){
v_nb_param=c(2,5,8,11,15)
v_nb_param=v_nb_param[1:length(v_BIC)]
v_lk=(v_BIC+(v_nb_param*log(nb_condition)))/2
return(v_lk)
}
getLambdaBIC <- function(lambda,v_loglik,nb_condition){
v_nb_param=c(2,5,8,11,15)
v_nb_param=v_nb_param[1:length(v_loglik)]
v_bic=(2*v_loglik)-(lambda*v_nb_param*log(nb_condition))
G=which(v_bic==max(v_bic,na.rm=TRUE))
return(G,v_bic)
}
getScaleMAD <- function(var_param,values,G,d){
mad_scale=var_param*(sqrt(mad(values))/G^(2/d))
return(mad_scale)
}
getMinLoglikelihoodNull <- function(expression_values,p_param,min_loglike,id_f1,id_f2){
l=abs(log(dnorm(expression_values,mean=p_param[2,id_f1],sd=p_param[3,id_f1]))-log(dnorm(expression_values,mean=p_param[2,id_f2],sd=p_param[3,id_f2])))
min_l=min(l)
min_l[is.na(min_l)]=0
in_null=0
if(min_l<min_loglike){
in_null=1
}
lk_r=c(in_null,min_l)
return(lk_r)
}
getRsdNull<- function(p_param,min_rsd,id_prop_max){
if(ncol(p_param)==1){
rsd_separated=0
}
else{
rsd_separated=matrix(nrow=0,ncol=3)
for(i in 1:ncol(p_param)){
if(id_prop_max!=i){
rsd_separated=rbind(rsd_separated,c(id_prop_max,i,(p_param[3,id_prop_max]/p_param[3,i])))
}
}
in_null_boolean=rsd_separated[,3]>min_rsd
in_null=rep(0,nrow(rsd_separated))
in_null[in_null_boolean] <- 1
rsd_separated=cbind(rsd_separated,in_null)
colnames(rsd_separated)=c("position_normal_max_proportion","position_normal2","ratio_sd","in_null")
}
return(rsd_separated)
}
getNullLoglikelihoodRsdMd<- function(expression_values,p_param,diff_median_p,min_loglike,min_rsd,percent_selective_threshold,nb_s_Step1){
##combine all normal distribution
null=rep(1,ncol(p_param))
min_lk_values=NULL
lk_names=vector()
percent_Step1=(nb_s_Step1)/length(expression_values)
percent_selective=percent_selective_threshold-percent_Step1
small_percent=p_param[1,]<percent_selective
max_p_id=which(p_param[1,]==max(p_param[1,]))
if(sum(small_percent)>0){
p_sort=order(p_param[2,],decreasing=FALSE)
if(which(p_sort==max_p_id)<length(p_sort)){
id_outliers=p_sort[which(p_sort==max_p_id)+1]
lk_result=getMinLoglikelihoodNull(expression_values,p_param,min_loglike,max_p_id,id_outliers)
in_null=lk_result[1]
min_lk_values=c(min_lk_values,round(lk_result[2],3))
lk_names=c(lk_names,paste("Normal",max_p_id,"vs Normal",id_outliers))
null[id_outliers]=in_null
id_out=id_outliers
while(which(p_sort==id_outliers)<length(p_sort)){
id_outliers=p_sort[which(p_sort==id_outliers)+1]
lk_result=getMinLoglikelihoodNull(expression_values,p_param,min_loglike,max_p_id,id_outliers)
in_null=lk_result[1]
min_lk_values=c(min_lk_values,round(lk_result[2],3))
lk_names=c(lk_names,paste("Normal",max_p_id,"vs Normal",id_outliers))
null[id_outliers]=in_null
}
}
if(which(p_sort==max_p_id)!=1){
id_outliers=p_sort[which(p_sort==max_p_id)-1]
lk_result=getMinLoglikelihoodNull(expression_values,p_param,min_loglike,max_p_id,id_outliers)
in_null=lk_result[1]
min_lk_values=c(min_lk_values,round(lk_result[2],3))
lk_names=c(lk_names,paste("Normal",max_p_id,"vs Normal",id_outliers))
null[id_outliers]=in_null
id_out=id_outliers
while(which(p_sort==id_outliers)>1){
id_outliers=p_sort[which(p_sort==id_outliers)-1]
lk_result=getMinLoglikelihoodNull(expression_values,p_param,min_loglike,max_p_id,id_outliers)
in_null=lk_result[1]
lk_names=c(lk_names,paste("Normal",max_p_id,"vs Normal",id_outliers))
min_lk_values=c(min_lk_values,round(lk_result[2],3))
null[id_outliers]=in_null
}
}
}
## Do not put null=0 to normal with a proportion larger that the percent_selective_threshold
null[!small_percent]=1
## //////////////////////////////////////////////////////////////////////////////////
## Remove normal if ratio(sd)<min_rsd and small percentage
rsd_values=NULL
if(length(null)>1 && sum(small_percent>=1)){
rsd_separated=getRsdNull(p_param,min_rsd,max_p_id)
rsd_values=round(as.vector(rsd_separated[,3]),3)
names(rsd_values)=paste("sd(Normal",rsd_separated[,1],")/sd(Normal",rsd_separated[,2],")",sep="")
rsd_small_id=intersect(which(small_percent==1),rsd_separated[which(rsd_separated[,4]!=1),2])
if(length(rsd_small_id)>1){
rsd_percent=sum(p_param[1,rsd_small_id])
while(rsd_percent>percent_selective){
s=which(p_param[2,rsd_small_id]==min(p_param[2,rsd_small_id]))
rsd_small_id=rsd_small_id[-s]
rsd_percent=sum(p_param[1,rsd_small_id])
}
}
null[rsd_small_id]=0
}
## //////////////////////////////////////////////////////////////////////////////
## Remove normal not separated if diff_median<0.75
id_m=which(diff_median_p[,3]<=0.75)
for(i in id_m){
if(max_p_id %in% diff_median_p[id_m,]){
null[diff_median_p[id_m,1]]=1
null[diff_median_p[id_m,2]]=1
}
}
## //////////////////////////////////////////////////////////////////////////////////
if(!is.null(min_lk_values)){
names(min_lk_values)=lk_names
}
l_null_min_lk_values_rsd=list(null,min_lk_values,rsd_values)
names(l_null_min_lk_values_rsd)=c("null","min_lk_values","sd_ratio")
return(l_null_min_lk_values_rsd)
}
getDifferenceMedian <- function(expression_values,fit2_p){
diff_median=matrix(nrow=0,ncol=3)
if(ncol(fit2_p$NorMixParam)>1){
for(i in 1:(ncol(fit2_p$NorMixParam)-1)){
for(j in (i+1):ncol(fit2_p$NorMixParam)){
diff_median=rbind(diff_median,c(i,j,abs(median(expression_values[which(fit2_p$classification==i)])-median(expression_values[which(fit2_p$classification==j)]))))
}
}
}
else{
diff_median=matrix(c(1,1,100),1,3)
}
return(diff_median)
}
detectSpecificFromPV <- function(M_pv,M_signe,pv_threshold){
M_s=matrix(0,nrow(M_pv),ncol(M_pv))
M_s[M_pv<pv_threshold] <- 1 ##0,1,-1!!!!
M_s_sum_row=apply(abs(M_s),1,sum)
M_s_sum_column=apply(abs(M_s),2,sum)
M_s=M_s*M_signe
## To sort by expression values
L_pv_s=lapply(1:nrow(M_pv), function(i) cbind(sort(M_pv[i,M_pv[i,]<pv_threshold])))
names(L_pv_s)=rownames(M_pv)
L_s_id_full=lapply(1:nrow(M_s), function(i) as.vector(which(!M_s[i,]==0)))
names(L_s_id_full)=rownames(M_pv)
s_id_length=sapply(1:length(L_s_id_full), function(i) length(L_s_id_full[[i]]))
L_s_id=L_s_id_full[which(s_id_length!=0)]
## Find the condition selective name if the number of condition selective is only one
length_pv_s=sapply(1:length(L_pv_s), function(i) length(L_pv_s[[i]]))
l1=which(length_pv_s==1)
l1_names=sapply(1:length(l1), function(i) colnames(M_pv)[M_pv[l1[i],]<pv_threshold])
if(length(l1)>0){
for(j in 1:length(l1)){
row.names(L_pv_s[[l1[j]]])=l1_names[j]
}
}
## --
selective=rep("Not specific",nrow(M_pv))
s=sapply(1:length(length_pv_s), function(p) length_pv_s[[p]]>0)
selective[s] <- "Specific"
row.names(M_s)=row.names(M_pv)
names(M_s_sum_row)=row.names(M_pv)
names(M_s_sum_column)=colnames(M_pv)
colnames(M_s)=colnames(M_pv)
L_s=list(M_s,M_s_sum_row,M_s_sum_column,L_pv_s,selective,L_s_id)
names(L_s)=c("M_s","M_s_sum_row","M_s_sum_column","L_pv_s","selective","L_s_id")
return(L_s)
}
getListSelectiveResult <- function(M_s,M_s_sum_row){
## M_selective contains only the probe sets detected as specific (+ or -)
selective_probeset=rownames(M_s[M_s_sum_row>0,])
if(length(selective_probeset)==0){
print("No probe set is selective")
return(NULL)
}
else{
M_selective=M_s[rownames(M_s) %in% selective_probeset,]
rownames(M_selective)=rownames(M_s)[rownames(M_s) %in% selective_probeset]
colnames(M_selective)=colnames(M_s)
M_selective_sum_row=apply(abs(M_selective),1,sum)
## M_selective_sum_column=apply(abs(M_selective),2,sum)
M_selective_positive=(M_selective==1)
M_selective_positive[M_selective_positive] <- 1
M_selective_positive_sum=apply(M_selective_positive,2,sum)
M_selective_negative=(M_selective==-1)
M_selective_negative[M_selective_negative] <- 1
M_selective_negative_sum=apply(M_selective_negative,2,sum)
M_selective_sum_column=rbind(M_selective_positive_sum,M_selective_negative_sum,(M_selective_positive_sum+M_selective_negative_sum))
rownames(M_selective_sum_column)=c("M_specific_positive_sum","M_specific_negative_sum","Both")
L_selective=list(M_selective,M_selective_sum_row,M_selective_sum_column)
names(L_selective)=c("M_selective","M_selective_sum_row","M_selective_sum_column")
return(L_selective)
}
print("fin getListSelectiveResult")
}
mclust_step2 <- function(probeset_name,expression_values,G_initial,fit_p){
G_initial=G_initial
G=fit_p$G
classification=fit_p$classification
if(fit_p$G==1){
NorMixParam=matrix(c(1,fit_p$parameters$mean,sqrt(fit_p$parameters$variance$sigmasq)),nrow=3,ncol=fit_p$G,byrow=TRUE)
colnames(NorMixParam)=c("Normal 1")
}
else{
NorMixParam=matrix(c(fit_p$parameters$pro,fit_p$parameters$mean,sqrt(fit_p$parameters$variance$sigmasq)),nrow=3,ncol=fit_p$G,byrow=TRUE)
}
colnames_NorMix=c("Normal 1","Normal 2","Normal 3","Normal 4","Normal 5")
colnames(NorMixParam)=colnames_NorMix[1:G]
rownames(NorMixParam)=c("proportion","mean","sd")
L_p=list(G_initial,G,NorMixParam,classification)
names(L_p)=c("G_initial","G","NorMixParam","classification")
return(L_p)
}
callMclustInStep2<- function(probeset_name,colnames,expression_values,G_initial,fit_p,beta_value=0,specific_outlier_step1){
G_initial=G_initial
classification=rep(10,length(expression_values))
names(classification)=colnames
specific_outlier_step1=specific_outlier_step1
if(is.null(specific_outlier_step1)){
G=fit_p$G
classification=fit_p$classification
if(fit_p$G==1){
NorMixParam=matrix(c(1,fit_p$parameters$mean,sqrt(fit_p$parameters$variance$sigmasq)),nrow=3,ncol=fit_p$G,byrow=TRUE)
colnames(NorMixParam)=c("Normal 1")
}
else{
NorMixParam=matrix(c(fit_p$parameters$pro,fit_p$parameters$mean,sqrt(fit_p$parameters$variance$sigmasq)),nrow=3,ncol=fit_p$G,byrow=TRUE)
}
}
else{
if(beta_value==0){
mclust2=Mclust(expression_values[-specific_outlier_step1], G = 1:3,modelNames = "V" )
}
else{
print("beta!=0 strep2!!!!!!")
mclust2=Mclust(expression_values[-specific_outlier_step1],G = 1:3, prior=priorControl(shrinkage=0,scale=getScaleMAD(beta_value,expression_values[-specific_outlier_step1],G=1:2,1)),modelNames = "V" )
}
if(mclust2$G==1){
NorMixParam=matrix(c(1,mclust2$parameters$mean,sqrt(mclust2$parameters$variance$sigmasq)),nrow=3,ncol=mclust2$G,byrow=TRUE)
colnames(NorMixParam)=c("Normal 1")
classification[-specific_outlier_step1]=1
}
else{
NorMixParam=matrix(c(mclust2$parameters$pro,mclust2$parameters$mean,sqrt(mclust2$parameters$variance$sigmasq)),nrow=3,ncol=mclust2$G,byrow=TRUE)
}
classification[names(mclust2$classification)]=mclust2$classification
G=mclust2$G
}
colnames_NorMix=c("Normal 1","Normal 2","Normal 3","Normal 4","Normal 5")
colnames(NorMixParam)=colnames_NorMix[1:G]
rownames(NorMixParam)=c("proportion","mean","sd")
L_p=list(G_initial,G,NorMixParam,classification,specific_outlier_step1)
names(L_p)=c("G_initial","G","NorMixParam","classification","specific_outlier_step1")
return(L_p)
}
get_normal_separated <- function(param){
L_result=list(param)
names(L_result)=c("param")
return(L_result)
}
changeColorClassification <- function(M_col){
M_col_class=M_col
M_col_class[M_col==1] <- 4
M_col_class[M_col==2] <- 3
M_col_class[M_col==3] <- "orange"
M_col_class[M_col==4] <- "purple"
M_col_class[M_col==5] <- "cyan"
M_col_class[M_col==10] <- "black"
return(M_col_class)
}
##Creation of the outdir directory
createOutdir <- function(outdir = getwd()){
if(file.exists(outdir)){
if(!file.info(outdir)$isdir)
stop(sprintf("'%s' must be a directory.", outdir))
outdirContents = dir(outdir, all.files = TRUE)
if(length(outdirContents)>0){
print(paste(outdir,"is not empty."))
}
setwd(outdir)
}
else {
dir.create(outdir, recursive=TRUE)
message(sprintf("The directory '%s' has been created.", outdir))
## will work in the new created directory
setwd(outdir)
}
}
getSelectiveResultTable <- function(M_selective,M_selective_sum_row,names_probeset=NULL){
## M_selective contains only the probeset with at least one condition specific detection
tissue_up=sapply(1:nrow(M_selective), function(i) paste(names(which(M_selective[i,]==1)),collapse=','))
tissue_up[tissue_up==""] <- "-"
names(tissue_up)=rownames(M_selective)
tissue_up_nb=sapply(1:nrow(M_selective), function(i) length(names(which(M_selective[i,]==1))))
tissue_down=sapply(1:nrow(M_selective), function(i) paste(names(which(M_selective[i,]==-1)),collapse=','))
tissue_down[tissue_down==""] <- "-"
names(tissue_down)=rownames(M_selective)
tissue_down_nb=sapply(1:nrow(M_selective), function(i) length(names(which(M_selective[i,]==-1))))
M_result_probeset=cbind(rownames(M_selective),rep("S",nrow(M_selective)),M_selective_sum_row,tissue_up_nb,tissue_down_nb,tissue_up,tissue_down)
colnames(M_result_probeset)=c("gene","type","Nb_condition_specific","Nb_condition_up","Nb_condition_down","Condition_up","Condition_down")
M_not_selective=c("N","0","0","0","-","-")
if(!is.null(names_probeset)){
M_result_selection_probeset=t(sapply(1:length(names_probeset), function(i) if(names_probeset[i]%in% rownames(M_selective)){ M_result_probeset[which(rownames(M_selective)==names_probeset[i]),]} else{c(names_probeset[i],M_not_selective)}))
colnames(M_result_selection_probeset)=c("gene","type","Nb_condition_specific","Nb_condition_up","Nb_condition_down","Condition_up","Condition_down")
return(M_result_selection_probeset)
}
else{
return(M_result_probeset)
}
}
getProfile <- function(M.specific){
M.specific.profile=matrix(0,nrow(M.specific),2)
rownames(M.specific.profile)=rownames(M.specific)
colnames(M.specific.profile)=c("profile","sum")
profile=sapply(1:nrow(M.specific), function(i) as.character(paste(M.specific[i,],collapse=",")))
sum.row=apply(abs(M.specific),1,sum)
M.specific.profile=cbind(profile,sum.row)
table.profile=table(M.specific.profile[,1])
M.specific.profile.table=cbind(names(table.profile),as.vector(table.profile))
colnames(M.specific.profile.table)=c("profile","nb.gene")
M.specific.profile.unique=matrix(as.numeric(unlist(strsplit(M.specific.profile.table[,1],','))),nrow=nrow(M.specific.profile.table),ncol=ncol(M.specific),byrow=TRUE)
colnames(M.specific.profile.unique)=colnames(M.specific)
L.specific=list(M.specific.profile,M.specific.profile.unique,M.specific.profile.table)
names(L.specific)=c("M.specific.profile","M.specific.profile.unique","M.specific.profile.table")
## the rows in M.specific.profile.unique and M.specific.profile.table should correspond
return(L.specific)
}
getGeneHtmlPage <- function(expressionMatrix,specificResult,name.index.html="index.html",prefix.file=NULL, outdir="Single_result_pages",gene.html=NULL,gene.html.ids=c(1:10)){
## specificResult contains:
## names(specificResult)=c("prfix.file","fit","param.detection","L.specific.result","L.null","L.mlk","L.rsd","identic.row.ids")
olddir=getwd()
on.exit(setwd(olddir))
if(is.null(prefix.file)){
if(is(specificResult,"sp_list")){
prefix.file=specificResult$prefix.file
}
else{
stop("Error you need to enter a prefix.file value or to use a specificResult attribute of classe sp_list containing a prefix.file value (object inside the SpeCond() result value or the result of the getSpecificResult() function)")
}
}
name.index.html=paste(prefix.file,"_",name.index.html,sep="")
message(sprintf("The index html page '%s' will be created in the current directory: '%s'",name.index.html,olddir))
outdir=paste(prefix.file,outdir,sep="_")
if(nrow(expressionMatrix)<10 && !is.null(gene.html.ids)){
gene.html.ids=c(1:nrow(expressionMatrix))
}
## Deal with the row that have not been considered as they contain only identical values
if(!is.null(specificResult$identic.row.ids)){
p_identic_row_names=row.names(expressionMatrix)[specificResult$identic.row.ids]
if(is.null(gene.html) && is.null(gene.html.ids)){
gene.html=row.names(expressionMatrix)
}
if(!is.null(gene.html.ids)){
gene.html=row.names(expressionMatrix)[gene.html.ids]
}
probeset_html_identic=gene.html[gene.html %in% p_identic_row_names]
gene.html=gene.html[!gene.html %in% p_identic_row_names]
expressionMatrix=expressionMatrix[-(specificResult$identic.row.ids),]
gene.html.ids=which(rownames(expressionMatrix) %in% gene.html)
}
M_class_col=t(sapply(1:length(specificResult$fit), function(i) changeColorClassification(specificResult$fit[[i]]$classification)))
if(is.null(gene.html.ids) && is.null(gene.html)){
## ----------------------------------------
## Generate html page results for all genes
n_page=sapply(1:nrow(expressionMatrix), function(i) paste(i,"- ID ",row.names(expressionMatrix)[i],sep=""))
n_link=sapply(1:nrow(expressionMatrix), function(i) paste(prefix.file,"_",row.names(expressionMatrix)[i],".html",sep=""))
p=openPage(name.index.html)
h_p=hwrite(n_page,link=paste(outdir,"/",n_link,sep=""),dim=c(length(n_page),1),border=0)
h_s=hwrite(specificResult$L.specific.result$specific,dim=c(length(specificResult$L.specific.result$specific),1),border=0)
hwrite(c(h_p,h_s),p,dim=c(1,2),border=0)
rversion = sessionInfo()$R.version$version.string
hwrite(paste("Page created by the ", hwrite("SpeCond",link="http://www.bioconductor.org/packages/release/bioc/")," package using hwriter under ",rversion,collapse=""), p, style='font-size:8pt')
closePage(p)
index.html.link=paste(getwd(),"/",name.index.html,sep="")
createOutdir(outdir)
print(paste("The gene html page(s) will be created in the ",outdir," directory",sep=""))
l=sapply(1:nrow(expressionMatrix), function(i) createSingleGeneHtmlPage(index.html.link,prefix.file,i,row.names(expressionMatrix)[i],expressionMatrix[i,],specificResult$param.detection,specificResult$fit[[i]],specificResult$L.specific.result$specific[i],specificResult$fit[[i]]$NorMixParam,M_class_col[i,],specificResult$L.specific.result$M.specific.all[i,],specificResult$L.specific.result$L.pv[[i]],specificResult$L.null[[i]],specificResult$L.mlk[[i]],specificResult$L.rsd[[i]],n_link=n_link))
}
else{
if(!is.null(gene.html)){
gene.html.ids=which(rownames(expressionMatrix)%in% gene.html)
}
n_page=sapply(1:length(gene.html.ids), function(i) paste(i,"- ID ",row.names(expressionMatrix)[gene.html.ids[i]],sep=""))
n_link=sapply(1:length(gene.html.ids), function(i) paste(prefix.file,"_",row.names(expressionMatrix)[gene.html.ids[i]],".html",sep=""))
p=openPage(name.index.html)
h_p=hwrite(n_page,link=paste(outdir,"/",n_link,sep=""),dim=c(length(n_page),1),border=0)
h_s=hwrite(specificResult$L.specific.result$specific[gene.html.ids],dim=c(length(gene.html.ids),1),border=0)
hwrite(c(h_p,h_s),p,dim=c(1,2),border=0)
rversion = sessionInfo()$R.version$version.string
hwrite(paste("Page created by the ", hwrite("SpeCond",link="http://www.bioconductor.org/packages/release/bioc/")," package using hwriter under ",rversion,collapse=""), p, style='font-size:8pt')
closePage(p)
index.html.link=paste(getwd(),"/",name.index.html,sep="")
createOutdir(outdir)
print(paste("The gene html page(s) will be created in the ",outdir," directory",sep=""))
l=sapply(1:length(gene.html.ids), function(i) createSingleGeneHtmlPage(index.html.link,prefix.file,i,row.names(expressionMatrix)[gene.html.ids[i]],expressionMatrix[gene.html.ids[i],],specificResult$param.detection,specificResult$fit[[gene.html.ids[i]]],specificResult$L.specific.result$specific[gene.html.ids[i]],specificResult$fit[[gene.html.ids[i]]]$NorMixParam,M_class_col[gene.html.ids[i],],specificResult$L.specific.result$M.specific.all[gene.html.ids[i],],specificResult$L.specific.result$L.pv[[gene.html.ids[i]]],specificResult$L.null[[gene.html.ids[i]]],specificResult$L.mlk[[gene.html.ids[i]]],specificResult$L.rsd[[gene.html.ids[i]]],n_link=n_link))
## To do:
## html page for genes with identical values
}
currentdir=getwd()
geneLink=cbind(rownames(expressionMatrix)[gene.html.ids],n_link)
genePageLink=list(currentdir, geneLink)
names(genePageLink)=c("genePageDirectory","geneLink")
setwd("../.")
return(genePageLink)
}
getIdenticRow <- function(expressionMatrix){
## Detect rows which have identical values for each condition
M_identic=t(sapply(1:nrow(expressionMatrix), function(i) expressionMatrix[i,]==expressionMatrix[i,1]))
M_identic_sum=apply(M_identic,1,sum)
identic_ids=which(M_identic_sum==ncol(expressionMatrix))
if(length(identic_ids)>0){
return(identic_ids)
}
else{
return(NULL)
}
}
getDefaultParameter <- function(){
param.detection=matrix(0,nrow=2,ncol=7)
rownames(param.detection)=c("Step 1","Step 2")
colnames(param.detection)=c("beta","lambda","per","md","mlk","rsd","pv")
param.detection[1,]=c(6,1,0.1,0.75,5,0.1,0.05)
param.detection[2,]=c(0,1,0.3,0.75,25,0.1,0.05)
return(param.detection)
}
createParameterMatrix <- function(param.detection=NULL,beta.1=NULL,beta.2=NULL,lambda.1=NULL,lambda.2=NULL,per.1=NULL,per.2=NULL,md.1=NULL,md.2=NULL,mlk.1=NULL,mlk.2=NULL,rsd.1=NULL,rsd.2=NULL,pv.1=NULL,pv.2=NULL){
if(is.null(param.detection)){
Mp=getDefaultParameter()
}
else{
Mp=param.detection
}
if(!is.null(beta.1)){
Mp[1,"beta"]=beta.1
}
if(!is.null(beta.2)){
if(!beta.2==0){print("Attention: beta 2 should be equal to 0")}
Mp[2,"beta"]=beta.2
}
if(!is.null(lambda.1)){
Mp[1,"lambda"]=lambda.1
}
if(!is.null(lambda.2)){
Mp[2,"lambda"]=lambda.2
}
if(!is.null(per.1)){
Mp[1,"per"]=per.1
}
if(!is.null(per.2)){
Mp[2,"per"]=per.2
}
if(!is.null(md.1)){
Mp[1,"md"]=md.1
}
if(!is.null(md.2)){
Mp[2,"md"]=md.2
}
if(!is.null(mlk.1)){
Mp[1,"mlk"]=mlk.1
}
if(!is.null(mlk.2)){
Mp[2,"mlk"]=mlk.2
}
if(!is.null(rsd.1)){
Mp[1,"rsd"]=rsd.1
}
if(!is.null(rsd.2)){
Mp[2,"rsd"]=rsd.2
}
if(!is.null(pv.1)){
Mp[1,"pv"]=pv.1
}
if(!is.null(pv.2)){
Mp[2,"pv"]=pv.2
}
if(!Mp[1,"md"]==Mp[2,"md"]){print("Attention: the two md values are different")}
if(Mp[1,"per"]>=Mp[2,"per"]){print("Attention: per in step1 should not be larger than per in step2 must be values are different")
print("per.1 per.2")
print(c(Mp[1,"per"],Mp[2,"per"]))
}
if(!Mp[1,"pv"]==Mp[2,"pv"]){print("Attention: the two p-value thresholds are different")}
return(Mp)
}
fitPrior <- function(expressionMatrix,param.detection=NULL,lambda=1,beta=6, evaluation.lambda.beta=FALSE){
print("Step1, fitting")
## Remove the identic rows
identic_row_ids=getIdenticRow(expressionMatrix)
if(!is.null(identic_row_ids)){
expressionMatrix=expressionMatrix[-(identic_row_ids),]
print(dim(expressionMatrix))
}
if(!is.null(param.detection)){
lambda=param.detection[1,"lambda"]
beta=param.detection[1,"beta"]
}
if(beta<0){
print("Error, beta cannot be negative!!!!!!")
}
else{
fit_beta_0=lapply(1:nrow(expressionMatrix), function(i) Mclust( expressionMatrix[i,], G = 1:3,modelNames = "V" ))
if(beta==0){
## No prior:
fit=lapply(1:nrow(expressionMatrix), function(i) Mclust( expressionMatrix[i,], G = 1:3,modelNames = "V" ))
}
else{
if(beta>0){
## no mean prior, prior on variance
fit=lapply(1:nrow(expressionMatrix), function(i) Mclust( expressionMatrix[i,], G = 1:3, prior=priorControl(shrinkage=0,scale=getScaleMAD(beta,expressionMatrix[i,],G=1:3,1)),modelNames = "V" ))
}
}
if(lambda==1){
fitlambda=fit
}
else{
if(lambda<0){
print("Error, lambda cannot be negative!!!!!!")
fitlambda=fit
}
else{
M_loglikelihood=t(sapply(1:length(fit), function(i) getLoglikelihoodFromBIC(fit[[i]]$BIC,ncol(expressionMatrix))))
L_G_lambdaBIC=lapply(1:nrow(M_loglikelihood), function(i) getLambdaBIC(lambda,M_loglikelihood[i,],ncol(expressionMatrix)))
if(beta==0){
## no prior on variance
fitlambda=lapply(1:nrow(expressionMatrix), function(i) Mclust(expressionMatrix[i,],G=L_G_lambdaBIC[[i]]$G,modelNames="V"))
}
else{
fitlambda=lapply(1:nrow(expressionMatrix), function(i) Mclust(expressionMatrix[i,],G=L_G_lambdaBIC[[i]]$G,prior=priorControl(shrinkage=0,scale=getScaleMAD(beta,expressionMatrix[i,],G=L_G_lambdaBIC[[i]]$G,1)),modelNames="V"))
}
}
}
## In Step1:
fit2=lapply(1:nrow(expressionMatrix), function(i) mclust_step2(rownames(expressionMatrix)[i],expressionMatrix[i,],G_initial=fit[[i]]$G,fitlambda[[i]]))
names(fit2)=row.names(expressionMatrix)
G_lambda_change=table(sapply(1:length(fit), function(i) fit[[i]]$G==fit2[[i]]$G))
## print("nb of G value that did not changed from initial fit to the second one witout values from first fit with b!=0 or lambda!=1 (previous if lambda=1 it should be all true!)")
## print(G_lambda_change)
lambda_effect=G_lambda_change["FALSE"]
lambda_effect[is.na(lambda_effect)]<-0
G_beta_change=table(sapply(1:length(fit), function(i) fit_beta_0[[i]]$G==fit[[i]]$G))
## print("nb of G value that did not changed from an initial fit with beta=0")
## print(G_beta_change)
beta_effect=G_beta_change["FALSE"]
beta_effect[is.na(beta_effect)]<-0
lambda_beta_effect=matrix(c(lambda_effect,beta_effect),1,2)
colnames(lambda_beta_effect)=c("lambda","beta")
rownames(lambda_beta_effect)="nb of genes with a change in G"
## Create first step parameter html page
if(evaluation.lambda.beta){
fitResult=list(fit2,lambda_beta_effect)
names(fitResult)=c("fit1","G.lambda.beta.effect")
}
else{
fitResult=list(fit2)
names(fitResult)=c("fit1")
}
return(fitResult)
}
}
fitNoPriorWithExclusion <- function(expressionMatrix,specificOutlierStep1=FALSE,param.detection=NULL,lambda=1,beta=0){
## To change fit2 to fit??????????????
identic_row_ids=getIdenticRow(expressionMatrix)
if(!is.null(identic_row_ids)){
expressionMatrix=expressionMatrix[-(identic_row_ids),]
}
if(!is.null(param.detection)){
lambda=param.detection[2,"lambda"]
beta=param.detection[2,"beta"]
}
print("Step2, fitting")
if(beta==0){
## No prior:
fit=lapply(1:nrow(expressionMatrix), function(i) Mclust( expressionMatrix[i,], G = 1:3,modelNames = "V" ))
}
## else{
## if(beta>0){
## ## no mean prior, prior on variance
## fit=lapply(1:nrow(expressionMatrix), function(i) Mclust( expressionMatrix[i,], G = 1:3, prior=priorControl(shrinkage=0,scale=getScaleMAD(beta,expressionMatrix[i,],G=1:3,1)),modelNames = "V" ))
## }
fit2=lapply(1:nrow(expressionMatrix), function(i)
if(length(which(names(specificOutlierStep1)==rownames(expressionMatrix)[i]))>0){
callMclustInStep2(rownames(expressionMatrix)[i],colnames(expressionMatrix),expressionMatrix[i,],G_initial=fit[[i]]$G,fit[[i]],beta_value=beta,specificOutlierStep1[[which(names(specificOutlierStep1)==rownames(expressionMatrix)[i])]])
}
else{
callMclustInStep2(rownames(expressionMatrix)[i],colnames(expressionMatrix),expressionMatrix[i,],G_initial=fit[[i]]$G,fit[[i]],beta_value=beta, NULL)
}
)
names(fit2)=row.names(expressionMatrix)
return(fit2)
}
getSpecificOutliersStep1 <- function(expressionMatrix,fit1=NULL,param.detection=NULL, multitest.correction.method="BY", prefix.file=NULL, print.hist.pv=FALSE){
if(is.null(prefix.file)){
stop("Error: no prefix is link to this analysis")
}
## min_loglike=1,min_rsd=6,median_diff=0.75,percent_selective_threshold=0.3,pv_threshold=0.05,lambda=1,beta=0,param_Step1=NULL
## param.detection[1,]=c(beta,lambda,percent_selective_threshold,median_diff,min_loglike,min_rsd,pv_threshold)
if(is.null(fit1)){
print("fit1 is NULL, you nust use the function fitPrior() or fitNoPriorWithExlusion() to fit the expression values before using this function")
}
else{
if(is.null(param.detection)){
param.detection=matrix(0,nrow=2,ncol=7)
rownames(param.detection)=c("Step 1","Step 2")
colnames(param.detection)=c("beta","lambda","per","md","mlk","rsd","pv")
param.detection[1,]=c(6,1,0.1,0.75,5,0.1,0.05)
}
if(nrow(param.detection)==2){
param_d=param.detection[1,]
}
resultSpecific=getSpecific(expressionMatrix,fit=fit1,param_d,specificOutlierStep1=NULL,multitest.correction.method="BY",prefix.file=paste(prefix.file,"_step1",sep=""),print.hist.pv=FALSE)
L_specific_Step1=lapply(1:nrow(expressionMatrix), function(i) if(length(which(names(resultSpecific$L.specific.result$L.condition.specific.id)==rownames(expressionMatrix)[i]))>0) {as.vector(unlist(resultSpecific$L.specific.result$L.condition.specific.id[rownames(expressionMatrix)[i]]))}
else{NULL}
)
names(L_specific_Step1)=rownames(expressionMatrix)
return(L_specific_Step1)
}
}
getSpecificResult<- function(expressionMatrix,fit2=NULL,param.detection=NULL,specificOutlierStep1=NULL,multitest.correction.method="BY",prefix.file=NULL,print.hist.pv=FALSE){
if(is.null(prefix.file)){
stop("Error: no prefix is associated to this analysis")
}
if(is.null(fit2)){
print("fit2 is NULL, you nust use the function fitPrior() or fitNoPriorWithExlusion() to fit the expression values before using this function")
}
else{
if(is.null(param.detection)){
param.detection=getDefaultParameter()
}
if(nrow(param.detection)==2){
param_d=param.detection[2,]
}
resultSpecific=getSpecific(expressionMatrix,fit=fit2,param.detection=param_d,specificOutlierStep1=specificOutlierStep1,multitest.correction.method="BY",prefix.file=prefix.file,print.hist.pv=print.hist.pv)
resultSpecific$param.detection=param.detection
return(resultSpecific)
}
}
getSpecific <- function(expressionMatrix,fit,param.detection,specificOutlierStep1=NULL,multitest.correction.method="BY",prefix.file=NULL,print.hist.pv=FALSE,identic_row_ids=NULL){
if(is.null(prefix.file)){
stop("Error: no prefix is associated to this analysis:")
}
percent_selective_threshold=param.detection["per"]
median_diff=param.detection["md"]
min_loglike=param.detection["mlk"]
min_rsd=param.detection["rsd"]
pv_threshold=param.detection["pv"]
identic_row_ids=getIdenticRow(expressionMatrix)
if(!is.null(identic_row_ids)){
expressionMatrix=expressionMatrix[-(identic_row_ids),]
}
L_diff_median=lapply(1:nrow(expressionMatrix), function(i) getDifferenceMedian(expressionMatrix[i,],fit[[i]]))
print("start: get null distributions")
L_null_min_lk_values_rsd=lapply(1:nrow(expressionMatrix), function(p) if(length(which(names(specificOutlierStep1)==rownames(expressionMatrix)[p]))>0){
getNullLoglikelihoodRsdMd(expressionMatrix[p,],fit[[p]]$NorMixParam,L_diff_median[[p]],min_loglike,min_rsd,percent_selective_threshold,length(specificOutlierStep1[[which(names(specificOutlierStep1)==rownames(expressionMatrix)[p])]]))
}
else{
getNullLoglikelihoodRsdMd(expressionMatrix[p,],fit[[p]]$NorMixParam,L_diff_median[[p]],min_loglike,min_rsd,percent_selective_threshold,0)
}
)
print("end: get null distributions")
L_null=lapply(1:length(L_null_min_lk_values_rsd), function(p) unlist(L_null_min_lk_values_rsd[[p]]$null))
names(L_null)=rownames(expressionMatrix)
L_min_lk_values=lapply(1:length(L_null_min_lk_values_rsd), function(p) unlist(L_null_min_lk_values_rsd[[p]]$min_lk_values))
names(L_min_lk_values)=rownames(expressionMatrix)
L_rsd=lapply(1:length(L_null_min_lk_values_rsd), function(p) unlist(L_null_min_lk_values_rsd[[p]]$sd_ratio))
names(L_rsd)=rownames(expressionMatrix)
M_null_mean_pv=t(sapply(1:nrow(expressionMatrix), function(i) getPValueMean(expressionMatrix[i,],fit[[i]]$NorMixParam,L_null[[i]])))
M_null_mean=M_null_mean_pv[,1]
M_signe=t(sapply(1:nrow(expressionMatrix), function(i) expressionMatrix[i,]<M_null_mean[i]))
M_signe[M_signe] <- (-1)
M_signe[!M_signe] <- 1
M_pv_i=M_null_mean_pv[,-1]
## ///////////////////// Histogramme pv ////////////////////////
if(print.hist.pv){
pdf(paste(prefix.file,"_hist_pv_notadjust.pdf",sep=""))
hist(M_pv_i)
dev.off()
}
## ///////////////////// ////////////////////////
## Need to correct for multiple testing
## //
## before bug (when p.adjusted accepted a matrix as input!)
## M_pv=p.adjust(M_pv_i,method=multitest.correction.method)
## row.names(M_pv)=row.names(expressionMatrix)
## colnames(M_pv)=colnames(expressionMatrix)
## end before
## //
M_pv1=p.adjust(as.vector(M_pv_i),method=multitest.correction.method)
M_pv=matrix(M_pv1,ncol=ncol(expressionMatrix),nrow=nrow(expressionMatrix),byrow = FALSE)
row.names(M_pv)=row.names(expressionMatrix)
colnames(M_pv)=colnames(expressionMatrix)
print("start: specific detection from p-values")
L_NorMix_s=detectSpecificFromPV(M_pv,M_signe,pv_threshold)
print("end: specific detection from p-values")
L_selective_result=getListSelectiveResult(L_NorMix_s$M_s,L_NorMix_s$M_s_sum_row)
if(is.null(L_selective_result)){
L_specific_result=list(L_NorMix_s$M_s,NULL,L_NorMix_s$M_s_sum_row, L_NorMix_s$M_s_sum_column,L_NorMix_s$L_pv_s,L_NorMix_s$selective,L_NorMix_s$L_s_id)
names(L_specific_result)=c("M.specific.all","M.specific","M.specific.sum.row","M.specific.sum.column","L.pv","specific","L.condition.specific.id")
}
else{
L_specific_result=list(L_NorMix_s$M_s,L_selective_result$M_selective,L_NorMix_s$M_s_sum_row, L_selective_result$M_selective_sum_column,L_NorMix_s$L_pv_s,L_NorMix_s$selective,L_NorMix_s$L_s_id)
names(L_specific_result)=c("M.specific.all","M.specific","M.specific.sum.row","M.specific.sum.column","L.pv","specific","L.condition.specific.id")
}
specificResult=list(prefix.file,fit,param.detection,L_specific_result,L_null,L_min_lk_values,L_rsd,identic_row_ids)
names(specificResult)=c("prefix.file","fit","param.detection","L.specific.result","L.null","L.mlk","L.rsd","identic.row.ids")
specificResult=new("sp_list",specificResult)
return(specificResult)
}
## Transform an ExpressionSet object to a matrux, summarising the value by using the method (mean or max) for each levels of the condition
getMatrixFromExpressionSet <- function(expSet,condition.factor=NULL,condition.method=c("mean","median","max")){
if(!is(expSet,"ExpressionSet"))
{
stop("the expSet argument must be of type expressionSet")
}
else{
Mexp=exprs(expSet)
if(is.null(condition.factor)){
message(sprintf("WARNING, if you have several samples for the same condition, you should consider to obtain one value by conditions (mean or maximum of the samples for example) using the argument factor.condition and method.condition to perform the SpeCond analysis"))
print("The expressionMatrix argument that you entered has been coverted to a matrix using the exprs() function of the Biobase package")
}
else{
M=Mexp
if(!is.factor(condition.factor)){
stop("condition must be of class factor")
}
else{
if(!length(condition.factor)==ncol(M)){
stop("the condition.factor vector size is not equal to the number of conditions in your expressionSet")
}
else{
Mexp=sapply(levels(condition.factor), function(l) if(length(which(condition.factor==l))>1){
apply(M[,which(condition.factor==l)],1,condition.method)} else{M[,which(condition.factor==l)]})
rownames(Mexp)=rownames(M)
}
}
}
}
return(Mexp)
}
SpeCond<- function(expressionMatrix,param.detection=NULL,multitest.correction.method="BY",prefix.file="A",print.hist.pv=FALSE,fit1=NULL,fit2=NULL,specificOutlierStep1=NULL,condition.factor=NULL,condition.method=c("mean","max")){
if(is(expressionMatrix,"ExpressionSet"))
{
## expressionMatrix=exprs(expressionMatrix)
expressionMatrix=getMatrixFromExpressionSet(expressionMatrix,condition.factor,condition.method)
print("The expressionMatrix argument that you entered has been coverted to a matrix using the getMatrixFromExpressionSet() function")
}
else{
if(!is(expressionMatrix,"matrix")){
stop("The expressionMatrix argument used must be a matrix or an ExpressionSet object")
}
}
if(ncol(expressionMatrix)<8){
message(sprintf("WARNING, you are studying less than 8 conditions, this condition-specific detection tool may not be appropiate for your analysis"))
}
print("Step1")
if(is.null(param.detection)){
## Default parameters
param.detection=getDefaultParameter()
}
if(is.null(fit1)){
## With a prior b>0 to catch the single outliers
L_b_fit=fitPrior(expressionMatrix,lambda=param.detection[1,"lambda"],beta=param.detection[1,"beta"],evaluation.lambda.beta=FALSE)
fit1=L_b_fit$fit1
}
if(is.null(specificOutlierStep1)){
specificOutlierStep1=getSpecificOutliersStep1(expressionMatrix,fit=fit1,param.detection=param.detection,multitest.correction.method="BY",prefix.file=prefix.file,print.hist.pv=FALSE)
}
else{
print("Use specificOutlierStep1 as result of step1")
}
print("Step2")
## Step2
## Without prior to obtain normal distribution very well fitting the data
if(is.null(fit2)){
fit2=fitNoPriorWithExclusion(expressionMatrix,specificOutlierStep1=specificOutlierStep1,lambda=param.detection[2,"lambda"],beta=param.detection[2,"beta"])
}
specificResult=getSpecificResult(expressionMatrix,fit=fit2,specificOutlierStep1=specificOutlierStep1,param.detection=param.detection,multitest.correction.method=multitest.correction.method,prefix.file=prefix.file,print.hist.pv=print.hist.pv)
generalResult=list(prefix.file,fit1,fit2,specificOutlierStep1,specificResult)
names(generalResult)=list("prefix.file","fit1","fit2","specificOutliersStep1","specificResult")
generalResult=new("sp_list",generalResult)
return(generalResult)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.