custom.mean = function(arr)
{
return(mean(as.numeric(arr), na.rm = T))
}#end def custom.mean
custom.cor = function(arr, var1)
{
if(length(arr[!is.na(arr)]) >= 0.5 * length(arr)){
return(cor(as.numeric(arr), var1, use="complete.obs"))
}else{
return(NA)
}
}#end def custom.cor
ttest2 = function(arr, grp1, grp2)
{
group1 = as.numeric(arr[grp1])
group2 = as.numeric(arr[grp2])
#print(grp1)
#print(grp2)
#stop()
#require at least 2 replicates
if((length(group1[!is.na(group1)]) >=2) & (length(group2[!is.na(group2)]) >=2))
{
result = t.test(group1, group2)
if(is.na(result$p.value)){
return(1)
}else{
return(result$p.value)
}
}
else
{
return(1)
}
}#end def ttest2
anova.pvalue = function(arr, grp.levels)
{
#print(arr)
#print(grp.levels)
grp.no.na = as.factor(as.character(grp.levels[!is.na(arr)]))
if(length(levels(grp.no.na)) >= 2)
{
test = aov(arr ~ grp.levels)
result = summary(test)[[1]][['Pr(>F)']][1]
if(is.null(result))
{
return(1)
}
else
{
return(result)
}
}
else
{
return(1)
}
}#end def anova.pvalue
lm.pvalue = function(arr, var1)
{
if(length(arr[!is.na(arr)]) >= 0.5 * length(arr)){
fit = lm(as.numeric(arr)~var1)
result = summary(fit)
pvalue = result$coefficients[2,4]
return(pvalue)
}else{
return(NA)
}
}#end def lm.pvalue
lm.pvalue2 = function(arr, var1, var2)
{
if(typeof(var2) == "character"){
var2 = as.factor(var2)
}
if(length(arr[!is.na(arr)]) >= 0.5 * length(arr)){
fit = lm(as.numeric(arr)~var1 + as.numeric(var2))
result = summary(fit)
pvalue = result$coefficients[2,4]
return(pvalue)
}else{
return(NA)
}
}#end def lm.pvalue
annova.2way.pvalue = function(arr, grp.levels, pairing.levels)
{
#print(arr)
#print(grp.levels)
#print(pairing.levels)
grp.no.na = as.factor(as.character(grp.levels[!is.na(arr)]))
min.reps = min(table(grp.no.na))
if((length(levels(grp.no.na)) >= 2) && (min.reps >= 2))
{
test = aov(arr ~ grp.levels + pairing.levels)
result = summary(test)[[1]][['Pr(>F)']][1]
if(is.null(result))
{
return(1)
}
else
{
return(result)
}
}
else
{
return(1)
}
}#end def anova.pvalue
count.observed = function(arr){
return(length(arr[!is.na(arr)]))
}#end def count.observed
cor.dist = function(mat){
cor.mat = cor(as.matrix(t(mat)), use="pairwise.complete.obs")
dis.mat = 1 - cor.mat
dis.mat[is.na(dis.mat)]=2
return(as.dist(dis.mat))
}#end def cor.dist
cpp.ANOVA.1way.wrapper = function(arr, grp.levels, ref){
full_beta = arr[!is.na(arr)]
betaT = arr[(grp.levels != ref)&!is.na(arr)]
betaR = arr[(grp.levels == ref)&!is.na(arr)]
result = .Call('_COHCAP_ANOVA_cpp_2group', PACKAGE = 'COHCAP', full_beta, betaT, betaR)
return(result)
}#end def cpp.ANOVA.1way.wrapper
cpp.annova.2way.wrapper = function(arr, var1, var2, ref){
full_beta = arr[!is.na(arr)]
max.df = length(full_beta)-length(levels(as.factor(as.character(var1[!is.na(arr)]))))*length(levels(as.factor(as.character(var2[!is.na(arr)]))))
if(max.df < 1){
return(NA)
}else{
betaT = arr[(var1 != ref)&!is.na(arr)]
betaR = arr[(var1 == ref)&!is.na(arr)]
interaction.var = paste(var1, var2, sep="-")
interaction.var = interaction.var[!is.na(arr)]
#Rcpp code uses numeric array
interaction.var = as.numeric(as.factor(interaction.var))
if(min(table(interaction.var)) < 2){
return(NA)
}else{
result = .Call('_COHCAP_ANOVA_cpp_2group_2way', PACKAGE = 'COHCAP',full_beta, betaT, betaR, interaction.var)
return(result)
}
}#end else
}#end def cpp.annova.2way.wrapper
cpp.ttest.wrapper = function(arr, grp.levels, ref){
betaT = arr[(grp.levels != ref)&!is.na(arr)]
betaR = arr[(grp.levels == ref)&!is.na(arr)]
result = .Call('_COHCAP_ttest_cpp', PACKAGE = 'COHCAP', betaT, betaR)
return(result)
}#end def cpp.ttest.wrapper
cpp.paired.ttest.wrapper = function(arr, iTrt, iRef){
pairedT = arr[iTrt]
pairedR = arr[iRef]
groupT=pairedT[!is.na(pairedT)&!is.na(pairedR)]
groupR=pairedR[!is.na(pairedT)&!is.na(pairedR)]
paired_diff = groupT - groupR
result = .Call('_COHCAP_ttest_cpp_paired', PACKAGE = 'COHCAP', paired_diff)
return(result)
}#end def cpp.paired.ttest.wrapper
cpp.lm.1var.wrapper = function(arr, var1){
full_continuous= var1[!is.na(arr)]
full_beta= arr[!is.na(arr)]
result = .Call('_COHCAP_lmResidual_cpp_1var', PACKAGE = 'COHCAP', full_beta, full_continuous)
return(result)
}#end def cpp.lm.1var.wrapper
fastLm_wrapper = function(arr, var1){
var1= var1[!is.na(arr)]
arr= arr[!is.na(arr)]
fit_stats = fastLmPure(as.matrix(var1), arr)
t_stat = fit_stats$coefficients / fit_stats$stderr
return(pt(-abs(t_stat), fit_stats$df.residual))
}#end def fastLm_wrapper
fastLm_wrapper2 = function(arr, independent.mat){
independent.mat= independent.mat[!is.na(arr),]
arr= arr[!is.na(arr)]
fit_stats = fastLmPure(independent.mat, arr)
t_stat = fit_stats$coefficients[1] / fit_stats$stderr[1]
return(pt(-abs(t_stat), fit_stats$df.residual))
}#end def fastLm_wrapper2
`COHCAP.site` = function (sample.file, beta.table, project.name, project.folder,
methyl.cutoff=0.7, unmethyl.cutoff = 0.3, paired=FALSE,
delta.beta.cutoff = 0.2, pvalue.cutoff=0.05, fdr.cutoff=0.05,
ref="none", num.groups=2,lower.cont.quantile=0, upper.cont.quantile=1,
create.wig = "avg", alt.pvalue="none", plot.heatmap=TRUE, output.format='txt',
heatmap.dist.fun="Euclidian")
{
fixed.color.palatte = c("green","orange","purple","cyan","pink","maroon","yellow","grey","red","blue","black",colors())
if(heatmap.dist.fun =="Euclidian"){
heatmap.dist.fun=dist
}else if(heatmap.dist.fun =="Pearson Dissimilarity"){
heatmap.dist.fun=cor.dist
}else{
stop("'heatmap.dist.fun' must be either 'Euclidian' or 'Pearson Dissimilarity'")
}
site.folder=file.path(project.folder,"CpG_Site")
dir.create(site.folder, showWarnings=FALSE)
data.folder=file.path(project.folder,"Raw_Data")
dir.create(data.folder, showWarnings=FALSE)
print("Reading Sample Description File....")
sample.table = read.table(sample.file, header=F, sep = "\t", stringsAsFactors=TRUE)
samples = as.character(sample.table[[1]])
for (i in 1:length(samples))
{
if(length(grep("^[0-9]",samples[i])) > 0)
{
samples[i] = paste("X",samples[i],sep="")
}#end if(length(grep("^[0-9]",samples[i])) > 0)
}#end def for (i in 1:length(samples))
sample.group = sample.table[[2]]
sample.group = as.factor(gsub(" ",".",sample.group))
pairing.group = NA
if(paired == "continuous"){
print("using continuous covariate")
pairing.group = sample.table[[3]]
pairing.group = as.numeric(pairing.group)
}else if(paired){
print("using pairing ID")
pairing.group = sample.table[[3]]
pairing.group = as.factor(gsub(" ",".",pairing.group))
}
ref = gsub(" ",".",ref)
sample.names = names(beta.table)[6:ncol(beta.table)]
beta.values = beta.table[,6:ncol(beta.table)]
annotation.names = names(beta.table)[1:5]
annotations = beta.table[,1:5]
print(dim(beta.values))
#print(samples)
#print(sample.names)
if(length(samples) != length(sample.names[match(samples, sample.names, nomatch=0)]))
{
warning("Some samples in sample description file are not present in the beta file!")
warning(paste(length(samples),"items in sample description file",sep=" "))
warning(paste(length(sample.names),"items in gene beta file",sep=" "))
warning(paste(length(sample.names[match(samples, sample.names, nomatch=0)]),"matching items in gene beta file",sep=" "))
#warning(sample.names[match(samples, sample.names, nomatch=0)])
stop()
}
if(length(samples)>1)
{
beta.values = beta.values[,match(samples, sample.names, nomatch=0)]
colnames(beta.values)=samples
}
print(dim(beta.values))
#print(samples)
#print(sample.names)
#print(colnames(beta.values))
#print(beta.values[1,])
groups = levels(sample.group)
if((length(groups) != num.groups) & (ref != "continuous"))
{
print(paste("Group:",groups, sep=" "))
warning(paste("There are ",length(groups)," but user specified algorithm for ",num.groups," groups.",sep=""))
warning("Please restart algorithm and specify the correct number of groups")
stop()
}
if (ref == "continuous"){
print("Continous Variable :")
print(sample.table[[2]])
}
stat.table = annotations
rm(annotations)
if((create.wig == "sample")|(create.wig == "avg.and.sample")){
#create .wig files for each sample
wig.folder=file.path(site.folder,paste(project.name,"wig",sep="_"))
dir.create(wig.folder, showWarnings=FALSE)
temp.stat.file = file.path(wig.folder, "temp.txt")
Perl.Path = file.path(path.package("COHCAP"), "Perl")
perl.script = file.path(Perl.Path , "create_wig_files.pl")
sample.beta = beta.table
sample.beta = sample.beta[order(sample.beta$Chr, sample.beta$Loc), ]
colnames(sample.beta) = paste(colnames(beta.table),"beta",sep=".")
write.table(sample.beta, temp.stat.file, quote=F, row.names=F, sep="\t")
cmd = paste("perl \"",perl.script,"\" \"", temp.stat.file,"\" \"", wig.folder,"\"", sep="")
res = system(cmd, intern=TRUE, wait=TRUE)
message(res)
shortcut.beta.file = file.path(wig.folder, "SiteID.beta.wig")
unlink(shortcut.beta.file)
shortcut.beta.file = file.path(wig.folder, "Chr.beta.wig")
unlink(shortcut.beta.file)
shortcut.beta.file = file.path(wig.folder, "Gene.beta.wig")
unlink(shortcut.beta.file)
shortcut.beta.file = file.path(wig.folder, "Loc.beta.wig")
unlink(shortcut.beta.file)
shortcut.beta.file = file.path(wig.folder, "Island.beta.wig")
unlink(shortcut.beta.file)
}#end if(create.wig)
rm(beta.table)
if(ref == "continuous"){
continous.var = sample.table[[2]]
if ((paired == TRUE) | (paired == "continuous")){
if(alt.pvalue == "RcppArmadillo.fastLmPure"){
print("Using fastLm and pt() instead of lm(), with 2nd variable")
independent.mat = cbind(continous.var, pairing.group)
lm.pvalue = apply(beta.values, 1, fastLm_wrapper2, independent.mat)
}else{
lm.pvalue = apply(beta.values, 1, lm.pvalue2, continous.var, pairing.group)
}
} else{
if(alt.pvalue == "cppLmResidual.1var"){
print("Using Rcpp residual t-test instead of lm()")
lm.pvalue = apply(beta.values, 1, cpp.lm.1var.wrapper, continous.var)
}else if(alt.pvalue == "RcppArmadillo.fastLmPure"){
print("Using fastLm and pt() instead of lm()")
lm.pvalue = apply(beta.values, 1, fastLm_wrapper, continous.var)
}else{
lm.pvalue = apply(beta.values, 1, lm.pvalue, continous.var)
}
}
lm.fdr = p.adjust(lm.pvalue, "fdr")
beta.cor = apply(beta.values, 1, custom.cor, var1=continous.var)
#upper.beta is max if positive correlation, min if negative correlation
#lower.beta is min if positive correlation, max if negative correlation
#in both cases, delta.beta is upper.beta - lower.beta
beta.min= apply(beta.values, 1, quantile, na.rm=TRUE, probs=c(lower.cont.quantile))
beta.max= apply(beta.values, 1, quantile, na.rm=TRUE, probs=c(upper.cont.quantile))
upper.beta = beta.max
upper.beta[!is.na(beta.cor) & (beta.cor < 0)] = beta.min[!is.na(beta.cor) & (beta.cor < 0)]
lower.beta = beta.min
lower.beta[!is.na(beta.cor) & (beta.cor < 0)] = beta.max[!is.na(beta.cor) & (beta.cor < 0)]
rm(beta.min)
rm(beta.max)
delta.beta = upper.beta - lower.beta
#make format compatible with 2-group code
stat.table = data.frame(stat.table, upper.beta = upper.beta, lower.beta = lower.beta,
delta.beta = delta.beta, pvalue = lm.pvalue, fdr = lm.fdr,
cor.coef = beta.cor)
} else if(length(groups) == 1) {
col.names = c(annotation.names)
print("Averaging Beta Values for All Samples")
#print(beta.values[1:3,])
if(length(samples) > 1)
{
avg.beta = apply(beta.values, 1, custom.mean)
}
else
{
avg.beta= beta.values
}
#print(avg.beta)
stat.table = data.frame(stat.table, avg.beta=avg.beta)
} else if(length(groups) == 2) {
print("Differential Methylation Stats for 2 Groups with Reference")
trt = groups[groups != ref]
#print(trt)
#print(ref)
trt.samples = samples[which(sample.group == trt)]
#print(trt.samples)
ref.samples = samples[which(sample.group == ref)]
#print(ref.samples)
all.indices = 1:length(samples)
trt.indices = all.indices[which(sample.group == trt)]
ref.indices = all.indices[which(sample.group == ref)]
if (length(trt.indices) == 1){
trt.avg.beta = beta.values[, trt.indices]
} else {
trt.beta.values = beta.values[, trt.indices]
trt.avg.beta = apply(trt.beta.values, 1, custom.mean)
}
if (length(ref.indices) == 1){
ref.avg.beta = beta.values[, ref.indices]
} else {
ref.beta.values = beta.values[, ref.indices]
ref.avg.beta = apply(ref.beta.values, 1, custom.mean)
}
delta.beta = trt.avg.beta - ref.avg.beta
if(paired == "continuous"){
print("Analysis of two numeric variables")
beta.pvalue = unlist(apply(beta.values, 1, annova.2way.pvalue, grp.levels=as.numeric(sample.group), pairing.levels=pairing.group))
}else if(paired){
print("Factor in Paired Samples")
#print(sample.group)
#print(pairing.group)
if (alt.pvalue == "cppPairedTtest"){
print("Using Rcpp/Boost Paired t-test instead of 2-way ANOVA")
pair.table = table(sample.table[,2], sample.table[,3])
pairIDs=colnames(pair.table)[(as.numeric(pair.table[1,]) == 1)&(as.numeric(pair.table[2,]) == 1)]
if(length(pairIDs) != length(levels(as.factor(pairing.group)))){
print("Only pairings with 2 samples are used:")
print("While incomplete pairs will be removed for missing values,")
print("please only provide a sample description table with pairs that you would like to compare.")
stop()
}#end if(length(pair.table) != length(levels(as.factor(pairing.group)))
pairedT = c()
pairedR = c()
for(i in 1:length(pairIDs)){
pairID = pairIDs[i]
pairedT[i] = samples[(pairing.group == pairID)&(sample.group != ref)]
pairedR[i] = samples[(pairing.group == pairID)&(sample.group == ref)]
}#end for(pairID in names(pair.table))
pairTi = match(pairedT, samples)
pairRi = match(pairedR, samples)
beta.pvalue = apply(beta.values, 1, anova.pvalue, grp.levels=sample.group)
}else if(alt.pvalue == "cppANOVA.2way"){
print("Using Rcpp/Boost 2-way ANOVA")
max.df = length(samples)-length(levels(as.factor(sample.group)))*length(levels(as.factor(pairing.group)))
if(max.df < 1){
print("Not enough degrees of freedom for this implementation. Consider using 'alt.pvalue' = 'cppPairedTtest'")
stop()
}
beta.pvalue = unlist(apply(beta.values, 1, cpp.annova.2way.wrapper, sample.group, pairing.group, ref))
}else{
beta.pvalue = unlist(apply(beta.values, 1, annova.2way.pvalue, grp.levels=sample.group, pairing.levels=pairing.group))
}
}else{
if(pvalue.cutoff == 1){
print("Skip p-value calculation to save time. May help with Average-by-Site workflow, but generally not recommended.")
beta.pvalue = rep(1, nrow(beta.values))
}else if (alt.pvalue == "rANOVA.1way"){
print("Using R-based ANOVA instead of t-test")
beta.pvalue = apply(beta.values, 1, anova.pvalue, grp.levels=sample.group)
}else if (alt.pvalue == "cppANOVA.1way"){
print("Using Rcpp/Boost ANOVA instead of t-test")
beta.pvalue = apply(beta.values, 1, cpp.ANOVA.1way.wrapper, grp.levels=sample.group, ref=ref)
}else if (alt.pvalue == "cppWelshTtest"){
print("Using Rcpp/Boost Welsh t-test instead of t.test()")
beta.pvalue = apply(beta.values, 1, cpp.ttest.wrapper, grp.levels=sample.group, ref=ref)
}else{
beta.pvalue = apply(beta.values, 1, ttest2, grp1=trt.indices, grp2=ref.indices)
}
}#end else
#print(beta.pvalue)
beta.fdr = p.adjust(beta.pvalue, method="fdr")
#print(names(beta.values))
print(dim(stat.table))
#print(length(annotation.names))
#print(length(trt.avg.beta))
#print(length(ref.avg.beta))
#print(length(delta.beta))
#print(length(beta.pvalue))
#print(length(beta.fdr))
stat.table = data.frame(stat.table, trt.beta = trt.avg.beta, ref.beta = ref.avg.beta, delta.beta = delta.beta, pvalue = beta.pvalue, fdr = beta.fdr)
print(dim(stat.table))
#print(names(stat.table))
#test = c(annotation.names,
# paste(trt,"avg.beta",sep="."),
# paste(ref,"avg.beta",sep="."),
# paste(trt,"vs",ref,"delta.beta",sep="."),
# paste(trt,"vs",ref,"pvalue",sep="."),
# paste(trt,"vs",ref,"fdr",sep="."))
#print(test)
#print(names(stat.table))
colnames(stat.table) = c(annotation.names,
paste(trt,"avg.beta",sep="."),
paste(ref,"avg.beta",sep="."),
paste(trt,"vs",ref,"delta.beta",sep="."),
paste(trt,"vs",ref,"pvalue",sep="."),
paste(trt,"vs",ref,"fdr",sep="."))
#print(names(stat.table))
} else if(length(groups) > 2) {
print("Calculating Average Beta and ANOVA p-values")
col.names = c(annotation.names)
for (i in 1:length(groups))
{
temp.grp = groups[i]
grp.beta = beta.values[,which(sample.group == temp.grp)]
avg.beta = apply(grp.beta, 1, custom.mean)
col.names = col.names = c(col.names, paste(temp.grp,"avg.beta",sep="."))
stat.table = data.frame(stat.table, avg.beta=avg.beta)
colnames(stat.table) = col.names
}#end for (i in 1:length(groups)
if(paired == "continuous"){
print("Analysis of two numeric variables")
beta.pvalue = unlist(apply(beta.values, 1, annova.2way.pvalue, grp.levels=as.numeric(sample.group), pairing.levels=pairing.group))
}else if(paired){
print("Factor in Paired Samples")
beta.pvalue = unlist(apply(beta.values, 1, annova.2way.pvalue, grp.levels=sample.group, pairing.levels=pairing.group))
}else{
pvalue = apply(beta.values, 1, anova.pvalue, grp.levels = sample.group)
}#end else
anova.fdr = p.adjust(pvalue, method="fdr")
col.names = c(col.names, "anova.pvalue", "annova.fdr")
stat.table = data.frame(stat.table, anova.pvalue = pvalue, anova.fdr = anova.fdr)
colnames(stat.table) = col.names
} else {
print("Invalid groups specified in sample description file!")
}
stat.table = stat.table[order(stat.table$Chr, stat.table$Loc), ]
if(output.format == 'xls'){
xlsfile = file.path(data.folder, paste(project.name,"_CpG_site_stats.xlsx",sep=""))
WriteXLS("stat.table", ExcelFileName = xlsfile)
} else if(output.format == 'csv') {
txtfile = file.path(data.folder, paste(project.name,"_CpG_site_stats.csv",sep=""))
write.table(stat.table, file=txtfile, quote=F, row.names=F, sep=",")
} else if(output.format == 'txt') {
txtfile = file.path(data.folder, paste(project.name,"_CpG_site_stats.txt",sep=""))
write.table(stat.table, file=txtfile, quote=F, row.names=F, sep="\t")
} else {
print(paste(output.format," is not a valid output format! Please use 'txt' or 'xls'.",sep=""))
}
if((create.wig == "avg")|(create.wig == "avg.and.sample")){
#create .wig file based upon average methylation values
wig.folder=file.path(site.folder,paste(project.name,"wig",sep="_"))
dir.create(wig.folder, showWarnings=FALSE)
temp.stat.file = file.path(wig.folder, "temp.txt")
Perl.Path = file.path(path.package("COHCAP"), "Perl")
perl.script = file.path(Perl.Path , "create_wig_files.pl")
write.table(stat.table, temp.stat.file, quote=F, row.names=F, sep="\t")
cmd = paste("perl \"",perl.script,"\" \"", temp.stat.file,"\" \"", wig.folder,"\"", sep="")
res = system(cmd, intern=TRUE, wait=TRUE)
message(res)
}#end if(create.wig)
#filter sites
print(dim(stat.table))
filter.table = stat.table
if((length(groups) == 2)|(ref == "continuous")) {
temp.trt.beta = stat.table[[6]]
temp.ref.beta = stat.table[[7]]
temp.delta.beta = abs(stat.table[[8]])
temp.pvalue = stat.table[[9]]
temp.fdr = stat.table[[10]]
if(unmethyl.cutoff > methyl.cutoff)
{
print("unmethyl.cutoff > methyl.cutoff ...")
print("so, delta-beta decides which group should be subject to which cutoff")
temp.delta.beta = stat.table[[8]]
temp.methyl.up = filter.table[(temp.trt.beta >= methyl.cutoff) & (temp.ref.beta<=unmethyl.cutoff) & !is.na(temp.pvalue) & (temp.pvalue <= pvalue.cutoff) & (temp.fdr <= fdr.cutoff) & (temp.delta.beta >= delta.beta.cutoff),]
print(dim(temp.methyl.up))
temp.methyl.down = filter.table[(temp.ref.beta >= methyl.cutoff) & (temp.trt.beta<=unmethyl.cutoff) & !is.na(temp.pvalue) & (temp.pvalue <= pvalue.cutoff) & (temp.fdr <= fdr.cutoff) & (temp.delta.beta <= -delta.beta.cutoff),]
print(dim(temp.methyl.down))
filter.table = rbind(temp.methyl.up, temp.methyl.down)
}
else
{
temp.methyl.up = filter.table[(temp.trt.beta >= methyl.cutoff) & (temp.ref.beta<=unmethyl.cutoff) & !is.na(temp.pvalue) & (temp.pvalue <= pvalue.cutoff) & (temp.fdr <= fdr.cutoff) & (temp.delta.beta >= delta.beta.cutoff),]
temp.methyl.down = filter.table[(temp.ref.beta >= methyl.cutoff) & (temp.trt.beta<=unmethyl.cutoff) & !is.na(temp.pvalue) & (temp.pvalue <= pvalue.cutoff) & (temp.fdr <= fdr.cutoff) & (temp.delta.beta >= delta.beta.cutoff),]
filter.table = rbind(temp.methyl.up, temp.methyl.down)
}
}else if(length(groups) == 1) {
temp.avg.beta = stat.table$avg.beta
filter.table = filter.table[(temp.avg.beta >= methyl.cutoff) | (temp.avg.beta <=unmethyl.cutoff),]
} else if(length(groups) > 2) {
temp.pvalue = stat.table[[ncol(stat.table) -1]]
temp.fdr = stat.table[[ncol(stat.table)]]
filter.table = filter.table[(temp.pvalue <= pvalue.cutoff) & (temp.fdr <= fdr.cutoff),]
} else {
print("Invalid groups specified in sample description file!")
}
print(dim(filter.table))
filter.table = filter.table[!is.na(filter.table$SiteID),]
print(dim(filter.table))
if(output.format == 'xls'){
xlsfile = file.path(site.folder, paste(project.name,"_CpG_site_filter.xlsx",sep=""))
WriteXLS("filter.table", ExcelFileName = xlsfile)
} else if(output.format == 'csv') {
txtfile = file.path(site.folder, paste(project.name,"_CpG_site_filter.csv",sep=""))
write.table(filter.table, file=txtfile, quote=F, row.names=F, sep=",")
} else if(output.format == 'txt') {
txtfile = file.path(site.folder, paste(project.name,"_CpG_site_filter.txt",sep=""))
write.table(filter.table, file=txtfile, quote=F, row.names=F, sep="\t")
} else {
warning(paste(output.format," is not a valid output format! Please use 'txt' or 'xls'.",sep=""))
}
if((plot.heatmap)& (nrow(filter.table) > 1)& (nrow(filter.table) < 10000)){
temp.beta.mat = apply(beta.values[match(as.character(filter.table$SiteID),as.character(stat.table$SiteID)),], 1, as.numeric)
probe.count.obs= apply(temp.beta.mat, 2, count.observed)
if(length(probe.count.obs[probe.count.obs < 0.5 * nrow(temp.beta.mat)]) > 0){
print(paste("filtering NA probes from heatmap: ",paste(filter.table$SiteID[probe.count.obs < 0.5 * nrow(temp.beta.mat)],collapse=","),sep=""))
print(dim(temp.beta.mat))
filter.table = filter.table[probe.count.obs >= 0.5 * nrow(temp.beta.mat), ]
temp.beta.mat = temp.beta.mat[,probe.count.obs >= 0.5 * nrow(temp.beta.mat)]
print(dim(temp.beta.mat))
}#end if(length(probe.count.obs[probe.count.obs < 0.5 * nrow(temp.beta.mat)]) > 0)
if(nrow(filter.table) < 25){
colnames(temp.beta.mat) = filter.table$SiteID
} else {
colnames(temp.beta.mat) = rep("", nrow(filter.table))
}
rownames(temp.beta.mat) = samples
labelColors = rep("black",times=nrow(temp.beta.mat))
if(ref == "continuous"){
continuous.color.breaks = 10
plot.var = as.numeric(continous.var)
plot.var.min = min(plot.var, na.rm=T)
plot.var.max = max(plot.var, na.rm=T)
plot.var.range = plot.var.max - plot.var.min
plot.var.interval = plot.var.range / continuous.color.breaks
color.range = colorRampPalette(c("green","black","orange"))(n = continuous.color.breaks)
plot.var.breaks = plot.var.min + plot.var.interval*(0:continuous.color.breaks)
for (j in 1:continuous.color.breaks){
labelColors[(plot.var >= plot.var.breaks[j]) &(plot.var <= plot.var.breaks[j+1])] = color.range[j]
}#end for (j in 1:continuous.color.breaks)
}else{
color.palette = fixed.color.palatte[1:length(groups)]
for (j in 1:length(groups)){
labelColors[sample.group == as.character(groups[j])] = color.palette[j]
}#end for (j in 1:length(groups))
}
beta.breaks = c(0:40*2.5)
if(max(temp.beta.mat, na.rm=T) < 2){
beta.breaks = beta.breaks / 100
}else{
print("Adjusting heatmap scale, assuming percent methylation between 0 and 100")
}
heatmap.file = file.path(site.folder, paste(project.name,"_CpG_site_heatmap.pdf",sep=""))
pdf(file = heatmap.file)
heatmap.2(temp.beta.mat,
col=colorpanel(40, low="blue", mid="black", high="red"), breaks=beta.breaks,
density.info="none", key=TRUE, distfun= heatmap.dist.fun,
RowSideColors=labelColors, trace="none", margins = c(5,15), cexCol=0.8, cexRow=0.8)
if(ref == "continuous"){
legend("topright",legend=c(round(plot.var.max,digits=1),rep("",length(color.range)-2),round(plot.var.min,digits=1)),
col=rev(color.range), pch=15, y.intersp = 0.4, cex=0.8, pt.cex=1.5)
}else{
legend("topright",legend=groups,col=color.palette, pch=15)
}
dev.off()
}#end if((plot.heatmap)& (nrow(island.avg.table) > 0))
return(filter.table)
}#end def COHCAP.site
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.