## LINC_METHODS
## getter methods
setGeneric("results", function(x) standardGeneric("results"))
setMethod("results", "LINCmatrix", function(x) x@results)
setGeneric("assignment", function(x) standardGeneric("assignment"))
setMethod("assignment", "LINCmatrix", function(x) x@assignment)
setGeneric("correlation", function(x) standardGeneric("correlation"))
setMethod("correlation", "LINCmatrix", function(x) x@correlation)
setGeneric("express", function(x) standardGeneric("express"))
setMethod("express", "LINCmatrix", function(x) x@expression)
setGeneric("history", function(x) standardGeneric("history"))
setMethod("history", "LINCmatrix", function(x) x@history)
setGeneric("linCenvir", function(x) standardGeneric("linCenvir"))
setMethod("linCenvir", "LINCmatrix", function(x) x@linCenvir)
## setter methods
setGeneric("results<-", function(x, value) standardGeneric("results<-"))
setReplaceMethod("results", "LINCmatrix",
function(x, value) {x@results <- value; x})
setGeneric("assignment<-", function(x, value) standardGeneric("assignment<-"))
setReplaceMethod("assignment", "LINCmatrix",
function(x, value) {x@assignment <- value; x})
setGeneric("correlation<-", function(x, value) standardGeneric("correlation<-"))
setReplaceMethod("correlation", "LINCmatrix",
function(x, value) {x@correlation <- value; x})
setGeneric("express<-", function(x, value) standardGeneric("express<-"))
setReplaceMethod("express", "LINCmatrix",
function(x, value) {x@expression <- value; x})
setGeneric("history<-", function(x, value) standardGeneric("history<-"))
setReplaceMethod("history", "LINCmatrix",
function(x, value) {x@history <- value; x})
setGeneric("linCenvir<-", function(x, value) standardGeneric("linCenvir<-"))
setReplaceMethod("linCenvir", "LINCmatrix",
function(x, value) {x@linCenvir <- value; x})
## GENERIC FUNCTION "justlinc"
setGeneric(name = "justlinc",
def = function(object,
targetGenes = "lincRNA",
rmPC = TRUE
){
standardGeneric("justlinc")
})
setMethod(f = "justlinc",
signature = c("matrix", "ANY", "ANY"),
definition = function(object,
targetGenes = "lincRNA",
rmPC = TRUE
){
## method for a matrix as input
## PREDEFINITIONs
#data(ENSG_BIO, package = "LINC")
#data(ENTREZ_BIO, package = "LINC")
#data(ENSG_PC, package = "LINC")
if(!exists("ENSG_BIO")) stop("Gene Annotation for 'justlinc' not found")
#ENSG_BIO_DIR <- system.file("extdata", "ENSG_BIO.RData", package = "LINC")
#load(ENSG_BIO_DIR)
#ENTREZ_BIO_DIR <- system.file("extdata", "ENTREZ_BIO.RData", package = "LINC")
#load(ENTREZ_BIO_DIR)
#ENSG_PC_DIR <- system.file("extdata", "ENSG_PC.RData", package = "LINC")
#load(ENSG_PC_DIR)
errorm00 <- "'justlinc' failed, please use 'linc'"
errorm01 <- "no match for 'targetGenes', valid targets:"
errorm03 <- "Only one gene biotype is allowed"
errorm05 <- "Gene names as 'rownames(object)' required"
errorm06 <- "No Ensembl or Entrez ids, method failed"
errorm07 <- "'targetGenes' supplied as gene ids not found"
warnim00 <- paste("correlation matrix contains infinite",
"or missing values; converted to 0")
warnim01 <- "This method is intended for Ensembl gene ids"
warnim02 <- paste("long computation times expected for",
"> 100 'targetGenes'")
exit_print <- names(table(ENSG_BIO$gene_biotype))
on.exit(print(exit_print))
## SECTION0: INPUT CHECK
# targetGenes
tg_promise <- try(is.element(targetGenes,
c(ENSG_BIO$gene_biotype, rownames(object))),
silent = TRUE)
if(class(tg_promise) == "try-error" |
!all(tg_promise)) stop(errorm01)
# suggest gene systems
gN_promise <- rownames(object)
if(is.null(gN_promise)) stop(errorm05)
gD_promise <- try(identifyGenes(gN_promise),
silent = TRUE)
if(class(gD_promise) == "try-error" |
length(gD_promise) == 0) stop(errorm00)
if(!any(is.element(gD_promise, "ENSEMBL")))
warning(warnim01)
if(!any(is.element(gD_promise, c("ENSEMBL",
"ENTREZ")))) stop(errorm06)
# removal of PC
rm_promise <- inlogical(rmPC, direct = TRUE)
## SECTION1: MATRIX SEPARATION
# remove PCs (> 30% of main variance)
if(rm_promise){
pca_object <- prcomp(object, center = FALSE,
scale. = FALSE)
mvar30 <- sum(pca_object$sdev) * 0.3
n = 1; mvar_sum = 0
while(mvar_sum < mvar30){
mvar_sum <- mvar_sum + pca_object$sdev[n]
n = n + 1
}
object <- pca_object$x[,n:ncol(object)] %*% t(
pca_object$rotation[,n:ncol(object)])
}
# separation
if(any(is.element(gD_promise, "ENSEMBL"))){
in_match <- match(ENSG_PC$ENSG, rownames(object))
pc_matrix <- object[in_match[!is.na(in_match)], ]
rownames(pc_matrix) <- ENSG_PC$ENTREZ
} else {
pc_matrix <- object[is.element(
rownames(object), ENSG_PC$ENTREZ), ]
}
# reduce samples and compute the correlation matrix
if(ncol(object) > 30){
samples <- seq_len(30)
} else {
samples <- seq_len(ncol(object))
}
# no queries are supplied
if(any(is.element(targetGenes, ENSG_BIO$gene_biotype))){
if(length(targetGenes) != 1) stop(errorm03)
if(any(is.element(gD_promise, "ENSEMBL"))){
nc_genes <- ENSG_BIO$ensembl_gene_id[is.element(
ENSG_BIO$gene_biotype, targetGenes)]
} else {
e_index <- is.element(ENTREZ_BIO$entrez_gene_id,
rownames(object))
t_index <- is.element(ENTREZ_BIO$gene_biotype,
targetGenes)
et_index <- ((e_index + t_index) == 2)
nc_genes <- as.character(ENTREZ_BIO$entrez_gene_id[
et_index])
}
nc_matrix <- object[nc_genes, ]
if(nrow(pc_matrix) < 5000) stop(errorm00)
if(nrow(nc_matrix) < 500) stop(errorm00)
# select for median expression and variance
pc_median <- median(pc_matrix, na.rm = TRUE)
nc_median <- median(nc_matrix, na.rm = TRUE)
pc_rw_median <- rowMedians(pc_matrix, na.rm = TRUE)
nc_rw_median <- rowMedians(nc_matrix, na.rm = TRUE)
pc_matrix <- pc_matrix[(pc_rw_median > 0.25*pc_median), ]
if(nrow(pc_matrix) < 5000) stop(errorm00)
pc_var <- apply(pc_matrix, 1, var)
pc_matrix <- pc_matrix[order(pc_var,
decreasing = TRUE)[1:5000], ]
if(nrow(nc_matrix) < 10) stop(errorm00)
nc_matrix <- nc_matrix[(nc_rw_median > 0.25*nc_median), ]
nc_var <- apply(nc_matrix, 1, var)
nc_matrix <- nc_matrix[order(nc_var,
decreasing = TRUE)[1:100], ]
cormatrix <- try(Cppspear(pc_matrix[, samples ],
nc_matrix[, samples ]), silent = TRUE)
if(class(cormatrix) == "try-error") stop(errorm00)
rownames(cormatrix) <- rownames(pc_matrix)
colnames(cormatrix) <- rownames(nc_matrix)
if(!all(is.finite(cormatrix))){
warning(warnim00)
cormatrix[is.infinite(cormatrix) |
is.na(cormatrix) ] <- 0
}
# select 10 genes with best correlations
max_cor <- apply(cormatrix, 2, max)
q10_genes <- order(max_cor, decreasing = TRUE)[1:10]
th_matrix <- (cormatrix[,q10_genes] > 0.75)
pc_list <- list()
q10_list <- list()
for(q in 1:10){
i_partners <- cormatrix[th_matrix[,q], q10_genes[q]]
i_partners <- i_partners[order(i_partners,
decreasing = TRUE)][1:50]
i_partners <- i_partners[!is.na(i_partners)]
q10_list[[q]] <- i_partners
if(length(i_partners) > 25){
pc_list[[q]] <- names(i_partners)
} else {
pc_list[[q]] <- NULL
}
}
names(pc_list) <- colnames(th_matrix)
names(q10_list) <- colnames(th_matrix)
null_index <- vapply(pc_list, is.null, TRUE)
if(length(which(!null_index)) < 10) stop(errorm00)
pc_list <- pc_list[!null_index]
pc_path <- try(compareCluster(pc_list, fun =
"enrichGO", OrgDb = 'org.Hs.eg.db')
, silent = TRUE)
if(class(pc_path) == "try-error") stop(errorm00)
## SECTION 3: PLOTTING
# plot expression and correlation of best
for(pp in 1:10){
subject <- pc_matrix[pc_list[[pp]][1], ]
query <- nc_matrix[names(pc_list)[pp], ]
s_df <- data.frame(lincRNA = query, protein_coding
= subject, SAMPLES = seq_len(ncol(pc_matrix)))
s_cor <- cor(subject, query, method = "spearman")
splot <- ggplot(s_df, environment = environment()) + geom_point(aes(x =
protein_coding, y = lincRNA), colour = "firebrick4",
size = 2) + ggtitle(paste(names(pc_list)[pp], "vs",
pc_list[[pp]][1],"| CORRELATION:", round(s_cor, 2))) +
theme(title = element_text(size = 12, face = "bold"))
assign(paste("splot", pp, sep = '_'), splot )
}
grid.arrange( splot_1, splot_6,
splot_2, splot_7,
splot_3, splot_8,
splot_4, splot_9,
splot_5, splot_10,
ncol =2, nrow = 5, top = textGrob(
"PLOT 1: EXPRESSION & CORRELATION (DETAILS)",
gp = gpar(fontsize = 30, font = 2, face = "bold")))
# Plot for enriched pathways
cplot <- plot(pc_path, showCategory = 10)
grid.arrange(cplot, top = textGrob(
"PLOT 2: ENRICHED PATHWAYS",
gp = gpar(fontsize = 30, font = 2, face = "bold")))
final_list <- list(REACTOMEPA = pc_path,
INTERACTION_PARTNERS = q10_list)
} else {
sf_targets <- is.element(rownames(object), targetGenes)
if(!any(sf_targets)) stop(errorm07)
if(length(targetGenes) > 100) warning(warnim02)
if(length(targetGenes) > 1){
nc_matrix <- object[targetGenes, ]
} else {
nc_matrix <- rbind(object[targetGenes, , drop = FALSE],
object[targetGenes, , drop = FALSE])
rownames(nc_matrix) <- c(targetGenes, paste(targetGenes,
"1", sep = '_'))
}
# select for median expression and variance
pc_median <- median(pc_matrix, na.rm = TRUE)
pc_rw_median <- rowMedians(pc_matrix, na.rm = TRUE)
pc_matrix <- pc_matrix[(pc_rw_median > 0.25*pc_median), ]
if(nrow(pc_matrix) < 5000) stop(errorm00)
pc_var <- apply(pc_matrix, 1, var)
pc_matrix <- pc_matrix[order(pc_var,
decreasing = TRUE)[1:5000], ]
c_matrix <- rbind(pc_matrix[ ,samples],
nc_matrix[ ,samples])
l_matrix <- linc(object = c_matrix, codingGenes =
is.element(rownames(c_matrix),
rownames(pc_matrix)), verbose = FALSE)
if(length(targetGenes) > 1){
l_cluster <- clusterlinc(l_matrix,
pvalCutOff = 0.0005,
verbose = FALSE)
final_list <- getbio(l_cluster, enrichFun =
'enrichGO')
plotlinc(final_list)
} else {
final_list <- singlelinc(l_matrix, query = targetGenes,
underth = TRUE,
threshold = 0.0005,
ont = 'BP',
verbose = FALSE)
plotlinc(final_list)
}
}
print <- function(x = final_list){ return(x) }
})
## GENERIC FUNCTION "linc"
## create a LINCmatrix instance
setGeneric(name = "linc",
def = function(object,
codingGenes = NULL,
corMethod = "spearman",
batchGroups = NULL,
nsv = 1,
rmPC = NULL,
outlier = NULL,
userFun = NULL,
verbose = TRUE){
standardGeneric("linc")
})
setMethod(f = "linc",
signature = c("matrix"),
definition = function(object,
codingGenes = NULL,
corMethod = "spearman",
batchGroups = NULL,
nsv = 1,
rmPC = NULL,
outlier = NULL,
userFun = NULL,
verbose = TRUE){
## method for a matrix as input
## PREDEFINITIONs
# errors, warnings and messages
errorm00 <- paste("Assignment of protein-coding genes",
"in 'codingGenes' is required")
errorm01 <- paste("'codingGenes' must have the same",
"length as 'nrow(object)'")
errorm02 <- paste("'corMethod' needs to be 'pearson',",
"'kendall' or 'spearman'")
errorm03 <- "A numeric matrix is required as input"
errorm04 <- "Size or content of matrix insufficient"
errorm05 <- "Gene names as 'rownames(object)' required"
errorm06 <- paste("'batchGroups' need to be of the",
"same length as the columns")
errorm07 <- paste("Not allowed to use the same name",
"for every entry in 'batchGroups'")
errorm08 <- paste("unable to use 'rmPC' as an index",
"vector for the removal of pcs")
errorm09 <- paste("'outlier' needs to be 'zscore',",
"or 'esd'")
errorm10 <- paste("'codingGenes' needs to be a gene",
"annotation or a logical vector")
errorm11 <- paste("Error in argument 'codingGenes',",
"not enough protein-coding genes")
errorm12 <- paste("unable to compute correlation matrix:",
"1. check input for infinite values / NAs",
"2. check user-defined correlation function", sep = '\n')
errorm13 <- "computation of cor. matrix lnc vs lnc failed"
warnim01 <- "Input 'object' contains infinite values"
warnim02 <- "'linc' was unable to identify a gene system"
warnim03 <- paste(
"single outliers and high sample variance were detected",
"by ESD and ANOVA; statistical correction is recommended",
sep = '\n')
warnim04 <- paste("Subsequent use of sva and removal of",
"principle components is not intended")
warnim05 <- paste("correlation matrix contains infinite",
"or missing values; converted to 0")
inform01 <- quote(paste("linc: removed ", infrm,
"rows contaning only infinite values"))
inform02 <- quote(paste("removed", length(obvar[obvar
== 0]), "zero variance genes from input"))
inform22 <- "removed genes with duplicated names"
inform03 <- "linc: gene system(s) assumed:"
inform04 <- "linc: correction by sva was called"
inform05 <- "linc: remove principle components"
inform06 <- quote(paste("linc: The outlier method '",
ol_promise, "' was called"))
inform07 <- quote(paste("linc: Correlation function",
" with '", cor_use, "' called", sep = ''))
inform08 <- paste("linc: Computation of correlation",
"matrix started")
# environments and object
store <- new.env(parent = emptyenv())
out_history <- new.env(parent = emptyenv())
## SECTION0: INPUT CONTROL
# use of the 'codingGenes' argument
if(is.null(codingGenes)) stop(errorm00)
if(length(codingGenes) != nrow(object)) stop(errorm01)
pc_promise <- codingGenes
# interpretation of'verbose'
if(class(verbose) != "logical" ){
verbose <- TRUE
} else {
if(!any(verbose)) verbose <- FALSE
if(any(verbose)) verbose <- TRUE
}
if(!verbose) message <- function(x) x
# interpretation of the 'corMethod' argument
cM_promise <- try(match.arg(corMethod,
c("pearson", "kendall", "spearman")),
silent = TRUE)
if(class(cM_promise) == "try-error") stop(errorm02)
if(!is.null(userFun)) cor_Method <- "user-defined"
# evaluation of 'object' and its gene ids
# numeric matrix
if(!is.numeric(object)) stop(errorm03)
# only infinite values
if(!all(is.finite(object))){
warning(warnim01)
mobject <- object[apply(object, 1, function(x){
any(is.finite(x)) }), ]
pcobject <- object; rownames(pcobject) <- pc_promise
pcobject <- pcobject[apply(pcobject, 1, function(x){
any(is.finite(x)) }), ]
infrm <- nrow(object) - nrow(mobject)
if(infrm != 0){ message(inform01); object <- mobject
pc_promise <- rownames(pcobject)
}
}
# 0 variance rows
obvar <- apply(object, 1, var)
if(is.element(0, obvar)){
object <- object[obvar != 0, ]
pc_promise <- pc_promise[obvar != 0]
message(eval(inform02))
}
# rows duplicated
if(any(duplicated(rownames(object)))){
pc_promise <- pc_promise[!duplicated(rownames(object))]
object <- object[(!duplicated(rownames(object))), ]
message(inform22)
}
out_object <- object
object <- object[!is.na(rownames(object)),]
pc_promise <- pc_promise[!is.na(pc_promise)]
# is the matrix to small
if(!all(dim(object) > 5)) stop(errorm04)
colnum <- ncol(object)
# suggest gene systems
gN_promise <- rownames(object)
if(is.null(gN_promise)) stop(errorm05)
gD_promise <- try(identifyGenes(gN_promise),
silent = TRUE)
if(class(gD_promise) == "try-error" |
length(gD_promise) == 0){
warning(warnim02)
out_history$gene_system <- NA
}else{
out_history$gene_system <- gD_promise
message(inform03); sapply(gD_promise,
function(x) message(x))
}
# if 'batchGroups' should be used
if(!is.null(batchGroups)){
if(length(batchGroups) != colnum) stop(errorm06)
if(1 == length(unique(batchGroups))) stop(errorm07)
store$SVA <- TRUE; message(inform04)
if(length(nsv) == 1 && is.numeric(nsv) &&
is.vector(nsv)){
bn_promise <- nsv
} else {
bn_promise <- 1
}
}
# if 'rmPC' should be used
if(!is.null(rmPC)){
col_sel <- try(seq_len(colnum)[-rmPC], silent = TRUE)
if(class(col_sel) == "try-error" ) stop(errorm08)
if(length(col_sel) == 0 |
anyNA(col_sel)) stop(errorm08)
rm_promise <- seq_len(colnum)[-rmPC]
store$PCA <- TRUE; message(inform05)
}
# interpretation of the 'outlier' argument
if(!is.null(outlier)){
ol_promise <- try(match.arg(outlier,
c("zscore", "esd")), silent = TRUE)
if(class(ol_promise) == "try-error") stop(errorm09)
store$outlier <- TRUE; message(eval(inform06))
}
## SECTION1: STATISTICS
# test samples using ANOVA
av_promise <- suppressMessages(reshape2::melt(data.frame(object)))
colnames(av_promise) <- c("group", "y")
anova_test <- anova( lm(y~group, data = av_promise ))
f_sample <- anova_test$`F value`[1]; f_df <- anova_test$Df
f_critical <- df(0.95, df1 = f_df[1] , df2 = f_df[2])
anova_passed <- (f_sample <= f_critical)
out_history$F_critical <- round(f_critical, 2)
out_history$F_sample <- round(f_sample, 2)
out_history$F_anova <- anova_passed
# test genes using esd
out_genes <- apply(object, 1, detectesd,
alpha = 0.05, rmax = 4)
outlier_det <- (100 * sum(out_genes,
na.rm = TRUE))/nrow(object)
out_history$outlier_detected <- round(outlier_det, 1)
# does the input fail for both tests
stats_fail <- all((outlier_det > 10) && !anova_passed)
# give a warning for no correction and failed tests
if(!exists("SVA", store) &
!exists("PCA", store) &
!exists("outlier", store)){
out_sobject <- object; sobject <- out_sobject
stats_applied <- "none"
if(stats_fail) warning(warnim03)
} else {
stats_applied <- paste(ls(store), collapse = ",")
}
if(exists("SVA", store) &
exists("PCA", store)) warning(warnim04)
if(exists("outlier", store)){
if(ol_promise == "esd"){
sobject <- t(apply(object, 1, correctESD,
alpha = 0.05, rmax = 4))
}
if(ol_promise == "zscore"){
sobject <- t(apply(object, 1, modZscore))
}
out_sobject <- sobject
} else {
sobject <- object; out_sobject <- object
}
if(exists("PCA", store)){
pca_object <- prcomp(sobject, center = FALSE,
scale. = FALSE)
out_sobject <- pca_object$x[,rm_promise] %*% t(
pca_object$rotation[,rm_promise])
sobject <- out_sobject
}
if(exists("SVA", store)){
#suppressPackageStartupMessages(require(sva))
exbatch <- as.factor(batchGroups)
mod1 <- model.matrix(~exbatch)
mod0 <- cbind(mod1[,1])
svse <- svaseq(sobject, mod1, mod0,
n.sv = bn_promise)$sv
out_sobject <- svaSolv(sobject, mod1, svse)
sobject <- out_sobject
# detach(pos = 2L, force = TRUE)
# detach(pos = 2L, force = TRUE)
# detach(pos = 2L, force = TRUE)
}
## pairwise for correlation
if(anyNA(sobject)){
cor_use <- "pairwise"
} else {
cor_use <- "everything"
}
## SECTION2: MATRIX SEPARATION AND CORRELATION
# index for coding genes
if(is.vector(pc_promise) && is.logical(pc_promise)){
store$pc_index <- pc_promise
out_assignment <- gN_promise[store$pc_index]
}
if(is.vector(pc_promise) && is.character(pc_promise)){
store$pc_index <- is.element(pc_promise,
c('protein_coding',
'coding',
'protein',
'protein-coding',
'protein coding'))
out_assignment <- gN_promise[store$pc_index]
}
if(!exists("pc_index", store)) stop(errorm10)
if(length(which(store$pc_index)) < 5) stop(errorm11)
pc_matrix <- sobject[store$pc_index, ]
nc_matrix <- sobject[!store$pc_index, ]
# to calculate the correlation
message(eval(inform07)); message(inform08)
out_cormatrix <- try(callCor(corMethod, userFun, cor_use)(
pc_matrix, nc_matrix), silent = TRUE)
if(class(out_cormatrix) == "try-error") stop(errorm12)
rownames(out_cormatrix) <- rownames(pc_matrix)
colnames(out_cormatrix) <- rownames(nc_matrix)
if(!all(is.finite(out_cormatrix))){
warning(warnim05)
out_cormatrix[is.infinite(out_cormatrix) |
is.na(out_cormatrix) ] <- 0
}
out_ltlmatrix <- try(callCor(corMethod, userFun, cor_use)(
nc_matrix, nc_matrix), silent = TRUE)
if(class(out_ltlmatrix) == "try-error") stop(errorm13)
rownames(out_ltlmatrix) <- rownames(nc_matrix)
colnames(out_ltlmatrix) <- rownames(nc_matrix)
if(!all(is.finite(out_ltlmatrix))){
out_ltlmatrix[is.infinite(out_ltlmatrix) |
is.na(out_ltlmatrix) ] <- 0
}
#out_history$outlier_detected <-
#c("corMethod", "cor_use", "cor_max", "F_critical", "F_sample", "", "outlier_detected")
## SECTION2: PREPARATION OF OUTPUT
out_history$cor_max <- round(max(out_cormatrix,
na.rm = TRUE), 2)
out_history$corMethod <- corMethod
out_history$cor_use <- cor_use
out_history$pc_matrix <- pc_matrix
out_history$nc_matrix <- nc_matrix
out_history$stats_use <- stats_applied
#out_linc <- LINCmatrix()
out_linc <- new("LINCmatrix")
results(out_linc) <- list(statscorr = out_sobject)
assignment(out_linc) <- out_assignment
correlation(out_linc) <- list(cormatrix = out_cormatrix,
lnctolnc = out_ltlmatrix)
express(out_linc) <- out_object
history(out_linc) <- out_history
out_linCenvir <- NULL
out_linCenvir <- new.env(parent = emptyenv())
out_linCenvir$linc <- out_linc
#lockEnvironment(out_linCenvir, bindings = TRUE)
linCenvir(out_linc) <- out_linCenvir
return(out_linc)
}) # method end
setMethod(f = "linc",
signature = c("data.frame"),
definition = function(object,
codingGenes = NULL,
corMethod = "spearman",
batchGroups = NULL,
nsv = 1,
rmPC = NULL,
outlier = NULL,
userFun = NULL,
verbose = TRUE){
## method for a data.frame as input
## PREDEFINITIONs
object <- as.matrix(object)
linc(object,
codingGenes,
corMethod,
batchGroups,
rmPC,
outlier,
userFun,
verbose)
}) # method end
setMethod(f = "linc",
signature = c("ExpressionSet"),
definition = function(object,
codingGenes = NULL,
corMethod = "spearman",
batchGroups = NULL,
nsv = 1,
rmPC = NULL,
outlier = NULL,
userFun = NULL,
verbose = TRUE){
## method for an ExpressionSet as input
## PREDEFINITIONs
#require(Biobase)
object <- Biobase::exprs(object)
linc(object,
codingGenes,
corMethod,
batchGroups,
rmPC,
outlier,
userFun,
verbose)
}) # method end
setMethod(f = "linc",
signature = c("LINCmatrix", "missing"),
definition = function(object,
codingGenes = NULL,
corMethod = "spearman",
batchGroups = NULL,
nsv = 1,
rmPC = NULL,
outlier = NULL,
userFun = NULL,
verbose = TRUE){
## method for a LINCmatrix as input
## PREDEFINITIONs
linc(object = linCenvir(object)$expression,
codingGenes = linCenvir(object)$assignment,
corMethod,
batchGroups,
rmPC,
outlier,
userFun,
verbose)
}) # method end
## getter methods
setMethod("results", "LINCcluster", function(x) x@results)
setMethod("assignment", "LINCcluster", function(x) x@assignment)
setMethod("correlation", "LINCcluster", function(x) x@correlation)
setMethod("express", "LINCcluster", function(x) x@expression)
setMethod("history", "LINCcluster", function(x) x@history)
setMethod("linCenvir", "LINCcluster", function(x) x@linCenvir)
## setter methods
setReplaceMethod("results", "LINCcluster",
function(x, value) {x@results <- value; x})
setReplaceMethod("assignment", "LINCcluster",
function(x, value) {x@assignment <- value; x})
setReplaceMethod("correlation", "LINCcluster",
function(x, value) {x@correlation <- value; x})
setReplaceMethod("express", "LINCcluster",
function(x, value) {x@expression <- value; x})
setReplaceMethod("history", "LINCcluster",
function(x, value) {x@history <- value; x})
setReplaceMethod("linCenvir", "LINCcluster",
function(x, value) {x@linCenvir <- value; x})
## create a 'LINCcluster' instance
setGeneric(name = "clusterlinc",
def = function(linc,
distMethod = "dicedist",
clustMethod = "average",
pvalCutOff = 0.05,
padjust = "none",
coExprCut = NULL,
cddCutOff = 0.05,
verbose = TRUE){
standardGeneric("clusterlinc")
})
setMethod(f = "clusterlinc",
signature = c("LINCmatrix"),
def = function(linc,
distMethod = "dicedist",
clustMethod = "average",
pvalCutOff = 0.05,
padjust = "none",
coExprCut = NULL,
cddCutOff = 0.05,
verbose = TRUE){
## method for a LINCmatrix
## PREDEFINITIONs
validObject(linc)
out_history <- history(linc)
out_history$type <- "cluster"
# errors, warnings and messages
errorm00 <- "LINCmatrix is empty, input corrupted"
errorm01 <- paste("'distMethod' needs to be ",
"'correlation', pvalue' or 'dicedist'")
errorm02 <- paste("'ward.D', 'ward.D2', 'single',",
"'complete', 'average', 'mcquitty',",
"'median', 'centroid'")
errorm03 <- "incorrect 'pvalCutOff' supplied"
errorm04 <- "incorrect 'coExprCut' supplied"
errorm05 <- "incorrect 'cddCutOff' supplied"
errorm06 <- paste("clustering failed, use 'none'",
"in 'padjust' or 'dicedist",
"in 'distMethod'")
warnim00 <- "call of hidden arguments"
warnim01 <- paste("cor. test matrix contains infinite",
"or missing values; converted to 0")
warnim02 <- paste("'corMethod' changed to 'spearman';",
"use 'singlelinc' to apply a user-defined",
"correlation test function")
warnim03 <- paste("unable to convert 'hclust' object",
"output will be corrupted")
inform01 <- paste("clusterlinc: computation for the",
"correlation test started")
inform02 <- quote(paste("clusterlinc: distance matrix",
"called with the method", dM_promise))
inform03 <- paste("clusterlinc: co-expressed",
"genes selected based on 'coExprCut'")
inform04 <- paste("clusterlinc: co-expressed",
"genes selected based on 'pvalCutOff'")
## SECTION0: INPUT CONTROL
if(!exists("linc", linCenvir(linc))) stop(errorm00)
# interpretation of'verbose'
if(class(verbose) != "logical" ){
verbose <- TRUE
} else {
if(!any(verbose)) verbose <- FALSE
if(any(verbose)) verbose <- TRUE
}
if(!verbose) message <- function(x) x
# interpretation of the 'distMethod' argument
dM_promise <- try(match.arg(distMethod,
c("correlation", "pvalue", "dicedist")),
silent = TRUE)
if(class(dM_promise) == "try-error") stop(errorm01)
out_history$distMethod <- dM_promise
# interpretation of the 'clustMethod' argument
cM_promise <- try(match.arg(clustMethod, c("ward.D",
"ward.D2", "single", "complete", "average",
"mcquitty", "median", "centroid")),
silent = TRUE)
if(class(cM_promise) == "try-error") stop(errorm02)
out_history$clustMethod <- cM_promise
# interpretation of the 'pvalCutOff' argument
co_promise <- pvalCutOff
if(length(co_promise) != 1) stop(errorm03)
if(!is.numeric(co_promise)) stop(errorm03)
if(co_promise >= 1 | co_promise < 0) stop(errorm03)
out_history$pvalCutOff <- co_promise
#interpretation of 'coExprCut'
if(!is.null(coExprCut)){
ct_promise <- try(as.integer(coExprCut), silent = TRUE)
if(class(ct_promise) == "try-error") stop(errorm04)
if(length(ct_promise) != 1) stop(errorm04)
if(!is.element(ct_promise, seq_len(nrow(
correlation(linCenvir(linc)$linc)[[1]])))) stop(errorm04)
out_history$pvalCutOff <- NULL
}
# interpretation of 'cddCutOff'
if(length(cddCutOff) != 1 | !is.numeric(cddCutOff) |
!is.vector(cddCutOff)) stop(errorm05)
if(cddCutOff > 1 | cddCutOff < 0) stop(errorm05)
## SECTION1: DISTANCE MATRIX AND CLUSTERING
# do the correlation test
message(inform01)
pc_matrix <- history(linc)$pc_matrix
nc_matrix <- history(linc)$nc_matrix
cor_use <- history(linc)$cor_use
corMethod <- history(linc)$corMethod
if(corMethod == "user"){
corMethod <- "spearman"; warning(warnim02)
}
cortest_matrix <- suppressWarnings(doCorTest(
corMethod, cor_use)(
pc_matrix, nc_matrix))
m_adjust <- p.adjust(cortest_matrix, method =
padjust)
cortest_matrix <- matrix(m_adjust, ncol = ncol(
cortest_matrix))
colnames(cortest_matrix) <- rownames(nc_matrix)
rownames(cortest_matrix) <- rownames(pc_matrix)
if(!all(is.finite(cortest_matrix))){
warning(warnim01)
cortest_matrix[is.infinite(cortest_matrix) |
is.na(cortest_matrix) ] <- 1
}
cortest_matrix[cortest_matrix < 1e-26] <- 1e-26
out_history$pval_min <- min(cortest_matrix, na.rm = TRUE)
if(min(cortest_matrix, na.rm = TRUE) < 1e-26){
out_history$pval_min <- 1e-26
}
# compute a distance matrix
message(eval(inform02))
if(dM_promise == "correlation"){
cormat <- as.dist(correlation(linCenvir(linc)$linc)[[2]])
distmat <- (1 - cormat)
}
if(dM_promise == "dicedist"){
msize <- nrow(nc_matrix)
for_cdd_test <- -log10(cortest_matrix)
blanc_matrix <- matrix(rep(-1, msize^2), ncol = msize)
cdd_result <- docdd(for_cdd_test, blanc_matrix,
(-log10(cddCutOff)))
distmat <- as.dist(cdd_result)
}
if(dM_promise == "pvalue"){
cortest_lnctolnc <- suppressWarnings(doCorTest(
corMethod = corMethod, cor_use)(
nc_matrix, nc_matrix))
l_adjust <- p.adjust(cortest_lnctolnc, method =
padjust)
cortest_lnctolnc <- matrix(l_adjust, ncol = ncol(
cortest_lnctolnc ))
cormat <- as.dist(cortest_lnctolnc)
distmat <- (1 - cormat)
}
# perform clustering
out_clust <- try(hclust(distmat, method = cM_promise), silent = TRUE)
if(class(out_clust) == "try-error") stop(errorm06)
#suppressPackageStartupMessages(require("ape"))
if(is.function(as.phylo)){
linc_phylo <- as.phylo(out_clust)
linc_phylo$tip.label <- rownames(nc_matrix)
out_clust <- linc_phylo
} else {
warning(warnim03)
}
# select neighbour genes
if(!is.null(coExprCut)){
neighbours <- deriveCluster2(cortest_matrix,
n = ct_promise)
message(inform03)
} else {
neighbours <- deriveCluster(cortest_matrix,
alpha = co_promise)
message(inform04)
}
out_clust$neighbours <- neighbours
## SECTION4: PREPARATION OF OUTPUT
out_linc <- new("LINCcluster")
results(out_linc) <- list(cluster = out_clust)
assignment(out_linc) <- assignment(linc)
correlation(out_linc) <- list(correlation(linc),
cortest = cortest_matrix)
express(out_linc) <- express(linc)
history(out_linc) <- out_history
out_linCenvir <- new.env(parent = emptyenv())
out_linCenvir$linc <- linc
out_linCenvir$cluster <- out_linc
#lockEnvironment(out_linCenvir, bindings = TRUE)
linCenvir(out_linc) <- out_linCenvir
return(out_linc)
}) # method end
setMethod(f = "clusterlinc",
signature = c("LINCcluster"),
def = function(linc,
distMethod = "dicedist",
clustMethod = "average",
pvalCutOff = 0.05,
coExprCut = NULL,
cddCutOff = 0.05,
verbose = TRUE){
## method for a LINCcluster
## PREDEFINITIONs
clusterlinc(linc = linCenvir(linc)$linc,
distMethod = distMethod,
clustMethod = clustMethod,
pvalCutOff = pvalCutOff,
coExprCut = coExprCut,
cddCutOff = cddCutOff,
verbose = verbose)
}) # method end
## getter methods
setMethod("results", "LINCbio", function(x) x@results)
setMethod("assignment", "LINCbio", function(x) x@assignment)
setMethod("correlation", "LINCbio", function(x) x@correlation)
setMethod("express", "LINCbio", function(x) x@expression)
setMethod("history", "LINCbio", function(x) x@history)
setMethod("linCenvir", "LINCbio", function(x) x@linCenvir)
## setter methods
setReplaceMethod("results", "LINCbio",
function(x, value) {x@results <- value; x})
setReplaceMethod("assignment", "LINCbio",
function(x, value) {x@assignment <- value; x})
setReplaceMethod("correlation", "LINCbio",
function(x, value) {x@correlation <- value; x})
setReplaceMethod("express", "LINCbio",
function(x, value) {x@expression <- value; x})
setReplaceMethod("history", "LINCbio",
function(x, value) {x@history <- value; x})
setReplaceMethod("linCenvir", "LINCbio",
function(x, value) {x@linCenvir <- value; x})
## create a LINCbio instance
setGeneric(name = "getbio",
def = function(cluster,
enrichFun = 'enrichGO',
ont = "BP",
...){
standardGeneric("getbio")
})
setMethod(f = "getbio",
signature = c("LINCcluster"),
def = function(cluster,
enrichFun = 'enrichGO',
ont = "BP",
...){
## method for a LINCcluster
## PREDEFINITIONs
# environments and object
validObject(cluster)
store <- new.env(parent = emptyenv())
out_history <- history(cluster)
# errors, warnings and messages
errorm01 <- "The supplied 'enrichFun' is not supported!"
warnim01 <- paste("This enrichment function is not",
"directly supported. Please,",
"consider using on of c(enrichGO,",
"enrichPathway, enrichDO)")
inform01 <- quote(paste("getbio: The function", enrichFun,
"will be called."))
inform02 <- quote(paste("getbio: Gene ids will be translated from",
kt_promise, "to entrez identifiers"))
## SECTION0: INPUT CONTROL
# interpretation of enrichFun
eF_promise <- try(match.arg(enrichFun,
c("enrichGO",
"enrichPathway",
"enrichDO")),
silent = TRUE)
if(class(eF_promise) == "try-error") warning(warnim01)
message(eval(inform01))
if(is.element("OrgDb", ls(history(cluster)))){
OrgDb <- get("OrgDb", envir = history(cluster))
} else {
OrgDb <- 'org.Hs.eg.db'
}
ot_promise <- match.arg(ont, c("MF", "BP", "CC"))
## SECTION1: HANDLE KEYTYPES
ll_promise <- results(cluster)[[1]]$neighbours
kt_promise <- identifyGenes(unlist(ll_promise))
if(kt_promise == "ENTREZID"){
cc_list <- ll_promise
} else {
message(eval(inform02))
cc_list <- lapply(ll_promise, function(x){
x <- bitr(x, fromType = kt_promise,
OrgDb = OrgDb, toType = "ENTREZID")
return(x$ENTREZID)
})
}
## SECTION2: CALL TO GENE ANNOTATION
if(is.element("OrgDb", names(formals(eF_promise)))){
bio <- compareCluster(cc_list, fun = eF_promise,
OrgDb = OrgDb, ont = ot_promise, ...)
}
if(is.element("organism", names(formals(eF_promise)))){
if(OrgDb == 'org.Hs.eg.db'){
OrgDb <- "human"
}
bio <- compareCluster(cc_list, fun = eF_promise, organism = OrgDb, ...)
}
if(!any(is.element(c("OrgDb", "organism"), names(formals(eF_promise))))){
bio <- compareCluster(cc_list, fun = eF_promise, ...)
}
if(!exists("bio")) stop(errorm01)
## SECTION3: PREPARATION OF OUTPUT
#out_linc <- LINCbio()
out_linc <- new("LINCbio")
results(out_linc) <- list(bio)
assignment(out_linc) <- assignment(cluster)
correlation(out_linc) <- correlation(cluster)
express(out_linc) <- express(cluster)
history(out_linc) <- out_history
out_linCenvir <- NULL
out_linCenvir <- linCenvir(cluster)
out_linCenvir$bio <- out_linc
#lockEnvironment(out_linCenvir, bindings = TRUE)
linCenvir(out_linc) <- out_linCenvir
return(out_linc)
}) # method end
## PLOTTING METHOD
setGeneric(name = "plotlinc",
def = function(
input,
showCor,
returnDat = FALSE){
standardGeneric("plotlinc")
})
setMethod(f = "plotlinc",
signature = c("LINCmatrix",
"missing"),
def = function(
input,
showCor,
returnDat = FALSE){
## method for a LINCbio object
## PREDEFINITIONs
# on.exit(options(stringsAsFactors = TRUE))
validObject(input)
em_promise <- results(linCenvir(input)$linc)[[1]]
ac_promise <- correlation(linCenvir(input)$linc)[[1]]
hs_promise <- history(linCenvir(input)$linc)
ep_promise <- express(linCenvir(input)$linc)
# suppressPackageStartupMessages(require(reshape))
# create a box plot
df_boxplot <- suppressMessages(reshape2::melt(em_promise))
names(df_boxplot) <- c(NA, "SAMPLES", "VALUE")
gg_box <- ggplot(df_boxplot,
aes(SAMPLES, VALUE), environment = environment()) + geom_boxplot(outlier.color =
"firebrick", colour = "dodgerblue3") + theme(
panel.border = element_rect(color = "grey", fill = NA),
panel.background = element_blank()) +
ggtitle("BOXPLOT OF EXPRESSION VALUES") +
theme(plot.title = element_text(face = "bold",
color = "steelblue4"))
# create a frequency plot
gg_freq <- ggplot(df_boxplot, aes(VALUE), environment = environment()) +
geom_histogram(bins = 30, fill = "gray95" ) +
geom_freqpoly(colour = "firebrick", linetype = "dashed", size = 0.7) +
theme(
panel.border = element_rect(color = "grey", fill = NA),
panel.background = element_blank()) +
ggtitle("FREQUENCY OF EXPRESSION VALUES") +
theme(plot.title = element_text(face = "bold",
color = "gray47"))
# create a histogram of correlation values
df_cor <- data.frame(CORRELATION = as.vector(ac_promise))
gg_cor <- ggplot(df_cor, aes(CORRELATION), environment = environment()) + geom_histogram(binwidth = 0.02, #bins = 100,
size = 1, fill = c(rep("grey", 95), rep("dodgerblue3", 6))) + theme(
panel.border = element_rect(color = "grey", fill = NA),
panel.background = element_blank()) +
xlim(-1,1) + #scale_x_continuous(breaks = 0.01 ) +
geom_vline(xintercept = 0, colour = "firebrick", linetype = 'dashed') +
geom_vline(xintercept = 0.9, colour = "dodgerblue3") +
ggtitle("HISTOGRAM OF CORRELATION VALUES") +
theme(plot.title = element_text(face = "bold",
color = "violetred4"))
# plot PCA analysis
pca_object <- prcomp(ep_promise, center = FALSE,
scale. = FALSE)
df_pca <- data.frame(PC = seq_len(length(pca_object$sdev)),
VARIANCE = (pca_object$sdev/sum(pca_object$sdev) * 100))
gg_pca <- ggplot(df_pca, aes(PC, VARIANCE), environment = environment()) + geom_point(
size = 4, colour = "dodgerblue3") + theme(
panel.border = element_rect(color = "grey", fill = NA),
panel.background = element_blank()) +
ylim(0,100) + scale_x_continuous(breaks=seq(1,
length(pca_object$sdev),1)) +
ggtitle("PRINCIPLE COMPONENT ANALYSIS") +
theme(plot.title = element_text(face = "bold",
color = "grey25"))
# get and plot the statistical parameters
get_this <- c("corMethod", "cor_use", "cor_max",
"F_critical", "F_sample", "F_anova",
"outlier_detected", "stats_use")
par_description <- c("Method for correlation: ",
"Usage of observations: ",
"Maximum cor. observed: ",
"Critical F-value: ",
"F-value of sample: ",
"ANOVA passed: ",
"Genes with outliers [%]: ",
"Correction applied: ")
stat_par <- mget(get_this, envir = hs_promise)
par_described <- mapply(paste, par_description, stat_par)
df_stat <- data.frame(value = c(" ", par_described, " "),
y = -c(1:10), x = rep(0,10), group =
c(rep("cor", 4), rep("samples", 4), rep("expr", 2)))
pty_pl <- (ggplot(df_stat, aes(x,y), environment = environment()) +
geom_point(color = "white") + xlim(0, 1) +
theme(axis.line = element_line(colour =
"white"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.y = element_text(color ="white"),
axis.title.x = element_text(color ="white"),
axis.text.x = element_text(color = "white"),
axis.text.y = element_text(color = "white")))
linc_stats_plot <- pty_pl + geom_text(aes(label =
df_stat$value, colour = df_stat$group) ,hjust = 0, vjust = 0,
size = 5, fontface = "bold") + ggtitle("STATISTICAL PARAMETERS") +
scale_colour_manual(values = c(
"violetred4", "gray47", "steelblue4"), guide = "none") +
theme(plot.title = element_text(face = "bold"))
#suppressPackageStartupMessages(require(grid))
# suppressPackageStartupMessages(require(png))
stats_img <- readPNG(system.file("extdata", "statslinc_img.png",
package ="LINC"))
#stats_img <- readPNG("statslinc_img.png")
stats_plot <- rasterGrob(stats_img, interpolate = TRUE)
# suppressPackageStartupMessages(require(gridExtra))
customid <- ""
if(exists("customID", envir = history(input))){
customid <- history(input)$customID
}
left_side <- suppressMessages(suppressWarnings(arrangeGrob(
gg_cor, gg_box , gg_pca, ncol = 1)))
right_side <- arrangeGrob(stats_plot, linc_stats_plot, ncol = 1, bottom = customid)
grid.arrange(left_side, right_side, nrow = 1 )
})
## plot a cluster without bio_terms
setMethod(f = "plotlinc",
signature = c("LINCcluster",
"missing"),
def = function(
input,
showCor,
returnDat = FALSE){
## method for a LINCcluster object
## PREDEFINITIONs
validObject(input)
hs_promise <- history(linCenvir(input)$cluster)
cluster <- results(linCenvir(input)$cluster)[[1]]
## SECTION0: INPUT CONTROL
# plot the cluster
tree <- ggtree(cluster, colour = "firebrick") +
coord_flip() + scale_x_reverse()
tree <- tree + geom_tiplab(size = 4, angle = -90,
colour = "deeppink4",
hjust = 0, vjust = 0)
plot_matrix <- as.matrix(unlist(lapply(
cluster$neighbours, length)))
plot_df <- as.data.frame(plot_matrix)
clust_heat <- gheatmap(tree, plot_df, offset = 0.1,
width = 0.2, colnames = FALSE,
colnames_position = "top",
low = "white", high = "white") +
guides(fill = FALSE) +
ggtitle("DENDROGRAM OF QUERIES") +
theme(plot.title = element_text(
face = "bold", color = "firebrick4"))
# plot p-value distributions of correlations
cortested <- correlation(input)[2]$cortest
log10pval <- -log10(as.vector(cortested))
df_cor <- data.frame(PVALUE = log10pval)
gg_cor <- ggplot(df_cor, aes(PVALUE), environment = environment()) +
geom_histogram(binwidth = 0.02 ) + #bins = 100,
#size = 1, fill = c(rep("grey", 95), rep("dodgerblue3", 6)))
theme(
panel.border = element_rect(color = "grey", fill = NA),
panel.background = element_blank()) +
xlim(-0.2, 16) + #scale_x_continuous(breaks = 0.01 ) +
geom_vline(xintercept = 0, colour = "firebrick", linetype = 'dashed') +
geom_vline(xintercept = 1.3, colour = "dodgerblue3") +
ggtitle("P-VALUES FROM CORRELATION TEST (units of -log10(p-value))") +
theme(plot.title = element_text(face = "bold",
color = "violetred4"))
# get and plot the statistical parameters
get_this <- c("distMethod", "clustMethod",
"corMethod", "cor_use", "pvalCutOff",
"pval_min" , "stats_use", "gene_system")
par_description <- c("Distance measure: ",
"Clustering algorithm: ",
"Method for correlation: ",
"Usage of observations: ",
"p-value cut-off: ",
"Best p-value observed: ",
"Statistical correction: ",
"Gene system identified: ")
stat_par <- mget(get_this, envir = hs_promise)
par_described <- mapply(paste, par_description, stat_par)
df_stat <- data.frame(value = c(" ", par_described, " "),
y = -c(1:10), x = rep(0,10))
pty_pl <- (ggplot(df_stat, aes(x,y), environment = environment()) +
geom_point(color = "white") + xlim(0, 1) +
theme(axis.line = element_line(colour =
"white"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.y = element_text(color ="white"),
axis.title.x = element_text(color ="white"),
axis.text.x = element_text(color = "white"),
axis.text.y = element_text(color = "white")))
clust_para_plot <- pty_pl + geom_text(aes(label =
df_stat$value) ,hjust = 0, vjust = 0,
size = 5, fontface = "bold",
colour = "gray47") +
ggtitle(paste("PARAMETERS OF CLUSTER",
"AND COEXPRESSED GENES")) +
theme(plot.title = element_text(face = "bold"))
stclust_img <- readPNG(system.file("extdata", "stclust_img.png",
package ="LINC"))
stclust_plot <- rasterGrob(stclust_img, interpolate = TRUE)
customid <- ""
if(exists("customID", envir = history(input))){
customid <- history(input)$customID
}
if(returnDat){
return(results(input))
} else {
final_plot <- grid.arrange(gg_cor, stclust_plot,
clust_heat, clust_para_plot,
nrow = 2)
return(invisible(final_plot))
}
}) # end of method
setMethod(f = "plotlinc",
signature = c("LINCsingle",
"missing"),
def = function(
input,
showCor,
returnDat = FALSE){
## method for a LINCsingle object
## PREDEFINITIONs
# on.exit(options(stringsAsFactors = TRUE))
validObject(input)
query <- results(linCenvir(input)$single)$query
bio <- results(linCenvir(input)$single)$bio
corl <- results(linCenvir(input)$single)$cor
pval <- results(linCenvir(input)$single)$pval
pval10 <- -log10(unlist(pval))
if(all(is.na(pval))) pval[1] <- 0 # ggplot rescue
# limit the terms to plot
size <- length(bio[[2]])
priority <- paste("[", 1:9, "]", sep = '')
if(size >= 9){
bio[[2]] <- bio[[2]][1:9]
bio[[1]] <- bio[[1]][1:9]
} else {
bio[[2]][(size + 1):9] <- "NA"
bio[[1]][(size + 1):9] <- 1
}
# make the data.frame for plotting
priority <- paste("[", 1:9, "]", sep = '')
term_assign <- mapply(function(x, y) { paste(x, y) }, x = priority, y = bio[[2]])
annotation_df <- data.frame(priority, -log10(bio[[1]]), term_assign)
names(annotation_df) <- c("TERMS", "pvalue", "ASSIGNMENT")
############################################################
# plot the annotation
annotation_plot <- ggplot(annotation_df, aes(TERMS,
y = pvalue, fill = ASSIGNMENT), environment = environment()) + geom_bar( stat =
"identity", width = 0.8) +
theme( panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank()) +
theme(axis.text.x = element_text(size = 15,
hjust = 0, vjust = 0, colour = "violetred4")) +
scale_fill_manual(values= c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6"),
name="ANNOTATION",
breaks= term_assign,
labels= term_assign) +
theme(legend.text = element_text(size = 15, colour = "violetred4"),
legend.title = element_text(size = 17, colour = "#023858")) +
ggtitle(paste("QUERY:", query, ";", length(corl), "CO-EXPRESSED GENES" )) + theme(plot.title = element_text(size = 17, face = "bold", vjust = -3, hjust = 1)) +
theme(axis.title.x = element_blank(), axis.title.y = element_text(size = 16))
#'-log10(p-value)'
# plot histograms of correlation and p-values
############################################################
# create a histogram of correlation values
df_cor <- data.frame(CORRELATION = unlist(corl))
gg_cor <- ggplot(df_cor, aes(CORRELATION), environment = environment()) +
geom_histogram(binwidth = 0.02, size = 1, fill = "grey") +
theme(panel.border = element_rect(color = "grey",#
fill = NA), panel.background = element_blank()) +
xlim(-1,1) + geom_vline(xintercept = 0, colour =
"firebrick", linetype = 'dashed') + geom_vline(
xintercept = 0.75, colour = "dodgerblue3") +
ggtitle("CO-EXPRESSED GENES: CORRELATION VALUES") +
theme(plot.title = element_text(face = "bold",
color = "ivory4"))
df_pval <- data.frame(pvalue = pval10)
gg_pval <- ggplot(df_pval, aes(pvalue), environment = environment()) +
geom_histogram(binwidth = 1, size = 1, fill = "grey") +
theme(panel.border = element_rect(color = "grey",
fill = NA), panel.background = element_blank()) +
xlim(0,100) + geom_vline(xintercept = -log10(0.05),
colour = "firebrick", linetype = 'dashed') + geom_vline(
xintercept = 16, colour = "dodgerblue3") +
ggtitle("CO-EXPRESSED GENES: P-VALUES (COR.TEST)") +
theme(plot.title = element_text(face = "bold",
color = "lightpink4"))
# suppressPackageStartupMessages(require(grid))
#suppressPackageStartupMessages(require(png))
single_img <- readPNG(system.file("extdata", "singlelinc_img.png",
package ="LINC"))
#single_img <- readPNG("singlelinc_img.png")
single_plot <- rasterGrob(single_img, interpolate = TRUE)
customid <- ""
if(exists("customID", envir = history(input))){
customid <- history(input)$customID
}
# suppressPackageStartupMessages(require(gridExtra))
right_bottom <- suppressMessages(suppressWarnings(arrangeGrob(
gg_cor, gg_pval, ncol = 1)))
right_side <- arrangeGrob(single_plot, right_bottom, ncol = 1, bottom = customid)
grid.arrange( annotation_plot, right_side, nrow = 1 )
})
setMethod(f = "plotlinc",
signature = c("LINCbio",
"missing"),
def = function(
input,
showCor,
returnDat = FALSE){
## method for a LINCbio object
## PREDEFINITIONs
validObject(input)
cluster <- results(linCenvir(input)$cluster)[[1]]
bio_list <- results(linCenvir(input)$bio)[[1]]
ep_promise <- express(linCenvir(input)$cluster)
## SECTION0: INPUT CONTROL
returnDat <- inlogical(returnDat, direct = FALSE)
q_list <- getlinc(input, subject = "queries")
q_express <- ep_promise[q_list, ]
# create a box plot
df_boxplot <- suppressMessages(reshape2::melt(q_express))
df_boxplot$Var1 <- as.factor(df_boxplot$Var1)
names(df_boxplot) <- c("QUERIES", NA, "GENE_EXPRESSION")
gg_box <- ggplot(df_boxplot,
aes(QUERIES, GENE_EXPRESSION),
environment = environment()) +
geom_boxplot(outlier.color = "firebrick",
colour = "dodgerblue3") +
theme(panel.border = element_rect(color = "grey",
fill = NA), panel.background = element_blank()) +
ggtitle("EXPRESSION OF QUERIES") +
theme(plot.title = element_text(face = "bold",
color = "steelblue4"),
axis.text.x = element_text(color = "deeppink4",
angle = -90, hjust = 0,
vjust = 0, size = 12),
axis.title.x = element_blank())
# plot the cluster
tree <- ggtree(cluster, colour = "firebrick") +
coord_flip() + scale_x_reverse()
tree <- tree + geom_tiplab(size = 4, angle = -90,
colour = "deeppink4",
hjust = 0, vjust = 0)
plot_matrix <- as.matrix(unlist(lapply(
cluster$neighbours, length)))
plot_df <- as.data.frame(plot_matrix)
clust_heat <- gheatmap(tree, plot_df, offset = 0.1,
width = 0.2, colnames = FALSE,
colnames_position = "top",
low = "white", high = "white") +
guides(fill = FALSE) +
ggtitle("DENDROGRAM OF QUERIES") +
theme(plot.title = element_text(
face = "bold", color = "firebrick4"))
clust_img <- readPNG(system.file("extdata", "clust_img.png",
package ="LINC"))
clust_plot <- rasterGrob(clust_img, interpolate = TRUE)
# plot the biological terms
term_cluster <- clusterProfiler::dotplot(bio_list,
showCategory = 4) +
theme(axis.text.x = element_text(angle = -90, hjust = 0,
vjust = 0, size = 12,
color = "deeppink4"))
# assembly of the final plot
customid <- ""
if(exists("customID", envir = history(input))){
customid <- history(input)$customID
}
if(returnDat){
return(bio_list)
} else {
top_panel <- arrangeGrob(clust_plot, clust_heat, gg_box,
ncol = 3, bottom = customid)
final_plot <- grid.arrange(top_panel, term_cluster,
nrow = 2)
return(invisible(final_plot))
}
}) # method end
# method for showCor
setMethod(f = "plotlinc",
signature = c("LINCmatrix",
"character"),
def = function(
input,
showCor,
returnDat = FALSE){
validObject(input)
input <- (input + feature(setLevel = "LINCmatrix"))
returnDat <- inlogical(returnDat, direct = FALSE)
if(returnDat) warning("'returnDat' unused in this method")
errorm01 <- paste("'showCor' must be a vector",
"supplying 2 up to 6 gene ids")
errorm02 <- "unable to find all gene ids in input"
# check showCor
spg_promise <- showCor
if(length(spg_promise) < 2 | length(spg_promise) > 6 |
is.numeric(spg_promise))
stop(errorm01)
select_try <- try(is.element(showCor,
rownames(express(input))), silent = TRUE)
if(class(select_try) == "try-error") stop(errorm01)
if(!all(select_try)) stop(errorm02)
# predefine empty vectors
for(n in 2:6){
assign(paste("SUBJECT", n, sep = '_'),
rep(NA, ncol(express(input))))
}
# define input
QUERY <- results(input)[[1]][spg_promise[1], ]
for(n in (c(2:length(spg_promise))) - 1){
assign(paste("SUBJECT", n, sep = '_'),
results(input)[[1]][spg_promise[n + 1], ])
}
expr_df <- data.frame(EXPRESSION = QUERY,
SAMPLES = seq_len(ncol(express(input))), SUBJECT_1,
SUBJECT_2, SUBJECT_3, SUBJECT_4, SUBJECT_5,
NO_SUBJECT = rep(0, ncol(express(input))))
qy_gg <- ggplot(expr_df, environment = environment())
qy_gg_ns <- ggplot(expr_df, environment = environment()) + geom_bar(aes(x = SAMPLES,
y = EXPRESSION), stat = "identity", colour =
"darkblue", fill = "blue", alpha = 0.1 ) +
theme(panel.background = element_blank(),
panel.border = element_rect(color = "grey",
fill = NA))
cor1 <- try(correlation(input)[[1]][spg_promise[2],
spg_promise[1]], silent = TRUE)
if(class(cor1) == "try-error") cor1 <- NA
plot1 <- qy_gg + geom_point(aes(x = QUERY, y = SUBJECT_1),
stat = "identity", colour = "firebrick4", alpha = 0.9) + ggtitle(paste(
spg_promise[1], "vs", spg_promise[2],
"(correlation =", round(cor1, 3), ")" )) +
theme(title = element_text(face = "bold",
size = 15))
if(length(spg_promise) > 2){
cor2 <- try(correlation(input)[[1]][spg_promise[3],
spg_promise[1]], silent = TRUE)
if(class(cor2) == "try-error") cor2 <- NA
plot2 <- qy_gg + geom_point(aes(x =QUERY, y = SUBJECT_2),
stat = "identity", colour = "firebrick4", alpha = 0.9) + ggtitle(paste(
spg_promise[1], "vs", spg_promise[3],
"(correlation =", round(cor2, 3), ")" )) +
theme(title = element_text(face = "bold",
size = 15))
} else {
plot2 <- qy_gg_ns + geom_bar(aes(x = SAMPLES, y = NO_SUBJECT),
stat = "identity") + ggtitle(spg_promise[1]) +
theme(title = element_text(face = "bold", size = 15))
}
if(length(spg_promise) > 3){
cor3 <- try(correlation(input)[[1]][spg_promise[4],
spg_promise[1]], silent = TRUE)
if(class(cor3) == "try-error") cor3 <- NA
plot3 <- qy_gg + geom_point(aes(x =QUERY, y = SUBJECT_3),
stat = "identity", colour = "firebrick4", alpha = 0.9) + ggtitle(paste(
spg_promise[1], "vs", spg_promise[4],
"(correlation =", round(cor3, 3), ")" )) +
theme(title = element_text(face = "bold",
size = 15))
} else {
plot3 <- qy_gg_ns + geom_bar(aes(x = SAMPLES, y = NO_SUBJECT),
stat = "identity") + ggtitle(spg_promise[1]) +
theme(title = element_text(face = "bold", size = 15))
}
if(length(spg_promise) > 4){
cor4 <- try(correlation(input)[[1]][spg_promise[5],
spg_promise[1]], silent = TRUE)
if(class(cor4) == "try-error") cor4 <- NA
plot4 <- qy_gg + geom_point(aes(x =QUERY, y = SUBJECT_4),
stat = "identity", colour = "firebrick4", alpha = 0.9) + ggtitle(paste(
spg_promise[1], "vs", spg_promise[5],
"(correlation =", round(cor4, 3), ")" )) +
theme(title = element_text(face = "bold",
size = 15))
} else {
plot4 <- qy_gg_ns + geom_bar(aes(x = SAMPLES, y = NO_SUBJECT),
stat = "identity") + ggtitle(spg_promise[1]) +
theme(title = element_text(face = "bold", size = 15))
}
if(length(spg_promise) > 5){
cor5 <- try(correlation(input)[[1]][spg_promise[6],
spg_promise[1]], silent = TRUE)
if(class(cor5) == "try-error") cor5 <- NA
plot5 <- qy_gg + geom_point(aes(x = QUERY, y = SUBJECT_5),
stat = "identity", colour = "firebrick4", alpha = 0.9) + ggtitle(paste(
spg_promise[1], "vs", spg_promise[6],
"(correlation =", round(cor5, 3), ")" )) +
theme(title = element_text(face = "bold",
size = 15))
} else {
plot5 <- qy_gg_ns + geom_bar(aes(x = SAMPLES, y = NO_SUBJECT),
stat = "identity") + ggtitle(spg_promise[1]) +
theme(title = element_text(face = "bold", size = 15))
}
# arrange these plots
# additional plot for title and explanations
# suppressPackageStartupMessages(require(grid))
#suppressPackageStartupMessages(require(png))
barplot_img <- readPNG(system.file("extdata", "barplot_img.png",
package ="LINC"))
#clust_img <- readPNG("protcl_img.png")
bar_plot <- rasterGrob(barplot_img, interpolate = TRUE)
# assembly of the final plot
customid <- ""
if(exists("customID", envir = history(input))){
customid <- history(input)$customID
}
bar_plot <- arrangeGrob(bar_plot)
final_plot <- grid.arrange(plot1, bar_plot, plot2,
plot3, plot4, plot5,
nrow = 3, ncol = 2)
return(invisible(final_plot))
})
setMethod(f = "plotlinc",
signature = c("LINCcluster",
"character"),
def = function(
input,
showCor,
returnDat = FALSE){
callNextMethod()
})
setMethod(f = "plotlinc",
signature = c("LINCbio",
"character"),
def = function(
input,
showCor,
returnDat = FALSE){
callNextMethod()
})
## getter methods
setMethod("results", "LINCsingle", function(x) x@results)
setMethod("assignment", "LINCsingle", function(x) x@assignment)
setMethod("correlation", "LINCsingle", function(x) x@correlation)
setMethod("express", "LINCsingle", function(x) x@expression)
setMethod("history", "LINCsingle", function(x) x@history)
setMethod("linCenvir", "LINCsingle", function(x) x@linCenvir)
## setter methods
setReplaceMethod("results", "LINCsingle",
function(x, value) {x@results <- value; x})
setReplaceMethod("assignment", "LINCsingle",
function(x, value) {x@assignment <- value; x})
setReplaceMethod("correlation", "LINCsingle",
function(x, value) {x@correlation <- value; x})
setReplaceMethod("express", "LINCsingle",
function(x, value) {x@expression <- value; x})
setReplaceMethod("history", "LINCsingle",
function(x, value) {x@history <- value; x})
setReplaceMethod("linCenvir", "LINCsingle",
function(x, value) {x@linCenvir <- value; x})
setMethod(f = "plotlinc",
signature = c("LINCsingle",
"character"),
def = function(
input,
showCor,
returnDat = FALSE){
callNextMethod()
})
## create a LINCsingle instance
setGeneric(name = "singlelinc",
def = function(input,
query = NULL,
onlycor = FALSE,
testFun = cor.test,
alternative = "greater",
threshold = 0.05,
underth = FALSE,
coExprCut = NULL,
enrichFun = 'enrichGO',
ont = 'BP',
verbose = TRUE, ...){
standardGeneric("singlelinc")
})
setMethod(f = "singlelinc",
signature = c("LINCmatrix"),
definition = function(input,
query = NULL,
onlycor = FALSE,
testFun = cor.test,
alternative = "greater",
threshold = 0.05,
underth = FALSE,
coExprCut = NULL,
enrichFun = 'enrichGO',
ont = 'BP',
verbose = TRUE, ...){
## method for a LINCmatrix as input
## PREDEFINITIONs
# errors, warnings and messages
#errorm00 <- "Input object is empty"
errorm01 <- "'testFun' is not a valid function"
errorm02 <- paste("'testFun' does not work; its",
"output must be available by '$pvalue'")
errorm03 <- "'coExprCut' has to be a single integer"
errorm04 <- "'threshold' has to be a single numeric value"
errorm05 <- "this function was called without a 'query'"
errorm06 <- "input 'query' not found; possible queries:"
errorm07 <- "no co-expressed genes for this 'threshold'"
warnim01 <- paste("'threshold' usually between 0 and 1; ",
"for a user-defined 'testFun' it could be different")
warnim02 <- paste("'testFun' was supplied and 'onlycor'",
"equals 'TRUE', here 'onlycor' has the higher priority")
warnim03 <- paste("'testFun' is not 'stats::cor.test'",
"and does not have formal arguments",
"'x', 'y', 'use' and 'method'")
warnim04 <- paste("This enrichment function is not",
"directly supported. Please,",
"consider using on of c(enrichGO,",
"enrichPathway, enrichDO)")
inform01 <- paste("singlelinc: no test conducted, genes",
"selected based on correlation values")
inform02 <- paste("singlelinc: the number of neighbour",
"genes was reduced by 'coExprCut'")
inform03 <- quote(paste("singlelinc: co-expression",
"analysis yielded", ql_promise, "genes"))
inform04 <- quote(paste("singlelinc: The function", enrichFun,
"will be called."))
inform05 <- quote(paste("singlelinc: Gene ids will be translated from",
kt_promise, "to entrez identifiers"))
# get information from 'linc'
validObject(input)
cm_promise <- correlation(linCenvir(input)$linc)[[1]]
out_history <- history(linCenvir(input)$linc)
aq_promise <- colnames(cm_promise)
corMethod <- out_history$corMethod
cor_use <- out_history$cor_use
store <- new.env(parent = emptyenv())
# on.exit(print("Possible queries are:"),
# print(paste( aq_promise)))
## SECTION0: INPUT CHECK
# interpretation of 'onlycor', 'underth, 'handleGeneIds'
# and 'verbose'
onlycor <- inlogical(onlycor, direct = FALSE)
underth <- inlogical(underth, direct = TRUE)
verbose <- inlogical(verbose, direct = TRUE)
if(!verbose) message <- function(x) x
# interpretation of 'testFun'
if(!is.null(testFun)){
tF_promise <- testFun
if(!is.function(match.fun(
tF_promise))) stop(errorm01)
fF_promise <- names(formals(tF_promise))
# formals x, y, method and use
x_promise <- any(is.element(fF_promise, "x"))
y_promise <- any(is.element(fF_promise, "y"))
m_promise <- any(is.element(fF_promise, "method"))
u_promise <- any(is.element(fF_promise, "use"))
if(!all(x_promise, y_promise, u_promise,
m_promise) && !identical(tF_promise, stats::cor.test)) warning(warnim03)
ff_promise <- try(testFun(c(1:10), c(1:10), method =
corMethod, use = cor_use)$pvalue)
if(class(ff_promise) == "try-error") stop(errorm02)
test_call <- TRUE
} else {
test_call <- FALSE; onlycor <- TRUE
}
#interpretation of 'coExprCut'
if(!is.null(coExprCut)){
ct_promise <- try(as.integer(coExprCut))
if(class(ct_promise) == "try-error") stop(errorm03)
if(length(ct_promise) != 1) stop(errorm03)
} else {
ct_promise <- 500
}
# interpretation of the 'threshold' argument
th_promise <- threshold
if(length(th_promise) != 1) stop(errorm04)
if(!is.numeric(th_promise)) stop(errorm04)
out_history$threshold <- th_promise
if(th_promise >= 1 | th_promise < 0) warning(warnim01)
# interpretation of query
if(is.null(query)) stop(errorm05)
qy_promise <- query
if(length(qy_promise) != 1) stop(errorm06)
found <- try(any(is.element(aq_promise, qy_promise)))
if(class(found) == "try-error") stop(errorm06)
if(!found) stop(errorm06)
## SECTION1: CO-EXPRESSION AND GENE SELECTION
# argument contradiction
if(test_call && onlycor){
warning(warnim02)
}
# in case only correlation defined
if(onlycor){
message(inform01); test_call <- FALSE
pre_selected <- cm_promise[, qy_promise]
ps_sort <- sort(pre_selected, decreasing = !underth) #!underth)
if(!underth)selected <- ps_sort[ps_sort > th_promise]
if(underth) selected <- ps_sort[ps_sort < th_promise] #
}
# in case a correlation test should be performed
if(!onlycor){
# do the test for the query gene
pc_matrix <- out_history$pc_matrix
nc_query <- out_history$nc_matrix[qy_promise, ]
query_tested <- apply(pc_matrix, 1, function(
x = pc_matrix, nc_query, corMethod, cor_use){
out <- tF_promise(x, nc_query, method =
corMethod, use = cor_use, alternative, ...)
return(out$p.value)
}, nc_query, corMethod, cor_use)
out_history$pval_min <- min(query_tested , na.rm = TRUE)
# select from the p-values
ps_sort <- sort(query_tested, decreasing = !underth)
if(!underth)selected <- ps_sort[ps_sort > th_promise]
if(underth) selected <- ps_sort[ps_sort < th_promise]
}
# do the final selection
ql_promise <- length(selected)
if(ql_promise == 0) stop(errorm07)
if(ql_promise > ct_promise){
selected <- selected[seq_len(ct_promise)]
ql_promise <- ct_promise
}
message(eval(inform03))
qg_promise <- names(selected)
# define output for correlation and the test
if(test_call){
out_pval <- selected
out_corl <- cm_promise[names(selected), qy_promise]
}
if(!test_call){
out_pval <- rep(NA, ql_promise)
out_corl <- selected
}
## SECTION2: GENE TRANSLATION
eF_promise <- try(match.arg(enrichFun,
c("enrichGO",
"enrichPathway",
"enrichDO")),
silent = TRUE)
if(class(eF_promise) == "try-error") warning(warnim01)
message(eval(inform04))
eF_promise <- get(eF_promise, mode = "function",
envir = loadNamespace('LINC'))
if(is.element("OrgDb", ls(history(input)))){
OrgDb <- get("OrgDb", envir = history(input))
} else {
OrgDb <- 'org.Hs.eg.db'
}
ot_promise <- match.arg(ont, c("MF", "BP", "CC"))
kt_promise <- identifyGenes(unlist(qg_promise))
if(kt_promise == "ENTREZID"){
entrez_query <- qg_promise
} else {
message(eval(inform05))
entrez_query <- bitr(qg_promise, fromType = kt_promise,
OrgDb = OrgDb, toType = "ENTREZID")$ENTREZID
}
## SECTION2: CALL TO GENE ANNOTATION
if(is.element("OrgDb", names(formals(eF_promise)))){
bio <- eF_promise(entrez_query,
OrgDb = OrgDb, ont = ot_promise, ...)
}
if(is.element("organism", names(formals(eF_promise)))){
if(OrgDb == 'org.Hs.eg.db'){
OrgDb <- "human"
}
bio <- eF_promise(entrez_query, organism = OrgDb, ...)
}
if(!any(is.element(c("OrgDb", "organism"), names(formals(eF_promise))))){
bio <- eF_promise(entrez_query, ...)
}
if(!exists("bio")) stop("The supplied 'enrichFun' is not supported!")
if(class(bio) == "enrichResult"){
qvalues <- slot(bio, "result")$qvalue
bio_terms <- slot(bio, "result")$Description
out_query <- list(qvalues, bio_terms)
} else {
out_query <- list(NA, NA)
}
## SECTION3: PREPARATION OF OUTPUT
# checkpoint reached, do not print queries
print <- function(x) x
out_linc <- new("LINCsingle")
results(out_linc) <- list(query = qy_promise,
bio = out_query,
cor = out_corl,
pval = out_pval)
assignment(out_linc) <- assignment(input)
correlation(out_linc) <- list(single = cm_promise[, qy_promise])
express(out_linc) <- express(input)
history(out_linc) <- out_history
out_linCenvir <- NULL
out_linCenvir <- linCenvir(input)
out_linCenvir$single <- out_linc
#lockEnvironment(out_linCenvir, bindings = TRUE)
linCenvir(out_linc) <- out_linCenvir
return(out_linc)
})
## getter methods
setGeneric("fcustomID", function(x) standardGeneric("fcustomID"))
setMethod("fcustomID", "LINCfeature", function(x) x@customID)
setGeneric("fcustomCol", function(x) standardGeneric("fcustomCol"))
setMethod("fcustomCol", "LINCfeature", function(x) x@customCol)
setGeneric("fsetLevel", function(x) standardGeneric("fsetLevel"))
setMethod("fsetLevel", "LINCfeature", function(x) x@setLevel)
setGeneric("fshowLevels", function(x) standardGeneric("fshowLevels"))
setMethod("fshowLevels", "LINCfeature", function(x) x@showLevels)
## setter methods
setGeneric("fcustomID<-", function(x, value) standardGeneric("fcustomID<-"))
setReplaceMethod("fcustomID", "LINCfeature",
function(x, value) {x@customID <- value; x})
setGeneric("fcustomCol<-", function(x, value) standardGeneric("fcustomCol<-"))
setReplaceMethod("fcustomCol", "LINCfeature",
function(x, value) {x@customCol <- value; x})
setGeneric("fsetLevel<-", function(x, value) standardGeneric("fsetLevel<-"))
setReplaceMethod("fsetLevel", "LINCfeature",
function(x, value) {x@setLevel <- value; x})
setGeneric("fshowLevels<-", function(x, value) standardGeneric("fshowLevels<-"))
setReplaceMethod("fshowLevels", "LINCfeature",
function(x, value) {x@showLevels <- value; x})
feature <- function(setLevel = NULL,
customID = NULL,
customCol = "black",
showLevels = FALSE){
out_feature <- new("LINCfeature")
linc_classes <- c("LINCmatrix", "LINCcluster",
"LINCbio", "LINCsingle")
# argument 'setLevel'
if(!is.null(setLevel)){
if(isTRUE(tryCatch(expr = (is.element(setLevel,
linc_classes))))){
fcustomCol(out_feature) <- customCol
} else {
stop(paste("'setLevel' not one of:",
paste(linc_classes, collapse = ', ')))
}
fsetLevel(out_feature) <- setLevel
}
# argument 'customID'
if(!is.null(customID)){
fcustomID(out_feature) <- customID
}
# argument 'customCol'
if(isTRUE(tryCatch(expr = (is.element(customCol,
colors()))))){
fcustomCol(out_feature) <- customCol
} else {
stop("invalid color name for 'customCol'")
}
# argument 'showLevels'
fshowLevels(out_feature) <- inlogical(showLevels,
direct = FALSE)
return(out_feature)
}
setMethod(f = '+',
signature = c("LINCmatrix", "LINCfeature"),
definition = function(e1, e2){
validObject(e1)
# set the level of e1
levels <- mget(c("cluster", "single", "bio", "linc"),
envir = linCenvir(e1), ifnotfound = FALSE)
if(length(fsetLevel(e2)) > 0){
if(!is.logical(levels$linc) && fsetLevel(e2) ==
"LINCmatrix") e1 <- linCenvir(e1)$linc
if(!is.logical(levels$cluster) && fsetLevel(e2) ==
"LINCcluster") e1 <- linCenvir(e1)$cluster
if(!is.logical(levels$bio) && fsetLevel(e2) ==
"LINCbio") e1 <- linCenvir(e1)$bio
if(!is.logical(levels$single) && fsetLevel(e2) ==
"LINCsingle") e1 <- linCenvir(e1)$single
}
# add costum IDs and colors to the history slot
if(length(fcustomID(e2)) > 0){
history(e1)$customID <- fcustomID(e2)
}
if(length(fcustomCol(e2)) > 0){
history(e1)$customCol <- fcustomCol(e2)
}
if(isTRUE(fshowLevels(e2))){
message("levels for this object:")
lapply(levels, function(x){
if(!is.logical(x)) message(class(x))
})
}
return(e1)
})
setMethod(f = '+',
signature = c("LINCbio", "LINCfeature"),
definition = function(e1, e2){
callNextMethod()
})
setMethod(f = '+',
signature = c("LINCcluster", "LINCfeature"),
definition = function(e1, e2){
callNextMethod()
})
setGeneric(name = "overlaylinc",
def = function(
input1,
input2){
standardGeneric("overlaylinc") # do it from environment
})
setMethod(f = "overlaylinc",
signature = c("LINCbio",
"LINCbio"),
def = function(
input1,
input2){
validObject(input1); validObject(input2)
e1e2_intersect <- (input1 + input2)
to_overlay <- results(e1e2_intersect)
cluster <- results(linCenvir(input1)$cluster)[[1]]
bio_list <- results(linCenvir(input1)$bio)[[1]]
## SECTION0: INPUT CONTROL
# check for a cluster
#+ to be added
# plot the cluster
tree <- ggtree(cluster, colour = "firebrick") +
coord_flip() + scale_x_reverse()
tree <- tree + geom_tiplab(size = 3.5, angle = 90,
colour = "firebrick", hjust = 1)
# prepare biological terms for plotting
# preparation of a matrix
# MAKE THIS ROBUST
term_ext <- 1
while(term_ext < 20){
term_ext <- (term_ext + 1)
raw_names <- names(bio_list)
term_crude <- lapply(bio_list, function(x, term_ext){
x[[2]][seq_len(term_ext)] }, term_ext)
term_unique <- unique(unlist(term_crude))
if(length(term_unique) > 20) break
}
term_unique[is.na(term_unique)] <- "NA"
m <- length(raw_names); n <- length(term_unique)
first_matrix <- matrix(rep(0, (m*n)), ncol = n, nrow = m )
colnames(first_matrix) <- term_unique
rownames(first_matrix) <- raw_names
# now fill matrix with biological terms
for(query in seq_len(m)){
if(length(bio_list[[query]][[2]]) > 0 &&
is.character(bio_list[[query]][[2]]) &&
is.numeric(bio_list[[query]][[1]])){
bio_query <- bio_list[[query]][[2]]
pvalues <- (-log10(bio_list[[query]][[1]]))
row_entry <- vapply(colnames(first_matrix),
function(x, pvalues){
if(any(x == bio_query)){
pvalues[x == bio_query][1]
} else {
return(0)
}
}, 0, pvalues)
first_matrix[query,] <- row_entry
}
}
# additional plot for term assignments
term_assign <- c(seq_len(length(term_unique)))
bio_assignment <- mapply(function(x,y){ paste(x,y) },
x = term_assign, y = term_unique)
df_assign <- data.frame(bio_assignment, y = -term_assign,
x = rep(0, length(term_unique)))
pty_pl <- (ggplot(df_assign, aes(x,y), environment = environment()) +
geom_point(color = "white") + xlim(0, 1) +
theme(axis.line = element_line(colour =
"white"), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
theme(axis.title.y = element_text(color =
"white")) + theme(axis.title.x = element_text(
color = "white")) + theme(axis.text.x =
element_text(color = "white")) + theme(
axis.text.y = element_text(color = "white")))
bio_assign_plot <- pty_pl + geom_text(aes(label =
bio_assignment),hjust=0, vjust=0,
size = 4, colour = "grey18")
over_matrix <- first_matrix
colnames(over_matrix) <- term_unique
over_matrix[,] <- 0
for(query in seq_len(length(to_overlay))){
if(length(to_overlay[[query]][[2]]) > 0 &&
is.character(to_overlay[[query]][[2]]) &&
is.numeric(to_overlay[[query]][[1]])){
bio_query <- to_overlay[[query]][[2]]
pvalues <- (-log10(to_overlay[[query]][[1]]))
row_entry <- vapply(colnames(over_matrix),
function(x, pvalues){
if(any(x == bio_query)){
pvalues[x == bio_query][1]
} else {
return(0)
}
}, 0, pvalues)
over_matrix[query,] <- row_entry
}
}
# convert to discrete values and join with tree
# significant in the first and second?
plot_matrix <- first_matrix
plot_matrix[plot_matrix > 0 ] <- 1
over_matrix[over_matrix > 0] <- 1
plot_matrix <- ( plot_matrix + over_matrix)
colnames(plot_matrix) <- term_assign
plot_df <- as.data.frame(plot_matrix)
clust_heat <- gheatmap(tree, plot_df, offset = 0.1,
width = 0.5, colnames = TRUE,
colnames_position = "top")
interbio_heat <- clust_heat + scale_fill_gradient2(aes(values),
low = "white", mid = "cornflowerblue", high =
"darkviolet", midpoint = 1) + guides(fill=FALSE)
interbio_img <- readPNG(system.file("extdata", "interbio_img.png",
package ="LINC"))
interbio_plot <- rasterGrob(interbio_img, interpolate = TRUE)
# assembly of the final plot
customid <- ""
if(exists("customID", envir = history(input1))){
customid <- history(input1)$customID
}
# suppressPackageStartupMessages(require(gridExtra))
right_side <- arrangeGrob(interbio_plot, bio_assign_plot,
ncol = 1, bottom = customid)
final_plot <- grid.arrange(interbio_heat, right_side,
nrow = 1)
return(invisible(final_plot))
})
# set all validation methods
setValidity("LINCmatrix", method =
function(object){
if(any(is.element(c("linc", "cluster",
"bio", "single"), ls(linCenvir(object)))
)){
return(TRUE)
} else {
stop("Not a valid instance of the 'LINC' class ")
}
}
)
# function getlinc
setGeneric(name = "getlinc",
def = function(
input,
subject = "queries"){
standardGeneric("getlinc")
})
setMethod(f = "getlinc",
signature = c("ANY", "character"),
def = function(
input,
subject = "queries"){
# check the class
if(!any(is.element(class(input),
c("LINCmatrix", "LINCcluster",
"LINCsingle", "LINCbio")))){
stop("input is not of a supported 'LINC' class")
}
# one of the subject arguments
sj_try <- try(any(is.element(subject,
c("queries", "geneSystem",
"results", "history",
"customID"))), silent = TRUE)
if(class(sj_try) == "try-error") stop("subject invalid")
if(length(subject) != 1 | sj_try == FALSE)
stop("subject invalid")
# go truth all arguments
if(subject == "history"){
print(str(mget(ls(history(input)),
envir = history(input))))
return(invisible((mget(ls(history(input)),
envir = history(input)))))
}
if(subject == "queries"){
return(listQuery(input))
}
if(subject == "geneSystem"){
message("diagnose for the gene system:")
assignment_ids <- try(identifyGenes(assignment(input)),
silent = TRUE)
if(class(assignment_ids) != "try-error"){
message("from slot 'assignment':", assignment_ids)
}
expression_ids <- try(identifyGenes(
rownames(express(input))),
silent = TRUE)
if(class(expression_ids) != "try-error"){
message("from slot 'expression':", expression_ids)
}
}
if(subject == "customID"){
if(exists("customID", envir = history(input))){
return(history(input)$customID)
} else {
message("no custom id for this object")
}
}
if(subject == "results"){
print(str(results(input)))
return(invisible(results(input)))
}
})
# function linctable
setGeneric(name = "linctable",
def = function(
file_name = "linc_table",
input){
standardGeneric("linctable")
})
setMethod(f = "linctable",
signature = c("character", "LINCcluster"),
def = function(
file_name = "linc_table",
input){
pre <- results(input)[[1]]$neighbours
tab <- lapply(pre, function(y){y[1:500] })
m_tab <- matrix(unlist(tab), ncol = 500, nrow = length(tab), byrow = TRUE )
rownames(m_tab) <- names(tab)
write.table(m_tab, file = file_name, col.names = FALSE, sep = "\t" )
message("table of co-expressed genes written")
})
## END OF SCRIPT
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.