Nothing
setGeneric("cluster_peak", function(object, ...) setGeneric("cluster_peak"))
cluster.GRange <- function(object, parallel = FALSE, num.cores = NULL,
n.clust = NULL, seeds = NULL, shift.peak = NULL, weight = NULL, subsample.weight = 100,
alpha = 1, p = 1, t.max = 0.5, plot.graph.k = TRUE, verbose = TRUE,
rescale = FALSE)
{
# This function will call the .cpp funciton
# SEXP kmean_function (SEXP x, SEXP spline, SEXP spline_der,
# SEXP lenght, SEXP seeds, SEXP align,
# SEXP k, SEXP weight_input, SEXP alpha_input,
# SEXP p_input, SEXP t_max_input, SEXP verbose)
if (rescale & is.null(object$spline_rescaled))
{
stop('provide the rescaled spline and derivatives')
}
if (is.null(object$counts))
{
stop('No information on the peaks provided!')
}
if (is.null(n.clust))
{
stop('No information on the number of clusters provided')
}
if (is.null(object$spline) || is.null(object$spline_der) || is.null(object$width_spline) )
{
stop('spline approximation not provided. Run smooth_peak before the clustering')
}
if (is.null(object$summit_spline))
{
stop('summit is not a metadata of the GRange. Run summit_peak before the clustering')
}
if (max(n.clust)>=length(object))
{
stop ('Maximum number of cluster greater then the number of observations')
}
if (length(object)==1)
{
x_centered_list <- vector('list', 1)
if(rescale)
{
x_centered_list[[1]] <- (-object$summit_spline +1) : (length(object$spline_rescaled[[1]]) - object$summit_spline)
}else
{
x_centered_list[[1]] <- (-object$summit_spline +1) : (object$width_spline - object$summit_spline)
}
}else
{
if(rescale)
{
x_centered_list <- mapply(function(x,y){(-x+1):(y-x)}, object$summit_spline_rescaled,
rep(length(object$spline_der_rescaled[[1]]), length(object$width_spline)),
SIMPLIFY = FALSE)
}else
{
x_centered_list <- mapply(function(x,y){(-x+1):(y-x)}, object$summit_spline, object$width_spline)
}
}
x_matrix <- unlist.counts(x_centered_list, sapply(x_centered_list, length))
if(rescale)
{
spline_matrix <- unlist.counts(object$spline_rescaled,
sapply(object$spline_rescaled, length))
spline_der_matrix <- unlist.counts(object$spline_der_rescaled,
sapply(object$spline_der_rescaled, length))
}else
{
spline_matrix <- unlist.counts(object$spline, object$width_spline)
spline_der_matrix <- unlist.counts(object$spline_der, object$width_spline)
}
# check the dimentions of x, spline, spline_der
n.data <- dim(x_matrix)[1]
n.points <- dim(x_matrix)[2]
if ((dim(spline_matrix)[1] != n.data) | (dim(spline_matrix)[2] != n.points))
{
stop('dimentions of x and spline don\'t coincide')
}
if ((dim(spline_der_matrix)[1] != n.data) | (dim(spline_der_matrix)[2] != n.points))
{
stop('dimentions of x and spline_der don\'t coincide')
}
# check on the definition of the parameters of the distance
if ( (p != 0) && (p != 1) && (p != 2) )
{
stop ('invalid value for p, It must be 0,1,2 ')
}
if ( (alpha < 0) || (alpha > 1) )
{
stop ('invalid value for alpha. It must be included in [0, 1]')
}
if (is.null(weight))
{
if ( (is.null(subsample.weight)) || (subsample.weight >= length(object)))
{
dist_matrix <- distance_peak(object, p, rescale)
upper_triang_d0 <- dist_matrix$dist_matrix_d0[upper.tri(dist_matrix$dist_matrix_d0)]
upper_triang_d1 <- dist_matrix$dist_matrix_d1[upper.tri(dist_matrix$dist_matrix_d1)]
weight <- median(upper_triang_d0/upper_triang_d1)
}else
{
# subsample the data to define the weights. Using all the
# data can be computationally expensive
object_here <- object[sort(sample(1:length(object), subsample.weight))]
dist_matrix <- distance_peak(object_here, p, rescale)
upper_triang_d0 <- dist_matrix$dist_matrix_d0[upper.tri(dist_matrix$dist_matrix_d0)]
upper_triang_d1 <- dist_matrix$dist_matrix_d1[upper.tri(dist_matrix$dist_matrix_d1)]
weight <- median(upper_triang_d0/upper_triang_d1)
}
}
if (is.null(seeds))
{
dist_matrix <- distance_peak(object, p, rescale)
dist_matrix_global <- (1 -alpha) * dist_matrix$dist_matrix_d0 + alpha * weight * dist_matrix$dist_matrix_d1
dist_elem <- colSums(dist_matrix_global)
seeds <- t(as.vector(sort(dist_elem, index.return = TRUE, decreasing = FALSE)$ix[1:max(n.clust)]))
}
if (!is.numeric(seeds))
{
if (seeds == 'random')
{
seeds <- sort(sample(1:n.data, max(n.clust)))
}else
{
stop('invalid value for seeds')
}
}
#check the uniqness of the seeds
seeds <- unique(seeds)
if ( ( min(seeds) < 0 ) | ( max(seeds) > n.data ) )
{
stop('invalid values in seed. Seeds are the indices of the data
choosen as starting centers of the clusters and then must be inclued in 0, n.data')
}
if (length(seeds) < max(n.clust))
{
warning ('not enough values of seeds provided, random choice of the missing values')
seeds_new <- sample((1:n.data)[-seeds], max(n.clust)-length(seeds))
seeds <- c(seeds, seeds_new)
}
if (length(seeds) > max(n.clust))
{
warning ('too many seeds provided. Only the first used')
seeds <- seeds[1:max(n.clust)]
}
if (is.null(shift.peak))
{
warning ('no value for shift.peak provided. Both results with no registration and
results with registration provided')
}else
{
if (shift.peak != TRUE & shift.peak != FALSE )
{
stop ('invalid value for shift.peak It must be TRUE or FALSE')
}
}
if (verbose)
{
verb <- 'T'
if (parallel)
{
warning('both parallel and verbose set to TRUE, the output will be unclear. ')
}
}else
{
verb <- 'F'
}
# to define the variable k as global.
k = NULL
width_global <- rep(NA, length(object$width_spline))
if(rescale)
{
width_global <- rep(length(object$spline_rescaled[[1]]), length(object$width_spline))
}else
{
width_global <- object$width_spline
}
registration_NOalignment <- NULL
registration_shift <- NULL
if (parallel==TRUE)
{
if (is.null(num.cores))
{
warning ('the number of cores will be automatically identified.')
num.cores = detectCores()
}
cl <- makeCluster(num.cores)
registerDoParallel(cl)
if (is.null(shift.peak))
{
registration_NOalignment <- foreach(k=n.clust) %dopar%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'F', k, weight, alpha, p, t.max, verb)
#NB the c++ codification of the position on vectors is from 0
}
registration_shift <- foreach(k=n.clust) %dopar%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'T', k, weight, alpha, p, t.max, verb)
}
}else
{
if (shift.peak)
{
# registration_NOalignment <- NULL
registration_shift <- foreach(k=n.clust) %dopar%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'T', k, weight, alpha, p, t.max, verb)
}
}
if (!shift.peak)
{
registration_NOalignment <- foreach(k=n.clust) %dopar%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'F', k, weight, alpha, p, t.max, verb)
}
# registration_shift <- NULL
}
}
stopCluster(cl)
}else
{
if (is.null(shift.peak))
{
registration_NOalignment <- foreach(k=n.clust) %do%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'F', k, weight, alpha, p, t.max, verb)
}
registration_shift <- foreach(k=n.clust) %do%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'T', k, weight, alpha, p, t.max, verb)
}
}else
{
if (shift.peak)
{
# registration_NOalignment <- NULL
registration_shift <- foreach(k=n.clust) %do%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'T', k, weight, alpha, p, t.max, verb)
}
}
if (!shift.peak)
{
registration_NOalignment <- foreach(k=n.clust) %do%
{
.Call(kmean_function, x_matrix, spline_matrix , spline_der_matrix,
width_global, seeds[1:k]-1, 'F', k, weight, alpha, p, t.max, verb)
}
# registration_shift <- NULL
}
}
}
# plot
if (plot.graph.k)
{
ylim <- NULL
no_al <- FALSE
shi <- FALSE
if (!is.null(registration_NOalignment))
{
mean_dist_NOal <- sapply(registration_NOalignment, function(x){mean(x$distances)})
ylim <- c(min(ylim, mean_dist_NOal), max(ylim, mean_dist_NOal))
no_al <- TRUE
}
if(!is.null(registration_shift))
{
mean_dist_shi <- sapply(registration_shift, function(x){mean(x$distances)})
ylim <- c(min(ylim, mean_dist_shi), max(ylim, mean_dist_shi))
shi <- TRUE
}
if (no_al & shi)
{
# plot(n.clust, mean_dist_NOal, col='grey31', pch=19, type='b', xlab='number of clusters', ylab ='average distance', ylim=ylim, lwd =2, main = 'Average distance varying the number of clusters')
# points(n.clust, mean_dist_shi, col='red3', pch=19, type='b', lwd =2)
# legend('topright', legend=c('No Shift', 'Shift'), col=c('grey31','red3'), pch=19, cex = 1.2, lty=1, bty = 'n')
#
par(mar = c(4,5,4,4))
plot(n.clust, mean_dist_NOal, col='grey31',
pch=19, type='b', xlab='number of clusters',
ylab ='average distance', ylim=ylim, lwd =2, cex = 2,
main = 'Average distance varying k',
font.main = 1, cex.main = 2, cex.lab = 2, cex.axis = 1.5)
points(n.clust, mean_dist_shi,
col='red3', pch=19, type='b', lwd =2, cex = 2)
legend('topright', legend=c('No Shift', 'Shift'),
col=c('grey31','red3'), pch=19,
cex = 1.7, lty=1, bty = 'n')
}
if (no_al & !shi)
{
plot(n.clust, mean_dist_NOal, col='grey31', pch=19,lwd =2, type='b', xlab='number of clusters', ylab ='average distance', ylim=ylim, main = 'Average distance varying the number of clusters')
legend('topright', legend=c('No Shift'), col=c('grey31'), pch=19, lty=1, cex = 1.2, bty = 'n')
}
if (!no_al & shi)
{
plot(n.clust, mean_dist_shi, col='red3', pch=19, type='b', xlab='number of clusters', ylab ='average distance', ylim=ylim, lwd =2, main = 'Average distance varying the number of clusters')
legend('topright', legend=c('Shift'), col=c('red3'), pch=19, lty=1, cex =1.2, bty ='n')
}
}
# save the results
if (length(registration_NOalignment)!=0)
{
label <- lapply(registration_NOalignment, function(x){x$labels+1})
labels_NOalignment <- convert_vectors_to_list(label, n.clust)
dist <- lapply(registration_NOalignment, function(x){x$distances})
dist_NOalignment <- convert_vectors_to_list(dist, n.clust)
elementMetadata(object)[['cluster_NOshift']] <- labels_NOalignment
elementMetadata(object)[['dist_NOshift']] <- dist_NOalignment
}
if (length(registration_shift)!=0)
{
label <- lapply(registration_shift, function(x){x$labels+1})
labels_shift <- convert_vectors_to_list(label, n.clust)
coef <- lapply(registration_shift, function(x){x$shift})
coef_shift <- convert_vectors_to_list(coef, n.clust)
dist <- lapply(registration_shift, function(x){x$distances})
dist_shift <- convert_vectors_to_list(dist, n.clust)
elementMetadata(object)[['cluster_shift']] <- labels_shift
elementMetadata(object)[['coef_shift']] <- coef_shift
elementMetadata(object)[['dist_shift']] <- dist_shift
}
return(object)
}
setMethod("cluster_peak", signature=(object="GRanges") ,function(object, parallel = FALSE, num.cores = NULL,
n.clust = NULL, seeds = NULL, shift.peak = NULL,
weight = NULL, subsample.weight = 100,
alpha = 1, p = 1, t.max = 0.5, plot.graph.k = TRUE,
verbose = TRUE, rescale = FALSE)
cluster.GRange(object, parallel, num.cores,
n.clust, seeds, shift.peak,
weight, subsample.weight, alpha,
p, t.max, plot.graph.k, verbose, rescale) )
# function which defines from the list of the vectors of the counts
# to a matrix (n x max(length(counts)) ) with in each row the values
# of the counts
unlist.counts <- function(counts, lenght.counts)
{
matrix_peaks <- matrix(NA, nrow=length(lenght.counts), ncol=max(lenght.counts))
for(i in 1:length(lenght.counts))
{
matrix_peaks[i,1:lenght.counts[i]] <- counts[[i]]
}
return(matrix_peaks)
}
# given a vector of positions (length p)
# and a list of p vectors (with fixed length n.elem)
# we aim to create a list of n.elem vectors each one of length
# max(positions) containing in the position i (i in positions)
# the value of vector[[n]][i] (n in 1:n.elem). The positions not
# included in positions will be NA
convert_vectors_to_list <- function(vect, positions)
{
matrix_data <- matrix(unlist(vect), length(vect), length(vect[[1]]), byrow=TRUE)
matrix_NA_data <- matrix(NA, max(positions), length(vect[[1]]))
matrix_NA_data[positions,] <- matrix_data
list_temp <- apply(matrix_NA_data, 2, list)
list <- lapply(list_temp, unlist)
return(list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.