.getSegments_old <- function(x, factor, cutoff=quantile(abs(x),0.99), verbose=TRUE){
Indexes <- split(seq(along=x),factor)
regionID <- vector("numeric", length(x))
LAST <- 0
segmentation <- vector("numeric", length(x))
type <- vector("numeric", length(x))
for (i in seq(along = Indexes)) {
if (verbose) if (i%%1000 == 0) cat(".")
Index <- Indexes[[i]]
y <- x[Index]
z <- sign(y) * as.numeric(abs(y) > cutoff)
w <- cumsum(c(1, diff(z) != 0)) + LAST
segmentation[Index] <- w
type[Index] <- z
LAST <- max(w)
}
##add a vector of the pns
res <- list(upIndex = split(which(type>0), segmentation[type>0]),
dnIndex = split(which(type<0), segmentation[type<0]),
zeroIndex = split(which(type==0), segmentation[type==0]))
names(res[[1]]) <- NULL
names(res[[2]]) <- NULL
names(res[[3]]) <- NULL
return(res)
}
getSegments <- function(x, f = NULL, cutoff=quantile(abs(x), 0.99), assumeSorted = FALSE, verbose=FALSE){
if(is.null(f))
f <- rep(1L, length(x))
stopifnot(length(x) == length(f))
stopifnot(length(cutoff) <= 2)
if(is.character(f))
f <- as.factor(f)
if(is.numeric(f))
f <- as.integer(f)
stopifnot(is.factor(f) || is.integer(f))
if(length(cutoff) == 1)
cutoff <- c(-cutoff, cutoff)
cutoff <- sort(cutoff)
reordered <- FALSE
if(!assumeSorted && is.unsorted(f)) {
od <- order(f)
x <- x[od]
f <- f[od]
reordered <- TRUE
}
if(verbose) message("getSegments: segmenting")
Indexes <- split(seq(along=x), f)
direction <- as.integer(greaterOrEqual(x, cutoff[2]))
direction[x <= cutoff[1]] <- -1L
## We now need to convert f into cid
if(verbose) message("getSegments: splitting")
segments <- cumsum(c(1, diff(direction) != 0)) +
cumsum(c(1, diff(f) != 0))
names(segments) <- NULL
res <- list(upIndex = split(which(direction>0), segments[direction>0]),
dnIndex = split(which(direction<0), segments[direction<0]),
zeroIndex = split(which(direction==0), segments[direction==0]))
names(res[[1]]) <- NULL
names(res[[2]]) <- NULL
names(res[[3]]) <- NULL
if(reordered) {
res <- lapply(res, function(sp) lapply(sp, function(xx) od[xx]))
}
res
}
clusterMaker <- function(chr, pos, assumeSorted = FALSE, maxGap=300){
if( (any(is.na(chr))) || (any(is.na(pos))) ) stop("NAs found in chr and/or pos vector(s). Please replace NA values in chr and/or pos vector(s) to actual chromosome names and genomic locations. ")
nonaIndex <- which(!is.na(chr) & !is.na(pos))
Indexes <- split(nonaIndex, chr[nonaIndex])
clusterIDs <- rep(NA, length(chr))
LAST <- 0
for(i in seq(along = Indexes)){
Index <- Indexes[[i]]
x <- pos[Index]
if(!assumeSorted){
Index <- Index[order(x)]
x <- pos[Index]
}
y <- as.numeric(diff(x) > maxGap)
z <- cumsum(c(1, y))
clusterIDs[Index] <- z + LAST
LAST <- max(z) + LAST
}
clusterIDs
}
##you can pass cutoff through the ...
regionFinder <- function(x, chr, pos, cluster=NULL, y=x, summary=mean,
ind=seq(along=x),order=TRUE, oneTable=TRUE,
maxGap=300, cutoff=quantile(abs(x), 0.99),
assumeSorted = FALSE, verbose = TRUE){
if(any(is.na(x[ind]))){
warning("NAs found and removed. ind changed.")
ind <- intersect(which(!is.na(x)),ind)
}
if(is.null(cluster))
cluster <- clusterMaker(chr, pos, maxGap=maxGap, assumeSorted = assumeSorted)
Indexes <- getSegments(x = x[ind], f = cluster[ind], cutoff = cutoff,
assumeSorted = assumeSorted, verbose = verbose)
clusterN <- table(cluster)[as.character(cluster)]
res <- vector("list",2)
for(i in 1:2){
res[[i]]<-
data.frame(chr=sapply(Indexes[[i]],function(Index) chr[ind[Index[1]]]),
start=sapply(Indexes[[i]],function(Index) min(pos[ind[Index]])),
end=sapply(Indexes[[i]], function(Index) max(pos[ind[Index]])),
value=sapply(Indexes[[i]],function(Index)summary(y[ind[Index]])),
area=sapply(Indexes[[i]],function(Index)abs(sum(y[ind[Index]]))),
cluster=sapply(Indexes[[i]],function(Index)cluster[ind[Index]][1]),
indexStart=sapply(Indexes[[i]], function(Index) min(ind[Index])),
indexEnd = sapply(Indexes[[i]], function(Index) max(ind[Index])),
stringsAsFactors=FALSE)
res[[i]]$L <- res[[i]]$indexEnd - res[[i]]$indexStart+1
res[[i]]$clusterL <- sapply(Indexes[[i]], function(Index) clusterN[ind[Index]][1])
}
names(res) <- c("up","dn")
if(order & !oneTable){
if(nrow(res$up)>0) res$up <- res$up[order(-res$up$area),]
if(nrow(res$dn)>0) res$dn <- res$dn[order(-res$dn$area),]
}
if(oneTable){
res <- rbind(res$up,res$dn)
if(order & nrow(res)>0) res <- res[order(-res$area),]
}
return(res)
}
boundedClusterMaker <- function(chr, pos, assumeSorted = FALSE,
maxClusterWidth = 1500, maxGap = 500) {
if( (any(is.na(chr))) || (any(is.na(pos))) ) stop("NAs found in chr and/or pos vector(s). Please replace NA values in chr and/or pos vector(s) to actual chromosome names and genomic locations. ")
nonaIndex <- which(!is.na(chr) & !is.na(pos))
Indexes <- split(nonaIndex, chr[nonaIndex])
clusterIDs <- rep(NA, length(chr))
LAST <- 0
for (i in seq(along = Indexes)) {
Index <- Indexes[[i]]
x <- pos[Index]
if (!assumeSorted) {
Index <- Index[order(x)]
x <- pos[Index]
}
y <- as.numeric(diff(x) > maxGap)
z <- cumsum(c(1, y))
startPosition <- tapply(x, list(z), min)
startPositions <- startPosition[as.character(z)]
offset <- x - startPositions
addToZ <- offset %/% maxClusterWidth
addToZ <- cumsum(addToZ * c(0, as.numeric(diff(addToZ) > 0)))
z <- z + addToZ
clusterIDs[Index] <- z + LAST
LAST <- max(z) + LAST
}
clusterIDs
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.