#' @importFrom data.table fread
#' @importFrom stringr str_replace
#' @importFrom GenomeInfoDb Seqinfo
#' @importFrom GenomicRanges GRangesList GRanges
#' @importFrom IRanges IRanges
#' @importFrom S4Vectors mcols mcols<-
#' @importFrom utils read.table
#' @importFrom readr read_tsv cols_only col_character col_integer col_number
GetBioFeatures <- function(manifest, dm, my.seqinfo) {
dm@abstraction.layer <- lapply(abstractionLayer(dm), tolower)
manifest <- manifest[tolower(manifest$MARK) %in% unlist(abstractionLayer(dm), use.names = FALSE), , drop = FALSE]
if(nrow(manifest) < 1) return(NULL)
good.files <- sapply(split(manifest, 1:nrow(manifest)), function(x) file.exists(x$FILE))
if (sum(good.files) < nrow(manifest)) {
stop(paste0("cannot find file: ", manifest[!good.files, "FILE"], "\n"))
}
get.files <- split(manifest, 1:nrow(manifest))
names(get.files) <- paste(manifest$SAMPLE, manifest$MARK, sep = "_")
# if (anyDuplicated(names(get.files))) {
# stop("the manifest describes multiple files with the same sample name and mark \n",
# "merge these files before running StatePaintR")
# }
if (sum(good.files) >= 1) {
bed.list <- lapply(get.files,
function(x, s.info) {
if (grepl(".narrowPeak", x$FILE, ignore.case = TRUE)) {
col.types <- cols_only(chr = col_character(),
start = col_integer(),
end = col_integer(),
name = col_character(),
score = col_integer(),
strand = col_character(),
signalValue = col_number())
col.names <- c("chr", "start", "end", "name", "score", "strand", "signalValue")
col.numbers <- c(1:7)
} else {
col.types <- cols_only(chr = col_character(),
start = col_integer(),
end = col_integer())
col.names <- c("chr", "start", "end")
col.numbers <- c(1:3)
}
if (any(grepl(".gz$", x$FILE))) {
xf <- suppressWarnings(read_tsv(file = x$FILE, col_names = col.names,
col_types = col.types,
progress = FALSE))
} else {
xf <- fread(input = x$FILE, sep = "\t", header = FALSE,
select = col.numbers, skip = "chr",
col.names = col.names,
encoding = "UTF-8",
stringsAsFactors = FALSE,
data.table = FALSE, showProgress = FALSE)
}
name <- paste(x$SAMPLE, x$MARK, sep = "_")
if (ncol(xf) < 7) xf$signalValue <- NA
xf <- try(GRanges(seqnames = xf$chr,
ranges = IRanges(start = xf$start + 1L,
end = xf$end),
strand = "*",
feature = base::rep.int(name, nrow(xf)),
signalValue = xf$signalValue,
seqinfo = s.info))
return(xf)
}, s.info = my.seqinfo)
} else {
stop("no files")
}
if (is.null(bed.list)) {
return(GRangesList())
} else {
return(GRangesList(bed.list))
}
}
parse.manifest <- function(manifest, check = TRUE) {
if (is(manifest, "character")) {
if (check == TRUE) {
if (!file.exists(manifest)) stop("manifest file: ", manifest, " does not exist")
}
col.names <- c("SAMPLE", "MARK", "SRC", "BUILD", "FILE")
manifest.df <- fread(file = manifest,
col.names = col.names,
stringsAsFactors = FALSE,
data.table = FALSE,
showProgress = FALSE)
manifest.path <- dirname(manifest)
} else {
if (!is(manifest, "data.frame")) stop("manifest must be a character vector containing one path name, or a data.frame")
manifest.df <- manifest
manifest.path <- ""
}
files.good <- file.exists(manifest.df$FILE)
if (check == TRUE) {
if (any(!files.good)) {
test.files <- manifest.df[!files.good, "FILE"]
test.files <- file.path(manifest.path, test.files)
new.files.good <- file.exists(test.files)
if (any(!new.files.good)) {
stop("cannot find these files listed in manifest:\n",
paste0(manifest.df[!new.files.good, "FILE"], collapse = "\n"))
} else {
manifest.df[!files.good, "FILE"] <- test.files
}
}
}
if (length(unique(sort(manifest.df$BUILD))) > 1) {
stop("each celltype must be described by one and only one genome build")
}
alls <- manifest.df[manifest.df$SAMPLE == "ALL", ]
manifest.df <- manifest.df[manifest.df$SAMPLE != "ALL", ]
by.sample <- split(manifest.df, manifest.df$SAMPLE)
by.sample <- lapply(by.sample, function(out, add = alls) {
out <- rbind(out, add)
return(out)
})
names(by.sample) <- tolower(names(by.sample))
return(by.sample)
}
flookup <- function(query, definition) {
qr <- matrix(nrow = dim(query)[2])
for (i in 1:nrow(definition)) {
d <- definition[i, , drop = FALSE]
keepnames <- which(d != 1L)
d <- d[keepnames]
qt <- query[keepnames, , drop = FALSE]
x <- colSums((d == qt | (d == 0L & qt != 3))) == length(d)
qr[x, 1] <- rownames(definition)[i]
}
return(qr)
}
footprintlookup <- function(query, definition) {
qr <- matrix(nrow = dim(query)[2])
if (any(query == 0)) {
for (i in 1:nrow(definition)) {
d <- definition[i, ]
qt <- query[!is.na(d), ]
d <- d[!is.na(d)]
x <- base::colSums((bitwAnd(d, 1L) == bitwAnd(qt, 1L)) |
(bitwAnd(d, 2L) == 0L &
qt == 0L),
na.rm = FALSE) == length(d)
qr[x, 1] <- rownames(definition)[i]
}
} else {
for (i in 1:nrow(definition)) {
d <- definition[i, ]
qt <- query[!is.na(d)]
d <- d[!is.na(d)]
x <- base::.colSums(bitwAnd(d, 1L) == bitwAnd(qt, 1L),
m = length(d), n = length(qt) / length(d),
na.rm = FALSE) == length(d)
qr[x, 1] <- rownames(definition)[i]
}
}
return(qr)
}
## from toupper documentation
capwords <- function(s, strict = FALSE) {
cap <- function(s) paste(toupper(substring(s, 1, 1)),
{s <- substring(s, 2); if (strict) tolower(s) else s},
sep = "", collapse = " " )
sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}
produce.state <- function(state.obj, position = 1) {
state <- state.obj[[position]]
order <- sapply(state$features, function(x) {x$order})
value <- sapply(state$features, function(x) {x$score})
name <- sapply(state$features, function(x) {x$name})
output <- value[order]
output[output == -1] <- NA
names(output) <- name[order]
return(output)
}
zero_range <- function(x, tol = .Machine$double.eps ^ 0.5) {
if (length(x) == 1) return(TRUE)
x <- range(x) / mean(x)
isTRUE(all.equal(x[1], x[2], tolerance = tol))
}
make.overlap.matrix <- function(query.over, subject.over, samples) {
overlaps <- data.frame(qhits = query.over,
samples = samples[subject.over],
stringsAsFactors = TRUE)
overlap.table <- table(overlaps)
output.matrix <- matrix(overlap.table,
nrow = nrow(overlap.table),
dimnames = list(rownames(overlap.table),
colnames(overlap.table)))
if (!is(output.matrix, "matrix")) {
dim(output.matrix) <- c(length(output.matrix), 1)
dimnames(output.matrix) <- list(rownames(overlap.table),
colnames(overlap.table))
}
return(output.matrix)
}
#' @importFrom utils packageVersion
#' @importFrom GenomeInfoDb seqnames
#' @importFrom BiocGenerics start end
#' @importFrom data.table fwrite
#' @importFrom dplyr data_frame everything
#' @importFrom tidyr unite
#' @importFrom grDevices col2rgb
write.state <- function(x, y, color, hub.id, file = stdout()) {
manifest <- y
meta <- list(software = "StatePaintR",
version = packageVersion("StatePaintR"),
files = paste("#",
manifest$SAMPLE,
manifest$MARK,
manifest$FILE,
sep = " "))
if (all(names(mcols(x)) == c("name", "state", "score"))) {
if (!is.null(color)) {
my.cols <- data.frame(mcols(x), stringsAsFactors = FALSE)
my.cols <- left_join(my.cols, color, by = c("state" = "STATE"))
colnames(my.cols) <- c("sample", "name", "score", "itemRgb")
mcols(x) <- my.cols
} else {
names(mcols(x)) <- c("sample", "name", "score")
}
} else {
stop("non default column names in input data, meta data may be name and sample")
}
file.con <- file(file, "w+")
writeLines(paste("# this file was produced by", meta$software), file.con)
writeLines(paste("# version number:", meta$version), file.con)
writeLines(paste("# StateHub Model ID:", hub.id), file.con)
writeLines("# it is the chromatin segmentation of the following files: ", file.con)
writeLines(meta$files, file.con)
if (!is.null(color)) {
my.track <- new("BasicTrackLine",
itemRgb = TRUE,
db = x@seqinfo@genome[[1]],
name = manifest$SAMPLE[[1]],
description = paste0("StatePaintR Segmentation for ",
manifest$SAMPLE[[1]]))
} else {
my.track <- new("BasicTrackLine",
db = x@seqinfo@genome[[1]],
name = manifest$SAMPLE[[1]],
description = paste0("StatePaintR Segmentation for ",
manifest$SAMPLE[[1]]))
}
writeLines(as(my.track, "character"), file.con)
close(file.con)
rgb.color <- col2rgb(mcols(x)$itemRgb)
rgb.color <- unite(data_frame(r = rgb.color[1, ],
g = rgb.color[2, ],
b = rgb.color[3, ]),
col = color,
everything(),
sep = ",")
x.df <- data.frame(chr = seqnames(x),
start = start(x) - 1L,
end = end(x),
name = mcols(x)$name,
score = mcols(x)$score,
strand = ".",
bstart = start(x) - 1L,
bend = end(x),
color = rgb.color$color)
fwrite(x.df, file = file,
append = TRUE,
sep = "\t",
col.names = FALSE,
quote = FALSE,
showProgress = FALSE)
}
reverse_tl <- function(tl) {
values <- sapply(tl, length)
values <- rep(names(values), values)
keys <- unlist(tl, use.names = FALSE)
names(values) <- keys
return(as.list(values))
}
#' @importFrom jsonlite toJSON fromJSON unbox
#' @importFrom utils read.delim
ExportStateHub <- function(states, decisionMatrix, output.dir, description = NULL, as = "json") {
if (missing(output.dir)) { stop("please indicate output directory") }
if (missing(decisionMatrix)) { stop("please include decisionMatrix") }
if (!dir.exists(output.dir)) { dir.create(output.dir) }
if (is(states, "GRangesList")) {
m.data <- attributes(states)$manifest
} else {
m.data <- parse.manifest(states, check = FALSE)
}
decisionMatrix@abstraction.layer <- lapply(abstractionLayer(decisionMatrix), tolower)
m.data <- lapply(m.data, function(manifest, dm = decisionMatrix) {
manifest <- manifest[tolower(manifest$MARK) %in% unlist(abstractionLayer(dm), use.names = FALSE), , drop = FALSE]
if (nrow(manifest) < 1) {
return(NULL)
} else {
return(manifest)
}
})
m.data <- m.data[!sapply(m.data, is.null)]
if (is.null(description)) {
sample.description <- data.frame(names = names(m.data), cell_type = names(m.data), stringsAsFactors = FALSE)
} else {
### DESCRIPTION file requires the columns "cell_type" and "sample_id" where sample_id
### is the same as the "name" of the m.data objects
sample.description <- data.frame(names = names(m.data), stringsAsFactors = FALSE)
description <- read.delim(description)
if (all(c("cell_type", "sample_id") %in% colnames(description))) {
description <- description[, c("cell_type", "sample_id")]
description$sample_id <- tolower(description$sample_id)
description <- unique(description)
sample.description <- left_join(sample.description, description, by = c("names" = "sample_id"))
} else {
stop("DESCRIPTION file requires the columns 'cell_type' and 'sample_id'")
}
}
df <- data.frame(name = sample.description$names,
description = sample.description$cell_type,
project = sapply(m.data, function(x) {x[1, "SRC"]}),
genome = sapply(m.data, function(x) {x[1, "BUILD"]}),
marks = NA,
bedFileName = paste0(names(m.data), ".", sapply(m.data, nrow), "mark.segmentation.bed"),
bigBedFileName = paste0(names(m.data), ".", sapply(m.data, nrow), "mark.segmentation.bb"),
"statePaintRVersion" = NA,
"modelID" = NA,
baseURL = NA,
order = NA,
check.names = FALSE,
stringsAsFactors = FALSE)
df$marks <- sapply(m.data, function(x) {x[, "MARK"]})
df$statePaintRVersion <- as.character(packageVersion("StatePaintR"))
df$modelID <- decisionMatrix@id
df$baseURL <- "http://s3-us-west-2.amazonaws.com/statehub-trackhub/tracks"
df$order <- 0
df$name <- str_replace_all(df$name, pattern = "/", replacement = "-")
df$name <- str_replace_all(df$name, pattern = " ", replacement = "_")
df$bedFileName <- str_replace_all(df$bedFileName, pattern = "/", replacement = "-")
df$bedFileName <- str_replace_all(df$bedFileName, pattern = " ", replacement = "_")
df$bigBedFileName <- str_replace_all(df$bigBedFileName, pattern = "/", replacement = "-")
df$bigBedFileName <- str_replace_all(df$bigBedFileName, pattern = " ", replacement = "_")
row.names(df) <- NULL
if (as == "json") {
jlist <- list(modelID = unbox(decisionMatrix@id),
tracks = df)
df.json <- toJSON(jlist, pretty = TRUE)
writeLines(df.json, con = file.path(output.dir, "manifest.json"))
return(invisible(jlist))
} else if (as == "autosql") {
for (sample.i in 1:nrow(df)) {
df.i <- df[sample.i, ]
sample.name <- df.i[, "name"]
outfile <- file(file.path(output.dir, paste0(sample.name, ".manifest.as")), open = "w+")
writeLines(paste0('table', ' "', sample.name, '"'), con = outfile)
writeLines(paste0('"', df.i[, "description"], '"'), con = outfile)
writeLines("(", con = outfile)
writeLines(paste("string", "chrom;", '"Reference sequence chromosome or scaffold"', sep = "\t"), con = outfile)
writeLines(paste("uint", "chromStart;", '"Start position of feature on chromosome"', sep = "\t"), con = outfile)
writeLines(paste("uint", "chromEnd;", '"End position of feature on chromosome"', sep = "\t"), con = outfile)
writeLines(paste("string", "name;", '"Label of Segment"', sep = "\t"), con = outfile)
writeLines(paste("uint", "score;", '"Score"', sep = "\t"), con = outfile)
writeLines(paste("char[1]", "strand;", '"+ or - for strand . for unstranded"', sep = "\t"), con = outfile)
writeLines(paste("uint", "thickStart;", '"starting position at which the feature is drawn thickly"', sep = "\t"), con = outfile)
writeLines(paste("uint", "thickEnd;", '"ending position at which the feature is drawn thickly"', sep = "\t"), con = outfile)
writeLines(paste("uint", "reserved;", '"Color of Segment coded to Label"', sep = "\t"), con = outfile)
writeLines(")", con = outfile)
close(outfile)
}
return(invisible(df))
} else {
stop("argument 'as' must be either 'autosql' or 'json'")
}
}
PRG_helper <- function(state, select, tissue, comparison) {
mcols(comparison) <- data.frame(FOUND = mcols(comparison)[, tissue])
if ("state" %in% colnames(mcols(state))) {
state <- state[state$state %in% select, ]
}
state <- state[order(state$score, decreasing = TRUE), ]
olaps <- findOverlaps(comparison, state, select = "first")
mcols(comparison)$score <- 0
mcols(comparison)[which(!is.na(olaps)), "score"] <- mcols(state[olaps[!is.na(olaps)]])$score
prg_curve <- create_prg_curve(mcols(comparison)$FOUND, mcols(comparison)$score)
auprg <- calc_auprg(prg_curve)
convex_hull <- prg_convex_hull(prg_curve)
plot.tissue <- list(list(tissue = tissue, curve = prg_curve, auprg = auprg, hull = convex_hull))
names(plot.tissue) <- tissue
return(plot.tissue)
}
PRG_prg <- function(plot.data) {
data.frame(TISSUE = plot.data$tissue,
PRECISION = plot.data$curve$precision_gain,
RECALL = plot.data$curve$recall_gain)
}
PRG_convex_hull <- function(plot.data) {
data.frame(TISSUE = plot.data$tissue,
PRECISION = plot.data$hull$precision_gain,
RECALL = plot.data$hull$recall_gain,
FSCORE = plot.data$hull$f_calibrated_score)
}
setMethod("show",
signature = signature(object = "decisionMatrix"),
function(object) {
cat(" ID:", object@id, "\n",
"Name:", object@name, "\n",
"Author:", object@author, "\n",
"Revision:", object@revision, "\n",
"Description:", object@description, "\n")
if (any(grepl("\\*$", names(abstractionLayer(object))))) {
cat(" Not Scoring:", grep(pattern = "\\*$",
x = names(abstractionLayer(object)),
value = TRUE), "\n")
}
if (any(grepl("^\\[.*\\]$|^\\[.*\\]\\*$", names(abstractionLayer(object))))) {
cat(" Not Splitting:", grep(pattern = "^\\[.*\\]$|^\\[.*\\]\\*$",
x = names(abstractionLayer(object)),
value = TRUE), "\n")
}
})
MergeDataSets <- function(input, input.desc, merge.groups, score) {
if (length(merge.groups) < 1) {
return(list(input = input, input.desc = input.desc))
} else {
merge.groups <- unique(unlist(merge.groups))
for (mark in merge.groups) {
input.merge <- unlist(input[names(input) %in% mark])
input.names <- unique(input.merge$feature)
input <- input[!(names(input) %in% mark)]
names(input.merge) <- NULL
if (score) {
input.merge <- binnedAverage(reduce(input.merge),
coverage(input.merge, weight = "signalValue"),
varname = "signalValue")
mcols(input.merge)$feature <- paste(input.names, collapse = "|")
mcols(input.merge) <- mcols(input.merge)[, c(2,1)]
} else {
input.merge <- reduce(input.merge)
mcols(input.merge)$feature <- paste(input.names, collapse = "|")
mcols(input.merge)$signalValue <- NA
}
input.merge <- GRangesList(input.merge)
names(input.merge) <- mark
x <- append(input, input.merge)
rm(input.merge)
input.desc <- input.desc[input.desc != mark]
input.desc <- c(input.desc, merged = mark)
}
return(list(input = input, input.desc = input.desc))
}
}
PaintStatesBenchmark <- function(manifest, decisionMatrix, scoreStates = FALSE, progress = TRUE) {
start.time <- Sys.time()
if (missing(manifest)) {stop("provide a manifest describing the location of your files \n",
"and the mark that was investigated")}
if (missing(decisionMatrix)) {stop("provide a decisionMatrix object")}
if (is(decisionMatrix, "decisionMatrix")) {
deflookup <- reverse_tl(abstractionLayer(decisionMatrix))
names(deflookup) <- tolower(names(deflookup))
d <- decisionMatrix(decisionMatrix)
} else {
stop("arg: decisionMatrix must be object of class decisionMatrix")
}
samples <- parse.manifest(manifest)
sample.genomes <- as.list(unique(unlist(sapply(samples, "[", "BUILD"))))
sample.genomes.names <- sample.genomes
sample.genomes.names <- lapply(sample.genomes.names, function(x) {x <- str_replace(tolower(x), "grch38", "hg38")})
do.seqinfo <- try(Seqinfo(genome = sample.genomes.names[[1]]))
if (inherits(do.seqinfo, "try-error")) {
sample.genomes <- lapply(sample.genomes.names, function(x) NULL)
warning("could not download seqinfo for genome")
} else {
sample.genomes <- lapply(sample.genomes.names, function(x) Seqinfo(genome = x))
}
names(sample.genomes) <- sample.genomes.names
output <- list()
skipped <- list()
lc <- 0
if (progress) pb <- txtProgressBar(min = 0, max = length(samples) * 3, style = 3)
for (cell.sample in seq_along(samples)) {
lc <- lc + 1
if (progress) setTxtProgressBar(pb, lc)
skipname <- cell.sample
cell.sample <- samples[[cell.sample]]
x <- GetBioFeatures(manifest = cell.sample, dm = decisionMatrix, my.seqinfo = sample.genomes[[cell.sample[1, "BUILD"]]])
names(x) <- tolower(names(x))
inputset <- names(x)
inputset <- sapply(inputset, function(x, y = deflookup) {
out <- unlist(y[str_detect(x, paste0(names(y), "$"))], use.names = FALSE)
return(out)
})
notInTranslationLayer <- sapply(inputset, is.null)
if(all(sapply(inputset, is.null))) {
warning(cell.sample[1, "SAMPLE"], " has no MARKs that are present in the translation layer \n",
" MARKs included are ", paste(cell.sample[, "MARK"], collapse = " "))
skipped <- c(skipped, skipname)
next()
}
x <- x[!notInTranslationLayer]
inputset <- inputset[!notInTranslationLayer]
names(x) <- inputset
to.merge <- inputset[duplicated(inputset)]
if (length(to.merge) >= 1) {
to.merge <- unique(unlist(to.merge))
for (mark in to.merge) {
x.merge <- unlist(x[names(x) %in% mark])
x.names <- unique(x.merge$feature)
x <- x[!(names(x) %in% mark)]
names(x.merge) <- NULL
if (scoreStates) {
x.merge <- binnedAverage(reduce(x.merge), coverage(x.merge, weight = "signalValue"), varname = "signalValue")
mcols(x.merge)$feature <- paste(x.names, collapse = "|")
mcols(x.merge) <- mcols(x.merge)[, c(2,1)]
} else {
x.merge <- reduce(x.merge)
mcols(x.merge)$feature <- paste(x.names, collapse = "|")
mcols(x.merge)$signalValue <- NA
}
x.merge <- GRangesList(x.merge)
names(x.merge) <- mark
x <- append(x, x.merge)
rm(x.merge)
inputset <- inputset[inputset != mark]
inputset <- c(inputset, merged = mark)
}
}
inputset.c <- names(inputset)
names(inputset.c) <- inputset
x.f <- disjoin(unlist(x))
dontuseScore.i <- grep(pattern = "\\*$", inputset)
inputset <- str_replace(inputset, "\\*$", "")
dontbreak.i <- grep(pattern = "^\\[.*\\]$", inputset)
inputset <- str_replace(inputset, "^\\[", "")
inputset <- str_replace(inputset, "\\]$", "")
dontbreak <- inputset[dontbreak.i]
dontuseScore <- inputset[dontuseScore.i]
names(x) <- inputset
if (length(dontbreak) > 0) {
x.f <- x.f[x.f %outside% x[dontbreak]]
unfrag <- reduce(sort(do.call(c, list(unlist(x[dontbreak]), ignore.mcols = TRUE))))
x.f <- c(x.f, unfrag, ignore.mcols = TRUE)
x.f <- sort(x.f)
}
overlaps <- findOverlaps(x.f, x)
resmatrix <- make.overlap.matrix(from(overlaps), to(overlaps), names(x))
resmatrix <- resmatrix[, colnames(d)[colnames(d) %in% colnames(resmatrix)], drop = FALSE]
if (scoreStates) {
scorematrix <- resmatrix
signalCol <- sapply(x, function(x) !is.na(mcols(x)[1, "signalValue"]))
signalCol <- signalCol[!(names(signalCol) %in% dontuseScore)]
signalCol <- which(signalCol[colnames(scorematrix)])
for (feature in names(signalCol)) {
scoreOverlaps <- findOverlaps(x.f, x[[feature]])
scorematrix[from(scoreOverlaps), feature] <- mcols(x[[feature]])[to(scoreOverlaps), "signalValue"]
scorematrix[, feature] <- frankv(scorematrix[, feature], ties.method = "average")
}
scorematrix <- scorematrix[, names(signalCol), drop = FALSE]
}
resmatrix <- resmatrix + 2L
missing.data <- colnames(d)[!(colnames(d) %in% colnames(resmatrix))]
if (any(is.na(d))) {
d[is.na(d)] <- 1L
}
lmd <- length(missing.data)
if (lmd > 0) {
dl <- d[-which(matrix(bitwAnd(d[, missing.data, drop = FALSE], 2L) == 2L, ncol = lmd), arr.ind = TRUE)[,1], -which(colnames(d) %in% missing.data), drop = FALSE]
} else {
dl <- d
}
d.order <- dl
d.order[dl == 1] <- 0
d.order[dl == 0] <- 1
dl <- dl[order(rowSums(d.order, na.rm = TRUE), decreasing = FALSE), , drop = FALSE]
resmatrix <- t(resmatrix)
mcols(x.f)$name <- cell.sample[1, "SAMPLE"]
lc <- lc + 1
if (progress) setTxtProgressBar(pb, lc)
if (scoreStates) {
dl.score <- dl[, names(signalCol)]
score.cells <- which(dl.score == 3L, arr.ind = TRUE)
if (length(signalCol) == 1) {
score.cells <- matrix(score.cells, ncol = 1, dimnames = list(names(score.cells), c("row")))
score.cells <- cbind(score.cells, col = 1)
}
segments <- data.frame(state = flookup(resmatrix, dl)[, 1], median = NA, mean = NA, max = NA, stringsAsFactors = FALSE)
score.features <- unique(segments$state)[(unique(segments$state) %in% rownames(score.cells))]
for (score.feature in score.features) {
scorematrix.rows <- segments$state == score.feature
feature.scores <- scorematrix[segments$state == score.feature,
score.cells[rownames(score.cells) %in% score.feature, "col"],
drop = FALSE]
feature.scores.df <- data.frame(median = numeric(), mean = numeric(), max = numeric())
if (is(feature.scores, "matrix")) {
feature.scores.df <- rbind.data.frame(feature.scores.df, data.frame(median = rowMedians(feature.scores), mean = rowMeans(feature.scores), max = rowMaxs(feature.scores)))
} else {
feature.scores.df <- rbind.data.frame(feature.scores.df, data.frame(median = feature.scores / 2, mean = feature.scores / 2, max = feature.scores))
}
segments[segments$state == score.feature, c("median", "mean", "max")] <- feature.scores.df
}
mcols(x.f)$state <- segments$state
mcols(x.f)[, c("median", "mean", "max")] <- DataFrame(segments[, c("median", "mean", "max")])
} else {
mcols(x.f)$state <- flookup(resmatrix, dl)[, 1]
}
x.f.l <- split(x.f, x.f$state)
if (scoreStates) {
x.f.ll <- lapply(x.f.l, reduce, with.revmap = TRUE)
for (state.name in names(x.f.ll)) {
if (state.name %in% score.features) {
score.feature <- state.name
revmap <- mcols(x.f.ll[[score.feature]])$revmap
revmap <- sapply(revmap, function(x) {x[1]})
my.scores <- mcols(x.f.l[[score.feature]])[revmap, c("median", "mean", "max")]
} else {
my.scores <- DataFrame(median = rep.int(0, length(x.f.ll[[state.name]])), mean = rep.int(0, length(x.f.ll[[state.name]])), max = rep.int(0, length(x.f.ll[[state.name]])))
}
mcols(x.f.ll[[state.name]])$revmap <- NULL
mcols(x.f.ll[[state.name]])$name <- cell.sample[1, "SAMPLE"]
mcols(x.f.ll[[state.name]])$state <- state.name
mcols(x.f.ll[[state.name]])[, c("median", "mean", "max")] <- lapply(my.scores, function(x) {
if(max(x) > 0) {
ceiling((x/max(x)) * 1000)
} else {
0
}
})
}
x.f.l <- x.f.ll
} else {
x.f.l <- lapply(x.f.l, reduce)
for (state.name in names(x.f.l)) {
mcols(x.f.l[[state.name]])$name <- cell.sample[1, "SAMPLE"]
mcols(x.f.l[[state.name]])$state <- state.name
mcols(x.f.l[[state.name]])$score <- 0
}
}
x.f <- sort(do.call(c, unlist(x.f.l, use.names = FALSE)))
output <- c(output, x.f)
lc <- lc + 1
if (progress) setTxtProgressBar(pb, lc)
}
if (length(samples) > 1) {
output <- GRangesList(output)
}
if (progress) close(pb)
skipped <- unlist(skipped)
if (is.null(skipped)) {
names(output) <- names(samples)
} else {
names(output) <- names(samples)[-skipped]
}
attributes(output)$manifest <- samples
done.time <- Sys.time() - start.time
if (progress) message("processed ", length(samples), " in ", round(done.time, digits = 2), " ", attr(done.time, "units"))
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.