Nothing
# Functions that support the cghMCR class
#
# Copyright 2006, J. Zhang, all rights reserved
#
# Gets the MCRs based on the segmentation data contained by segList.
# segList is a data frame extracted from the output of DNAcopy
getMCR <- function(cghmcr){
getMCR4Locus <- function(loc.pos, chromosome){
if(loc.pos["status"] == "gain"){
segInLoc <- segList[as.character(segList[, "chrom"]) ==
as.character(chromosome) & as.numeric(
segList[, "loc.start"]) >= as.numeric(
loc.pos["loc.start"]) & as.numeric(
segList[, "loc.end"]) <= as.numeric(
loc.pos["loc.end"]) & as.numeric(
segList[, "seg.mean"]) >= 0,
, drop = FALSE]
}else{
segInLoc <- segList[as.character(segList[, "chrom"]) ==
as.character(chromosome) & as.numeric(
segList[, "loc.start"]) >= as.numeric(
loc.pos["loc.start"]) & as.numeric(
segList[, "loc.end"]) <= as.numeric(
loc.pos["loc.end"]) & as.numeric(
segList[, "seg.mean"]) < 0,
, drop = FALSE]
}
if(nrow(segInLoc) > 0){
mcrByLocus <- findMCR(segInLoc)
mcrs <<- rbind(mcrs, cbind(chromosome, loc.pos["span"],
loc.pos["status"], loc.pos["loc.start"],
loc.pos["loc.end"], mcrByLocus))
}
}
# Find altered regions based on the percentile values defined
segList <- altered(cghmcr) <- getAlteredSegs(cghmcr)
if(nrow(segList) == 0){
cat(paste("cghMCR settings do not allow for the detection of any",
"altered segments.\n"))
cat("You may change the settings and try again.\n")
return(NA)
}
# Join altered regions that are separated by less than 500 kb
spans <- as.data.frame(mergeSegs(altered(cghmcr),
gapAllowed(cghmcr)))
mcrs <- NULL
#temp <- split.data.frame(altered(cghmcr),
# factor(altered(cghmcr)[, "chrom"]))
#locus <- lapply(temp, getLocus, overlap = overlap)
for(chrom in as.vector(unique(spans[, "chrom"]))){
#locusByChrom <- locus[[chrom]]
junk <- apply(spans[spans[, "chrom"] == chrom, , drop = FALSE], 1,
getMCR4Locus, chromosome = chrom)
}
colnames(mcrs) <- c("chromosome", "locus", "status", "loc.start",
"loc.end", "mcr", "mcr.start", "mcr.end", "samples",
"counts")
rownames(mcrs) <- mcrs[, 1]
mcrs <- mcrs[as.numeric(mcrs[, "counts"])/(ncol(DNAData(cghmcr)) - 3) >=
recurrence(cghmcr)/100, -ncol(mcrs), drop = FALSE]
return(mcrs)
}
# Takes the segment data for a givan chromosome and returns a data frame
# defining the loci identified. Segments are considered to belong to the
# same locus unless they overlap by the number of bases specified by
# variable 'overlap'.
getLocus <- function(segData, overlap = 500){
locus <- NULL
listBySign <- list(gain = segData[segData[, "seg.mean"] >= 0,,
drop = FALSE], loss = segData[segData[, "seg.mean"] < 0,
, drop = FALSE])
for(name in names(listBySign)){
if(nrow(listBySign[[name]]) > 0){
locus <- rbind(locus, cbind(findLocus(listBySign[[name]],
overlap = overlap), status = name))
}
}
return(locus)
}
# Filters out segments based on given thresholds
getAlteredSegs <- function(cghmcr){
segList <- split.data.frame(DNASeg(cghmcr), factor(DNASeg(cghmcr)[, "ID"]))
toReturn <- NULL
for(sample in names(segList)){
switch(thresholdType(cghmcr),
quantile = thresholds <- quantile(DNAData(cghmcr)[, sample],
prob = c(alteredLow(cghmcr), alteredHigh(cghmcr)), na.rm = TRUE),
value = thresholds <- c(alteredLow(cghmcr), alteredHigh(cghmcr)))
temp <- segList[[sample]]
toReturn <- rbind(toReturn, temp[temp[, "seg.mean"] < thresholds[1] |
temp[, "seg.mean"] > thresholds[2], ,
drop = FALSE])
}
return(toReturn)
}
mergeSegs <- function(segs, gapAllowed){
if(nrow(segs) < 2){
span <- c(chrom = segs[, "chrom"], status = ifelse(segs[, "seg.mean"]
>= 0, "gain", "loss"), span = "span.1",
loc.start = segs[,"loc.start"], loc.end = segs["loc.end"])
}else{
span <- NULL
temp <- splitSegments(segs)
for(chrom in names(temp)){
for(sign in names(temp[[chrom]])){
segBySign <- temp[[chrom]][[sign]]
if(nrow(segBySign) != 0){
span <- rbind(span, findSpan(segBySign, gapAllowed))
}
}
}
}
return(span)
}
# finds the segment spans on a given chromosome
findSpan <- function(segData, gapAllowed = 50000){
span <- NULL
chrom <- as.vector(segData[1, "chrom"])
status <-ifelse(segData[1, "seg.mean"] >= 0, "gain", "loss")
begin <- min(segData[, "loc.start"])
end <- max(segData[segData[, "loc.start"] == begin, "loc.end"])
if(end >= max(segData[, "loc.end"])){
span <- rbind(span, c(chrom, status, "span.1", begin, end))
}else{
spanCounter <- 1
while(TRUE){
# Subsetting segments that overlap with the locus currently defined
tempData <- segData[segData[, "loc.start"] != begin, , drop = FALSE]
tempData <- tempData[tempData[, "loc.end"] > end &
tempData[, "loc.start"] <= end + gapAllowed,
, drop = FALSE]
if(nrow(tempData) < 1){
# Segments break, define one locus
span <- rbind(span, c(chrom, status, paste("span.", spanCounter,
sep = ""), begin, end))
spanCounter <- spanCounter + 1
# Drop the ones have been processed
segData <- segData[segData[, "loc.start"] > end + gapAllowed, ,
drop = FALSE]
if(nrow(segData) < 1){
break
}else{
# Define the begining of another locus
begin <- min(segData[, "loc.start"])
end <- max(segData[segData[, "loc.start"] == begin, "loc.end"])
}
}else{
end <- max(tempData[, "loc.end"])
# End of the data
if(end >= max(segData[, "loc.end"])){
span <- rbind(span, c(chrom, status, paste("span.", spanCounter,
sep = ""), begin, end))
break
}
}
}
}
colnames(span) <- c("chrom", "status", "span", "loc.start", "loc.end")
return(span)
}
splitSegments <- function(segs){
temp <- split.data.frame(segs, factor(segs[, "chrom"]))
return(lapply(temp, splitSegByMean))
}
splitSegByMean <- function(segs){
posMeans <- segs[as.numeric(segs[, "seg.mean"]) >= 0, , drop = FALSE]
negMeans <- segs[as.numeric(segs[, "seg.mean"]) < 0, , drop = FALSE]
return(list(gain = posMeans[order(posMeans[, "loc.start"]),, drop = FALSE],
loss = negMeans[order(negMeans[, "loc.start"]), , drop = FALSE]))
}
findLocus <- function(segData, overlap = 500){
locus <- NULL
begin <- min(segData[, "loc.start"])
end <- max(segData[segData[, "loc.start"] == begin, "loc.end"])
if(end >= max(segData[, "loc.end"])){
locus <- rbind(locus, c("locus.1", begin, end))
}else{
locusCounter <- 1
while(TRUE){
# Subsetting segments that overlap with the locus currently defined
tempData <- segData[segData[, "loc.start"] != begin, , drop = FALSE]
tempData <- tempData[tempData[, "loc.end"] > end &
tempData[, "loc.start"] - overlap <= end,
, drop = FALSE]
if(nrow(tempData) < 1){
# Segments break, define one locus
locus <- rbind(locus, c(paste("locus.", locusCounter, sep = ""),
begin, end))
locusCounter <- locusCounter + 1
# Drop the ones have been processed
segData <- segData[segData[, "loc.start"] - overlap > end, ,
drop = FALSE]
if(nrow(segData) < 1){
break
}else{
# Define the begining of another locus
begin <- min(segData[, "loc.start"])
end <- max(segData[segData[, "loc.start"] == begin, "loc.end"])
}
}else{
end <- max(tempData[, "loc.end"])
# End of the data
if(end >= max(segData[, "loc.end"])){
locus <- rbind(locus, c(paste("locus.", locusCounter, sep = ""),
begin, end))
break
}
}
}
}
colnames(locus) <- c("locus", "loc.start", "loc.end")
return(locus)
}
# Takes the segment data for a given locus and returns the MCRs located
# by mapping the data to the potential MCRs identified by getPotentialMCR
findMCR <- function(segData){
mcrs <- NULL
if(nrow(segData) == 1){
mcrs <- matrix(c("mcr.1", segData[, "loc.start"], segData[, "loc.end"],
segData[, "ID"], 1), ncol = 5)
}else{
mcrCounter <- 1
pMCRs <- getPotentialMCR(segData)
for(index in 1:nrow(pMCRs)){
segs <- segData[as.numeric(segData[, "loc.start"])
<= as.numeric(pMCRs[index, "mcr.start"]) &
as.numeric(segData[, "loc.end"])
>= as.numeric(pMCRs[index, "mcr.end"]), "ID"]
if(length(segs) > 0){
mcrs <- rbind(mcrs, c(paste("mcr", mcrCounter, sep = "."),
pMCRs[index, "mcr.start"],
pMCRs[index, "mcr.end"],
paste(unique(segs), sep = "", collapse = ","),
length(unique(segs))))
mcrCounter <- mcrCounter + 1
}
}
}
colnames(mcrs) <- c("mcr.num", "mcr.start", "mcr.end", "samples", "counts")
return(mcrs)
}
# Locate all the potential MCRs by aligning the starting and ending points
# of all the segments from all the samples. The region between any two
# adjacent points will be a potential MCR
getPotentialMCR <- function(segData){
temp <- sort(unique(as.numeric(c(segData[, "loc.start"],
segData[, "loc.end"]))))
mcrs <- cbind(temp[1:(length(temp) - 1)], temp[2:length(temp)])
colnames(mcrs) <- c("mcr.start", "mcr.end")
return(mcrs)
}
mergeMCRProbes <- function(mcr, rawData){
temp <- apply(mcr, 1, FUN = function(x){
probes <- rawData[rawData[, 2] == x[1] & as.numeric(rawData[, 3])
>= as.numeric(x[7]) & as.numeric(rawData[, 3])
<= as.numeric(x[8]), 1]
if(length(probes) == 0){
return(NA)
}else{
return(paste(probes, sep = "", collapse = ","))
}
})
mcr <- cbind(mcr, probes = temp)
return(mcr)
}
plot.DNAcopy <- function(x, ..., save = FALSE, layout){
args <- list(...)
sampleNames <- unique(x[["output"]][, "ID"])
locations <- cghMCR:::alignGenes(x[["data"]][, c("chrom", "maploc")])
adjustments <- cghMCR:::getAdjustments(x[["data"]][, c("chrom", "maploc")])
segdata <- cghMCR:::adjustSegments(x[["output"]], adjustments)
if(save){
tempPng <- file.path(tempdir(), "segments.png")
png(filename = tempPng)
}
if(missing(layout)){
par(mfrow = c(length(sampleNames), 1))
}else{
par(mfrow = layout)
}
for(sampleName in sampleNames){
plot(0, 0, cex = 0, main = sampleName, xlab = "Chromsome",
ylab = " Log2 ratio", ylim = c(-5, 5), axes = FALSE,
xlim = c(0, max(locations) + 10))
axis(2)
box()
cghMCR:::highlightChrom(adjustments)
points(locations, x[["data"]][, sampleName], cex = 0.4, pch = 16)
lines(c(min(locations), max(locations)), rep(0, 2), lwd = 2)
lines(c(min(locations), max(locations)), rep(1, 2))
lines(c(min(locations), max(locations)), rep(-1, 2))
cghMCR:::drawSegs(segdata[segdata[, "ID"] == sampleName, ])
cghMCR:::markChrom(adjustments)
}
if(save){
dev.off()
return(tempPng)
}else{
return(invisible())
}
}
plot.MCR <- function(x, ..., DNAData, threshold = 1, save = FALSE,
expand = c(2000, 2000), layout, range = 1:5){
x <- x[unlist(lapply(strsplit(x[, "samples"], ","), length)) >= threshold, ]
if(save){
tempPng <- paste(tempfile("mcrs"), "png", sep = ".")
png(filename = tempPng)
}
if(!missing(layout)){
par(mfrow = layout)
}
for(index in range){
showMCR(x[index, "mcr.start"], x[index, "mcr.end"],
DNAData[DNAData[, "chrom"] == x[index, "chromosome"] &
as.numeric(DNAData[, "maploc"]) >=
(as.numeric(x[index, "mcr.start"]) - expand[1]) &
as.numeric(DNAData[, "maploc"]) <=
(as.numeric(x[index, "mcr.end"]) + expand[2]),
c(3, which(colnames(DNAData) %in%
unlist(strsplit(x[index, "samples"], ",")))),
drop = FALSE], "median")
}
if(save){
dev.off()
return(tempPng)
}else{
return(invisible())
}
}
showMCR <- function(start, end, ratioMat, what = c("mean", "median")){
what = match.arg(what)
temp <- cbind(ratioMat[, 1], unlist(apply(ratioMat,
1, FUN = function(ratios){
ratios <- ratios[2:length(ratios)]
ratios<- ratios[!is.na(ratios)]
if(what == "mean"){
return(mean(as.numeric(ratios)))
}else{
return(median(as.numeric(ratios)))
}
})))
left <- as.numeric(unlist(ratioMat[as.numeric(ratioMat[, 1]) <
as.numeric(start),
2:ncol(ratioMat), drop = FALSE]))
inSeg <- as.numeric(unlist(ratioMat[as.numeric(ratioMat[, 1]) >=
as.numeric(start) &
as.numeric(ratioMat[, 1]) <=
as.numeric(end),
2:ncol(ratioMat), drop = FALSE]))
right <- as.numeric(unlist(ratioMat[as.numeric(ratioMat[, 1]) >
as.numeric(end),
2:ncol(ratioMat), drop = FALSE]))
plot(temp[, 1], temp[, 2], xlab = "Chromosomal location",
ylab = "Log2 ratio")
if(length(left) > 0){
lines(c(min(ratioMat[as.numeric(ratioMat[, 1]) < as.numeric(start), 1]),
max(ratioMat[as.numeric(ratioMat[, 1]) < as.numeric(start), 1])),
rep(ifelse(what == "mean", mean(left), median(left)), 2),
col = "red", lwd = 2)
}
lines(c(start, end), rep(ifelse(what == "mean", mean(inSeg), median(inSeg)),
2), col = "red", lwd = 2)
if(length(right) > 0){
lines(c(min(ratioMat[as.numeric(ratioMat[, 1]) > as.numeric(end), 1]),
max(ratioMat[as.numeric(ratioMat[, 1]) > as.numeric(end), 1])),
rep(ifelse(what == "mean", mean(left), median(right)), 2),
col = "red", lwd = 2)
}
return(invisible())
}
getLineData <- function(start, end, ratioMat, what){
left <- as.numeric(unlist(ratioMat[as.numeric(ratioMat[, 1]) < start,
2:ncol(ratioMat), drop = FALSE]))
inSeg <- as.numeric(unlist(ratioMat[as.numeric(ratioMat[, 1]) >= start &
as.numeric(ratioMat[, 1]) <= end,
2:ncol(ratioMat), drop = FALSE]))
right <- as.numeric(unlist(ratioMat[as.numeric(ratioMat[, 1]) > end,
2:ncol(ratioMat), drop = FALSE]))
if(length(left) > 0){
means <- c(means, ifelse(what == "mean", mean(left), median(left)))
}
means <- c(means, ifelse(what == "mean", mean(inSeg), median(inSeg)))
if(length(right) > 0){
means <- c(means, ifelse(what == "mean", mean(right), median(right)))
}
return(means)
}
markChrom <- function(adjustments){
chromLocs <- NULL
chromNames <- NULL
for(i in 1:length(adjustments) - 1){
if(i %% 2 == 1){
chromLocs <- c(chromLocs, mean(c(adjustments[i], adjustments[i + 1])))
chromNames <- c(chromNames, names(adjustments)[i])
}
}
text(chromLocs, rep(-5.125, length(chromLocs)), chromNames, cex = 0.5)
return(invisible())
}
highlightChrom <- function(adjustments){
for(index in 1:length(adjustments)){
if(index %% 2 == 1){
polygon(c(adjustments[index], adjustments[index + 1],
adjustments[index + 1], adjustments[index]),
c(-5, -5, 5, 5), col = "gray", border = "white")
}
}
return(invisible())
}
drawSegs <- function(segdata){
drawSegLine <- function(segLocs){
lines(c(segLocs["loc.start"], segLocs["loc.end"]),
rep(segLocs["seg.mean"], 2), col = "red", lwd = 2)
}
junck <- apply(segdata, 1, FUN = drawSegLine)
return(invisible())
}
# This function gets the values that can be used to adjust chromosomal
# locations to align genes on different chromosomes so that they appear
# in sequence from chromosome one to Y along a single strand
getAdjustments <- function(positions){
temp <- split.data.frame(positions, factor(positions[, 1]))
temp <- unlist(lapply(temp, FUN = function(x) max(x[, 2])))
chroms <- sort(as.numeric(names(temp)[names(temp) %in% 1:24]))
if(any(names(temp) %in% "X")){
chroms <- c(chroms, "X")
}
if(any(names(temp) %in% "Y")){
chroms <- c(chroms, "Y")
}
adjustments <- 0
for(index in 1:length(chroms) - 1){
adjustments <- c(adjustments, (adjustments[length(adjustments)]+
temp[as.character(chroms[index])]))
}
names(adjustments) <- chroms
return(adjustments)
}
alignGenes <- function(positions){
adjustments <- getAdjustments(positions)
for(chrom in names(adjustments)){
positions[positions[, 1] == chrom, 2] <-
positions[positions[, 1] == chrom, 2] + adjustments[chrom]
}
return(positions[, 2])
}
adjustSegments <- function(segdata, adjustments){
for(chrom in names(adjustments)){
segdata[segdata[, "chrom"] == chrom, "loc.start"] <-
segdata[segdata[, "chrom"] == chrom, "loc.start"] + adjustments[chrom]
segdata[segdata[, "chrom"] == chrom, "loc.end"] <-
segdata[segdata[, "chrom"] == chrom, "loc.end"] + adjustments[chrom]
}
return(segdata)
}
getSegData <- function(arrayRaw){
#filter <- getProbeFilter(arrayRaw)
#log2Ratio <- maM(arrayRaw)[filter, ]
#chrom <- gsub("chr([0-9XY]+):.*", "\\1", maInfo(maGnames(
# arrayRaw))["SystematicName"][filter, 1])
#pos <- gsub("chr.*-([0-9]+)", "\\1", maInfo(maGnames(
# arrayRaw))["SystematicName"][filter, 1])
#probes <- maInfo(maGnames(arrayRaw))["ProbeName"][filter, 1]
rawData <- read.maimages(arrayRaw, source = "agilent", columns =
list(R = "rMedianSignal", G = "gMedianSignal", Rb = "rBGMedianSignal",
Gb = "gBGMedianSignal"), annotation = c("ControlType",
"ProbeName", "GeneName", "SystematicName",
"gIsFeatNonUnifOL", "rIsFeatNonUnifOL", "gIsBGNonUnifOL", "rIsBGNonUnifOL",
"gIsFeatPopnOL", "rIsFeatPopnOL", "gIsBGPopnOL", "rIsBGPopnOL",
"rIsSaturated", "gIsSaturated"), names = basename(arrayRaw))
ma <- normalizeWithinArrays(backgroundCorrect(rawData, method = "minimum"), method = "loess")
probes <- ma$genes[, "ProbeName"]
chrom <- gsub("chr([0-9XY]+):.*", "\\1", ma$genes[, "SystematicName"])
dropMe <- c(which(!chrom %in% c(1:22, "X", "Y")), which(ma$genes[, "ControlType"] != 0))
require(DNAcopy, quietly = TRUE)
set.seed(25)
cna <- CNA(ma$M[-dropMe, ],
gsub("chr([0-9XY]+):.*", "\\1", ma$genes[-dropMe, "SystematicName"]),
as.numeric(gsub(".*:([0-9]+)-.*", "\\1",
ma$genes[-dropMe, "SystematicName"])),
data.type = "logratio", sampleid = colnames(ma$M))
segdata <- segment(smooth.CNA(cna))
#CNA.object <- CNA(matrix(as.numeric(log2Ratio),
# ncol = ncol(log2Ratio), byrow = FALSE), chrom,
# as.numeric(pos), data.type = "logratio",
# sampleid = colnames(log2Ratio))
#segdata <- segment(smooth.CNA(CNA.object))
segdata[["data"]] <- cbind(probe = probes, as.data.frame(segdata[["data"]]))
return(segdata)
}
#getProbeFilter <- function(arrayRaw){
# chrom <- gsub("chr([0-9XY]+):.*", "\\1",
# maInfo(maGnames(arrayRaw))["SystematicName"][, 1])
# return(intersect(which(chrom %in% c(1:22, "X", "Y")), which(maInfo(
# maGnames(arrayRaw))["ControlType"] >= 0)))
#}
cghMCR <- function(segments, gapAllowed = 500, alteredLow = 0.03,
alteredHigh = 0.97, spanLimit = 20000000,
recurrence = 75, thresholdType = c("quantile", "value")){
thresholdType <- match.arg(thresholdType)
return(new("cghMCR", DNASeg = as.data.frame(segments[["output"]]), gapAllowed = gapAllowed,
DNAData = as.data.frame(segments[["data"]]), alteredLow = alteredLow,
alteredHigh = alteredHigh, spanLimit = spanLimit,
recurrence = recurrence, thresholdType = thresholdType))
}
showSegment <- function(dataMat, chromosome, start, end, samples = NA,
what = c("mean", "median")){
getMean <- function(x){
x <- x[-(1:3)]
if(!is.na(samples)){
x <- x[samples]
}
return(ifelse(what == "mean", mean(as.numeric(x), na.rm = TRUE),
median(as.numeric(x), na.rm = TRUE)))
}
what = match.arg(what)
temp <- dataMat[dataMat[, "chrom"] == chromosome &
as.numeric(dataMat[, "maploc"]) >= as.numeric(start) &
as.numeric(dataMat[, "maploc"]) <= as.numeric(end),
, drop = FALSE]
y <- apply(temp, 1, FUN = getMean)
plot(temp[, "maploc"], y, xlab = "Chromosomal location",
ylab = ifelse(what == "mean", "Mean across samples",
"Median across samples"))
return(invisible())
}
# Methods for SGOL
SGOL <- function(rsObj, threshold, method){
if(segBy(rsObj) == "region"){
sampleStart = 4
}else if(segBy(rsObj) == "gene"){
sampleStart = 6
}else{
stop("No applicable to sample pairs")
}
return(new("SGOL", gol = getSGOL(as.data.frame(rs(rsObj)),
threshold = threshold,
method = method, sampleStart = sampleStart),
threshold = threshold, method = method))
}
getSGOL <- function(rsData, threshold, method, sampleStart){
gains <- apply(rsData[, sampleStart:ncol(rsData)],
1, FUN = function(x) method(as.numeric(x[as.numeric(x) >
threshold[2]]), na.rm = TRUE))
losses <- apply(rsData[, sampleStart:ncol(rsData)],
1, FUN = function(x) method(as.numeric(x[as.numeric(x) <
threshold[1]]), na.rm = TRUE))
return(cbind(rsData[, 1:(sampleStart - 1)], gains = gains,
losses = losses))
}
plotSGOL <- function(gol, XY, chrom = "chrom",
start = "start", end = "end"){
gol <- as.matrix(gol)
merged <- sortByChromNLoc(mergeStartNEnd(mergeChrom(gol, chrom = chrom,
start = start, end = end), chrom = chrom, start = start,
end = end), by1 = chrom, by2 = "pos")
if(!missing(XY)){
if (XY == FALSE){
merged <- merged[which(!merged[, chrom] %in% c("X", "Y")), ]
}
}
ylim <- range(c(as.numeric(merged[, "gains"]),
as.numeric(merged[, "losses"])), na.rm = TRUE)
xlim = range(as.numeric(merged[, "pos"]), na.rm = TRUE)
ylim <- c(floor(ylim[1] + ylim[1]/10), ceiling(ylim[2] + ylim[2]/10))
plot(0, 0, type = "n", ylim = ylim, ylab = "SGOL score",
xlab = "Chromosome", xlim = xlim, axes = FALSE)
markChrom(merged[!duplicated(merged[, chrom]), "pos"], ylim[1],
ylim[2])
lines(merged[, "pos"], merged[, "gains"], type = "l", col = "green")
lines(merged[, "pos"], merged[, "losses"], type = "l", col = "red")
margins <- c(merged[!duplicated(merged[, chrom]), "pos"],
max(as.numeric(merged[, "pos"]), na.rm = TRUE))
margins <- cbind(margins[-length(margins)], margins[-1])
axis(2)
mean <- as.numeric(margins[, 1]) + (as.numeric(margins[, 2]) -
as.numeric(margins[, 1]))/2
axis(1, at = mean,
labels = merged[!duplicated(merged[, chrom]), "chrom"])
box()
}
markChrom <- function(adjustments, min, max){
for(index in 1:length(adjustments)){
if(index %% 2 == 1){
polygon(c(adjustments[index], adjustments[index + 1],
adjustments[index + 1], adjustments[index]),
c(min, min, max, max), col = "gray94", border = "white")
}
}
return(invisible())
}
getGEOI <- function(gol){
splited <- split.data.frame(gol, factor(gol[, "chrom"]))
gains <- lapply(splited, mergeGOL, isGain = TRUE)
gains <- do.call("rbind", args = gains)
losses <- lapply(splited, mergeGOL, isGain = FALSE)
losses <- do.call("rbind", args = losses)
return(list(gain = gains, loss = losses))
}
mergeGOL <- function(gol, isGain){
gol <- gol[order(as.numeric(gol[, "start"]), decreasing = FALSE), ]
start <- 1
merged <- NULL
if(isGain){
checkMe <- gol[1, "gains"]
}else{
checkMe <- gol[1, "losses"]
}
for(index in 1:nrow(gol)){
if(isGain){
temp <- gol[index, "gains"]
}else{
temp <- gol[index, "losses"]
}
if(is.na(temp)){
if(checkMe != "NA"){
merged <- rbind(merged, c(chrom = gol[start, "chrom"],
start = min(as.numeric(gol[start:(index-1), "start"])),
end = max(as.numeric(gol[start:(index - 1), "end"])),
sgol = checkMe, geneID = paste(gol[start:(index - 1),
"geneid"], sep = "", collapse = ";"),
geneNum = length(start:(index-1))))
}
checkMe <- "NA"
start <- index
next()
}
if(temp == checkMe){
next()
}else{
merged <- rbind(merged, c(chrom = gol[start, "chrom"],
start = min(as.numeric(gol[start:(index-1), "start"])),
end = max(as.numeric(gol[start:(index - 1), "end"])),
sgol = checkMe, geneID = paste(gol[start:(index - 1),
"geneid"], sep = "", collapse = ";"),
geneNum = length(start:(index-1))))
checkMe <- temp
start <- index
}
}
return(merged)
}
mergeStartNEnd <- function(mergeMe, chrom = "chrom", start = "start",
end = "end"){
st <- mergeMe[, -which(colnames(mergeMe) == end)]
ed <- mergeMe[, -which(colnames(mergeMe) == start)]
stNames <- colnames(st)
edNames <- colnames(ed)
stNames[which(stNames == start)] <- "pos"
edNames[which(edNames == end)] <- "pos"
colnames(st) <- stNames
colnames(ed) <- edNames
merged <- rbind(st, ed[, colnames(st)])
return(sortByChromNLoc(merged, by1 = chrom, by2 = "pos"))
}
mergeChrom <- function(mergeMe, chrom = "chrom",
start = "start", end = "end"){
mergeMe <- sortByChromNLoc(mergeMe, by1 = chrom, by2 = start)
chromSum <- getChromMargin(getChromLength(mergeMe[,
c(chrom, start, end)], by = chrom, pos = end))
for(ch in names(chromSum)){
mergeMe[which(mergeMe[, chrom] == ch), start] <-
(as.numeric(mergeMe[which(mergeMe[, chrom] == ch), start])
+ chromSum[ch])
mergeMe[which(mergeMe[, chrom] == ch), end] <-
(as.numeric(mergeMe[which(mergeMe[, chrom] == ch), end])
+ chromSum[ch])
}
return(mergeMe)
}
getChromMargin <- function(chromLengh){
chromLength <- chromLengh[c(1:22, c("X", "Y"))]
chromLength <- chromLength[!is.na(chromLength)]
margins <- cumsum(chromLength)
chroms <- names(margins)[-1]
margins <- margins[-length(margins)]
names(margins) <- chroms
return(margins)
}
getChromLength <- function(segList, by = "chrom", pos = "loc.end"){
splited <- split.data.frame(segList, factor(segList[, by]))
chroms <- c(1:22, "X", "Y")
temp <- sapply(splited, FUN = function(x) max(as.numeric(as.vector(x[,
pos]))))
return(temp[chroms[which(chroms %in% names(temp))]])
}
# Sort mapped in order by chromsome and then by location
# A data frame or matrix to be sorted by columns defined by by1 and by2
sortByChromNLoc <- function(sortMe, by1 = "Ch", by2 = "Pos"){
splited <- split.data.frame(sortMe, factor(sortMe[, by1]))
sorted <- NULL
for(chrom in c(1:22, "X", "Y")){
if(!is.null(splited[[chrom]])){
sorted <- rbind(sorted,
splited[[chrom]][order(as.numeric(gsub(" ", "", splited[[chrom]][, by2]))), ])
}
}
return(sorted)
}
### Drops CNPs of the tumor SGOL objects
### threshold - a numeric vector of two with the first one for losses
### and second one for gains
dropCNP <- function(normalsgol, tumorsgol, threshold){
ngol <- gol(normalsgol)
tgol <- gol(tumorsgol)
gainGeneID <- as.vector(ngol[which(as.numeric(as.vector(ngol[, "gains"])) >
threshold[2]), "geneid"])
lossGeneID <- as.vector(ngol[which(as.numeric(as.vector(ngol[, "losses"])) <
threshold[1]), "geneid"])
tumorsgol@gol <- dropGenes(gainGeneID, lossGeneID, tgol)
return(tumorsgol)
}
dropGenes <- function(gainGeneID, lossGeneID, sgol){
imputMe <- which(sgol[, "geneid"] %in% gainGeneID)
sgol[imputMe, "gains"] <- NA
sgol[imputMe, "gains"] <- approx(sgol[, "gains"], xout = imputMe)$y
imputMe <- which(sgol[, "geneid"] %in% lossGeneID)
sgol[imputMe, "losses"] <- NA
sgol[imputMe, "losses"] <- approx(sgol[, "losses"], xout = imputMe)$y
return(sgol)
}
topGenes <- function(sgol, quan = c(0.02, 0.98), XY = FALSE){
gol <- sgol
if(!XY){
gol <- gol[!gol[, "chrom"] %in% c("X", "Y"), ]
}
gains <- gol[as.numeric(gol[, "gains"]) >
quantile(as.numeric(gol[, "gains"]), quan[2],
na.rm = TRUE), ]
losses <- gol[which(as.numeric(gol[, "losses"]) <
quantile(as.numeric(gol[, "losses"]), quan[1],
na.rm = TRUE)), ]
return(list(gain = gains[order(as.numeric(gains[, "gains"]), decreasing = TRUE), ],
loss = losses[order(as.numeric(losses[, "losses"]), decreasing = FALSE), ]))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.