.segFromRaw <- function(raw, segmatrix) {
result <- new("cghSeg", copynumber=copynumber(raw),
segmented=segmatrix,
phenoData=phenoData(raw),
featureData=featureData(raw),
annotation=annotation(raw),
experimentData=experimentData(raw)
)
result
}
.callFromSeg <- function(seg, assayData) {
result <- new("cghCall", assayData=assayData,
phenoData=phenoData(seg),
featureData=featureData(seg),
annotation=annotation(seg),
experimentData=experimentData(seg)
)
result
}
######## COPIED FROM DNACOPY, BUT ADAPTED TO ALLOW FOR TWO UNDO-PARAMETERS
.segment2 <- function (x,clen=10, relSDlong=3, alpha = 0.01, nperm = 10000, p.method = c("hybrid",
"perm"), min.width = 2, kmax = 25, nmin = 200, eta = 0.05,
sbdry = NULL, trim = 0.025, undo.splits = c("none", "prune",
"sdundo"), undo.prune = 0.05, undo.SD = 3, verbose = 1)
{
if (!inherits(x, "CNA"))
stop("First arg must be a copy number array object")
call <- match.call()
if (min.width < 2 | min.width > 5)
stop("minimum segment width should be between 2 and 5")
if (missing(sbdry)) {
if (nperm == 10000 & alpha == 0.01 & eta == 0.05) {
sbdry <- default.DNAcopy.bdry
}
else {
max.ones <- floor(nperm * alpha) + 1
sbdry <- DNAcopy:::getbdry(eta, nperm, max.ones)
}
}
sbn <- length(sbdry)
nsample <- ncol(x) - 2
sampleid <- colnames(x)[-(1:2)]
uchrom <- unique(x$chrom)
data.type <- attr(x, "data.type")
p.method <- match.arg(p.method)
undo.splits <- match.arg(undo.splits)
segres <- list()
segres$data <- x
allsegs <- list()
allsegs$ID <- NULL
allsegs$chrom <- NULL
allsegs$loc.start <- NULL
allsegs$loc.end <- NULL
allsegs$num.mark <- NULL
allsegs$seg.mean <- NULL
for (isamp in 1:nsample) {
if (verbose >= 1)
cat(paste("Analyzing:", sampleid[isamp], "\n"))
genomdati <- x[, isamp + 2]
ina <- which(!is.na(genomdati) & !(abs(genomdati) ==
Inf))
genomdati <- genomdati[ina]
trimmed.SD <- sqrt(DNAcopy:::trimmed.variance(genomdati, trim))
chromi <- x$chrom[ina]
sample.lsegs <- NULL
sample.segmeans <- NULL
for (ic in uchrom) {
if (verbose >= 2)
message(paste(" current chromosome:", ic, "\n"))
segci <- .changepoints2(genomdati[chromi == ic], data.type,
alpha, sbdry, sbn, nperm, p.method, min.width,
kmax, nmin, trimmed.SD, undo.splits, undo.prune,
undo.SD, clen, relSDlong, verbose)
sample.lsegs <- c(sample.lsegs, segci$lseg)
sample.segmeans <- c(sample.segmeans, segci$segmeans)
}
sample.nseg <- length(sample.lsegs)
sample.segs.start <- ina[cumsum(c(1, sample.lsegs[-sample.nseg]))]
sample.segs.end <- ina[cumsum(sample.lsegs)]
allsegs$ID <- c(allsegs$ID, rep(isamp, sample.nseg))
allsegs$chrom <- c(allsegs$chrom, x$chrom[sample.segs.end])
allsegs$loc.start <- c(allsegs$loc.start, x$maploc[sample.segs.start])
allsegs$loc.end <- c(allsegs$loc.end, x$maploc[sample.segs.end])
allsegs$num.mark <- c(allsegs$num.mark, sample.lsegs)
allsegs$seg.mean <- c(allsegs$seg.mean, sample.segmeans)
}
allsegs$ID <- sampleid[allsegs$ID]
allsegs$seg.mean <- round(allsegs$seg.mean, 4)
allsegs <- as.data.frame(allsegs)
allsegs$ID <- as.character(allsegs$ID)
segres$output <- allsegs
segres$call <- call
class(segres) <- "DNAcopy"
segres
}
.changepoints2 <- function (genomdat, data.type = "logratio", alpha = 0.01, sbdry,
sbn, nperm = 10000, p.method = "hybrid", min.width = 2, kmax = 25,
nmin = 200, trimmed.SD = NULL, undo.splits = "none", undo.prune = 0.05,
undo.SD = 3, clen, relSDlong, verbose = 1, ngrid = 100, tol = 1e-06)
{
n <- length(genomdat)
if (missing(trimmed.SD))
trimmed.SD <- mad(diff(genomdat),na.rm=TRUE)/sqrt(2)
seg.end <- c(0, n)
k <- length(seg.end)
change.loc <- NULL
while (k > 1) {
current.n <- seg.end[k] - seg.end[k - 1]
if (verbose >= 3)
message(".... current segment:", seg.end[k - 1] + 1,
"-", seg.end[k], "\n")
if (current.n >= 2 * min.width) {
current.genomdat <- genomdat[(seg.end[k - 1] + 1):seg.end[k]]
current.genomdat <- current.genomdat - mean(current.genomdat)
current.tss <- sum(current.genomdat^2)
hybrid <- FALSE
delta <- 0
if ((p.method == "hybrid") & (nmin < current.n)) {
hybrid <- TRUE
delta <- (kmax + 1)/current.n
}
zzz <- .Fortran("fndcpt", n = as.integer(current.n),
x = as.double(current.genomdat), tss = as.double(current.tss),
px = double(current.n), sx = double(current.n),
nperm = as.integer(nperm), cpval = as.double(alpha),
ncpt = integer(1), icpt = integer(2), ibin = as.logical(data.type ==
"binary"), hybrid = as.logical(hybrid), al0 = as.integer(min.width),
hk = as.integer(kmax), delta = as.double(delta),
ngrid = as.integer(ngrid), sbn = as.integer(sbn),
sbdry = as.integer(sbdry), tol = as.double(tol),
PACKAGE = "DNAcopy")
}
else {
zzz <- list()
zzz$ncpt <- 0
}
if (zzz$ncpt == 0)
change.loc <- c(change.loc, seg.end[k])
seg.end <- switch(1 + zzz$ncpt, seg.end[-k], c(seg.end[1:(k -
1)], seg.end[k - 1] + zzz$icpt[1], seg.end[k]), c(seg.end[1:(k -
1)], seg.end[k - 1] + zzz$icpt, seg.end[k]))
k <- length(seg.end)
if (verbose >= 3)
message(".... segments to go:", seg.end, "\n")
}
seg.ends <- rev(change.loc)
nseg <- length(seg.ends)
lseg <- diff(c(0, seg.ends))
if (nseg > 1) {
if (undo.splits == "prune") {
lseg <- changepoints.prune(genomdat, lseg, undo.prune)
}
if (undo.splits == "sdundo") {
lseg <- .changepoints.sdundo2(genomdat, lseg, trimmed.SD,
undo.SD, clen, relSDlong)
}
}
segmeans <- 0 * lseg
ll <- uu <- 0
for (i in 1:length(lseg)) {
uu <- uu + lseg[i]
segmeans[i] <- mean(genomdat[(ll + 1):uu])
ll <- uu
}
list(lseg = lseg, segmeans = segmeans)
}
.changepoints.sdundo2 <- function (genomdat, lseg, trimmed.SD, change.SD = 3, clen, relSDlong)
{
change.SD <- trimmed.SD * change.SD
cpt.loc <- cumsum(lseg)
sdundo <- TRUE
while (sdundo) {
k <- length(cpt.loc)
if (k > 1) {
segments0 <- cbind(c(1, 1 + cpt.loc[-k]), cpt.loc)
segmed <- apply(segments0, 1, function(i, x) {
median(x[i[1]:i[2]])}, genomdat)
ens <- apply(segments0, 1, function(i, x) {i[2]+1-i[1]
}, genomdat)
el <- length(ens)
ens2 <- c(2,ens)
ens1 <- c(ens,2)
allens <- apply(cbind(ens1,ens2),1,function(ro) ifelse(min(ro[1],ro[2])>clen,relSDlong,1))[-c(1,(el+1))]
adsegmed <- abs(diff(segmed)*allens)
if (min(adsegmed) < change.SD) {
i <- which(adsegmed == min(adsegmed))
cpt.loc <- cpt.loc[-i]
}
else {
sdundo <- FALSE
}
}
else {
sdundo <- FALSE
}
}
lseg.sdundo <- diff(c(0, cpt.loc))
lseg.sdundo
}
######################################################################
.assignNames <- function(matrix, object) {
colnames(matrix) <- sampleNames(object)
rownames(matrix) <- featureNames(object)
matrix
}
.makeEmptyFeatureData <- function(object) {
dims <- Biobase:::assayDataDims(object)
n <- dims[1,1]
features <-
if (is(object, "environment")) ls(object)
else names(object)
nms <- rownames(object[[features[[1]]]])
data <- data.frame(Chromosome=numeric(n), Start=numeric(n), End=numeric(n), row.names=nms)
dimLabels <- c("featureNames", "featureColumns")
metadata <- data.frame(labelDescription=c("Chromosomal position", "Basepair position start", "Basepair position end"), row.names=c("Chromosome", "Start", "End"))
new("AnnotatedDataFrame", data=data, dimLabels=dimLabels, varMetadata=metadata)
}
.featureDataRequiredColumns <- function(featureData, columns) {
msg <- NULL
absent <- columns[!(columns %in% rownames(varMetadata(featureData)))]
if (length(absent) != 0) {
msg <- paste(msg, paste("missing columns' ", absent ,"' in featureData" , sep = "", collapse = "\n\t"), sep="\n")
}
if (is.null(msg)) TRUE else msg
}
.countcl <- function(k, regionsdat) {
regionsdat[k,2]-regionsdat[k,1]+1
}
.sumreg <- function(k, dat, regionsdat) {
sum(dat[regionsdat[k,1]:regionsdat[k,2]])
}
.sumsqreg <- function(k, dat, regionsdat) {
sum((dat[regionsdat[k,1]:regionsdat[k,2]])^2)
}
.varregtimescount <- function(k, counts, dat, regionsdat) {
var(dat[regionsdat[k,1]:regionsdat[k,2]])*counts[k]
}
.profreg <- function(k) {
profile[k]
}
.varproffun <- function(prof, vcnmat, profile) {
vcnprof <- vcnmat[profile == prof & !is.na(vcnmat[,1]),]
if(!is.null(dim(vcnprof))) {
return(sum(vcnprof[,1])/sum(vcnprof[,3]))
} else {
return(vcnprof[1]/vcnprof[3])
}
}
.xgivenknotrunc <- function(k, class, pm, varprofall, allsum, allsumsq, allnc, robustsig, allcell,betas=c(1,0)) {
sumx <- allsum[k]
sumxsq <- allsumsq[k]
ncl <- allnc[k]
v1 <- 2*varprofall[k]
sd1 <- sqrt(v1)
cell <- allcell[k]
if (class==6) {
mu <- (log2(2)/log2(1.5))*(0.10 + exp(-pm[4]))+0.3+exp(-pm[5])
} else if (class==5) {
mu <- (log2(2)/log2(1.5))*(0.10 + exp(-pm[4]))
} else if (class==4) {
mu <- 0.10 + exp(-pm[class])
} else if (class==3) {
mu <- -0.05+0.1*exp(-(pm[class])^2)
} else if (class==1) {
mu <- -0.10-exp(-pm[2])-0.3 - exp(-pm[1])
} else {
mu <- -0.10-exp(-pm[class])
}
munorm <- -0.05+0.1*exp(-(pm[class])^2)
#NEW
mu <- ((1-cell)*munorm + cell*mu)*(betas[1]+betas[2]*sd1)
v2 <- if(class == 4 | class == 2) 2*((pm[5+class])^2 + 0.0001)
else {if(class==3) {if(robustsig=="yes") 1/4*((pm[7])^2 + 0.0001)+1/4*((pm[9])^2 + 0.0001)+2*((pm[8]^2)+0.0001)
else {2*((pm[8])^2 + 0.0001)}}
else {if(class==1) 2*(pm[7]^2+(pm[6])^2 + 0.0001)
else {if(class==5) 2*((pm[10])^2+(pm[9])^2 + 0.0001)
else 2*((pm[11])^2 + (pm[10])^2 + (pm[9])^2 + 0.0001)}}} # for stability
v2norm <- if(robustsig=="yes") {1/4*((pm[7])^2 + 0.0001)+1/4*((pm[9])^2 + 0.0001)+2*((pm[8]^2)+0.0001)}
else {2*((pm[8])^2 + 0.0001)}
#NEW
v2 <- if(class==3) v2norm else {(1-cell)^2*v2norm + cell^2*v2}
#v2 <- (1-cell)*v2norm + cell^2*v2
A <- (sumx*v2+mu*v1)/(ncl*v2+v1)
B <- v1*v2/(ncl*v2+v1)
C <- sumxsq/v1 + mu^2/v2 - A^2/B
logD <- log(B)/2 -log(v2)/2
logres <- - C + logD
res <- exp(logres)
return(logres)
}
.posteriorp <- function(k, priorp, pm, varprofall, allsum, allsumsq, allnc,robustsig,allcell) {
all3 <- sapply(c(1,2,3,4,5,6), .xgivenknotrunc, k=k, pm=pm, varprofall=varprofall, allsum=allsum, allsumsq=allsumsq, allnc=allnc,robustsig=robustsig,allcell=allcell)
maxim <- max(all3)
all <- exp(all3-maxim)
allprior <- all*priorp[k,]
tot <- all%*%priorp[k,]
return(allprior/tot)
}
.reallikk4 <- function(k, alpha, pm, varprofall, allsum, allsumsq, allnc,robustsig,allcell,betas=c(1,0)) {
#betas=c(1,0);allcell=allcell_pr*0.2;k=1;pm=bstart; varprofall =varprof_allall_pr;allsum= allsumall_pr;allsumsq= allsumsqall_pr;allnc= allncall_pr;robustsig= robustsig;
all3 <-sapply(c(1,2,3,4,5,6), .xgivenknotrunc, k=k, pm=pm, varprofall=varprofall, allsum=allsum, allsumsq=allsumsq,
allnc=allnc,robustsig=robustsig,allcell=allcell,betas=betas)
maxim <- max(all3)
all <- exp(all3-maxim)
tot <- maxim + log(all%*%alpha[k,])
return(tot)
}
.reallik4 <- function(nreg, alpha, pm, varprofall, allsum, allsumsq, allnc,robustsig,allcell,betas=c(1,0)) {
return(-sum(sapply(1:nreg, .reallikk4, pm=pm, alpha=alpha, varprofall =varprofall, allsum=allsum, allsumsq=allsumsq, allnc=allnc,robustsig=robustsig,allcell=allcell,betas=betas)))
}
#.reallik4b <- function(nreg, alpha, pm, varprofall, allsum, allsumsq, allnc,robustsig,allcell,betas=c(1,0)) {
# return(-sapply(1:nreg, .reallikk4, pm=pm, alpha=alpha, varprofall =varprofall, allsum=allsum, allsumsq=allsumsq, allnc=allnc,robustsig=robustsig,allcell=allcell,betas=betas))
#}
.alphafun <- function(k, profile, priorp, pm, varprofall, allsum, allsumsq, allnc=allnc, robustsig, allcell, prior) {
prof <- profile[k]
regionsk <- which(profile==prof)
nregk <- length(regionsk)
if (prior=="all") {
nregk <- length(profile)
regionsk <- 1:nregk
}
totpost <- rep(1,nregk)%*%t(sapply(regionsk, .posteriorp, priorp=priorp, pm=pm, varprofall=varprofall, allsum=allsum, allsumsq=allsumsq, allnc=allnc,robustsig=robustsig,allcell=allcell))
return(totpost/nregk)
}
.alpha0all <- function(nreg, profile, priorp, pm, varprofall, allsum, allsumsq, allnc, robustsig, allcell, prior) {
prevprof <- 0
alphaall <- c()
alpha1 <- .alphafun(1, profile, priorp, pm, varprofall, allsum, allsumsq, allnc, robustsig, allcell, prior)
for (i in (1:nreg)) {
if (prior != "all") {
curprof <- profile[i]
if (curprof==prevprof) {
newalpha <- oldalpha
}
if (curprof != prevprof) {
newalpha <- .alphafun(i, profile, priorp, pm, varprofall, allsum, allsumsq, allnc, robustsig, allcell, prior)
}
oldalpha <- newalpha
prevprof <- curprof
}
if (prior == "all") {
newalpha <- alpha1
}
alphaall <- rbind(alphaall,newalpha)
}
return(alphaall)
}
.minusEloglikreg <- function(k, posteriorprev, alphaprev, pm, varprofall, allsum, allsumsq, allnc, robustsig, allcell) {
all3 <- sapply(c(1,2,3,4,5,6), .xgivenknotrunc, k=k, pm=pm, varprofall=varprofall, allsum=allsum, allsumsq=allsumsq, allnc, robustsig=robustsig, allcell=allcell)
return(-all3%*%posteriorprev[,k] - (log(alphaprev[k,])%*%posteriorprev[,k]))
}
.totallik <- function(nreg, pm, posteriorprev, alphaprev, varprofall, allsum, allsumsq, allnc, robustsig,allcell,ncpus=1) {
if(ncpus==1) {
return(sum(sapply(1:nreg, .minusEloglikreg, pm=pm, posteriorprev=posteriorprev, alphaprev=alphaprev, varprofall=varprofall, allsum=allsum, allsumsq=allsumsq, allnc,robustsig=robustsig,allcell=allcell)))
} else {
sfExport("pm")
return(sum(sfSapply(1:nreg, .minusEloglikreg, pm=pm, posteriorprev=posteriorprev, alphaprev=alphaprev, varprofall=varprofall, allsum=allsum, allsumsq=allsumsq, allnc,robustsig=robustsig,allcell=allcell)))
}
}
#.mwp <- function(prof) {
# meanpost <- cbind(allmean,posteriorfin2)
# return(meanpost[profile==prof,])
#}
.MakeData <- function(smratall,chrnum) { #updated 16/07/10
#smratall <- datmat[,-(1:nc)];chrnum<-chr;
nc <- ncol(smratall)
allregions <- vector("list", nc)
ls <- length(chrnum)
chrnumfw <- c(100,chrnum[-ls])
for (j in 1:(nc)) {
smrat <- smratall[,j] #changed 19/06/2009, much faster
smratshfw <- c(100,smrat[-ls])
smratshbw <- c(smrat[-1],100)
mult <- (smrat-smratshfw)*(smratshbw-smratshfw) + (chrnumfw-chrnum)
wm <- which(mult!=0)
smwh <- smrat[wm]
wmend <- c(wm[-1]-1,ls)
regions <- cbind(wm,wmend,smwh)
allregions[[j]] <- regions
# breaks <- c(0)
# for(i in 2:(ls-1)) {
# breaks <- c(breaks, ifelse(((smrat[i] != smrat[i-1]) & (smrat[i-1] != smrat[i+1])) | chrnum[i] != chrnum[i-1], 1, 0))
# }
# breaks <- c(breaks,0)
# ind <- 1:ls
# difind <- cbind(ind, breaks)
# br <- difind[breaks==1,2]
# if (length(br) < 1) breakpos <- c()
# if (length(br) ==1) breakpos <- difind[breaks == 1,][1]
# if (length(br) > 1) breakpos <- difind[breaks == 1,][,1]
# regions <- cbind(append(breakpos, 1, 0), append(breakpos-1, ls, length(breakpos)))
rm(smrat,smratshfw,smratshbw,mult,smwh,wm);gc()
}
return(allregions)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.