R/DesignProbes.R
In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences

Documented in DesignProbes

`.substr<-` <- function(x,
	start,
	stop,
	value) {
	w <- which(start > stop)
	if (length(w) > 0)
		stop(paste("start", start[w], "> stop", stop[w]))
	w <- which(stop < start)
	if (length(w) > 0)
		stop(paste("stop", stop[w], "< start", start[w]))
	w <- which(start < 1)
	if (length(w) > 0)
		stop(paste("start", start[w], "< 1"))
	w <- which(stop > nchar(x))
	if (length(w) > 0)
		stop(paste("stop", stop[w], "> nchar(x)", nchar(x)[w]))
	
	x <- paste(ifelse(start > 1,
			substr(x,
				1,
				start - 1),
			""),
		value,
		ifelse(stop < nchar(x),
			substr(x,
				stop + 1,
				nchar(x)),
			""),
		sep="")
}

.tiledTemplate <- function(tiles) {
	template <- ""
	begin <- 0
	repeat {
		w <- which(tiles$start > begin)[1]
		
		if (is.na(w)) {
			# complete template with last tile
			last <- which(tiles$start==tiles$start[dim(tiles)[1]])[1]
			if (tiles$end[last]==begin)
				break
			template <- paste(template,
				substr(tiles$target_site[last],
					begin - tiles$start[last] + 2,
					nchar(tiles$target_site[last])),
				sep="")
			dist <- tiles$end[last] - nchar(template)
			if (dist > 0)
				template <- paste(template,
					paste(rep("A", dist), collapse=""),
					sep="")
			break
		} else {
			# fill missing areas of template
			dist <- tiles$start[w] - begin
			if (dist > 1)
				template <- paste(template,
					paste(rep("A", dist - 1), collapse=""),
					sep="")
		}
		
		template <- paste(template,
			tiles$target_site[w], sep="")
		begin <- tiles$start[w] + nchar(tiles$target_site[w]) - 1
	}
	
	return(template)
}

.CalculateEfficiencyFISH <- function(probe,
	template,
	fivePrimeEnd,
	threePrimeEnd,
	temp,
	P,
	ions,
	FA,
	batchSize=1000,
	minEfficiency=0.1,
	type="SSU") {
	
	# error checking
	if (is.character(probe))
		probe <- toupper(probe)
	if (is(probe, "DNAStringSet"))
		probe <- strsplit(toString(probe), ", ", fixed=TRUE)[[1]]
	if (is(template, "DNAStringSet"))
		template <- strsplit(toString(template), ", ", fixed=TRUE)[[1]]
	if (!is.character(probe))
		stop("probe must be a DNAStringSet or character vector.")
	if (!is.character(template))
		stop("template must be a DNAStringSet or character vector.")
	if (!is.numeric(ions))
		stop("ions must be a numeric.")
	if (ions < .01 || is.nan(ions))
		stop("Sodium equivilent concentration must be at least 0.01M.")
	if (!is.numeric(P))
		stop("P must be a numeric.")
	if (!(P > 0))
		stop("P must be greater than zero.")
	if (!is.numeric(FA))
		stop("FA must be a numeric.")
	if (!is.numeric(batchSize))
		stop("batchSize must be a numeric.")
	if (floor(batchSize)!=batchSize)
		stop("batchSize must be a whole number.")
	if (batchSize <= 0)
		stop("batchSize must be greater than zero.")
	if (!is.numeric(temp))
		stop("temp must be a numeric.")
	if (any(!is.numeric(fivePrimeEnd)))
		stop("fivePrimeEnd must be a numeric.")
	if (any(fivePrimeEnd < 1))
		stop("fivePrimeEnd must be greater than zero.")
	if (any(fivePrimeEnd != floor(fivePrimeEnd)))
		stop("fivePrimeEnd must be an integer.")
	if (any(!is.numeric(threePrimeEnd)))
		stop("threePrimeEnd must be a numeric.")
	if (any(threePrimeEnd < 1))
		stop("threePrimeEnd must be greater than zero.")
	if (any(threePrimeEnd != floor(threePrimeEnd)))
		stop("threePrimeEnd must be an integer.")
	if (any(fivePrimeEnd > threePrimeEnd))
		stop("fivePrimeEnd must be less than threePrimeEnd.")
	if (!is.numeric(minEfficiency))
		stop("minEfficiency must be a numeric.")
	if (any(minEfficiency < 0))
		stop("minEfficiency must be greater than or equal to zero.")
	if (any(minEfficiency > 1))
		stop("minEfficiency must be less than or equal to one.")
	
	RT <- .0019871*(273.15 + temp) # [kcal/mol]
	l <- length(probe)
	n <- nchar(probe)
	
	if (l==0)
		stop("No probe specified.")
	if (l!=length(template))
		stop("probe is not the same length as template.")
	if (l!=length(fivePrimeEnd))
		stop("probe is not the same length as fivePrimeEnd.")
	if (l!=length(threePrimeEnd))
		stop("threePrimeEnd is not the same length as fivePrimeEnd.")
	target <- substr(template, fivePrimeEnd, threePrimeEnd)
	
	# align probe and target
	seqs2 <- reverseComplement(DNAStringSet(target))
	seqs2 <- unlist(strsplit(toString(seqs2), ", ", fixed=TRUE))
	seqs2 <- paste("----", seqs2, "----", sep="")
	p <- pairwiseAlignment(probe,
		seqs2,
		type="global-local",
		gapOpen=-10,
		gapExtension=-10)
	seqs1 <- unlist(strsplit(toString(pattern(p)), ", ", fixed=TRUE))
	seqs2 <- unlist(strsplit(toString(subject(p)), ", ", fixed=TRUE))
	#ws <- p@subject@range@width # width of templates
	
	deltas <- .Call("calculateFISH", seqs1, seqs2, PACKAGE="DECIPHER")
	dG1_PM_DNARNA <- deltas[,1] - (273.15 + temp)/1000*(deltas[,2] + 0.368*n*log(ions))
	
	# determine ddG1 for mismatched probes
	ddG1_MM_DNARNA <- numeric(l)
	ddG1_MM_DNADNA <- numeric(l)
	ddG1_MM_RNARNA <- numeric(l)
	dG1_PM_UNADNA <- numeric(l)
	dG1_MM_UNADNA <- numeric(l)
	dG1_PM_UNARNA <- numeric(l)
	dG1_MM_UNARNA <- numeric(l)
	MM <- which(deltas[,3] != 0) # mismatched probes/targets
	if (length(MM) > 0) {
		ddG1_MM_DNARNA[MM] <- deltas[MM,3] - (273.15 + temp)/1000*(deltas[MM,4] + 0.368*n[MM]*log(ions))
		ddG1_MM_DNADNA[MM] <- deltas[MM,5] - (273.15 + temp)/1000*(deltas[MM,6] + 0.368*n[MM]*log(ions))
		ddG1_MM_RNARNA[MM] <- deltas[MM,7] - (273.15 + temp)/1000*(deltas[MM,8] + 0.368*n[MM]*log(ions))
		
		target <- .Call("replaceChar", seqs2[MM], "-", "", PACKAGE="DECIPHER")
		target <- reverseComplement(DNAStringSet(target))
		target <- unlist(strsplit(toString(target), ", ", fixed=TRUE))
		target_PM <- unlist(strsplit(toString(reverseComplement(DNAStringSet(probe[MM]))), ", ", fixed=TRUE))
		
		seqs <- paste(probe[MM],
			target_PM,
			sep=" ")
		seq <- unique(seqs)
		ls <- length(seq)
		dG <- numeric(ls)
		for (start in seq(1, ls, batchSize)) {
			end <- ifelse(start + batchSize > ls, ls, start + batchSize)
			dG[start:end] <- as.numeric(system(paste("hybrid-min -n DNA -t",
					temp,
					"-T",
					temp,
					"-N",
					ions,
					"-E -q",
					paste(seq[start:end], collapse=" ")),
				intern=TRUE))
		}
		dG1_PM_UNADNA[MM] <- dG[match(seqs, seq)]
		
		seqs <- paste(probe[MM],
			target,
			sep=" ")
		seq <- unique(seqs)
		ls <- length(seq)
		dG <- numeric(ls)
		for (start in seq(1, ls, batchSize)) {
			end <- ifelse(start + batchSize > ls, ls, start + batchSize)
			dG[start:end] <- as.numeric(system(paste("hybrid-min -n DNA -t",
					temp,
					"-T",
					temp,
					"-N",
					ions,
					"-E -q",
					paste(seq[start:end], collapse=" ")),
				intern=TRUE))
		}
		dG1_MM_UNADNA[MM] <- dG[match(seqs, seq)]
		
		seqs <- paste(probe[MM],
			target_PM,
			sep=" ")
		seq <- unique(seqs)
		ls <- length(seq)
		dG <- numeric(ls)
		for (start in seq(1, ls, batchSize)) {
			end <- ifelse(start + batchSize > ls, ls, start + batchSize)
			dG[start:end] <- as.numeric(system(paste("hybrid-min -n RNA -t",
					temp,
					"-T",
					temp,
					"-E -q",
					paste(seq[start:end], collapse=" ")),
				intern=TRUE))
		}
		dG1_PM_UNARNA[MM] <- dG[match(seqs, seq)]
		
		seqs <- paste(probe[MM],
			target,
			sep=" ")
		seq <- unique(seqs)
		ls <- length(seq)
		dG <- numeric(ls)
		for (start in seq(1, ls, batchSize)) {
			end <- ifelse(start + batchSize > ls, ls, start + batchSize)
			dG[start:end] <- as.numeric(system(paste("hybrid-min -n RNA -t",
					temp,
					"-T",
					temp,
					"-E -q",
					paste(seq[start:end], collapse=" ")),
				intern=TRUE))
		}
		dG1_MM_UNARNA[MM] <- dG[match(seqs, seq)]
	}
	
	ddG1_DNA <- dG1_MM_UNADNA - dG1_PM_UNADNA
	ddG1_RNA <- dG1_MM_UNARNA - dG1_PM_UNARNA
	ddG1_loop_DNA <- ddG1_DNA + ddG1_MM_DNADNA
	ddG1_loop_RNA <- ddG1_RNA + ddG1_MM_RNARNA
	ddG1 <- (ddG1_loop_DNA + ddG1_loop_RNA)/2 - ddG1_MM_DNARNA
	dG1 <- dG1_PM_DNARNA + ddG1
	#cat(dG1, sep=",")
	#cat("\n")
	K1 <- exp(-(dG1 + FA*(0.0696 + 0.0079*n))/RT)
	
	eff <- P*K1/(1 + P*K1)
	
	# disregard probes with < minEfficiency (from dG1 alone)
	z <- which(eff >= minEfficiency)
	l <- length(z)
	if (l != length(eff))
		eff[-z] <- 0
	if (l == 0) {
		ans <- matrix(nrow=length(dG1),
		ncol=6,
		dimnames=list(1:length(dG1),
			c("HybEff",
				"FAm",
				"ddG1",
				"dG1",
				"dG2",
				"dG3")))
		ans[, "HybEff"] <- eff
		ans[, "ddG1"] <- ddG1
		ans[, "dG1"] <- dG1
		return(ans)
	}
	
	probe <- probe[z]
	template <- template[z]
	fivePrimeEnd <- fivePrimeEnd[z]
	threePrimeEnd <- threePrimeEnd[z]
	K1 <- K1[z]
	#ws <- ws[z]
	n <- n[z]
	
	# probe folding
	seqs <- probe
	seq <- unique(seqs)
	ls <- length(seq)
	dG <- numeric(ls)
	for (start in seq(1, ls, batchSize)) {
		end <- ifelse(start + batchSize > ls, ls, start + batchSize)
		dG[start:end] <- as.numeric(system(paste("hybrid-ss-min -n DNA -t",
				temp,
				"-T",
				temp,
				"-N",
				ions,
				"-E -q",
				paste(seq[start:end], collapse=" ")),
			intern=TRUE))
	}
	dG2a <- numeric(l)
	dG2a <- dG[match(seqs, seq)]
	#cat(dG2a - 0.1*FA, sep=",")
	#cat("\n")
	K2a <- exp(-(dG2a + 0.1*FA)/RT)
	
	# target folding
	if (type=="SSU") {
		mapping <- matrix(0, nrow=16, ncol=16)
		dimnames(mapping) <- list(c(names(IUPAC_CODE_MAP), "-"), c(names(IUPAC_CODE_MAP), "-"))
		mapping["A",c("A","M","R","W","V","H","D","N")] <- 1
		mapping["G",c("G","R","S","K","V","D","D","N")] <- 1
		mapping["C",c("C","M","S","Y","V","H","B","N")] <- 1
		mapping["T",c("T","W","Y","K","H","D","B","N")] <- 1
		w <- which(mapping==0)
		mapping[w] <- .1
		mapping[,"-"] <- 0
		ecoli <- "AAATTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGGAAGAAGCTTGCTTCTTTGCTGACGAGTGGCGGACGGGTGAGTAATGTCTGGGAAACTGCCTGATGGAGGGGGATAACTACTGGAAACGGTAGCTAATACCGCATAACGTCGCAAGACCAAAGAGGGGGACCTTCGGGCCTCTTGCCATCGGATGTGCCCAGATGGGATTAGCTAGTAGGTGGGGTAACGGCTCACCTAGGCGACGATCCCTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGGAGTAAAGTTAATACCTTTGCTCATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTTGGAGGTTGTGCCCTTGAGGCGTGGCTTCCGGAGCTAACGCGTTAAGTCGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACGGAAGTTTTCAGAGATGAGAATGTGCCTTCGGGAACCGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATCCTTTGTTGCCAGCGGTCCGGCCGGGAACTCAAAGGAGACTGCCAGTGATAAACTGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACACACGTGCTACAATGGCGCATACAAAGAGAAGCGACCTCGCGAGAGCAAGCGGACCTCATAAAGTGCGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGTGGATCAGAATGCCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTAGGTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTA"
		
		p <- pairwiseAlignment(unique(template), ecoli, type="global-local", fuzzyMatrix=mapping)
		p <- p[match(template, unique(template))]
		
		# determine domain positioning
		ins <- insertion(p)
		starts <- lapply(ins, slot, "start")
		widths <- lapply(ins, slot, "width")
		
		index <- p@subject@range@start > 567 | (p@subject@range@start + p@subject@range@width) <= 567
		domain2start <- ifelse(index,
			1,
			568 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 568)
			if (length(w) > 0)
				domain2start[j] <- domain2start[j] + cum[length(w)]
		}
		
		index <- p@subject@range@start > 913 | (p@subject@range@start + p@subject@range@width) <= 913
		domain3start <- ifelse(index,
			domain2start,
			914 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 913)
			if (length(w) > 0)
				domain3start[j] <- domain3start[j] + cum[length(w)]
		}
		
		index <- p@subject@range@start > 1397 | (p@subject@range@start + p@subject@range@width) <= 1397
		domain4start <- ifelse(index,
			domain3start,
			1398 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 1398)
			if (length(w) > 0)
				domain4start[j] <- domain4start[j] + cum[length(w)]
		}
		
		# find target site position in the alignment
		startsPattern <- p@pattern@range@start - 1
		del <- deletion(p) # gaps added to pattern
		starts <- fivePrimeEnd
		ends <- threePrimeEnd
		ls <- length(p)
		for (j in 1:ls) {
			d <- del[[j]]
			if (length(d)==0)
				next
			w <- which((d@start + startsPattern[j]) <= starts[j])
			if (length(w) > 0)
				starts[j] <- starts[j] + sum(d@width[w])
			w <- which((d@start + startsPattern[j]) <= ends[j])
			if (length(w) > 0)
				ends[j] <- ends[j] + sum(d@width[w])
		}
		
		# determine start and end of each sequence's domain
		domainStarts <- numeric(ls)
		domainEnds <- numeric(ls)
		domains <- matrix(c(rep(1L, ls),
				domain2start,
				domain3start,
				domain4start,
				nchar(template) - p@pattern@range@width + nchar(unlist(strsplit(toString(pattern(p)), ", ", fixed=TRUE))) + 1),
			nrow=ls)
		for (j in 1:ls) {
			w <- which(domains[j,] <= starts[j])
			domainStarts[j] <- domains[j, w[length(w)]]
			w <- which(domains[j,] > ends[j])
			domainEnds[j] <- domains[j, w[1]] - 1
		}
		
		# determine positioning in the original sequence
		for (j in 1:ls) {
			d <- del[[j]]
			if (length(d)==0)
				next
			w <- which((d@start + startsPattern[j] + cumsum(d@width) - d@width) <= domainStarts[j])
			if (length(w) > 0)
				domainStarts[j] <- domainStarts[j] - sum(d@width[w])
			w <- which((d@start + startsPattern[j] + cumsum(d@width) - d@width) <= domainEnds[j])
			if (length(w) > 0)
				domainEnds[j] <- domainEnds[j] - sum(d@width[w])
		}
		
		batchSize <- floor(batchSize*max(n)/max(domainEnds - domainStarts))
		if (batchSize < 1)
			batchSize <- 1
	} else if (type=="LSU") {
		mapping <- matrix(0, nrow=16, ncol=16)
		dimnames(mapping) <- list(c(names(IUPAC_CODE_MAP), "-"), c(names(IUPAC_CODE_MAP), "-"))
		mapping["A",c("A","M","R","W","V","H","D","N")] <- 1
		mapping["G",c("G","R","S","K","V","D","D","N")] <- 1
		mapping["C",c("C","M","S","Y","V","H","B","N")] <- 1
		mapping["T",c("T","W","Y","K","H","D","B","N")] <- 1
		w <- which(mapping==0)
		mapping[w] <- .1
		mapping[,"-"] <- 0
		ecoli <- "GGTTAAGCGACTAAGCGTACACGGTGGATGCCCTGGCAGTCAGAGGCGATGAAGGACGTGCTAATCTGCGATAAGCGTCGGTAAGGTGATATGAACCGTTATAACCGGCGATTTCCGAATGGGGAAACCCAGTGTGATTCGTCACACTATCATTAACTGAATCCATAGGTTAATGAGGCGAACCGGGGGAACTGAAACATCTAAGTACCCCGAGGAAAAGAAATCAACCGAGATTCCCCCAGTAGCGGCGAGCGAACGGGGAGGAGCCCAGAGCCTGAATCAGTGTGTGTGTTAGTGGAAGCGTCTGGAAAGGCGCGCGATACAGGGTGACAGCCCCGTACACAAAAATGCACATACTGTGAGCTCGATGAGTAGGGCGGGACACGTGGTATCCTGTCTGAATATGGGGGGACCATCCTCCAAGGCTAAATACTCCTGACTGACCGATAGTGAACCAGTACCGTGAGGGAAAGGCGAAAAGAACCCCGGCGAGGGGAGTGAAAAAGAACCTGAAACCGTGTACGTACAAGCAGTGGGAGCCTCTTTTATGGGGTGACTGCGTACCTTTTGTATAATGGGTCAGCGACTTATATTCTGTAGCAAGGTTAACCGAATAGGGGAGCCGAAGGGAAACCGAGTCTTAACCGGGCGTTAAGTTGCAGGGTATAGACCCGAAACCCGGTGATCTAGCCATGGGCAGGTTGAAGGTTGGGTAACACTAACTGGAGGACCGAACCGACTAATGTTGAAAAATTAGCGGATGACTTGTGGCTGGGGGTGAAAGGCCAATCAAACCGGGAGATAGCTGGTTCTCCCCGAAAGCTATTTAGGTAGCGCCTCGTGAATTCATCTCCGGGGGTAGAGCACTGTTTCGGCAAGGGGGTCATCCCGACTTACCAACCCGATGCAAACTGCGAATACCGGAGAATGTTATCACGGGAGACATACGGCGGGTGCTAACGTCCGTCGTGAAGAGGGAAACAACCCAGACCGCCAGCTAAGGTCCCAAAGTCATGGTTAAGTGGGAAACGATGTGGGAAGGCCCAGACAGCCAGGATGTTGGCTTAGAAGCAGCCATCATTTAAAGAAAGCGTAATAGCTCACTGGTCGAGTCGGCCTGCGCGGAAGATGTAACGGGGCTAAACCATGCACCGAAGCTGCGGCAGCGACACTGTGTGTTGTTGGGTAGGGGAGCGTTCTGTAAGCCTGTGAAGGTGTACTGTGAGGTATGCTGGAGGTATCAGAAGTGCGAATGCTGACATAAGTAACGATAAAGCGGGTGAAAAGCCCGCTCGCCGGAAGACCAAGGGTTCCTGTCCAACGTTAATCGGGGCAGGGTGAGTCGACCCCTAAGGCGAGGCCGAAAGGCGTAGTCGATGGGAAACAGGTTAATATTCCTGTACTTGGTGTTACTGCGAAGGGGGGACGGAGAAGGCTATGTTGGCCGGGCGACGGTTGTCCCGGTTTAAGCGTGTAGGCTGGTTTTCCAGGCAAATCCGGAAAATCAAGGCTGAGGCGTGATGACGAGGCACTACGGTGCTGAAGCAACAAATGCCCTGCTTCCAGGAAAAGCCTCTAAGCATCAGGTAACATCAAATCGTACCCCAAACCGACACAGGTGGTCAGGTAGAGAATACCAAGGCGCTTGAGAGAACTCGGGTGAAGGAACTAGGCAAAATGGTGCCGTAACTTCGGGAGAAGGCACGCTGATATGTAGGTGAAGTCCCTCGCGGATGGAGCTGAAATCAGTCGAAGATACCAGCTGGCTGCAACTGTTTATTAAAAACACAGCACTGTGCAAACACGAAAGTGGACGTATACGGTGTGACGCCTGCCCGGTGCCGGAAGGTTAATTGATGGGGTCAGCGCAAGCGAAGCTCTTGATCGAAGCCCCGGTAAACGGCGGCCGTAACTATAACGGTCCTAAGGTAGCGAAATTCCTTGTCGGGTAAGTTCCGACCTGCACGAATGGCGTAATGATGGCCAGGCTGTCTCCACCCGAGACTCAGTGAAATTGAACTCGCTGTGAAGATGCAGTGTACCCGCGGCAAGACGGAAAGACCCCGTGAACCTTTACTATAGCTTGACACTGAACATTGAGCCTTGATGTGTAGGATAGGTGGGAGGCTTTGAAGTGTGGACGCCAGTCTGCATGGAGCCGACCTTGAAATACCACCCTTTAATGTTTGATGTTCTAACGTGGACCCGTGATCCGGGTTGCGGACAGTGTCTGGTGGGTAGTTTGACTGGGGCGGTCTCCTCCTAAAGAGTAACGGAGGAGCACGAAGGTTGGCTAATCCTGGTCGGACATCAGGAGGTTAGTGCAATGGCATAAGCCAGCTTGACTGCGAGCGTGACGGCGCGAGCAGGTGCGAAAGCAGGTCATAGTGATCCGGTGGTTCTGAATGGAAGGGCCATCGCTCAACGGATAAAAGGTACTCCGGGGATAACAGGCTGATACCGCCCAAGAGTTCATATCGACGGCGGTGTTTGGCACCTCGATGTCGGCTCATCACATCCTGGGGCTGAAGTAGGTCCCAAGGGTATGGCTGTTCGCCATTTAAAGTGGTACGCGAGCTGGGTTTAGAACGTCGTGAGACAGTTCGGTCCCTATCTGCCGTGGGCGCTGGAGAACTGAGGGGGGCTGCTCCTAGTACGAGAGGACCGGAGTGGACGCATCACTGGTGTTCGGGTTGTCATGCCAATGGCACTGCCCGGTAGCTAAATGCGGAAGAGATAAGTGCTGAAAGCATCTAAGCACGAAACTTGCCCCGAGATGAGTTCTCCCTGACCCTTTAAGGGTCCTGAAGGAACGTTGAAGACGACGACGTTGATAGGCCGGGTGTGTAAGCGCAGCGATGCGTTGAGCTAACCGGTACTAATGAACCGTGAGGCTTAACCTT"
		
		p <- pairwiseAlignment(unique(template), ecoli, type="global-local", fuzzyMatrix=mapping)
		p <- p[match(template, unique(template))]
		
		# determine domain positioning
		ins <- insertion(p)
		starts <- lapply(ins, slot, "start")
		widths <- lapply(ins, slot, "width")
		
		index <- p@subject@range@start > 562 | (p@subject@range@start + p@subject@range@width) <= 562
		domain2start <- ifelse(index,
			1,
			563 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 563)
			if (length(w) > 0)
				domain2start[j] <- domain2start[j] + cum[length(w)]
		}
		
		index <- p@subject@range@start > 1270 | (p@subject@range@start + p@subject@range@width) <= 1270
		domain3start <- ifelse(index,
			domain2start,
			1271 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 1271)
			if (length(w) > 0)
				domain3start[j] <- domain3start[j] + cum[length(w)]
		}
		
		index <- p@subject@range@start > 1647 | (p@subject@range@start + p@subject@range@width) <= 1647
		domain4start <- ifelse(index,
			domain3start,
			1648 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 1648)
			if (length(w) > 0)
				domain4start[j] <- domain4start[j] + cum[length(w)]
		}
		
		index <- p@subject@range@start > 2015 | (p@subject@range@start + p@subject@range@width) <= 2015
		domain5start <- ifelse(index,
			domain4start,
			2016 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 2016)
			if (length(w) > 0)
				domain5start[j] <- domain5start[j] + cum[length(w)]
		}
		
		index <- p@subject@range@start > 2626 | (p@subject@range@start + p@subject@range@width) <= 2626
		domain6start <- ifelse(index,
			domain5start,
			2627 - p@subject@range@start)
		index <- !index
		for (j in which(index)) {
			i <- starts[[j]] + p@subject@range@start[j] - 1
			cum <- cumsum(widths[[j]])
			w <- which(i <= 2627)
			if (length(w) > 0)
				domain6start[j] <- domain6start[j] + cum[length(w)]
		}
		
		# find target site position in the alignment
		startsPattern <- p@pattern@range@start - 1
		del <- deletion(p) # gaps added to pattern
		starts <- fivePrimeEnd
		ends <- threePrimeEnd
		ls <- length(p)
		for (j in 1:ls) {
			d <- del[[j]]
			if (length(d)==0)
				next
			w <- which((d@start + startsPattern[j]) <= starts[j])
			if (length(w) > 0)
				starts[j] <- starts[j] + sum(d@width[w])
			w <- which((d@start + startsPattern[j]) <= ends[j])
			if (length(w) > 0)
				ends[j] <- ends[j] + sum(d@width[w])
		}
		
		# determine start and end of each sequence's domain
		domainStarts <- numeric(ls)
		domainEnds <- numeric(ls)
		domains <- matrix(c(rep(1L, ls),
				domain2start,
				domain3start,
				domain4start,
				domain5start,
				domain6start,
				nchar(template) - p@pattern@range@width + nchar(unlist(strsplit(toString(pattern(p)), ", ", fixed=TRUE))) + 1),
			nrow=ls)
		for (j in 1:ls) {
			w <- which(domains[j,] <= starts[j])
			domainStarts[j] <- domains[j, w[length(w)]]
			w <- which(domains[j,] > ends[j])
			domainEnds[j] <- domains[j, w[1]] - 1
		}
		
		# determine positioning in the original sequence
		for (j in 1:ls) {
			d <- del[[j]]
			if (length(d)==0)
				next
			w <- which((d@start + startsPattern[j] + cumsum(d@width) - d@width) <= domainStarts[j])
			if (length(w) > 0)
				domainStarts[j] <- domainStarts[j] - sum(d@width[w])
			w <- which((d@start + startsPattern[j] + cumsum(d@width) - d@width) <= domainEnds[j])
			if (length(w) > 0)
				domainEnds[j] <- domainEnds[j] - sum(d@width[w])
		}
		
		batchSize <- floor(batchSize*max(n)/max(domainEnds - domainStarts))
		if (batchSize < 1)
			batchSize <- 1
	} else { # domain is type nucleotides to either side of the type site
		type <- as.numeric(type)
		batchSize <- floor(batchSize*max(n)/(max(n) + 2*type))
		if (batchSize < 1)
			batchSize <- 1
		
		domainStarts <- ifelse(fivePrimeEnd - type < 1,
			1,
			fivePrimeEnd - type)
		domainEnds <- ifelse(threePrimeEnd + type > nchar(template),
			nchar(template),
			threePrimeEnd + type)
	}
	
	seqs <- substr(template, domainStarts, domainEnds)
	seq <- unique(seqs)
	ls <- length(seq)
	dG3_folded <- numeric(ls)
	for (start in seq(1, ls, batchSize)) {
		end <- ifelse(start + batchSize > ls, ls, start + batchSize)
		dG3_folded[start:end] <- as.numeric(system(paste("hybrid-ss-min -n RNA -t",
				temp,
				"-T",
				temp,
				"-E -q",
				paste(seq[start:end], collapse=" ")),
			intern=TRUE))
	}
	dG3 <- numeric(l)
	dG3 <- dG3_folded[match(seqs, seq)]
	
	#seqs <- substr(template, domainStarts, domainEnds - 1)
	prohibit <- fivePrimeEnd - domainStarts + 1
	groups <- paste(prohibit, "0", threePrimeEnd - fivePrimeEnd + 1, sep=",")
	g <- unique(groups)
	dG3_unfolded <- numeric(l)
	for (i in 1:length(g)) {
		w <- which(groups == g[i])
		seq <- unique(seqs[w])
		ls <- length(seq)
		for (start in seq(1, ls, batchSize)) {
			end <- ifelse(start + batchSize > ls, ls, start + batchSize)
			m <- match(seqs[w], seq[start:end])
			na <- which(!is.na(m))
			dG3_unfolded[w][na] <- as.numeric(system(paste("hybrid-ss-min -n RNA -t",
					temp,
					"-T",
					temp,
					paste("--prohibit=", g[i], sep=""),
					"-E -q",
					paste(seq[start:end], collapse=" ")),
				intern=TRUE))[m][na]
		}
	}
	dG3_unfolded <- ifelse(dG3_unfolded > 0, 0, dG3_unfolded)
	dG3 <- dG3 - dG3_unfolded
	#cat(dG3, sep=",")
	#cat("\n")
	K3 <- exp(-(dG3 + FA*-0.0111*dG3)/RT)
	
	Kov <- K1/((1 + K2a)*(1 + K3))
	eff[z] <- P*Kov/(1 + P*Kov)
	
	FAm <- numeric(length(z))
	FAm[] <- -Inf
	range <- -1000:1000
	for (i in 1:length(z)) {
		f <- function(FA) {
			K1 <- exp(-(dG1[z][i] + FA*(0.0696 + 0.0079*n[i]))/RT)
			K2a <- exp(-(dG2a[i] + 0.1*FA)/RT)
			K3 <- exp(-(dG3[i] + FA*-0.0111*dG3[i])/RT)
			Kov <- K1/((1 + K2a)*(1 + K3))
			return(0.5 - P*Kov/(1 + P*Kov))
		}
		HybEffs <- f(range)
		try(FAm[i] <- uniroot(f,
				c(range[which.min(HybEffs)],
					range[which.max(HybEffs)] + 1))$root,
			silent=TRUE)
	}
	
	ans <- matrix(nrow=length(dG1),
		ncol=7,
		dimnames=list(1:length(dG1),
			c("HybEff",
				"FAm",
				"ddG1",
				"dG1",
				"dG2",
				"dG3",
				"dGov")))
	ans[, "HybEff"] <- eff
	ans[z, "FAm"] <- FAm
	ans[, "ddG1"] <- ddG1
	ans[, "dG1"] <- dG1
	ans[z, "dG2"] <- dG2a
	ans[z, "dG3"] <- dG3
	ans[z, "dGov"] <- -RT*log(Kov)
	
	return(ans)
}

DesignProbes <- function(tiles,
	identifier="",
	start=1,
	end=NULL,
	minLength=17,
	maxLength=26,
	maxPermutations=4,
	minCoverage=0.9,
	minGroupCoverage=0.2,
	hybTemp=46,
	P=250e-9,
	Na=1,
	FA=35,
	minEfficiency=0.50,
	worstScore=-Inf,
	numProbeSets=0,
	batchSize=1000,
	target="SSU",
	verbose=TRUE) {
	
	# error checking
	TARGETS <- c("SSU","LSU","Other")
	molecule <- pmatch(target, TARGETS)
	if (is.na(molecule))
		stop("Invalid target.  Choose either SSU, LSU, or Other.")
	if (molecule == -1)
		stop("Ambiguous target.  Choose either SSU, LSU, or Other.")
	molecule <- c("SSU", "LSU", 200)[molecule]
	if (!is.numeric(Na))
		stop("[Na+] concentration must be a numeric.")
	if (Na < .01 || is.nan(Na))
		stop("Sodium equivilent concentration must be at least 0.01M.")
	if (Na > 1)
		stop("Sodium equivilent concentration can be at most 1.0M.")
	if (minLength > maxLength)
		stop("minLength must be less or equal to maxLength.")
	ids <- unique(tiles$id)
	if (length(ids)==0)
		stop("No identifiers found in tiles.")
	if (any(grepl("mol,", ids, fixed=TRUE)))
		stop("Identifiers in tiles cannot include 'mol,'.")
	if (any(grepl("%;", ids, fixed=TRUE)))
		stop("Identifiers in tiles cannot include '%;'.")
	if (any(grepl(" (", ids, fixed=TRUE)))
		stop("Identifiers in tiles cannot include ' ('.")
	if (any(grepl(") ", ids, fixed=TRUE)))
		stop("Identifiers in tiles cannot include ') '.")
	if (!is.numeric(hybTemp))
		stop("hybTemp must be a numeric.")
	if (hybTemp < 10)
		stop("hybTemp must be at least 10 degrees Celsius.")
	if (hybTemp >= 90)
		stop("hybTemp must be less than 90 degrees Celsius.")
	if (!is.numeric(FA))
		stop("FA must be a numeric.")
	if (FA < 0)
		stop("FA must be at least 0% (volume/volume).")
	if (FA > 100)
		stop("FA can be at most 100% (volume/volume).")
	if (FA > 50 | FA < 20)
		warning("FA between 20 and 50% (volume/volume) will give better results.")
	if (!is.numeric(P))
		stop("P must be a numeric.")
	if (!(P > 0))
		stop("P must be greater than zero.")
	if (!is.numeric(minEfficiency))
		stop("minEfficiency must be a numeric.")
	if (minEfficiency < 0 || minEfficiency > 1)
		stop("minEfficiency must be between zero and one.")
	if (!is.numeric(worstScore))
		stop("worstScore must be a numeric.")
	if (worstScore > 0)
		stop("worstScore must be less than or equal to zero.")
	if (!is.numeric(batchSize))
		stop("batchSize must be a numeric.")
	if (floor(batchSize)!=batchSize)
		stop("batchSize must be a whole number.")
	if (batchSize <= 0)
		stop("batchSize must be greater than zero.")
	if (!is.numeric(numProbeSets))
		stop("numProbeSets must be a numeric.")
	if (floor(numProbeSets)!=numProbeSets)
		stop("numProbeSets must be a whole number.")
	if (numProbeSets < 0)
		stop("numProbeSets must be greater than or equal to zero.")
	if (!is.numeric(maxPermutations))
		stop("maxPermutations must be a numeric.")
	if (floor(maxPermutations)!=maxPermutations)
		stop("maxPermutations must be a whole number.")
	if (maxPermutations <= 0)
		stop("maxPermutations must be greater than zero.")
	if (!is.logical(verbose))
		stop("verbose must be a logical.")
	if (!is.numeric(start))
		stop("start must be a numeric.")
	if (!is.numeric(end) && !is.null(end))
		stop("end must be a numeric or NULL.")
	if (start < 1)
		stop("start must be greater than zero.")
	if (floor(start)!=start)
		stop("start must be a whole number.")
	if (!is.numeric(minCoverage))
		stop("minCoverage must be a numeric.")
	if (minCoverage > 1 || minCoverage < 0)
		stop("minCoverage must be between zero and one.")
	if (!is.numeric(minGroupCoverage))
		stop("minGroupCoverage must be a numeric.")
	if (minGroupCoverage > 1 || minGroupCoverage < 0)
		stop("minGroupCoverage must be between zero and one.")
	if (!is.null(end)) {
		if (end < start)
			stop("end must be greater than start.")
		if (floor(end)!=end)
			stop("end must be a whole number.")
	}
	if (identifier[1]=="") {
		identifier <- ids
	} else {
		w <- which(!(identifier %in% ids))
		if (length(w) > 0)
			stop("identifier not in tiles: ",
				paste(identifier[w], collapse=", "))
	}
	if (is(try(system("hybrid-min -V", intern=TRUE), silent=TRUE), "try-error"))
		stop("OligoArrayAux must be properly installed.  See the Note section in the help page for DesignProbes (enter: ?DesignProbes).")
	
	probes_all <- data.frame()
	
	if (numProbeSets > 0) {
		l <- length(identifier)
		pSets <- data.frame(identifier=I(rep(identifier, each=numProbeSets)),
			start_one=I(integer(l*numProbeSets)),
			start_two=I(integer(l*numProbeSets)),
			start_aligned_one=I(integer(l*numProbeSets)),
			start_aligned_two=I(integer(l*numProbeSets)),
			permutations_one=I(integer(l*numProbeSets)),
			permutations_two=I(integer(l*numProbeSets)),
			score_one=I(numeric(l*numProbeSets)),
			score_two=I(numeric(l*numProbeSets)),
			score_set=I(numeric(l*numProbeSets)),
			one_probe=I(matrix(character(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			two_probe=I(matrix(character(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			one_efficiency=I(matrix(numeric(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			two_efficiency=I(matrix(numeric(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			one_FAm=I(matrix(numeric(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			two_FAm=I(matrix(numeric(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			one_coverage=I(matrix(numeric(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			two_coverage=I(matrix(numeric(),
				nrow=l*numProbeSets,
				ncol=maxPermutations)),
			mismatches_one=I(character(l*numProbeSets)),
			mismatches_two=I(character(l*numProbeSets)),
			mismatches_set=I(character(l*numProbeSets)))
	}
	l_ids <- length(ids)
	for (id in identifier) {
		if (verbose) {
			time.1 <- Sys.time()
			cat("\n", id, sep="")
			flush.console()
		}
		
		w <- which(tiles$id==id)
		
		if (length(w)==0) {
			warning("No target sites met the specified constraints: ", id)
			next
		}
		
		target <- tiles[w,]
		template <- .tiledTemplate(target)
		
		if (is.null(end)) {
			w <- which(tiles$id==id &
				tiles$start_aligned >= start)
		} else {
			w <- which(tiles$id==id &
				tiles$start_aligned >= start &
				tiles$end_aligned <= end)
		}
		if (length(w)==0) {
			warning("No target sites met the specified constraints: ", id)
			next
		}
		
		target <- tiles[w,]
		
		uw <- unique(target$width)
		if (length(uw) > 1) {
			warning("Multiple sequence widths: ", id)
			next
		}
		
		u <- unique(target$start_aligned)
		l <- length(u)
		probes <- data.frame(identifier=I(rep(id,l)),
			start=I(integer(l)),
			start_aligned=I(integer(l)),
			permutations=I(integer(l)),
			score=I(numeric(l)),
			probe=I(matrix(character(),
				nrow=l,
				ncol=maxPermutations)),
			efficiency=I(matrix(numeric(),
				nrow=l,
				ncol=maxPermutations)),
			FAm=I(matrix(numeric(),
				nrow=l,
				ncol=maxPermutations)),
			coverage=I(matrix(numeric(),
				nrow=l,
				ncol=maxPermutations)),
			mismatches=I(character(l)))
		
		targets <- array("",
			c(l, maxPermutations, maxLength - minLength + 1),
			dimnames=list(position=NULL, permutation=NULL, length=NULL))
		
		begin <- 1L
		for (i in 1:l) {
			# find all target_sites
			w <- .Call("multiMatch", target$start_aligned, u[i], begin, PACKAGE="DECIPHER")
			begin <- w[1]
			target_site <- target$target_site[w]
			perms <- length(target_site)
			
			if (perms > maxPermutations)
				next
			
			if (minCoverage > sum(target$coverage[w]))
				next
			
			if (minGroupCoverage > sum(target$groupCoverage[w]))
				next
			
			probes$permutations[i] <- perms
			probes$coverage[i, 1:perms] <- target$coverage[w]
			
			# determine optimal length
			for (j in 1:perms) {
				n <- nchar(target_site[j])
				for (p in minLength:ifelse(n > maxLength, maxLength, n)) {
					targets[i, j, p - minLength + 1] <- substr(target_site[j], 1, p)
				}
			}
			probes$start_aligned[i] <- u[i]
			probes$start[i] <- target$start[w[1]]
		}
		
		w <- which(probes$permutations==0)
		if (length(w)==dim(probes)[1]) {
			warning("All target sites have too many permutations: ", id)
			next
		}
		if (length(w) > 0) {
			probes <- probes[-w,]
			targets <- targets[-w,,]
			dim(targets) <- c(l - length(w), maxPermutations, maxLength - minLength + 1)
			l <- dim(probes)[1]
		}
		
		# Choose optimal length for each probe
		w <- integer()
		empty <- integer()
		for (j in 1:(maxLength - minLength + 1)) {
			if (length(empty) > 0) {
				empty <- c((1:(l*maxPermutations))[w],
					which(targets[,,j]==""),
					empty)
			} else {
				empty <- c((1:(l*maxPermutations))[w],
					which(targets[,,j]==""))
			}
			t <- targets[,,j]
			if (length(empty) > 0) {
				t <- t[-empty]
			} else {
				t <- as.vector(t)
			}
			if (length(t)==0)
				break
			
			# calculate hybridization efficiency
			eff <- CalculateEfficiencyFISH(unlist(strsplit(toString(reverseComplement(DNAStringSet(t))),
					", ",
					fixed=TRUE)),
				t,
				hybTemp,
				P,
				Na,
				FA,
				batchSize)
			w <- which(eff[, "HybEff"] >= minEfficiency)
			if (length(w)==0)
				next
			
			if (length(empty) > 0) {
				probes$probe[-empty][w] <- strsplit(toString(reverseComplement(DNAStringSet(targets[,,j][-empty][w]))),
					", ",
					fixed=TRUE)[[1]]
				probes$efficiency[-empty][w] <- eff[w, "HybEff"]
				probes$FAm[-empty][w] <- eff[w, "FAm"]
			} else {
				probes$probe[w] <- strsplit(toString(reverseComplement(DNAStringSet(targets[,,j][w]))),
					", ",
					fixed=TRUE)[[1]]
				probes$efficiency[w] <- eff[w, "HybEff"]
				probes$FAm[w] <- eff[w, "FAm"]
			}
			
			a <- apply(probes$probe, 1, function(x) length(which(x != "")))
			w <- which(probes$permutations != a)
			if (length(w) > 0) {
				probes$probe[w,] <- NA
				probes$efficiency[w,] <- NA
				probes$FAm[w,] <- NA
			}
			
			w <- which(probes$probe != "")
		}
		
		w <- integer()
		for (i in 1:l) {
			if (probes$start_aligned[i]==0) {
				w <- c(w,i) # empty probe
				next
			}
			p <- probes$permutations[i]
			
			numPerms <- Inf
			full <- which(probes$probe[i,]!="")
			if (length(full) != 0) {
				c <- suppressWarnings(ConsensusSequence(DNAStringSet(probes$probe[i, full]),
					threshold=0.01))
				a <- alphabetFrequency(c)
				numPerms <- sum(2*a[1, 5:10], 3*a[1, 11:14], 4*a[1, 15])
			}
			
			if (numPerms > probes$permutations[i] || p != length(which(probes$probe[i,]!=""))) {
				w <- c(w,i)
				probes$permutations[i] <- 0
				probes$probe[i,] <- NA
				probes$efficiency[i,] <- NA
				probes$score[i] <- -Inf
			} else {
				d <- duplicated(probes$probe[i,], incomparables=NA)
				l <- length(which(d))
				if (l > 0) {
					for (j in which(d)) {
						w1 <- which(probes$probe[i,1:(j - 1)]==probes$probe[i,j])[1]
						probes$coverage[i,w1] <- probes$coverage[i,w1] + probes$coverage[i,j]
						probes$coverage[i,j] <- NA
					}
					probes$permutations[i] <- probes$permutations[i] - l
					probes$probe[i,] <- c(probes$probe[i,!d],
						rep(NA, l))
					probes$efficiency[i,] <- c(probes$efficiency[i,!d],
						rep(NA, l))
					probes$FAm[i,] <- c(probes$FAm[i,!d],
						rep(NA, l))
					probes$coverage[i,] <- c(probes$coverage[i,!d],
						rep(NA, l))
				}
			}
		}
		
		if (length(w) > 0)
			probes <- probes[-w,]
		d <- dim(probes)[1]
		if (d==0) {
			warning("No probes met the specified constraints: ", id)
			next
		}
		
		# Determine which probes also meet original model constraints
		w <- which(probes$probe != "")
		index <- rep(1:d, maxPermutations)
		index <- index[w]
		
		templates <- rep(template, length(w))
		.substr(templates,
			probes$start[index],
			probes$start[index] + nchar(probes$probe[w]) - 1) <-
			unlist(strsplit(toString(reverseComplement(DNAStringSet(probes$probe[w]))),
				", ",
				fixed=TRUE))
		eff <- .CalculateEfficiencyFISH(as.character(probes$probe[w]),
			templates,
			probes$start[index],
			probes$start[index] + nchar(probes$probe[w]) - 1,
			hybTemp,
			P,
			Na,
			FA=0,
			batchSize,
			type=molecule)
		
		w <- which(!is.na(eff[,"FAm"]))
		if (length(w) > 0)
			w <- w[which(eff[w, "FAm"] < 70 &
				eff[w, "FAm"] > (FA - 15) &
				eff[w, "dG2"] > -3)]
		if (length(w)==0) {
			w <- 1:(dim(probes)[1])
		} else {
			w <- unique(index[-w])
		}
		
		if (length(w) > 0)
			probes <- probes[-w,]
		d <- dim(probes)[1]
		if (d==0) {
			warning("No probes met the specified constraints: ", id)
			next
		}
		
		if (verbose) {
			cat(" (", d, " candidate probe", ifelse(d==1, "", "s"), "):\n", sep="")
			flush.console()
			pBar <- txtProgressBar(min=0, max=100, initial=0, style=ifelse(interactive(), 3, 1))
		}
		
		for (k in 1:length(ids)) {
			if (ids[k] == id)
				next
			
			w <- which(tiles$id==ids[k])
			target <- tiles[w,]
			
			if (any(uw != unique(target$width)))
				next
			
			combos <- numeric(d)
			count <- 1
			nontargets <- targets <- character(d*sum(probes$permutations)*max(table(target$start)))
			FAms <- numeric(d*sum(probes$permutations)*max(table(target$start)))
			
			begin <- 1L
			for (i in 1:d) {
				# find all non-target_sites
				w <- .Call("multiMatchUpper", target$start_aligned, probes$start_aligned[i], begin, PACKAGE="DECIPHER")
				
				if (length(w)==0) # target does not overlap nontarget
					break
				
				begin <- w[1]
				target_site <- target$target_site[w]
				
				j <- length(target_site)
				if (j==0) # no nontarget
					next
				
				w <- .Call("multiMatchCharNotNA", probes$probe[i,], PACKAGE="DECIPHER")
				l <- length(w)
				
				if (l > 0) {
					combos[i] <- j*l
					targets[count:(count + combos[i] - 1)] <- rep(probes$probe[i,w],
						each=j)
					nontargets[count:(count + combos[i] - 1)] <- rep(target_site,
						l)
#					FAms[count:(count + combos[i] - 1)] <- rep(probes$FAm[i,w],
#						each=j)
					FAms[count:(count + combos[i] - 1)] <- min(probes$FAm[i,w])
					count <- count + combos[i]
				}
			}
			
			if (count==1) # sites do not overlap
				next
			
#			p <- pairwiseAlignment(nontargets[1:(count - 1)],
#				reverseComplement(DNAStringSet(targets[1:(count - 1)])),
#				type="local-global",
#				gapOpen=-10,
#				gapExtension=-10)
			
			eff <- CalculateEfficiencyFISH(targets[1:(count - 1)],
				nontargets[1:(count - 1)],
				hybTemp,
				P,
				Na,
				FA,
				batchSize)
			
			eff_max <- numeric(d)
			ddG1 <- numeric(d)
			FAm <- numeric(d)
			dFAms <- numeric(d)
			dFAms[] <- -Inf
			index <- integer(d)
			count <- 1
			for (i in 1:d) {
				if (combos[i] != 0) {
					dFAm <- eff[count:(count + combos[i] - 1), "FAm"] - FAms[count:(count + combos[i] - 1)]
					w <- which.max(dFAm)
					dFAms[i] <- dFAm[w]
					eff_max[i] <- eff[count:(count + combos[i] - 1), "HybEff"][w]
					ddG1[i] <- eff[count:(count + combos[i] - 1), "ddG1"][w]
					FAm[i] <- eff[count:(count + combos[i] - 1), "FAm"][w]
					index[i] <- count + w - 1
					count <- count + combos[i]
				}
			}
			
			probes$score <- probes$score - ifelse(dFAms < -20, 0, 0.2 + 1.2^ifelse(dFAms > 0, 0, dFAms)) # score = 0.2*n + 1.2^dFAm
			
			w <- which(dFAms >= -20)
			
			if (length(w)==0)
				next
			
			s1 <- targets[index[w]]
			m <- match(s1, probes$probe)
			s2 <- reverseComplement(DNAStringSet(nontargets[index[w]]))
			p <- pairwiseAlignment(s1,
				paste("----", s2, "----", sep=""),
				type="global-local",
				gapOpen=-10,
				gapExtension=-10)
			s1 <- toString(pattern(p))
			s1 <- unlist(strsplit(s1, ", ", fixed=TRUE))
			s2 <- toString(subject(p))
			s2 <- unlist(strsplit(s2, ", ", fixed=TRUE))
			s2 <- toString(reverseComplement(DNAStringSet(s2)))
			s2 <- unlist(strsplit(s2, ", ", fixed=TRUE))
			
			probes$mismatches[w] <- paste(probes$mismatches[w],
				ids[k],
				" (",
				formatC(100*eff_max[w],
					digits=1,
					width=1,
					format="f"),
				"%,",
				formatC(ddG1[w],
					digits=2,
					width=1,
					format="f"),
				"kcal/mol,",
				formatC(dFAms[w],
					digits=1,
					width=1,
					format="f"),
				"%;",
				s1,
				"/",
				s2,
				") ",
				sep="")
			
			if (worstScore > -Inf) {
				w <- probes$score < worstScore
				if (any(w)) {
					probes$probe[w,] <- NA
					probes$efficiency[w,] <- NA
					probes$FAm[w,] <- NA
					probes$mismatches[w] <- ""
					probes$score[w] <- -Inf
					probes$permutations[w] <- 0
					probes$coverage[w,] <- NA
				}
				w <- which(w)
				if (length(w) > 0)
					probes <- probes[-w,]
				d <- dim(probes)[1]
				if (d==0) {
					warning("All probes were below the worstScore: ", id)
					next
				}
			}
			
			if (verbose)
				setTxtProgressBar(pBar,
					floor(100*k/l_ids))
		}
		
		if (numProbeSets > 0) {
			if (d==1) {
				warning("Not enough probes to design a probe set:", id)
				break
			}
			# order probes by score
			searchSpace <- ifelse(numProbeSets > 100, numProbeSets, 100)
			f <- which(is.finite(probes$score))
			n <- unlist(lapply(strsplit(probes$mismatches, ") ", fixed=TRUE), length))
			o <- order(-1*n[f],
				probes$score[f],
				-1*probes$permutations[f],
				rowSums(as.matrix(probes$coverage[f,]), na.rm=TRUE),
				decreasing=TRUE)
			s <- ifelse(length(f) > searchSpace, searchSpace, length(f))
			if (s < 2) { # need at least 2 probes to form a set
				warning("Not enough probe sets meet the specified constaints:", id)
				
				if (verbose) {
					setTxtProgressBar(pBar, 100)
					close(pBar)
					time.2 <- Sys.time()
					cat("\n")
					print(signif(difftime(time.2,
						time.1,
						units='secs'),
						digits=2))
				}
				
				next
			}
			
			# calculate the set's efficiency
			MMs <- strsplit(probes$mismatches[f[o][1:s]], "mol,", fixed=TRUE)
			ls <- unlist(lapply(MMs, length))
			ls <- ifelse(ls > 0, ls - 1, 0)
			index <- rep(1:length(ls), ls)
			if (length(index) > 0) {
				MMs <- strsplit(unlist(MMs), "%;", fixed=TRUE)
				MMs <- unlist(strsplit(unlist(MMs), ") ", fixed=TRUE))
				effs <- as.numeric(MMs[seq(2, length(MMs), 3)])
				MMs <- MMs[seq(1, length(MMs), 3)]
				MMs <- unlist(strsplit(MMs, " (", fixed=TRUE))
				MMs <- MMs[seq(1, length(MMs), 2)]
			}
			
			m <- matrix(Inf,
				nrow=s,
				ncol=s,
				dimnames=list(f[o][1:s],
					f[o][1:s]))
			n <- matrix(Inf,
				nrow=s,
				ncol=s,
				dimnames=list(f[o][1:s],
					f[o][1:s]))
			for (i in 1:(s - 1)) {
				w1 <- which(index==i)
				if (length(w1)==0) {
					n[i, (i + 1):s] <- 0
					m[i, (i + 1):s] <- 0
					next
				}
				for (j in (i + 1):s) {
					w2 <- which(index==j)
					if (length(w2)==0) {
						n[i, j] <- 0
						m[i, j] <- 0
						next
					}
					overlap_MMs <- match(MMs[w1], MMs[w2])
					w <- which(!is.na(overlap_MMs))
					n[i, j] <- length(w)
					if (length(w) > 0) {
						m[i, j] <- sum(tapply(c(effs[w1[w]], effs[w2[overlap_MMs[w]]]),
							rep(1:length(w), 2),
							function(x) return(1.2^ifelse(min(x) > 0, 0, min(x)))))
					} else {
						m[i, j] <- 0
					}
				}
			}
			
			# choose the best probe sets
			d <- dimnames(m)
			w <- which(pSets$identifier==id)
			p <- outer(probes$permutations[f[o][1:s]],
				probes$permutations[f[o][1:s]],
				FUN="+")
			c <- -1*outer(rowSums(as.matrix(probes$coverage[f[o][1:s],]), na.rm=TRUE),
				ifelse(rep(s, s)==1,
					sum(probes$coverage[f[o][1:s],], na.rm=TRUE),
					rowSums(as.matrix(probes$coverage[f[o][1:s],]), na.rm=TRUE)),
				FUN="*")
			ss <- -1*outer(probes$score[f[o][1:s]],
				probes$score[f[o][1:s]],
				FUN="+")
			o <- order(m + n/5, p, c, ss) # score = 0.2*n + 1.2^dFAm
			j <- 0
			dims <- dim(m)
			for (k in 1:numProbeSets) {
				if ((j + 1) > length(o)) {
					warning("Not enough probe sets meet the specified constaints:", id)
					break
				}
				for (j in (j + 1):length(o)) {
					w_o <- c((o[j] - 1)%%dims[1] + 1,
						(o[j] - 1)%/%dims[1] + 1)
					f <- as.numeric(d[[1]][w_o[1]])
					r <- as.numeric(d[[2]][w_o[2]])
					
					start_F <- probes$start[f]
					start_R <- probes$start[r]
					if (abs(start_F - start_R) > (maxLength + 50)) # 50 nt separation
						break
				}
				
				if (abs(start_F - start_R) > maxLength) {
					pSets$start_one[w[k]] <- probes$start[f]
					pSets$start_two[w[k]] <- probes$start[r]
					pSets$start_aligned_one[w[k]] <- probes$start_aligned[f]
					pSets$start_aligned_two[w[k]] <- probes$start_aligned[r]
					pSets$permutations_one[w[k]] <- probes$permutations[f]
					pSets$permutations_two[w[k]] <- probes$permutations[r]
					pSets$score_one[w[k]] <- probes$score[f]
					pSets$score_two[w[k]] <- probes$score[r]
					pSets$score_set[w[k]] <- -m[o[j]]/100
					pSets$one_probe[w[k],] <- probes$probe[f,]
					pSets$two_probe[w[k],] <- probes$probe[r,]
					pSets$one_efficiency[w[k],] <- probes$efficiency[f,]
					pSets$two_efficiency[w[k],] <- probes$efficiency[r,]
					pSets$one_FAm[w[k],] <- probes$FAm[f,]
					pSets$two_FAm[w[k],] <- probes$FAm[r,]
					pSets$one_coverage[w[k],] <- probes$coverage[f,]
					pSets$two_coverage[w[k],] <- probes$coverage[r,]
					pSets$mismatches_one[w[k]] <- probes$mismatches[f]
					pSets$mismatches_two[w[k]] <- probes$mismatches[r]
					
					w_F <- which(index==w_o[1])
					w_R <- which(index==w_o[2])
					overlap_MMs <- match(MMs[w_F], MMs[w_R])
					w_S <- which(!is.na(overlap_MMs))
					if (length(w_S)==0)
						next
					EFFs <- tapply(c(effs[w_F[w_S]], effs[w_R[overlap_MMs[w_S]]]),
							rep(1:length(w_S), 2),
							min)
					# record set mismatches
					w1 <- which(EFFs >= -20)
					if (length(w1) > 0)
						pSets$mismatches_set[w[k]] <- paste(MMs[w_F][w_S][w1],
							" (",
							formatC(EFFs[w1],
								digits=1,
								width=1,
								format="f"),
							"%)",
							sep="",
							collapse=", ")
				} else {
					warning("Not enough probes to meet the specified constraints: ", id)
					break
				}
			}
		}
		
		if (verbose) {
			setTxtProgressBar(pBar, 100)
			close(pBar)
			time.2 <- Sys.time()
			cat("\n")
			print(signif(difftime(time.2,
				time.1,
				units='secs'),
				digits=2))
		}
		
		if (numProbeSets == 0)
			probes_all <- rbind(probes_all,probes)
	}
	
	if (numProbeSets > 0) {
		w <- which(pSets$start==0)
		if (length(w) > 0)
			pSets <- pSets[-w,]
		return(pSets)
	} else {
		w <- which(probes_all$start==0)
		if (length(w) > 0)
			probes_all <- probes_all[-w,]
		return(probes_all)
	}
}
Any scripts or data that you put into this service are public.
DECIPHER documentation built on Nov. 8, 2020, 8:30 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
DECIPHER
Tools for curating, analyzing, and manipulating biological sequences

R/DesignProbes.R
In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences

Defines functions DesignProbes .CalculateEfficiencyFISH .tiledTemplate `.substr<-`

Documented in DesignProbes

Try the DECIPHER package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DECIPHER Tools for curating, analyzing, and manipulating biological sequences

R/DesignProbes.R In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences

Defines functions DesignProbes .CalculateEfficiencyFISH .tiledTemplate `.substr<-`

Documented in DesignProbes

Try the DECIPHER package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DECIPHER
Tools for curating, analyzing, and manipulating biological sequences

R/DesignProbes.R
In DECIPHER: Tools for curating, analyzing, and manipulating biological sequences