Nothing
.write.tmp.parameters <- function(param){
temp.file.prefix <- file.path(".",paste(".Rsubread_PARAM_cellCounts_",Sys.getpid(),sep=""))
param$PMR_CREATE_TIME <- Sys.time()
saveRDS(param, temp.file.prefix)
}
.retrieve.tmp.parameters <- function(){
temp.file.prefix <- file.path(".",paste(".Rsubread_PARAM_cellCounts_",Sys.getpid(),sep=""))
if(!file.exists(temp.file.prefix)) return (NA)
rv <- readRDS(temp.file.prefix)
file.remove(temp.file.prefix)
if( Sys.time() - rv$PMR_CREATE_TIME > 5) return (NA) # the file should be created no longer than 5 seconds ago
rv
}
.index.names.to.sheet<-function(dirname, nametab, fname, sample.name=NA){
lanes <- c()
index.names <- c()
sample.names <- c()
index.seq <- c()
for(cli in 1:nrow(nametab)){
if(nametab$InputDirectory[cli]!=dirname)next
if((!is.na(sample.name)) && nametab$SampleName[cli]!=sample.name)next
seqs <- .convert.sample_index.id.to.seq(nametab$IndexSetName[cli])
for(seq in seqs){
lanes <-c(lanes, nametab$Lane[cli])
index.names <-c(index.names, nametab$IndexSetName[cli])
sample.names <- c(sample.names, nametab$SampleName[cli])
index.seq <- c(index.seq, seq)
}
}
sdf<-data.frame(Lane=lanes, Sample_ID=index.names, Sample_Name=sample.names, index=index.seq, Sample_Project=rep("cellCounts", length(lanes)))
fileConn<-file(fname)
lines <- paste( sdf$Lane, sdf$Sample_ID, sdf$Sample_Name, sdf$index, sdf$Sample_Project ,sep=",")
writeLines(c("EMFileVersion,4","[data]","Lane,Sample_ID,Sample_Name,index,Sample_Project",lines), fileConn)
close(fileConn)
}
.convert.sample_index.id.to.seq <- function(spid){
seqs <- .one.convert.sample_index.id.to.seq(spid)
if(any(is.na(seqs))){
spid <- gsub("-","_", spid)
seqs <- .one.convert.sample_index.id.to.seq(spid)
}
if(any(is.na(seqs)))stop(paste("Unable to find sample index for", spid))
seqs
}
.one.convert.sample_index.id.to.seq <- function(spid){
spseqs <- NA
if(spid == "SI_001") spseqs <- c('TCGCCATA', 'GTATACAC', 'AATGGTGG', 'CGCATGCT')
if(spid == "SI_002") spseqs <- c('TATCCTCG', 'GCGAGGTC', 'CGCTTCAA', 'ATAGAAGT')
if(spid == "SI_003") spseqs <- c('TGACGTCG', 'CTTGTGTA', 'ACGACCGT', 'GACTAAAC')
if(spid == "SI_004") spseqs <- c('ATCTAGCT', 'GAGCGTAC', 'TCAGCCTG', 'CGTATAGA')
if(spid == "SI_005") spseqs <- c('CCGTTCCC', 'ATACAGTT', 'TGTAGTAA', 'GACGCAGG')
if(spid == "SI_006") spseqs <- c('TCAATTGG', 'AGTTAGAA', 'GAGCGCTT', 'CTCGCACC')
if(spid == "SI_007") spseqs <- c('CTGCCTTG', 'ACTAGCCC', 'GGCTAGAT', 'TAAGTAGA')
if(spid == "SI_008") spseqs <- c('GGCAGAAA', 'ACGGTTCT', 'CATTCGTC', 'TTACACGG')
if(spid == "SI_P01_A1") spseqs <- c('TTGTAAGA', 'GGCGTTTC', 'CCTACCAT', 'AAACGGCG')
if(spid == "SI_P01_B1") spseqs <- c('CTAGCTGT', 'GCCAACAA', 'AGGCTACC', 'TATTGGTG')
if(spid == "SI_P01_C1") spseqs <- c('GATGCAGT', 'AGACTTTC', 'TTCTACAG', 'CCGAGGCA')
if(spid == "SI_P01_D1") spseqs <- c('AGCTGCGT', 'GTGGAGCA', 'TCTATTAG', 'CAACCATC')
if(spid == "SI_P01_E1") spseqs <- c('CGCCCGTA', 'GTTTGCCT', 'TAAGTTAG', 'ACGAAAGC')
if(spid == "SI_P01_F1") spseqs <- c('TGACTAGT', 'GATAGGTA', 'CCGTACAG', 'ATCGCTCC')
if(spid == "SI_P01_G1") spseqs <- c('CATATGCG', 'ATGCGATT', 'TCCGCTAC', 'GGATACGA')
if(spid == "SI_P01_H1") spseqs <- c('TCTTGTCC', 'CGGGAGTA', 'GTCACAGG', 'AAACTCAT')
if(spid == "SI_P01_A2") spseqs <- c('AGCCCTTT', 'TCTTAGGC', 'GTGAGAAG', 'CAAGTCCA')
if(spid == "SI_P01_B2") spseqs <- c('GGTCGAGC', 'TTAGATTG', 'CCCACCCA', 'AAGTTGAT')
if(spid == "SI_P01_C2") spseqs <- c('CCGAGAAC', 'TGCTCTGT', 'GTAGTGCG', 'AATCACTA')
if(spid == "SI_P01_D2") spseqs <- c('ACATTCCG', 'GACACAAT', 'CTGCGGTA', 'TGTGATGC')
if(spid == "SI_P01_E2") spseqs <- c('TCATCAAG', 'GTTGGTCC', 'AGGCTGGT', 'CACAACTA')
if(spid == "SI_P01_F2") spseqs <- c('TGCCCGCT', 'GCAAACGC', 'CATTGATA', 'ATGGTTAG')
if(spid == "SI_P01_G2") spseqs <- c('CGGTGAGC', 'ATAACCTA', 'TCCGAGCG', 'GATCTTAT')
if(spid == "SI_P01_H2") spseqs <- c('CCGAACTC', 'AACGGTCA', 'TTATTGGT', 'GGTCCAAG')
if(spid == "SI_P01_A3") spseqs <- c('AAAGCATA', 'GCCTTTAT', 'CTGCAGCC', 'TGTAGCGG')
if(spid == "SI_P01_B3") spseqs <- c('TCATCCTT', 'ATTGGACG', 'CAGCTTAC', 'GGCAAGGA')
if(spid == "SI_P01_C3") spseqs <- c('ACGTTACA', 'TTACCTAC', 'GACGACGG', 'CGTAGGTT')
if(spid == "SI_P01_D3") spseqs <- c('GAGCACGC', 'CGAAGTTG', 'TTCGTGAA', 'ACTTCACT')
if(spid == "SI_P01_E3") spseqs <- c('TCTGCAGG', 'CGGCTCCA', 'AACAAGTC', 'GTATGTAT')
if(spid == "SI_P01_F3") spseqs <- c('TTAGGACC', 'AGTCTGTA', 'GCCTCCGT', 'CAGAATAG')
if(spid == "SI_P01_G3") spseqs <- c('TACGAGTT', 'ATGTCCAG', 'GCTATAGC', 'CGACGTCA')
if(spid == "SI_P01_H3") spseqs <- c('TTGGGCTT', 'GACAAACC', 'ACACCTAA', 'CGTTTGGG')
if(spid == "SI_P01_A4") spseqs <- c('CATGGCAG', 'AGAACGCC', 'GTCTTTGA', 'TCGCAATT')
if(spid == "SI_P01_B4") spseqs <- c('GACAGGCT', 'CCTCTAAC', 'AGGGACTG', 'TTATCTGA')
if(spid == "SI_P01_C4") spseqs <- c('ACATTGGC', 'GAGCCCAT', 'CTTAGTCA', 'TGCGAATG')
if(spid == "SI_P01_D4") spseqs <- c('AAATCGTC', 'GCGATCGG', 'CTTCGAAT', 'TGCGATCA')
if(spid == "SI_P01_E4") spseqs <- c('GTAAACAT', 'TATCCTGA', 'AGCTGACG', 'CCGGTGTC')
if(spid == "SI_P01_F4") spseqs <- c('GCATGATA', 'CGTCCTCT', 'AACGACAC', 'TTGATGGG')
if(spid == "SI_P01_G4") spseqs <- c('CCTGCGGT', 'GTACAACG', 'AGCTTCTC', 'TAGAGTAA')
if(spid == "SI_P01_H4") spseqs <- c('TTCCATCT', 'ACTGGAGC', 'CGGTCGTG', 'GAAATCAA')
if(spid == "SI_P01_A5") spseqs <- c('CAGTCTGG', 'TCACACTC', 'ATTGGGAA', 'GGCATACT')
if(spid == "SI_P01_B5") spseqs <- c('TCATGCGA', 'ATCGTACT', 'CATCAGTG', 'GGGACTAC')
if(spid == "SI_P01_C5") spseqs <- c('TCAGTCAA', 'CACTGACT', 'ATGCATTC', 'GGTACGGG')
if(spid == "SI_P01_D5") spseqs <- c('GTCGACTC', 'AGACGGAT', 'CCTTTAGA', 'TAGACTCG')
if(spid == "SI_P01_E5") spseqs <- c('CCGTTGAA', 'TATGCTCT', 'ATCCAAGG', 'GGAAGCTC')
if(spid == "SI_P01_F5") spseqs <- c('TCTGACTA', 'GTACGGCT', 'CGGTTTAG', 'AACACAGC')
if(spid == "SI_P01_G5") spseqs <- c('ATGAAGTA', 'GAAGCTCG', 'TCTTTCGT', 'CGCCGAAC')
if(spid == "SI_P01_H5") spseqs <- c('ATAGTATG', 'TATAAGGA', 'GGCTCCAC', 'CCGCGTCT')
if(spid == "SI_P01_A6") spseqs <- c('CTTTCGAC', 'ACGGGACT', 'TGCATCTG', 'GAACATGA')
if(spid == "SI_P01_B6") spseqs <- c('GCGCACCT', 'AACGCGAA', 'CTATTTGG', 'TGTAGATC')
if(spid == "SI_P01_C6") spseqs <- c('CGCTCAGG', 'GAGGTTTA', 'ACTCAGAC', 'TTAAGCCT')
if(spid == "SI_P01_D6") spseqs <- c('GAAGTCTT', 'TGCAGGGC', 'ATGCCAAA', 'CCTTATCG')
if(spid == "SI_P01_E6") spseqs <- c('TGATGGCT', 'GCCATTTG', 'ATTGAAAC', 'CAGCCCGA')
if(spid == "SI_P01_F6") spseqs <- c('GACTTCCT', 'TGAGGAAG', 'ATGCCGGC', 'CCTAATTA')
if(spid == "SI_P01_G6") spseqs <- c('GTGCGACA', 'TCAGTGTT', 'AGCACTGG', 'CATTACAC')
if(spid == "SI_P01_H6") spseqs <- c('AAGCATAA', 'CCCATCGC', 'TTAGCGCT', 'GGTTGATG')
if(spid == "SI_P01_A7") spseqs <- c('CTCATCAT', 'TAACGTCC', 'AGGTCATA', 'GCTGAGGG')
if(spid == "SI_P01_B7") spseqs <- c('TCCACACG', 'CTTCTGTT', 'GAATGCAC', 'AGGGATGA')
if(spid == "SI_P01_C7") spseqs <- c('GGCGGAAT', 'ACACCGGG', 'CATAATCC', 'TTGTTCTA')
if(spid == "SI_P01_D7") spseqs <- c('CCGGATCC', 'GGTCGCAT', 'TTAACGTG', 'AACTTAGA')
if(spid == "SI_P01_E7") spseqs <- c('AAGACGTG', 'CCATGTGT', 'GTTCACAA', 'TGCGTACC')
if(spid == "SI_P01_F7") spseqs <- c('GGTTAGAC', 'CAAACTTT', 'ACCCGAGA', 'TTGGTCCG')
if(spid == "SI_P01_G7") spseqs <- c('GCCGGTAA', 'TGACTGCC', 'ATTACCGG', 'CAGTAATT')
if(spid == "SI_P01_H7") spseqs <- c('TGGCACGA', 'AACGGGTG', 'CTAATTCT', 'GCTTCAAC')
if(spid == "SI_P01_A8") spseqs <- c('GACTGTTC', 'ATGATACG', 'CCACAGAA', 'TGTGCCGT')
if(spid == "SI_P01_B8") spseqs <- c('ACGTTCAC', 'TGCCCAGA', 'CAAGGTCT', 'GTTAAGTG')
if(spid == "SI_P01_C8") spseqs <- c('TTTATCCC', 'GCACGTTT', 'CAGGAAGA', 'AGCTCGAG')
if(spid == "SI_P01_D8") spseqs <- c('AATCTTTG', 'GGATGAGT', 'CTCAAGAC', 'TCGGCCCA')
if(spid == "SI_P01_E8") spseqs <- c('GCTTACAT', 'TAGGGTGC', 'AGCCTATG', 'CTAACGCA')
if(spid == "SI_P01_F8") spseqs <- c('AGTTGGGA', 'TACATTCT', 'CCAGAAAG', 'GTGCCCTC')
if(spid == "SI_P01_G8") spseqs <- c('AAGTACTC', 'GGAACTCT', 'TCCCTGAG', 'CTTGGAGA')
if(spid == "SI_P01_H8") spseqs <- c('AAGAGCGG', 'TCATAGCA', 'GGCCCATC', 'CTTGTTAT')
if(spid == "SI_P01_A9") spseqs <- c('GAGTGCGT', 'CTCCAACA', 'ACAACTTG', 'TGTGTGAC')
if(spid == "SI_P01_B9") spseqs <- c('AAGCGTGT', 'CTTGACCG', 'TGATTAAC', 'GCCACGTA')
if(spid == "SI_P01_C9") spseqs <- c('AGATCGGT', 'CATCGTCG', 'GTCATATA', 'TCGGACAC')
if(spid == "SI_P01_D9") spseqs <- c('CAAGGGAC', 'ACCTACTG', 'GGGACACA', 'TTTCTTGT')
if(spid == "SI_P01_E9") spseqs <- c('AGTAAGCA', 'TACCGCGG', 'CCGGTAAT', 'GTATCTTC')
if(spid == "SI_P01_F9") spseqs <- c('AGTTAGTT', 'GTACTTAA', 'CACGCACG', 'TCGAGCGC')
if(spid == "SI_P01_G9") spseqs <- c('TTGACTTC', 'GCCGAAGT', 'CAATGGCA', 'AGTCTCAG')
if(spid == "SI_P01_H9") spseqs <- c('GGAATATG', 'ACCTGCCA', 'CTTCATAC', 'TAGGCGGT')
if(spid == "SI_P01_A10") spseqs <- c('ACAGCAAC', 'TTTCGCGA', 'CGCAATTT', 'GAGTTGCG')
if(spid == "SI_P01_B10") spseqs <- c('ACCATTAA', 'CTGGACGT', 'GAACGGTC', 'TGTTCACG')
if(spid == "SI_P01_C10") spseqs <- c('CGTGCTAA', 'TCACTCCT', 'ATCTGATC', 'GAGAAGGG')
if(spid == "SI_P01_D10") spseqs <- c('CTTATTTG', 'GCGGGCAT', 'AGATAACA', 'TACCCGGC')
if(spid == "SI_P01_E10") spseqs <- c('GCACCAGT', 'CGCAGGAG', 'TAGTACCA', 'ATTGTTTC')
if(spid == "SI_P01_F10") spseqs <- c('TCGACAAT', 'GAATACTG', 'ATTCGTGC', 'CGCGTGCA')
if(spid == "SI_P01_G10") spseqs <- c('CGGAGACT', 'TCCTATGA', 'ATACTGAG', 'GATGCCTC')
if(spid == "SI_P01_H10") spseqs <- c('GACCGCCA', 'TCGAATTG', 'ATTTCAGC', 'CGAGTGAT')
if(spid == "SI_P01_A11") spseqs <- c('CTTTCCTT', 'TAGGTAAA', 'ACCAGTCC', 'GGACAGGG')
if(spid == "SI_P01_B11") spseqs <- c('TCCAGATA', 'GATTCGCT', 'CGACATAG', 'ATGGTCGC')
if(spid == "SI_P01_C11") spseqs <- c('GTTTGTGG', 'ACCGAACA', 'TAGACGAC', 'CGACTCTT')
if(spid == "SI_P01_D11") spseqs <- c('GCTACTTC', 'CACCTCAG', 'ATATGAGA', 'TGGGAGCT')
if(spid == "SI_P01_E11") spseqs <- c('ATCGCCAT', 'TCACGGTA', 'GGGTTTCC', 'CATAAAGG')
if(spid == "SI_P01_F11") spseqs <- c('GAACCCGG', 'AGCAGTTA', 'TCGTAGAT', 'CTTGTACC')
if(spid == "SI_P01_G11") spseqs <- c('AGGGCGTT', 'CTATACGC', 'TACATAAG', 'GCTCGTCA')
if(spid == "SI_P01_H11") spseqs <- c('TCTCGACT', 'AGGATCGA', 'CACGATTC', 'GTATCGAG')
if(spid == "SI_P01_A12") spseqs <- c('TTATGGAA', 'ACTACTGT', 'CGGGAACG', 'GACCTCTC')
if(spid == "SI_P01_B12") spseqs <- c('GAAAGACA', 'CGCTACAT', 'ACGCTTGG', 'TTTGCGTC')
if(spid == "SI_P01_C12") spseqs <- c('TAAGCCAC', 'CCGTTATG', 'GGTAATGT', 'ATCCGGCA')
if(spid == "SI_P01_D12") spseqs <- c('GCTGTGTA', 'AGAAACGT', 'CACTCAAC', 'TTGCGTCG')
if(spid == "SI_P01_E12") spseqs <- c('CGCTATCC', 'ACGCGGAA', 'TAAATCGT', 'GTTGCATG')
if(spid == "SI_P01_F12") spseqs <- c('AATTGAAC', 'TGGACCCT', 'CCAGTGGA', 'GTCCATTG')
if(spid == "SI_P01_G12") spseqs <- c('CATGCGTA', 'ACCCGCAC', 'TGATATCG', 'GTGATAGT')
if(spid == "SI_P01_H12") spseqs <- c('TGTGTATA', 'GTCCAGGC', 'CAATCCCT', 'ACGAGTAG')
if(spid == "SI_3A_A1" || spid == "SI_P03_A1") spseqs <- c('AAACGGCG', 'CCTACCAT', 'GGCGTTTC', 'TTGTAAGA')
if(spid == "SI_3A_B1" || spid == "SI_P03_B1") spseqs <- c('AGGCTACC', 'CTAGCTGT', 'GCCAACAA', 'TATTGGTG')
if(spid == "SI_3A_C1" || spid == "SI_P03_C1") spseqs <- c('AGACTTTC', 'CCGAGGCA', 'GATGCAGT', 'TTCTACAG')
if(spid == "SI_3A_D1" || spid == "SI_P03_D1") spseqs <- c('AGCTGCGT', 'CAACCATC', 'GTGGAGCA', 'TCTATTAG')
if(spid == "SI_3A_E1" || spid == "SI_P03_E1") spseqs <- c('ACGAAAGC', 'CGCCCGTA', 'GTTTGCCT', 'TAAGTTAG')
if(spid == "SI_3A_F1" || spid == "SI_P03_F1") spseqs <- c('ATCGCTCC', 'CCGTACAG', 'GATAGGTA', 'TGACTAGT')
if(spid == "SI_3A_G1" || spid == "SI_P03_G1") spseqs <- c('ATGCGATT', 'CATATGCG', 'GGATACGA', 'TCCGCTAC')
if(spid == "SI_3A_H1" || spid == "SI_P03_H1") spseqs <- c('AAACTCAT', 'CGGGAGTA', 'GTCACAGG', 'TCTTGTCC')
if(spid == "SI_3A_A2" || spid == "SI_P03_A2") spseqs <- c('AGCCCTTT', 'CAAGTCCA', 'GTGAGAAG', 'TCTTAGGC')
if(spid == "SI_3A_B2" || spid == "SI_P03_B2") spseqs <- c('AAGTTGAT', 'CCCACCCA', 'GGTCGAGC', 'TTAGATTG')
if(spid == "SI_3A_C2" || spid == "SI_P03_C2") spseqs <- c('AATCACTA', 'CCGAGAAC', 'GTAGTGCG', 'TGCTCTGT')
if(spid == "SI_3A_D2" || spid == "SI_P03_D2") spseqs <- c('ACATTCCG', 'CTGCGGTA', 'GACACAAT', 'TGTGATGC')
if(spid == "SI_3A_E2" || spid == "SI_P03_E2") spseqs <- c('AGGCTGGT', 'CACAACTA', 'GTTGGTCC', 'TCATCAAG')
if(spid == "SI_3A_F2" || spid == "SI_P03_F2") spseqs <- c('ATGGTTAG', 'CATTGATA', 'GCAAACGC', 'TGCCCGCT')
if(spid == "SI_3A_G2" || spid == "SI_P03_G2") spseqs <- c('ATAACCTA', 'CGGTGAGC', 'GATCTTAT', 'TCCGAGCG')
if(spid == "SI_3A_H2" || spid == "SI_P03_H2") spseqs <- c('AACGGTCA', 'CCGAACTC', 'GGTCCAAG', 'TTATTGGT')
if(spid == "SI_3A_A3" || spid == "SI_P03_A3") spseqs <- c('AAAGCATA', 'CTGCAGCC', 'GCCTTTAT', 'TGTAGCGG')
if(spid == "SI_3A_B3" || spid == "SI_P03_B3") spseqs <- c('ATTGGACG', 'CAGCTTAC', 'GGCAAGGA', 'TCATCCTT')
if(spid == "SI_3A_C3" || spid == "SI_P03_C3") spseqs <- c('ACGTTACA', 'CGTAGGTT', 'GACGACGG', 'TTACCTAC')
if(spid == "SI_3A_D3" || spid == "SI_P03_D3") spseqs <- c('ACTTCACT', 'CGAAGTTG', 'GAGCACGC', 'TTCGTGAA')
if(spid == "SI_3A_E3" || spid == "SI_P03_E3") spseqs <- c('AACAAGTC', 'CGGCTCCA', 'GTATGTAT', 'TCTGCAGG')
if(spid == "SI_3A_F3" || spid == "SI_P03_F3") spseqs <- c('AGTCTGTA', 'CAGAATAG', 'GCCTCCGT', 'TTAGGACC')
if(spid == "SI_3A_G3" || spid == "SI_P03_G3") spseqs <- c('ATGTCCAG', 'CGACGTCA', 'GCTATAGC', 'TACGAGTT')
if(spid == "SI_3A_H3" || spid == "SI_P03_H3") spseqs <- c('ACACCTAA', 'CGTTTGGG', 'GACAAACC', 'TTGGGCTT')
if(spid == "SI_3A_A4" || spid == "SI_P03_A4") spseqs <- c('AGAACGCC', 'CATGGCAG', 'GTCTTTGA', 'TCGCAATT')
if(spid == "SI_3A_B4" || spid == "SI_P03_B4") spseqs <- c('AGGGACTG', 'CCTCTAAC', 'GACAGGCT', 'TTATCTGA')
if(spid == "SI_3A_C4" || spid == "SI_P03_C4") spseqs <- c('ACATTGGC', 'CTTAGTCA', 'GAGCCCAT', 'TGCGAATG')
if(spid == "SI_3A_D4" || spid == "SI_P03_D4") spseqs <- c('AAATCGTC', 'CTTCGAAT', 'GCGATCGG', 'TGCGATCA')
if(spid == "SI_3A_E4" || spid == "SI_P03_E4") spseqs <- c('AGCTGACG', 'CCGGTGTC', 'GTAAACAT', 'TATCCTGA')
if(spid == "SI_3A_F4" || spid == "SI_P03_F4") spseqs <- c('AACGACAC', 'CGTCCTCT', 'GCATGATA', 'TTGATGGG')
if(spid == "SI_3A_G4" || spid == "SI_P03_G4") spseqs <- c('AGCTTCTC', 'CCTGCGGT', 'GTACAACG', 'TAGAGTAA')
if(spid == "SI_3A_H4" || spid == "SI_P03_H4") spseqs <- c('ACTGGAGC', 'CGGTCGTG', 'GAAATCAA', 'TTCCATCT')
if(spid == "SI_3A_A5" || spid == "SI_P03_A5") spseqs <- c('ATTGGGAA', 'CAGTCTGG', 'GGCATACT', 'TCACACTC')
if(spid == "SI_3A_B5" || spid == "SI_P03_B5") spseqs <- c('ATCGTACT', 'CATCAGTG', 'GGGACTAC', 'TCATGCGA')
if(spid == "SI_3A_C5" || spid == "SI_P03_C5") spseqs <- c('ATGCATTC', 'CACTGACT', 'GGTACGGG', 'TCAGTCAA')
if(spid == "SI_3A_D5" || spid == "SI_P03_D5") spseqs <- c('AGACGGAT', 'CCTTTAGA', 'GTCGACTC', 'TAGACTCG')
if(spid == "SI_3A_E5" || spid == "SI_P03_E5") spseqs <- c('ATCCAAGG', 'CCGTTGAA', 'GGAAGCTC', 'TATGCTCT')
if(spid == "SI_3A_F5" || spid == "SI_P03_F5") spseqs <- c('AACACAGC', 'CGGTTTAG', 'GTACGGCT', 'TCTGACTA')
if(spid == "SI_3A_G5" || spid == "SI_P03_G5") spseqs <- c('ATGAAGTA', 'CGCCGAAC', 'GAAGCTCG', 'TCTTTCGT')
if(spid == "SI_3A_H5" || spid == "SI_P03_H5") spseqs <- c('ATAGTATG', 'CCGCGTCT', 'GGCTCCAC', 'TATAAGGA')
if(spid == "SI_3A_A6" || spid == "SI_P03_A6") spseqs <- c('ACGGGACT', 'CTTTCGAC', 'GAACATGA', 'TGCATCTG')
if(spid == "SI_3A_B6" || spid == "SI_P03_B6") spseqs <- c('AACGCGAA', 'CTATTTGG', 'GCGCACCT', 'TGTAGATC')
if(spid == "SI_3A_C6" || spid == "SI_P03_C6") spseqs <- c('ACTCAGAC', 'CGCTCAGG', 'GAGGTTTA', 'TTAAGCCT')
if(spid == "SI_3A_D6" || spid == "SI_P03_D6") spseqs <- c('ATGCCAAA', 'CCTTATCG', 'GAAGTCTT', 'TGCAGGGC')
if(spid == "SI_3A_E6" || spid == "SI_P03_E6") spseqs <- c('ATTGAAAC', 'CAGCCCGA', 'GCCATTTG', 'TGATGGCT')
if(spid == "SI_3A_F6" || spid == "SI_P03_F6") spseqs <- c('ATGCCGGC', 'CCTAATTA', 'GACTTCCT', 'TGAGGAAG')
if(spid == "SI_3A_G6" || spid == "SI_P03_G6") spseqs <- c('AGCACTGG', 'CATTACAC', 'GTGCGACA', 'TCAGTGTT')
if(spid == "SI_3A_H6" || spid == "SI_P03_H6") spseqs <- c('AAGCATAA', 'CCCATCGC', 'GGTTGATG', 'TTAGCGCT')
if(spid == "SI_3A_A7" || spid == "SI_P03_A7") spseqs <- c('AGGTCATA', 'CTCATCAT', 'GCTGAGGG', 'TAACGTCC')
if(spid == "SI_3A_B7" || spid == "SI_P03_B7") spseqs <- c('AGGGATGA', 'CTTCTGTT', 'GAATGCAC', 'TCCACACG')
if(spid == "SI_3A_C7" || spid == "SI_P03_C7") spseqs <- c('ACACCGGG', 'CATAATCC', 'GGCGGAAT', 'TTGTTCTA')
if(spid == "SI_3A_D7" || spid == "SI_P03_D7") spseqs <- c('AACTTAGA', 'CCGGATCC', 'GGTCGCAT', 'TTAACGTG')
if(spid == "SI_3A_E7" || spid == "SI_P03_E7") spseqs <- c('AAGACGTG', 'CCATGTGT', 'GTTCACAA', 'TGCGTACC')
if(spid == "SI_3A_F7" || spid == "SI_P03_F7") spseqs <- c('ACCCGAGA', 'CAAACTTT', 'GGTTAGAC', 'TTGGTCCG')
if(spid == "SI_3A_G7" || spid == "SI_P03_G7") spseqs <- c('ATTACCGG', 'CAGTAATT', 'GCCGGTAA', 'TGACTGCC')
if(spid == "SI_3A_H7" || spid == "SI_P03_H7") spseqs <- c('AACGGGTG', 'CTAATTCT', 'GCTTCAAC', 'TGGCACGA')
if(spid == "SI_3A_A8" || spid == "SI_P03_A8") spseqs <- c('ATGATACG', 'CCACAGAA', 'GACTGTTC', 'TGTGCCGT')
if(spid == "SI_3A_B8" || spid == "SI_P03_B8") spseqs <- c('ACGTTCAC', 'CAAGGTCT', 'GTTAAGTG', 'TGCCCAGA')
if(spid == "SI_3A_C8" || spid == "SI_P03_C8") spseqs <- c('AGCTCGAG', 'CAGGAAGA', 'GCACGTTT', 'TTTATCCC')
if(spid == "SI_3A_D8" || spid == "SI_P03_D8") spseqs <- c('AATCTTTG', 'CTCAAGAC', 'GGATGAGT', 'TCGGCCCA')
if(spid == "SI_3A_E8" || spid == "SI_P03_E8") spseqs <- c('AGCCTATG', 'CTAACGCA', 'GCTTACAT', 'TAGGGTGC')
if(spid == "SI_3A_F8" || spid == "SI_P03_F8") spseqs <- c('AGTTGGGA', 'CCAGAAAG', 'GTGCCCTC', 'TACATTCT')
if(spid == "SI_3A_G8" || spid == "SI_P03_G8") spseqs <- c('AAGTACTC', 'CTTGGAGA', 'GGAACTCT', 'TCCCTGAG')
if(spid == "SI_3A_H8" || spid == "SI_P03_H8") spseqs <- c('AAGAGCGG', 'CTTGTTAT', 'GGCCCATC', 'TCATAGCA')
if(spid == "SI_3A_A9" || spid == "SI_P03_A9") spseqs <- c('ACAACTTG', 'CTCCAACA', 'GAGTGCGT', 'TGTGTGAC')
if(spid == "SI_3A_B9" || spid == "SI_P03_B9") spseqs <- c('AAGCGTGT', 'CTTGACCG', 'GCCACGTA', 'TGATTAAC')
if(spid == "SI_3A_C9" || spid == "SI_P03_C9") spseqs <- c('AGATCGGT', 'CATCGTCG', 'GTCATATA', 'TCGGACAC')
if(spid == "SI_3A_D9" || spid == "SI_P03_D9") spseqs <- c('ACCTACTG', 'CAAGGGAC', 'GGGACACA', 'TTTCTTGT')
if(spid == "SI_3A_E9" || spid == "SI_P03_E9") spseqs <- c('AGTAAGCA', 'CCGGTAAT', 'GTATCTTC', 'TACCGCGG')
if(spid == "SI_3A_F9" || spid == "SI_P03_F9") spseqs <- c('AGTTAGTT', 'CACGCACG', 'GTACTTAA', 'TCGAGCGC')
if(spid == "SI_3A_G9" || spid == "SI_P03_G9") spseqs <- c('AGTCTCAG', 'CAATGGCA', 'GCCGAAGT', 'TTGACTTC')
if(spid == "SI_3A_H9" || spid == "SI_P03_H9") spseqs <- c('ACCTGCCA', 'CTTCATAC', 'GGAATATG', 'TAGGCGGT')
if(spid == "SI_3A_A10" || spid == "SI_P03_A10") spseqs <- c('ACAGCAAC', 'CGCAATTT', 'GAGTTGCG', 'TTTCGCGA')
if(spid == "SI_3A_B10" || spid == "SI_P03_B10") spseqs <- c('ACCATTAA', 'CTGGACGT', 'GAACGGTC', 'TGTTCACG')
if(spid == "SI_3A_C10" || spid == "SI_P03_C10") spseqs <- c('ATCTGATC', 'CGTGCTAA', 'GAGAAGGG', 'TCACTCCT')
if(spid == "SI_3A_D10" || spid == "SI_P03_D10") spseqs <- c('AGATAACA', 'CTTATTTG', 'GCGGGCAT', 'TACCCGGC')
if(spid == "SI_3A_E10" || spid == "SI_P03_E10") spseqs <- c('ATTGTTTC', 'CGCAGGAG', 'GCACCAGT', 'TAGTACCA')
if(spid == "SI_3A_F10" || spid == "SI_P03_F10") spseqs <- c('ATTCGTGC', 'CGCGTGCA', 'GAATACTG', 'TCGACAAT')
if(spid == "SI_3A_G10" || spid == "SI_P03_G10") spseqs <- c('ATACTGAG', 'CGGAGACT', 'GATGCCTC', 'TCCTATGA')
if(spid == "SI_3A_H10" || spid == "SI_P03_H10") spseqs <- c('ATTTCAGC', 'CGAGTGAT', 'GACCGCCA', 'TCGAATTG')
if(spid == "SI_3A_A11" || spid == "SI_P03_A11") spseqs <- c('ACCAGTCC', 'CTTTCCTT', 'GGACAGGG', 'TAGGTAAA')
if(spid == "SI_3A_B11" || spid == "SI_P03_B11") spseqs <- c('ATGGTCGC', 'CGACATAG', 'GATTCGCT', 'TCCAGATA')
if(spid == "SI_3A_C11" || spid == "SI_P03_C11") spseqs <- c('ACCGAACA', 'CGACTCTT', 'GTTTGTGG', 'TAGACGAC')
if(spid == "SI_3A_D11" || spid == "SI_P03_D11") spseqs <- c('ATATGAGA', 'CACCTCAG', 'GCTACTTC', 'TGGGAGCT')
if(spid == "SI_3A_E11" || spid == "SI_P03_E11") spseqs <- c('ATCGCCAT', 'CATAAAGG', 'GGGTTTCC', 'TCACGGTA')
if(spid == "SI_3A_F11" || spid == "SI_P03_F11") spseqs <- c('AGCAGTTA', 'CTTGTACC', 'GAACCCGG', 'TCGTAGAT')
if(spid == "SI_3A_G11" || spid == "SI_P03_G11") spseqs <- c('AGGGCGTT', 'CTATACGC', 'GCTCGTCA', 'TACATAAG')
if(spid == "SI_3A_H11" || spid == "SI_P03_H11") spseqs <- c('AGGATCGA', 'CACGATTC', 'GTATCGAG', 'TCTCGACT')
if(spid == "SI_3A_A12" || spid == "SI_P03_A12") spseqs <- c('ACTACTGT', 'CGGGAACG', 'GACCTCTC', 'TTATGGAA')
if(spid == "SI_3A_B12" || spid == "SI_P03_B12") spseqs <- c('ACGCTTGG', 'CGCTACAT', 'GAAAGACA', 'TTTGCGTC')
if(spid == "SI_3A_C12" || spid == "SI_P03_C12") spseqs <- c('ATCCGGCA', 'CCGTTATG', 'GGTAATGT', 'TAAGCCAC')
if(spid == "SI_3A_D12" || spid == "SI_P03_D12") spseqs <- c('AGAAACGT', 'CACTCAAC', 'GCTGTGTA', 'TTGCGTCG')
if(spid == "SI_3A_E12" || spid == "SI_P03_E12") spseqs <- c('ACGCGGAA', 'CGCTATCC', 'GTTGCATG', 'TAAATCGT')
if(spid == "SI_3A_F12" || spid == "SI_P03_F12") spseqs <- c('AATTGAAC', 'CCAGTGGA', 'GTCCATTG', 'TGGACCCT')
if(spid == "SI_3A_G12" || spid == "SI_P03_G12") spseqs <- c('ACCCGCAC', 'CATGCGTA', 'GTGATAGT', 'TGATATCG')
if(spid == "SI_3A_H12" || spid == "SI_P03_H12") spseqs <- c('ACGAGTAG', 'CAATCCCT', 'GTCCAGGC', 'TGTGTATA')
if(spid == "SI_T2_1") spseqs <- c('GGGTGATC', 'TTACCGAT', 'AATGACGA', 'CCCATTCG')
if(spid == "SI_T2_2") spseqs <- c('GGGTCGAA', 'ATCCGCCC', 'TCTATAGT', 'CAAGATTG')
if(spid == "SI_T2_3") spseqs <- c('GCTGATAT', 'TGCCGAGC', 'AAATTGCG', 'CTGACCTA')
if(spid == "SI_T2_4") spseqs <- c('ACTTCTGA', 'TTCATCTT', 'CGACGACG', 'GAGGAGAC')
if(spid == "SI_T2_5") spseqs <- c('GAATACAA', 'AGCATACC', 'TCGGGTTT', 'CTTCCGGG')
if(spid == "SI_T2_6") spseqs <- c('TATTGAGA', 'GTAGTCAG', 'CGCCATTC', 'ACGACGCT')
if(spid == "SI_T2_7") spseqs <- c('AAATCTGT', 'GTCCAACC', 'TCTGGCTG', 'CGGATGAA')
if(spid == "SI_T2_8") spseqs <- c('CCTTGAAC', 'GAAATCGG', 'TGGCCTCT', 'ATCGAGTA')
if(spid == "SI_GA_A1" || spid == "SI_P2_A1") spseqs <- c('GGTTTACT', 'CTAAACGG', 'TCGGCGTC', 'AACCGTAA')
if(spid == "SI_GA_A2" || spid == "SI_P2_A2") spseqs <- c('TTTCATGA', 'ACGTCCCT', 'CGCATGTG', 'GAAGGAAC')
if(spid == "SI_GA_A3" || spid == "SI_P2_A3") spseqs <- c('CAGTACTG', 'AGTAGTCT', 'GCAGTAGA', 'TTCCCGAC')
if(spid == "SI_GA_A4" || spid == "SI_P2_A4") spseqs <- c('TATGATTC', 'CCCACAGT', 'ATGCTGAA', 'GGATGCCG')
if(spid == "SI_GA_A5" || spid == "SI_P2_A5") spseqs <- c('CTAGGTGA', 'TCGTTCAG', 'AGCCAATT', 'GATACGCC')
if(spid == "SI_GA_A6" || spid == "SI_P2_A6") spseqs <- c('CGCTATGT', 'GCTGTCCA', 'TTGAGATC', 'AAACCGAG')
if(spid == "SI_GA_A7" || spid == "SI_P2_A7") spseqs <- c('ACAGAGGT', 'TATAGTTG', 'CGGTCCCA', 'GTCCTAAC')
if(spid == "SI_GA_A8" || spid == "SI_P2_A8") spseqs <- c('GCATCTCC', 'TGTAAGGT', 'CTGCGATG', 'AACGTCAA')
if(spid == "SI_GA_A9" || spid == "SI_P2_A9") spseqs <- c('TCTTAAAG', 'CGAGGCTC', 'GTCCTTCT', 'AAGACGGA')
if(spid == "SI_GA_A10" || spid == "SI_P2_A10") spseqs <- c('GAAACCCT', 'TTTCTGTC', 'CCGTGTGA', 'AGCGAAAG')
if(spid == "SI_GA_A11" || spid == "SI_P2_A11") spseqs <- c('GTCCGGTC', 'AAGATCAT', 'CCTGAAGG', 'TGATCTCA')
if(spid == "SI_GA_A12" || spid == "SI_P2_A12") spseqs <- c('AGTGGAAC', 'GTCTCCTT', 'TCACATCA', 'CAGATGGG')
if(spid == "SI_GA_B1" || spid == "SI_P2_B1") spseqs <- c('GTAATCTT', 'TCCGGAAG', 'AGTTCGGC', 'CAGCATCA')
if(spid == "SI_GA_B2" || spid == "SI_P2_B2") spseqs <- c('TACTCTTC', 'CCTGTGCG', 'GGACACGT', 'ATGAGAAA')
if(spid == "SI_GA_B3" || spid == "SI_P2_B3") spseqs <- c('GTGTATTA', 'TGTGCGGG', 'ACCATAAC', 'CAACGCCT')
if(spid == "SI_GA_B4" || spid == "SI_P2_B4") spseqs <- c('ACTTCATA', 'GAGATGAC', 'TGCCGTGG', 'CTAGACCT')
if(spid == "SI_GA_B5" || spid == "SI_P2_B5") spseqs <- c('AATAATGG', 'CCAGGGCA', 'TGCCTCAT', 'GTGTCATC')
if(spid == "SI_GA_B6" || spid == "SI_P2_B6") spseqs <- c('CGTTAATC', 'GCCACGCT', 'TTACTCAG', 'AAGGGTGA')
if(spid == "SI_GA_B7" || spid == "SI_P2_B7") spseqs <- c('AAACCTCA', 'GCCTTGGT', 'CTGGACTC', 'TGTAGAAG')
if(spid == "SI_GA_B8" || spid == "SI_P2_B8") spseqs <- c('AAAGTGCT', 'GCTACCTG', 'TGCTGTAA', 'CTGCAAGC')
if(spid == "SI_GA_B9" || spid == "SI_P2_B9") spseqs <- c('CTGTAACT', 'TCTAGCGA', 'AGAGTGTG', 'GACCCTAC')
if(spid == "SI_GA_B10" || spid == "SI_P2_B10") spseqs <- c('ACCGTATG', 'GATTAGAT', 'CTGACTGA', 'TGACGCCC')
if(spid == "SI_GA_B11" || spid == "SI_P2_B11") spseqs <- c('GTTCCTCA', 'AGGTACGC', 'TAAGTATG', 'CCCAGGAT')
if(spid == "SI_GA_B12" || spid == "SI_P2_B12") spseqs <- c('TACCACCA', 'CTAAGTTT', 'GGGTCAAG', 'ACTGTGGC')
if(spid == "SI_GA_C1" || spid == "SI_P2_C1") spseqs <- c('CCACTTAT', 'AACTGGCG', 'TTGGCATA', 'GGTAACGC')
if(spid == "SI_GA_C2" || spid == "SI_P2_C2") spseqs <- c('CCTAGACC', 'ATCTCTGT', 'TAGCTCTA', 'GGAGAGAG')
if(spid == "SI_GA_C3" || spid == "SI_P2_C3") spseqs <- c('TCAGCCGT', 'CAGAGGCC', 'GGTCAATA', 'ATCTTTAG')
if(spid == "SI_GA_C4" || spid == "SI_P2_C4") spseqs <- c('ACAATTCA', 'TGCGCAGC', 'CATCACTT', 'GTGTGGAG')
if(spid == "SI_GA_C5" || spid == "SI_P2_C5") spseqs <- c('CGACTTGA', 'TACAGACT', 'ATTGCGTG', 'GCGTACAC')
if(spid == "SI_GA_C6" || spid == "SI_P2_C6") spseqs <- c('ATTACTTC', 'TGCGAACT', 'GCATTCGG', 'CAGCGGAA')
if(spid == "SI_GA_C7" || spid == "SI_P2_C7") spseqs <- c('GTCTCTCG', 'AATCTCTC', 'CGGAGGGA', 'TCAGAAAT')
if(spid == "SI_GA_C8" || spid == "SI_P2_C8") spseqs <- c('GTTGAGAA', 'AGATCTGG', 'TCGATACT', 'CACCGCTC')
if(spid == "SI_GA_C9" || spid == "SI_P2_C9") spseqs <- c('GCGCAGAA', 'ATCTTACC', 'TATGGTGT', 'CGAACCTG')
if(spid == "SI_GA_C10" || spid == "SI_P2_C10") spseqs <- c('TCTCAGTG', 'GAGACTAT', 'CGCTTAGC', 'ATAGGCCA')
if(spid == "SI_GA_C11" || spid == "SI_P2_C11") spseqs <- c('GAGGATCT', 'AGACCATA', 'TCCTGCGC', 'CTTATGAG')
if(spid == "SI_GA_C12" || spid == "SI_P2_C12") spseqs <- c('TCTCGTTT', 'GGCTAGCG', 'ATGACCGC', 'CAAGTAAA')
if(spid == "SI_GA_D1" || spid == "SI_P2_D1") spseqs <- c('CACTCGGA', 'GCTGAATT', 'TGAAGTAC', 'ATGCTCCG')
if(spid == "SI_GA_D2" || spid == "SI_P2_D2") spseqs <- c('TAACAAGG', 'GGTTCCTC', 'ATCATGCA', 'CCGGGTAT')
if(spid == "SI_GA_D3" || spid == "SI_P2_D3") spseqs <- c('ACATTACT', 'TTTGGGTA', 'CAGCCCAC', 'GGCAATGG')
if(spid == "SI_GA_D4" || spid == "SI_P2_D4") spseqs <- c('CCCTAACA', 'ATTCCGAT', 'TGGATTGC', 'GAAGGCTG')
if(spid == "SI_GA_D5" || spid == "SI_P2_D5") spseqs <- c('CTCGTCAC', 'GATCAGCA', 'ACAACAGG', 'TGGTGTTT')
if(spid == "SI_GA_D6" || spid == "SI_P2_D6") spseqs <- c('CATGCGAT', 'TGATATTC', 'GTGATCGA', 'ACCCGACG')
if(spid == "SI_GA_D7" || spid == "SI_P2_D7") spseqs <- c('ATTTGCTA', 'TAGACACC', 'CCACAGGG', 'GGCGTTAT')
if(spid == "SI_GA_D8" || spid == "SI_P2_D8") spseqs <- c('GCAACAAA', 'TAGTTGTC', 'CGCCATCG', 'ATTGGCGT')
if(spid == "SI_GA_D9" || spid == "SI_P2_D9") spseqs <- c('AGGAGATG', 'GATGTGGT', 'CTACATCC', 'TCCTCCAA')
if(spid == "SI_GA_D10" || spid == "SI_P2_D10") spseqs <- c('CAATACCC', 'TGTCTATG', 'ACCACGAA', 'GTGGGTGT')
if(spid == "SI_GA_D11" || spid == "SI_P2_D11") spseqs <- c('CTTTGCGG', 'TGCACAAA', 'AAGCAGTC', 'GCAGTTCT')
if(spid == "SI_GA_D12" || spid == "SI_P2_D12") spseqs <- c('GCACAATG', 'CTTGGTAC', 'TGCACCGT', 'AAGTTGCA')
if(spid == "SI_GA_E1" || spid == "SI_P2_E1") spseqs <- c('TGGTAAAC', 'GAAAGGGT', 'ACTGCTCG', 'CTCCTCTA')
if(spid == "SI_GA_E2" || spid == "SI_P2_E2") spseqs <- c('GTGGTACC', 'TACTATAG', 'ACAAGGTA', 'CGTCCCGT')
if(spid == "SI_GA_E3" || spid == "SI_P2_E3") spseqs <- c('AGGTATTG', 'CTCCTAGT', 'TCAAGGCC', 'GATGCCAA')
if(spid == "SI_GA_E4" || spid == "SI_P2_E4") spseqs <- c('TTCGCCCT', 'GGATGGGC', 'AATCAATG', 'CCGATTAA')
if(spid == "SI_GA_E5" || spid == "SI_P2_E5") spseqs <- c('CATTAGCG', 'TTCGCTGA', 'ACAAGAAT', 'GGGCTCTC')
if(spid == "SI_GA_E6" || spid == "SI_P2_E6") spseqs <- c('CTGCGGCT', 'GACTCAAA', 'AGAAACTC', 'TCTGTTGG')
if(spid == "SI_GA_E7" || spid == "SI_P2_E7") spseqs <- c('CACGCCTT', 'GTATATAG', 'TCTCGGGC', 'AGGATACA')
if(spid == "SI_GA_E8" || spid == "SI_P2_E8") spseqs <- c('ATAGTTAC', 'TGCTGAGT', 'CCTACGTA', 'GAGCACCG')
if(spid == "SI_GA_E9" || spid == "SI_P2_E9") spseqs <- c('TTGTTTCC', 'GGAGGAGG', 'CCTAACAA', 'AACCCGTT')
if(spid == "SI_GA_E10" || spid == "SI_P2_E10") spseqs <- c('AAATGTGC', 'GGGCAAAT', 'TCTATCCG', 'CTCGCGTA')
if(spid == "SI_GA_E11" || spid == "SI_P2_E11") spseqs <- c('AAGCGCTG', 'CGTTTGAT', 'GTAGCACA', 'TCCAATGC')
if(spid == "SI_GA_E12" || spid == "SI_P2_E12") spseqs <- c('ACCGGCTC', 'GAGTTAGT', 'CGTCCTAG', 'TTAAAGCA')
if(spid == "SI_GA_F1" || spid == "SI_P2_F1") spseqs <- c('GTTGCAGC', 'TGGAATTA', 'CAATGGAG', 'ACCCTCCT')
if(spid == "SI_GA_F2" || spid == "SI_P2_F2") spseqs <- c('TTTACATG', 'CGCGATAC', 'ACGCGGGT', 'GAATTCCA')
if(spid == "SI_GA_F3" || spid == "SI_P2_F3") spseqs <- c('TTCAGGTG', 'ACGGACAT', 'GATCTTGA', 'CGATCACC')
if(spid == "SI_GA_F4" || spid == "SI_P2_F4") spseqs <- c('CCCAATAG', 'GTGTCGCT', 'AGAGTCGC', 'TATCGATA')
if(spid == "SI_GA_F5" || spid == "SI_P2_F5") spseqs <- c('GACTACGT', 'CTAGCGAG', 'TCTATATC', 'AGGCGTCA')
if(spid == "SI_GA_F6" || spid == "SI_P2_F6") spseqs <- c('CGGAGCAC', 'GACCTATT', 'ACTTAGGA', 'TTAGCTCG')
if(spid == "SI_GA_F7" || spid == "SI_P2_F7") spseqs <- c('CGTGCAGA', 'AACAAGAT', 'TCGCTTCG', 'GTATGCTC')
if(spid == "SI_GA_F8" || spid == "SI_P2_F8") spseqs <- c('CATGAACA', 'TCACTCGC', 'AGCTGGAT', 'GTGACTTG')
if(spid == "SI_GA_F9" || spid == "SI_P2_F9") spseqs <- c('CAAGCTCC', 'GTTCACTG', 'TCGTGAAA', 'AGCATGGT')
if(spid == "SI_GA_F10" || spid == "SI_P2_F10") spseqs <- c('GCTTGGCT', 'AAACAAAC', 'CGGGCTTA', 'TTCATCGG')
if(spid == "SI_GA_F11" || spid == "SI_P2_F11") spseqs <- c('GCGAGAGT', 'TACGTTCA', 'AGTCCCAC', 'CTATAGTG')
if(spid == "SI_GA_F12" || spid == "SI_P2_F12") spseqs <- c('TGATGCAT', 'GCTACTGA', 'CACCTGCC', 'ATGGAATG')
if(spid == "SI_GA_G1" || spid == "SI_P2_G1") spseqs <- c('ATGAATCT', 'GATCTCAG', 'CCAGGAGC', 'TGCTCGTA')
if(spid == "SI_GA_G2" || spid == "SI_P2_G2") spseqs <- c('TGATTCTA', 'ACTAGGAG', 'CAGCCACT', 'GTCGATGC')
if(spid == "SI_GA_G3" || spid == "SI_P2_G3") spseqs <- c('CCTCATTC', 'AGCATCCG', 'GTGGCAAT', 'TAATGGGA')
if(spid == "SI_GA_G4" || spid == "SI_P2_G4") spseqs <- c('GCGATGTG', 'AGATACAA', 'TTTCCACT', 'CACGGTGC')
if(spid == "SI_GA_G5" || spid == "SI_P2_G5") spseqs <- c('GAGCAAGA', 'TCTGTGAT', 'CGCAGTTC', 'ATATCCCG')
if(spid == "SI_GA_G6" || spid == "SI_P2_G6") spseqs <- c('CTGACGCG', 'GGTCGTAC', 'TCCTTCTT', 'AAAGAAGA')
if(spid == "SI_GA_G7" || spid == "SI_P2_G7") spseqs <- c('GGTATGCA', 'CTCGAAAT', 'ACACCTTC', 'TAGTGCGG')
if(spid == "SI_GA_G8" || spid == "SI_P2_G8") spseqs <- c('TATGAGCT', 'CCGATAGC', 'ATACCCAA', 'GGCTGTTG')
if(spid == "SI_GA_G9" || spid == "SI_P2_G9") spseqs <- c('TAGGACGT', 'ATCCCACA', 'GGAATGTC', 'CCTTGTAG')
if(spid == "SI_GA_G10" || spid == "SI_P2_G10") spseqs <- c('TCGCCAGC', 'AATGTTAG', 'CGATAGCT', 'GTCAGCTA')
if(spid == "SI_GA_G11" || spid == "SI_P2_G11") spseqs <- c('TTATCGTT', 'AGCAGAGC', 'CATCTCCA', 'GCGGATAG')
if(spid == "SI_GA_G12" || spid == "SI_P2_G12") spseqs <- c('ATTCTAAG', 'CCCGATTA', 'TGGAGGCT', 'GAATCCGC')
if(spid == "SI_GA_H1" || spid == "SI_P2_H1") spseqs <- c('GTATGTCA', 'TGTCAGAC', 'CACGTCGG', 'ACGACATT')
if(spid == "SI_GA_H2" || spid == "SI_P2_H2") spseqs <- c('TAATGACC', 'ATGCCTTA', 'GCCGAGAT', 'CGTATCGG')
if(spid == "SI_GA_H3" || spid == "SI_P2_H3") spseqs <- c('CCAAGATG', 'AGGCCCGA', 'TACGTGAC', 'GTTTATCT')
if(spid == "SI_GA_H4" || spid == "SI_P2_H4") spseqs <- c('GCCATTCC', 'CAAGAATT', 'TTGCCGGA', 'AGTTGCAG')
if(spid == "SI_GA_H5" || spid == "SI_P2_H5") spseqs <- c('CCACTACA', 'GATTCTGG', 'TGCGGCTT', 'ATGAAGAC')
if(spid == "SI_GA_H6" || spid == "SI_P2_H6") spseqs <- c('TAGGATAA', 'CCTTTGTC', 'GTACGCGG', 'AGCACACT')
if(spid == "SI_GA_H7" || spid == "SI_P2_H7") spseqs <- c('AGCTATCA', 'CATATAAC', 'TCAGGGTG', 'GTGCCCGT')
if(spid == "SI_GA_H8" || spid == "SI_P2_H8") spseqs <- c('TTGTTGAT', 'GCTCAACC', 'CAAAGTGG', 'AGCGCCTA')
if(spid == "SI_GA_H9" || spid == "SI_P2_H9") spseqs <- c('ACACTGTT', 'CAGGATGG', 'GGCTGAAC', 'TTTACCCA')
if(spid == "SI_GA_H10" || spid == "SI_P2_H10") spseqs <- c('GTAATTGC', 'AGTCGCTT', 'CACGAGAA', 'TCGTCACG')
if(spid == "SI_GA_H11" || spid == "SI_P2_H11") spseqs <- c('GGCGAGTA', 'ACTTCTAT', 'CAAATACG', 'TTGCGCGC')
if(spid == "SI_GA_H12" || spid == "SI_P2_H12") spseqs <- c('GACAGCAT', 'TTTGTACA', 'AGGCCGTG', 'CCATATGC')
spseqs
}
.rstest <- function(r, coef) r * (1 + 1/r)^(1 + coef)
.averaging_transform <- function(r, nr){
d <- c(1, r[ 2:length(r) ] - r[1:(length(r)-1)])
dr <- c( 0.5 * (d[2:length(d)] + d[1:(length(d)-1)]),d[ length(d) ])
nr/dr
}
.do.EXACT.DEBUG <- FALSE
#.do.EXACT.DEBUG <- TRUE
.simple.Good.Turing.Freq<-function( raw.freq ){
n_zero <- sum(raw.freq == 0)
times <- table(raw.freq[raw.freq>0])
obs <- sort(as.numeric(names(times)))
times <- times[as.character(obs)]
nobs <- length(obs)
if(nobs < .min_mySGT_input_size){
cat(sprintf("Warning: not enough element in the observation array : %d < %d.\nThe `Rescued' element in the returned object will be set to NA.\n", nobs, .min_mySGT_input_size))
return(NA)
}else{
if(nobs != length(times)) stop("The oservation array doesn't match the frequency array.")
total_observed <- sum(obs * times)
tval <- matrix(0, ncol=2, nrow=nobs)
ks <- c(obs[2:nobs], 2*obs[nobs] - obs[nobs-1])
tval[,1] <- log(obs)
tval[,2] <- log( .averaging_transform(obs,times))
meanx <- mean(tval[,1])
meany <- mean(tval[,2])
slope <- sum( (tval[,1]-meanx) * (tval[,2]-meany) ) / sum((tval[,1]-meanx) ^2)
intercept <- meany - slope * meanx
obs.in.next <- (obs+1) %in% obs
SD <- ( 1+(1:nobs) ) / times *(c(times[2:nobs], 0)* (1+c(times[2:nobs],0) /times))^.5
SD [!obs.in.next] <- 1
GT <- (obs+1)/obs * c(times[2:nobs],0)/times
GT [!obs.in.next] <- 0
y <- .rstest(obs,slope)
LGT <- y / obs
xy.sim <- ( abs(GT-LGT) * (1:nobs) / SD ) > 1.65
min.no.turing <- min(which(!xy.sim))
r <- GT
if(min.no.turing>0)r[min.no.turing:nobs] <- LGT[min.no.turing:nobs]
p_zero <- times[1] / total_observed
rs <- (1-p_zero) * obs * r / sum(r*times*obs/total_observed)
total_rs <- sum(rs * times)
puniq <- (1-p_zero) * rs / total_rs
pres <- rep(p_zero / n_zero, length(raw.freq))
pres [raw.freq>0] <- puniq[match( raw.freq[raw.freq>0], obs )]
ret <- list(p0=p_zero, p=pres)
if(.do.EXACT.DEBUG){
print("DEBUG_EXACT")
skf <- "/tmp/del4-YangLiao-GoodTuring-Pr-vals.txt"
if(file.exists(skf))file.remove(skf)
sink(skf)
for(rr in pres){
cat(sprintf("%.20g\n",rr))
}
sink()
}
ret
}
}
.min_mySGT_input_size <- 5
.smoothed <- function(i,intercept,slope) 2.718281828^(intercept + slope * log(i))
# Simple Good Turing
.mySGT <- function(obs.per.spe){
obs.tab <- table(obs.per.spe[obs.per.spe>0])
obs <- sort( as.numeric(names(obs.tab)))
obs.tab <- obs.tab[ as.character(obs) ]
sgtr <- .mySGTsorted(as.numeric(names(obs.tab)), obs.tab)
res <- obs.per.spe
res[obs.per.spe!=0] <- sgtr$p[match(as.character(obs.per.spe[obs.per.spe!=0]), names(obs.tab))]
n_zero <- sum(obs.per.spe==0)
res[obs.per.spe==0] <- sgtr$p0/n_zero
res
}
.mySGTsorted<-function( obs, times ){
if(any(obs != sort(obs)))stop("Observations have to be sorted.")
nobs <- length(obs)
if(nobs < .min_mySGT_input_size) stop("Not enough element in the observation array.")
if(nobs != length(times)) stop("The oservation array doesn't match the frequency array.")
total_observed <- sum(obs * times)
p_zero <- 0
if(1 %in% obs) p_zero <- times[which(1==obs)[1]] / total_observed
tval <- matrix(0, ncol=2, nrow=nobs)
ks <- c(obs[2:nobs], 2*obs[nobs] - obs[nobs-1])
tval[,1] <- log(obs)
tval[,2] <- log(2*times / (ks - c(0, obs[1:(nobs-1)])))
meanX <- mean(tval[,1])
meanY <- mean(tval[,2])
slope <- sum( (tval[,1]-meanX) * (tval[,2]-meanY) ) / sum((tval[,1]-meanX) ^2)
intercept <- meanY - slope * meanX
y <- (1+obs) * .smoothed(1+obs, intercept, slope ) / .smoothed(obs, intercept, slope)
x <- (1+obs) * c((times[2:nobs] / times[1:(nobs-1)]), y[nobs])
xy.sim <- abs(x-y) <= 1.96*( (obs+1)^2 * c(times[2:nobs], 0)/ times^2 * (1+ c(times[2:nobs], 0) / times ))^0.5
obs.in.next <- (obs+1) %in% obs
min.inv <- min(which( xy.sim | !obs.in.next ))
r<-x
if(min.inv>0)r[min.inv:nobs] <- y[min.inv:nobs]
list(p0=p_zero, p=(1-p_zero) * r / sum(r*times))
}
.cellCounts_try_cellbarcode <- function( input.directory, sample.sheet, cell.barcode.list, nreads.testing ){ # the three parameters can only be one string, not strings!
cmd <- paste0(c(input.directory, sample.sheet, cell.barcode.list, as.character(nreads.testing)), collapse=.R_param_splitor)
rvs <- as.integer(rep(0,5))
C_args <- .C("R_try_cell_barcode_wrapper",nargs=as.integer(4),argv=as.character(cmd),retv=rvs ,PACKAGE="Rsubread")
return_val <- ifelse(C_args$retv[1]==0,"FINISHED","ERROR")
tested_reads <- C_args$retv[2]
good_sample <- C_args$retv[3]
good_cell <- C_args$retv[4]
if(return_val=="ERROR"){
return(NA)
}
return(c(tested_reads, good_sample, good_cell))
}
.dataURL <- function(uri){
sprintf("http://bioinf.wehi.edu.au/cellCounts/cell-barcodes/%s",uri)
}
.find_best_cellbarcode <- function( input.directory, sample.sheet=NULL, input.mode="bcl"){
barcode.database.file <- path.expand("~/.Rsubread/cellCounts/known_barcode_sets.txt")
if(!file.exists(barcode.database.file)){
dir.create("~/.Rsubread/cellCounts", recursive=TRUE)
rr <- download.file(.dataURL("known_barcode_sets.txt"), barcode.database.file)
if(rr!=0)stop("ERROR: the barcode database cannot be retrieved from the Internet. You may still run cellCounts by specifying a local barcode list file to the `cell.barcode.list` option.")
}
bcb.sets <- read.delim(barcode.database.file, stringsAsFactors=F, header=T)
cat(sprintf("Found %d known cell barcode sets.\n", nrow(bcb.sets)))
max.cell.good <- -1
for(libf in bcb.sets$File){
listfile <- path.expand(paste0("~/.Rsubread/cellCounts/",libf))
if(!file.exists(listfile)){
rr <- download.file(.dataURL(libf), listfile)
if(rr!=0)stop("ERROR: the barcode list cannot be retrieved from the Internet. You may still run cellCounts by specifying a local barcode list file to the `cell.barcode.list` option.")
}
cat(sprintf("Testing the cell barcodes in %s.\n", libf))
barcode_res <- .cellCounts_try_cellbarcode(input.directory[1], sample.sheet[1], listfile, 30000)
if(length(barcode_res)<3)stop("ERROR: the input sample cannot be processed.")
sample.good.rate <- barcode_res[2]/barcode_res[1]
cell.good.rate <- barcode_res[3]/barcode_res[1]
max.cell.good <- max(max.cell.good, cell.good.rate)
#cat(sprintf("Sample supporting rate : %.1f%% ; cell supporting rate : %.1f%%.\n", sample.good.rate*100., cell.good.rate*100.))
if(sample.good.rate < 0.5)cat(sprintf("WARNING: there are only %.1f%% reads having known sample indices. Please check if the sample sheet is correct.\n", sample.good.rate*100.))
if(cell.good.rate > 0.6){
cat(sprintf("Found cell-barcode list '%s' for the input data: supported by %.1f%% reads.\n", libf, cell.good.rate*100.))
return(listfile)
}
}
stop(sprintf("ERROR: no known cell barcode set was found for the data set. The highest percentage of cell-barcode matched reads is %.1f%%\n", max.cell.good*100.))
}
library(Matrix)
.read.sparse.mat <- function (fn){
cat("Loading matrix from",fn,"\n")
mtx <- readMM(paste0(fn, ".spmtx"))
coln <- read.delim(paste0(fn, ".BCtab"), stringsAsFactors=F, header=F)$V1
rown <- read.delim(paste0(fn, ".GENEtab"), stringsAsFactors=F, header=F)$V1
colnames(mtx) <- coln
rownames(mtx) <- rown
mtx
}
.use.DEBUG.data <- NA
# .use.DEBUG.i <- 1
.get.multi.nomial <- function(n, size, prob){
if(.do.EXACT.DEBUG){
.use.DEBUG.i <- 1
ret <- read.table(paste0("/tmp/del4-YangLiao-multi-nomial-SIZE",size,"-",.use.DEBUG.i,".txt"), header=F)
ret <- t(ret)
#.use.DEBUG.i <<- .use.DEBUG.i+1
#print(ret[1:5,1:5])
ret
}else{
rmultinom(n, size, prob )
}
}
.simu.multinomial <- function(candi.mat, gene.profile.freq , times=10000){
bcsizes <- sort(unique( colSums(candi.mat) ))
ret.nUMI.LLH.tab <- list()
N10000.LLH <- NA
Old_bs_one <- NA
log_E_GTE <- log(gene.profile.freq)
if(.do.EXACT.DEBUG){
tff<-"/tmp/del4-YangLiao-for-multi-nomial-steps.txt"
if(file.exists(tff))file.remove(tff)
sink(tff)
for(bcsize_one in bcsizes){
UMI_step_diff <- NA
if(!any(is.na(Old_bs_one))) UMI_step_diff <- bcsize_one - Old_bs_one
st1 <- NA
if(is.na(Old_bs_one)) {
st1 <- bcsize_one
}else if(UMI_step_diff >= 1000){
st1 <- UMI_step_diff
}
if(!is.na(st1))cat(st1,"\n")
Old_bs_one <- bcsize_one
}
sink()
system("python /usr/local/work/liao/subread/scripts/Cellranger-replicate/CrepPY-multi-nomial.py")
}
Old_bs_one <- NA
N1000.FstLLH <- c()
if(.do.EXACT.DEBUG) M50.gene.ids <- read.table("/tmp/del4-YangLiao-50M-gene-nos.txt", header=F)$V1
all.steps.len <- 0
for(bcsize_one in bcsizes){
UMI_step_diff <- NA
if(!is.na(Old_bs_one)) UMI_step_diff <- bcsize_one - Old_bs_one
if(is.na(Old_bs_one)){
}else if(UMI_step_diff >= 1000){
}else{
step_len <- bcsize_one - Old_bs_one
all.steps.len <- step_len + all.steps.len
}
Old_bs_one <- bcsize_one
}
if(5000 <= all.steps.len)print("WARNING: SMALL STEPS >= 5000!")
Old_bs_one <- NA
M50.in.loop.K <- 0
for(bcsize_one in bcsizes){
UMI_step_diff <- NA
if(!any(is.na(Old_bs_one))) UMI_step_diff <- bcsize_one - Old_bs_one
if(any(is.na(N10000.LLH))){
N10000 <- .get.multi.nomial(n=times, size=bcsize_one, prob=gene.profile.freq )
print("======== N10000 and PROF DIM =======")
print(dim(N10000))
print(length(gene.profile.freq))
N10000.LLH <- apply(N10000, 2, function(x) dmultinom(x, prob=gene.profile.freq, log =T ))
}else if(UMI_step_diff >= 1000){
UMI_step_diff_N10000 <- .get.multi.nomial(n=times, size=UMI_step_diff, prob=gene.profile.freq )
N10000 <- N10000 + UMI_step_diff_N10000
N10000.LLH <- apply(N10000, 2, function(x) dmultinom(x, prob=gene.profile.freq, log =T ))
}else{
step_len <- bcsize_one - Old_bs_one
for(curi in (Old_bs_one+1):bcsize_one){
if(.do.EXACT.DEBUG){
UMI_step_indices_N10000 <- M50.gene.ids[1+M50.in.loop.K + (0:9999) * all.steps.len ]
if(curi == 588){
tff<-"/tmp/del4-YangLiao-DEBUG-588-10KJ.txt"
if(file.exists(tff))file.remove(tff)
sink(tff)
for(pi in 1:10000){
cat(pi," ", N10000[UMI_step_indices_N10000[pi],pi],"\n")
}
sink()
}
M50.in.loop.K <- 1+M50.in.loop.K
}else{
UMI_step_indices_N10000 <- sample(1:nrow(candi.mat), size=times, prob=gene.profile.freq, replace=T)
}
for(idi in 1:times){
N10000[ UMI_step_indices_N10000[idi] ,idi ] <- N10000[ UMI_step_indices_N10000[idi] ,idi ]+1
}
UMI_step_values_N10000 <- rep(0, times)
for(idi in 1:times){
UMI_step_values_N10000[idi] <- N10000[UMI_step_indices_N10000[idi] ,idi ]
}
#print(UMI_step_values_N10000)
N10000.LLH <- N10000.LLH + log_E_GTE[ UMI_step_indices_N10000 ] + log((curi) / UMI_step_values_N10000)
if(.do.EXACT.DEBUG && curi == 588)for(uii in 1:30){
tff<-"/tmp/del4-YangLiao-DEBUG-1-30-E_GTE.txt"
if(file.exists(tff))file.remove(tff)
sink(tff)
for(i in 1:30){
cat(sprintf("%.5g %.5g %.5g\n", N10000.LLH[i], log_E_GTE[ UMI_step_indices_N10000 [i]], log((curi) / UMI_step_values_N10000[i])))
}
sink()
}
}
}
ret.nUMI.LLH.tab[[ bcsize_one ]] <- N10000.LLH
N1000.FstLLH <- c( N1000.FstLLH, N10000.LLH[1] )
Old_bs_one <- bcsize_one
}
if(.do.EXACT.DEBUG){
tff<-"/tmp/del4-YangLiao-10K-LLH-from-R.txt"
if(file.exists(tff))file.remove(tff)
sink(tff)
for(flh in N1000.FstLLH){
cat(sprintf("%.6g\n", flh))
}
sink()
}
ret.nUMI.LLH.tab
}
.is.no.candBC <- function(fn){
info <- file.info(fn)
return(info$size < 2)
}
.cellCounts.rescue <- function( BAM.name, FC.gene.ids, sample.no ){
fname <- sprintf("%s.scRNA.%03d", BAM.name, sample.no)
nozero.anywhere.genes <- read.delim(paste0(fname,".no0Genes"), stringsAsFactors=F, header=F)$V1
ambient.accumulate <- read.delim(paste0(fname,".AmbSum"), stringsAsFactors=F)
ambient.accumulate <- ambient.accumulate[ match(FC.gene.ids , ambient.accumulate$GeneID), ]
ambient.accumulate$UMIs[is.na(ambient.accumulate$UMIs)] <- 0
ambient.accumulate <- ambient.accumulate$UMIs
names(ambient.accumulate) <- FC.gene.ids
ambient.accumulate <- ambient.accumulate[ names(ambient.accumulate) %in% nozero.anywhere.genes]
resc.bc.fn <- paste0(fname,".RescCand")
rstfq <- .simple.Good.Turing.Freq(ambient.accumulate)
if(any(is.na(rstfq)) || .is.no.candBC(paste0(resc.bc.fn,".BCtab"))){
return(NA)
}else{
gte <- rstfq$p
print("Summary of the ambient RNA profile (proportions)")
print(summary(gte))
# This function returns "times" log-likelihoods.
rescue.candidates <- as.matrix(.read.sparse.mat(resc.bc.fn))
rescue.candidates <- rescue.candidates[ match(names(ambient.accumulate), rownames(rescue.candidates)), ]
rownames(rescue.candidates) <- names(ambient.accumulate)
rescue.candidates [is.na(rescue.candidates )] <- 0
# Compute observed log-likelihood of barcodes being generated from ambient RNA
# Compute the multinomial log PMF for many barcodes -- log-likelihoods
# Only use the "non-zero anywhere" genes.
log.like.cands <- apply(rescue.candidates, 2, function(x) dmultinom(x, prob=gte, log =T ))
# Simulate log likelihoods
# This step is to build like 10000 sets of N_GENES vectors from the multinomial distribution of "gte"
# See what are their log-likelihoods against "gte" -- most should be very small.
simu.pvalues <- .simu.multinomial( rescue.candidates, gte )
# Compute p-values : the p-value is the chance of a barcode is actually from ambient RNA (ie smaller the p-value, more likely this barcode is for a real cell)
actual.pvalues <- rep(0, ncol(rescue.candidates) )
names(actual.pvalues) <- colnames(rescue.candidates)
for(candi in names(log.like.cands)){
cand_umis <- sum(rescue.candidates[,candi])
cand.simu.pvs <- simu.pvalues[[ cand_umis ]]
cand.actual.pv <- log.like.cands[candi]
actual.pvalues[candi] <- (1+sum(cand.simu.pvs < cand.actual.pv))/(length(cand.simu.pvs)+1)
}
# p-value => FDR
actual.FDR <- p.adjust(actual.pvalues, method='BH')
if(.do.EXACT.DEBUG){
tff<-"/tmp/del4-YangLiao-final-FDR.txt"
if(file.exists(tff))file.remove(tff)
sink(tff)
for(candi in names(log.like.cands)) cat(sprintf("%s %.10g %.10g\n", candi, actual.pvalues[candi], actual.FDR[candi]))
sink()
}
# select cells that has FDR < cutoff
FDR.Cutoff <- 0.01
head(actual.FDR)
Rescured.Barcodes <- names(actual.FDR)[actual.FDR <= FDR.Cutoff]
head(Rescured.Barcodes)
rescue.candidates[,Rescured.Barcodes, drop=FALSE]
}
}
.match.exons <- function(annot.tab, counts){
annot.str <- paste(annot.tab$GeneID, annot.tab$Chr, annot.tab$Start,
annot.tab$End, ifelse(annot.tab$Strand == "+", "P","N"), sep=":fc@R@Spl:")
ret <- matrix(0, ncol=ncol(counts), nrow=nrow(annot.tab))
colnames(ret) <- colnames(counts)
rownames(ret) <- annot.str
ret[ rownames(counts),] <- counts
ret
}
.load.one.scSample <- function( BAM.name, FC.gene.ids, sample.no, use.meta.features, annot.tab){
fname <- sprintf("%s.scRNA.%03d", BAM.name, sample.no)
cat("Loading high-conf matrix from '",fname,"'\n")
highconf <- as.matrix(.read.sparse.mat(paste0(fname,".HighConf")))
rescued <- .cellCounts.rescue(BAM.name, FC.gene.ids, sample.no)
if(use.meta.features){
if(!any(is.na(rescued))) rescued <- rescued[rowSums(rescued)>0,]
ncolRescued <- 0
if(is.na(rescued)){
ret <- matrix(0,ncol=ncol(highconf), nrow=length(FC.gene.ids))
colnames(ret) <- colnames(highconf)
}else{
ret <- matrix(0,ncol=ncol(highconf)+ncol(rescued), nrow=length(FC.gene.ids))
colnames(ret) <- c( colnames(highconf), colnames(rescued) )
}
rownames(ret) <- FC.gene.ids
ret[rownames(highconf), colnames(highconf) ] <- highconf
if(!is.na(rescued)) ret[rownames(rescued), colnames(rescued) ] <- rescued
list(Counts=ret, HighConfidneceCell=colnames(ret) %in% colnames(highconf))
}else{
fcmat <- .match.exons(annot.tab,highconf)
rownames(fcmat)<-NULL
list(Counts=fcmat, HighConfidneceCell=rep(T, ncol(highconf)))
}
}
.load.all.scSamples <- function( BAM.name, FC.gene.ids, use.meta.features, annot.tab){
sum.tab <- read.delim(paste0(BAM.name,".scRNA.SampleTable"), stringsAsFactors=F)
ret <- list()
for(roiw in 1:nrow(sum.tab)){
sname <- sum.tab$SampleName[roiw]
sid <- sum.tab$Index[roiw]
count.tab <- .load.one.scSample(BAM.name, FC.gene.ids, sid, use.meta.features, annot.tab)
ret[[sprintf("Sample.%d",sid)]] <- count.tab
}
ret[["Sample.Table"]] <- sum.tab
ret
}
.del.temp.files <- function(pfx){
if(nchar(pfx)<16) stop("Prefix should be longer.")
flist <- dir(".")
to.delete <- flist[substr(flist,1,nchar(pfx)) == pfx]
for(df in to.delete){
if(grepl(pfx, df)){
file.remove(df)
}else{
stop("Wrong selection of files to delete")
}
}
}
.extract.sample.table.cols <- function(rdir, smr){
total.cells <- c()
hiconf.cells <- c()
res.cells <- c()
umis <- c()
for(spi in 1:nrow(smr[["Sample.Table"]])){
samplename <- smr[["Sample.Table"]]$SampleName[spi]
sampleno <- sprintf("Sample.%d", spi)
total.cells <- c(total.cells,length(smr[[sampleno]][["HighConfidneceCell"]]))
hiconf.cells <- c(hiconf.cells,sum(smr[[sampleno]][["HighConfidneceCell"]]))
res.cells <- c(res.cells, sum(!(smr[[sampleno]][["HighConfidneceCell"]])))
umis <- c(umis,sum(smr[[sampleno]][["Counts"]]))
}
cbind( SampleName=smr[["Sample.Table"]]$SampleName, InputDirectory=rep(rdir, nrow(smr[["Sample.Table"]])), TotalCells=total.cells, HighConfidenceCells=hiconf.cells, RescuedCells=res.cells, TotalUMI=umis, smr[["Sample.Table"]][,c("TotalReads","MappedReads","AssignedReads")] )
}
.SCRNA_FASTA_SPLIT1 <- "|Rsd:cCounts:mFQs|"
.SCRNA_FASTA_SPLIT2 <- "|Rsd:cCounts:1mFQ|"
cellCounts <- function(index, sample.index,input.mode="BCL", cell.barcode=NULL, aligner="align", annot.inbuilt="mm10",annot.ext=NULL,isGTFAnnotationFile=FALSE,GTF.featureType="exon",GTF.attrType="gene_id",useMetaFeatures=TRUE, nthreads=10, ...){
set.seed(0)
if(!is.null(aligner)) aligner <- match.arg(aligner,c("subjunc","align"))
fc <- list()
temp.file.prefix <- file.path(".",paste(".Rsubread_TEMP_cellCounts_",Sys.getpid(),sep=""))
raw.fc.annot <- NA
df.sample.info <- data.frame()
if("InputDirectory" %in% colnames(sample.index)){
dirs <- unique( sample.index$InputDirectory )
fc[["counts"]] <- list()
fc[["cell.confidence"]] <- list()
for(dirname in dirs){
unique.samples <- unique( sample.index$SampleName[ sample.index$InputDirectory == dirname ] )
sample.1 <- paste0(temp.file.prefix,".samplesheet")
if(is.null(cell.barcode)){
.index.names.to.sheet(dirname, sample.index, sample.1)
cell.barcode <- .find_best_cellbarcode(dirname, sample.1)
}else{
cell.barcode <- .check_and_NormPath(cell.barcode, mustWork=T, opt="cell.barcode")
}
full_dirname <- .check_and_NormPath(dirname, mustWork=TRUE, "InputDirectory in sample.index")
if(is.null(aligner)){
generate.scRNA.BAM <- FALSE
if(!all(file.exists(paste0( unique.samples,".bam" )))) stop("No aligner is specified but the BAM file does not exist. Please specify 'align' or 'subjunc' as the aligner.")
for(samplename in unique.samples){
.index.names.to.sheet(dirname, sample.index, sample.1, samplename)
one.bam.name <- paste0(samplename, ".bam")
.write.tmp.parameters(list(sampleSheet=sample.1, cellBarcodeList=cell.barcode, generate.scRNA.BAM=generate.scRNA.BAM))
one.raw.fc <- featureCounts(one.bam.name, annot.inbuilt=annot.inbuilt, annot.ext=annot.ext, isGTFAnnotationFile=isGTFAnnotationFile, GTF.featureType=GTF.featureType, GTF.attrType=GTF.attrType, useMetaFeatures=useMetaFeatures, nthreads=nthreads, ...)
if(is.na(raw.fc.annot)) raw.fc.annot<- one.raw.fc$annotation
cat("Processing sample '",samplename,"'\n")
one.result <- .load.all.scSamples(paste0(samplename,".bam"), as.character(raw.fc.annot$GeneID), useMetaFeatures, raw.fc.annot)
fc[["counts"]][[samplename]] <- one.result[["Sample.1"]][["Counts"]] # only one sample.
fc[["cell.confidence"]][[samplename]] <- one.result[["Sample.1"]][["HighConfidneceCell"]]
stt <- .extract.sample.table.cols(full_dirname,one.result)
df.sample.info <- rbind(df.sample.info, stt)
}
}else{
.index.names.to.sheet(dirname, sample.index, sample.1)
generate.scRNA.BAM <- TRUE
.write.tmp.parameters(list(isBCLinput=TRUE))
if(aligner=="align"){
align(index, full_dirname, output_file=temp.file.prefix, nthreads=nthreads, ...)
}else if(aligner=="subjunc"){
subjunc(index, full_dirname, output_file=temp.file.prefix, nthreads=nthreads, ...)
}
.write.tmp.parameters(list(sampleSheet=sample.1, cellBarcodeList=cell.barcode, generate.scRNA.BAM=generate.scRNA.BAM))
raw.fc<-featureCounts(temp.file.prefix, annot.inbuilt=annot.inbuilt, annot.ext=annot.ext, isGTFAnnotationFile=isGTFAnnotationFile, GTF.featureType=GTF.featureType, GTF.attrType=GTF.attrType, useMetaFeatures=useMetaFeatures,nthreads=nthreads, ...)
if(is.na(raw.fc.annot)) raw.fc.annot<-raw.fc$annotation
some.results <- .load.all.scSamples(temp.file.prefix, as.character(raw.fc.annot$GeneID), useMetaFeatures, raw.fc.annot)
for(spi in 1:nrow(some.results[["Sample.Table"]])){
samplename <- some.results[["Sample.Table"]][["SampleName"]][spi]
fc[["counts"]][[samplename]] <- some.results[[sprintf("Sample.%d", spi)]][["Counts"]] # only one sample.
fc[["cell.confidence"]][[samplename]] <- some.results[[sprintf("Sample.%d", spi)]][["HighConfidneceCell"]]
}
stt <- .extract.sample.table.cols(full_dirname,some.results)
df.sample.info <- rbind(df.sample.info, stt)
}
}
} else {
stop("FASTQ input for scRNA-seq data hasn't been fully tested.")
if(!("File.BC.UMIs" %in% colnames(sample.index) && "File.Genomic" %in% colnames(sample.index) )) stop("You need to provide BC+UMI and Genomic sequence files")
if(is.null(cell.barcode)){
cell.barcode <- .find_best_cellbarcode(combined.fastq.names, sample.sheet=NULL, input.mode="fastq")
}else{
cell.barcode <- .check_and_NormPath(cell.barcode, mustWork=T, opt="cell.barcode")
}
combined.fastq.names <- ""
for(rowi in 1:nrow(sample.index)){
combined.fastq.names <- paste0( combined.fastq.names,.SCRNA_FASTA_SPLIT1, sample.index[rowi,"File.BC.UMIs"], .SCRNA_FASTA_SPLIT2,".",.SCRNA_FASTA_SPLIT2, sample.index[rowi,"File.Genomic"] )
}
combined.fastq.names <- substr(combined.fastq.names, nchar(.SCRNA_FASTA_SPLIT1)+1, 9999999)
.write.tmp.parameters(list(isScRNAFastqinput=TRUE))
align(index, combined.fastq.names, output_file=temp.file.prefix, nthreads=nthreads,...)
}
fc[["Annotation"]] <- raw.fc.annot
fc[["sample.info"]] <- df.sample.info
.del.temp.files(temp.file.prefix)
fc
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.