Nothing
#standAloneBuildingLocalAnnotation a stand alone function to create gene-level annotation for Affymetrix exon-arrays
#standAloneAddingAnnotation a stand alone function to add gene-level annotaiton to a data frame, having a column made with gene-level ids
#standAloneBuildingLocalAnnotation(libDirLocation = getwd(), netaffxUser = "myemail@somewhere.org", netaffxUserPw = "yourpassword", whichAnnotation = "HuEx")
#building gene and exon annotation for oneChannelGUI
#ncScaffold build scaffold for ncRNA using bioMart this function create the scaffold used to load ncRNA-seq data if data are mapped only on ncRNA subset
#makeGeneScaffold creating Grange object for UCSC browser needed to map reads on UCSC genes
#makeGCcontent creating a list of GC frequences associated to the genes defined by makeGeneScaffold
#wrapScaffold wraping the scaffold and gc frequencies in an Rda object named with the genome reference
################################################################################
"standAloneBuildingLocalAnnotation" <- function(libDirLocation = getwd(), netaffxUser = "myemail@somewhere.org", netaffxUserPw = "yourpassword", whichAnnotation = c("HuEx", "MoEx", "RaEx")){
# require("AffyCompatible") || stop("\nMissing AffyCompatible library\n")
#where to save the objects containing the annotation
libDirLocation <- libDirLocation
#netaffx user email
username.tmp <- netaffxUser
#netaffx user password
userpw.tmp <- netaffxUserPw
rsrc <- NetAffxResource(user = username.tmp, password = userpw.tmp, affxLicence = "BIOCON0808")
rsrc
ReturnNetaffx <- whichAnnotation
if(ReturnNetaffx != ""){
annos <- rsrc[[grep(ReturnNetaffx, names(rsrc))]]
#gene level
anno <- affxAnnotation(annos)[[grep("Transcript Cluster Annotations, CSV", affxDescription(rsrc[[grep(ReturnNetaffx, names(rsrc))]]))]] #"transcript clusters Annotation, CSV Format"
df <- readAnnotation(rsrc, annotation = anno, content=FALSE)
df1 <- as.character(unlist(strsplit(df, ".zip")))
df1 <- as.character(unlist(strsplit(df1,'/')))
df1 <- df1[length(df1)]
conn <- unz(df, df1)
tmp <- readLines(conn, n=500)
myskip <- grep("transcript_cluster_id", tmp)
df.ann <- read.csv(conn, skip= (myskip - 1), header=T, as.is=T)
df.ann <- df.ann[,which(names(df.ann)%in%c("probeset_id", "gene_assignment"))]
#IMPORTANT to build the GENE level oneChannelGUI annotation file I use only the first assignment mapped on the gen_assignment field in the annotation files
geneAnnExtraction <- function(x){
mx <- strsplit(x, '//')
mx <- as.vector(unlist(mx))
if(length(mx) >= 4){
mx <- mx[1:4]
mxtmp <- gsub(" ", "", mx[c(1:2,4)])
mx <- c(mxtmp[1:2], mx[3], mxtmp[3])
return(mx)
} else{
mx <- rep(NA, 4)
return(mx)
}
}
myann <- sapply(df.ann[,2], geneAnnExtraction)
myann <- t(myann)
rownames(myann) <- df.ann[,1]
df.ann <- cbind(df.ann[,1], myann)
df.ann <- as.data.frame(df.ann)
names(df.ann) <- c("PROBESETID","ACC","SYMBOL","DESCRIPTION","CYTOBAND")
}
####
if(ReturnNetaffx == "HuEx"){
huex.annotation <- df.ann
save(huex.annotation, file="huex.annotation.rda")
cat("\nHuEx gene level annotation is saved in: ", getwd()," as huex.annotation.rda\n")
return(paste(getwd(),"/huex.annotation.rda", sep=""))
}
if(ReturnNetaffx == "MoEx"){
moex.annotation <- df.ann
save(moex.annotation, file="moex.annotation.rda")
cat("\nMoEx gene level annotation is saved in: ", getwd()," as moex.annotation.rda\n")
return(paste(getwd(),"/moex.annotation.rda", sep=""))
}
if(ReturnNetaffx == "RaEx"){
raex.annotation <- df.ann
save(raex.annotation, file="raex.annotation.rda")
cat("\nRaEx gene level annotation is saved in: ", getwd()," as raex.annotation.rda\n")
return(paste(getwd(),"/raex.annotation.rda", sep=""))
}
}
#############################################################################################################
#data(file="huex.annotation",package="oneChannelGUI")
#mydf <- read.table("tmpTopTable.txt", sep="\t", header=T)
#annotated.df <- standAloneAddingAnnotation(huex.annotation, mydf, ids.column = 1)
#shrimpReformat reformat the output for shrimp in a way to be used by oneChannelGUI
"standAloneAddingAnnotation" <- function(annotationdf, df.tobe.annotated, ids.column){
df.tobe.annotated <- df.tobe.annotated[order(as.character(df.tobe.annotated[,1])),]
myids <- as.character(df.tobe.annotated[,ids.column])
myann <- annotationdf[which(as.character(annotationdf$PROBESETID)%in%myids),]
myann <- myann[order(as.character(myann$PROBESETID)),]
if(identical(as.character(myann$PROBESETID), myids)){
dfann <- cbind(myann, df.tobe.annotated[,setdiff(seq(1, dim(df.tobe.annotated)[2]), ids.column)])
return(dfann)
} else{
cat("\nIDs from dataframe and those, selected from annotation table, seems to be not identical.\nAnnotation cannot be attached to the data frame!\n")
return()
}
}
################################################################################
"ncScaffold" <- function(genome, fasta =TRUE){
# require(biomaRt) || stop("library biomaRt could not be found !")
# require(Biostrings) || stop("\nBiostrings package is missing\n")
ensembl <- useMart('ensembl')
if(genome=="hg19"){
ensembl <- useDataset('hsapiens_gene_ensembl', mart=ensembl)
chrs <- seq(1, 25)
chrs.names <- c(chrs[1:22], "X", "Y", "MT")
emblInfo <- getBM(attributes = c('ensembl_gene_id','ensembl_transcript_id','transcript_exon_intron', 'chromosome_name', 'start_position', 'end_position', 'strand'), filters = 'biotype', values = c("miRNA","Mt_rRNA","Mt_tRNA","rRNA","snoRNA","snRNA"), mart = ensembl)
#only the set of nc associated to canonical chr are selected
p.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "1")),]
n.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "-1")),]
peaksNames.p <- paste("chr", p.set$chromosome_name, ".", "plus", ".", p.set$start_position, "-", p.set$end_position, "." ,p.set$ensembl_transcript_id, sep="")
peaksP <- IRanges(start = p.set$start_position, end = p.set$end_position, names = peaksNames.p)
peaksNames.n <- paste("chr", n.set$chromosome_name, ".", "minus", ".", n.set$start_position, "-", n.set$end_position, "." ,n.set$ensembl_transcript_id, sep="")
peaksN <- IRanges(start = n.set$start_position, end = n.set$end_position, names = peaksNames.n)
ncHs.data <- list("peaksP" = peaksP, "peaksN" = peaksN)
save(ncHs.data, file="ncHs.data.rda")
#I need also to retrieve the fasta seq
if(fasta){
p.seq <- apply(p.set, 1, function(x) x[1])
p.seq <- cbind(peaksNames.p, p.seq)
p.fa <- apply(p.seq, 1,function(x) {
n <- as.character(x[1])
s <- as.character(x[2])
tmp.list <- list("desc"= n, "seq"=s)
})
n.seq <- apply(n.set, 1, function(x) x[1])
n.seq <- cbind(peaksNames.n, n.seq)
n.fa <- apply(n.seq, 1,function(x) {
n <- as.character(x[1])
s <- as.character(x[2])
tmp.list <- list("desc"= n, "seq"=s)
})
writeFASTA(p.fa, file="ncHs.fa", append=F, width=80)
writeFASTA(n.fa, file="ncHs.fa", append=T, width=80)
}
hsfa <- readFASTA("ncHs.fa")
save(hsfa, file="ncHs.rda", ascii=T)
} else if(genome=="mm9"){
ensembl <- useDataset('mmusculus_gene_ensembl', mart=ensembl)
chrs <- seq(1, 22)
chrs.names <- c(chrs[1:19], "X", "Y", "MT")
emblInfo <- getBM(attributes = c('ensembl_gene_id','ensembl_transcript_id','transcript_exon_intron', 'chromosome_name', 'start_position', 'end_position', 'strand'), filters = 'biotype', values = c("miRNA","Mt_rRNA","Mt_tRNA","rRNA","snoRNA","snRNA"), mart = ensembl)
#only the set of nc associated to canonical chr are selected
p.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "1")),]
n.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "-1")),]
peaksNames.p <- paste("chr", p.set$chromosome_name, ".", "plus", ".", p.set$start_position, "-", p.set$end_position, "." ,p.set$ensembl_transcript_id, sep="")
peaksP <- IRanges(start = p.set$start_position, end = p.set$end_position, names = peaksNames.p)
peaksNames.n <- paste("chr", n.set$chromosome_name, ".", "minus", ".", n.set$start_position, "-", n.set$end_position, "." ,n.set$ensembl_transcript_id, sep="")
peaksN <- IRanges(start = n.set$start_position, end = n.set$end_position, names = peaksNames.n)
ncMm.data <- list("peaksP" = peaksP, "peaksN" = peaksN)
save(ncMm.data, file="ncMm.data.rda")
#I need also to retrieve the fasta seq
if(fasta){
p.seq <- apply(p.set, 1, function(x) x[1])
p.seq <- cbind(peaksNames.p, p.seq)
p.fa <- apply(p.seq, 1,function(x) {
n <- as.character(x[1])
s <- as.character(x[2])
tmp.list <- list("desc"= n, "seq"=s)
})
n.seq <- apply(n.set, 1, function(x) x[1])
n.seq <- cbind(peaksNames.n, n.seq)
n.fa <- apply(n.seq, 1,function(x) {
n <- as.character(x[1])
s <- as.character(x[2])
tmp.list <- list("desc"= n, "seq"=s)
})
writeFASTA(p.fa, file="ncMm.fa", append=F, width=80)
writeFASTA(n.fa, file="ncMm.fa", append=T, width=80)
}
mmfa <- readFASTA("ncMm.fa")
save(mmfa, file="ncMm.rda", ascii=T)
} else if(genome=="rn4"){
ensembl <- useDataset('rnorvegicus_gene_ensembl', mart=ensembl)
chrs <- seq(1, 21)
chrs.names <- c(chrs[1:19], "X", "MT")
emblInfo <- getBM(attributes = c('ensembl_gene_id','ensembl_transcript_id','transcript_exon_intron', 'chromosome_name', 'start_position', 'end_position', 'strand'), filters = 'biotype', values = c("miRNA","Mt_rRNA","Mt_tRNA","rRNA","snoRNA","snRNA"), mart = ensembl)
#only the set of nc associated to canonical chr are selected
p.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "1")),]
n.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "-1")),]
peaksNames.p <- paste("chr", p.set$chromosome_name, ".", "plus", ".", p.set$start_position, "-", p.set$end_position, "." ,p.set$ensembl_transcript_id, sep="")
peaksP <- IRanges(start = p.set$start_position, end = p.set$end_position, names = peaksNames.p)
peaksNames.n <- paste("chr", n.set$chromosome_name, ".", "minus", ".", n.set$start_position, "-", n.set$end_position, "." ,n.set$ensembl_transcript_id, sep="")
peaksN <- IRanges(start = n.set$start_position, end = n.set$end_position, names = peaksNames.n)
ncRn.data <- list("peaksP" = peaksP, "peaksN" = peaksN)
save(ncRn.data, file="ncRn.data.rda")
#I need also to retrieve the fasta seq
if(fasta){
p.seq <- apply(p.set, 1, function(x) x[1])
p.seq <- cbind(peaksNames.p, p.seq)
p.fa <- apply(p.seq, 1,function(x) {
n <- as.character(x[1])
s <- as.character(x[2])
tmp.list <- list("desc"= n, "seq"=s)
})
n.seq <- apply(n.set, 1, function(x) x[1])
n.seq <- cbind(peaksNames.n, n.seq)
n.fa <- apply(n.seq, 1,function(x) {
n <- as.character(x[1])
s <- as.character(x[2])
tmp.list <- list("desc"= n, "seq"=s)
})
writeFASTA(p.fa, file="ncRn.fa", append=F, width=80)
writeFASTA(n.fa, file="ncRn.fa", append=T, width=80)
}
rnfa <- readFASTA("ncRn.fa")
save(rnfa, file="ncRn.rda", ascii=T)
} else{
cat("\nYou have selected a genome which is not implemented. \nImplemented genomes are:\n\thg19\n\tmm9\n\trn4\n")
}
return()
}
##################################################################################
"exonScaffold" <- function(genome){
# require(biomaRt) || stop("library biomaRt could not be found !")
# require(Biostrings) || stop("\nBiostrings package is missing\n")
ensembl <- useMart('ensembl')
if(genome=="hg19"){
ensembl <- useDataset('hsapiens_gene_ensembl', mart=ensembl)
chrs <- seq(1, 25)
chrs.names <- c(chrs[1:22], "X", "Y", "MT")
emblInfo <- getBM(attributes = c('ensembl_gene_id','ensembl_exon_id','chromosome_name', 'exon_chrom_start', 'exon_chrom_end', 'strand', 'rank'), mart = ensembl)
#plus and minus sted are organized separately
p.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "1")),]
n.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "-1")),]
peaksNames.p <- paste("chr", p.set$chromosome_name, ".", "plus", ".", p.set$exon_chrom_start, "-", p.set$exon_chrom_end, ".", p.set$ensembl_gene_id, "." ,p.set$ensembl_exon_id, ".", p.set$rank, sep="")
peaksP <- IRanges(start = p.set$exon_chrom_start, end = p.set$exon_chrom_end, names = peaksNames.p)
peaksNames.n <- paste("chr", n.set$chromosome_name, ".", "minus", ".", n.set$exon_chrom_start, "-", n.set$exon_chrom_end, "." ,n.set$ensembl_gene_id, "." ,n.set$ensembl_exon_id, ".", n.set$rank, sep="")
peaksN <- IRanges(start = n.set$exon_chrom_start, end = n.set$exon_chrom_end, names = peaksNames.n)
exonHs.data <- list("peaksP" = peaksP, "peaksN" = peaksN)
save(exonHs.data, file="exonHs.data.rda", ascii=T)
} else if(genome=="mm9"){
ensembl <- useDataset('mmusculus_gene_ensembl', mart=ensembl)
chrs <- seq(1, 22)
chrs.names <- c(chrs[1:19], "X", "Y", "MT")
emblInfo <- getBM(attributes = c('ensembl_gene_id','ensembl_exon_id','chromosome_name', 'exon_chrom_start', 'exon_chrom_end', 'strand', 'rank'), mart = ensembl)
#only the set of nc associated to canonical chr are selected
p.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "1")),]
n.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "-1")),]
peaksNames.p <- paste("chr", p.set$chromosome_name, ".", "plus", ".", p.set$exon_chrom_start, "-", p.set$exon_chrom_end, "." ,p.set$ensembl_transcript_id, sep="")
peaksP <- IRanges(start = p.set$exon_chrom_start, end = p.set$exon_chrom_end, names = peaksNames.p)
peaksNames.n <- paste("chr", n.set$chromosome_name, ".", "minus", ".", n.set$exon_chrom_start, "-", n.set$exon_chrom_end, "." ,n.set$ensembl_transcript_id, sep="")
peaksN <- IRanges(start = n.set$exon_chrom_start, end = n.set$exon_chrom_end, names = peaksNames.n)
exonMm.data <- list("peaksP" = peaksP, "peaksN" = peaksN)
save(exonMm.data, file=paste("chr",chrs.names,"exonMm.data.rda", sep="."))
} else if(genome=="rn4"){
ensembl <- useDataset('rnorvegicus_gene_ensembl', mart=ensembl)
chrs <- seq(1, 21)
chrs.names <- c(chrs[1:19], "X", "MT")
emblInfo <- getBM(attributes = c('ensembl_gene_id','ensembl_exon_id','chromosome_name', 'exon_chrom_start', 'exon_chrom_end', 'strand', 'rank'), mart = ensembl)
#only the set of nc associated to canonical chr are selected
p.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "1")),]
n.set <- emblInfo[intersect(which(emblInfo$chromosome_name %in% chrs.names),which(emblInfo$strand == "-1")),]
peaksNames.p <- paste("chr", p.set$chromosome_name, ".", "plus", ".", p.set$exon_chrom_start, "-", p.set$exon_chrom_end, "." ,p.set$ensembl_transcript_id, sep="")
peaksP <- IRanges(start = p.set$exon_chrom_start, end = p.set$exon_chrom_end, names = peaksNames.p)
peaksNames.n <- paste("chr", n.set$chromosome_name, ".", "minus", ".", n.set$exon_chrom_start, "-", n.set$exon_chrom_end, "." ,n.set$ensembl_transcript_id, sep="")
peaksN <- IRanges(start = n.set$exon_chrom_start, end = n.set$exon_chrom_end, names = peaksNames.n)
exonRn.data <- list("peaksP" = peaksP, "peaksN" = peaksN)
save(exonRn.data, file=paste("chr",chrs.names,"exonMm.data.rda", sep="."))
} else{
cat("\nYou have selected a genome which is not implemented. \nImplemented genomes are:\n\thg19\n\tmm9\n\trn4\n")
}
return()
}
################################################################################
"makeGeneScaffold" <- function(whichRef = c("hg19", "mm9", "rn4")){
Try(cat("\nStart creating the gene-level scaffold for reads mapping....\nBe patient!\n"))
# Try(require(rtracklayer) || stop("\nMissing library rtracklayer\n"))
Try(data("chrLength", package="oneChannelGUI"))
if(whichRef=="hg19"){
Try(chr.length <- chrLength$hg19[which(names(chrLength$hg19)!="MT")])
Try(session <- browserSession())
Try(genome(session) <- "hg19")
# Try(trackNames(session)) #show the available tables
# UCSC genes knownGene
Try(query <- ucscTableQuery(session, "knownGene"))
Try(tableName(query) <- "knownToLocusLink")
Try(myEGtable <- getTable(query))
Try(chr.eg <- list())
Try(chr.gr <- list())
for( i in 1:length(chr.length)){
Try(mychr <- paste("chr", names(chr.length)[i], sep=""))
Try(query <- ucscTableQuery(session, "knownGene", GRangesForUCSCGenome("hg19", mychr, IRanges(1, as.numeric(chr.length[i])))))
Try(tableName(query) <- "knownGene") #indicates which table I am interested
Try(mytable <- getTable(query))
Try(mytable <- mytable[which(mytable$proteinID!=""),]) #keeping only genes associated to proteins
Try(mytable <- mytable[setdiff(seq(1,dim(mytable)[1]), which(duplicated(mytable$proteinID))),]) #since analysis at gene level I keep only one of the duplicated transcript isoforms
#associating eg to uscs transcripts names
Try(myeg <- myEGtable[which(myEGtable$name%in%mytable$name),])
Try(mytable <- mytable[which(mytable$name%in%myeg$name),])
Try(mytable <- mytable[setdiff(seq(1,dim(mytable)[1]), which(duplicated(mytable$name))),])
Try(mytable <- mytable[order(as.character(mytable$name)),])
Try(myeg <- myeg[order(as.character(myeg$name)),])
#identical(as.character(mytable$name), as.character(myeg$name))
#creating a gene-level grange object
Try(mygr <- GRanges(seqnames = Rle(rep(mychr, dim(mytable)[1])), ranges = IRanges(start=mytable$txStart, end=mytable$txEnd), strand = Rle(as.character(mytable$strand)), names=as.character(mytable$proteinID)))
Try(myeg <- cbind(myeg, elementMetadata(mygr)$names))
Try(names(myeg)<-c("names","EG","proteinID"))
Try(chr.gr[[mychr]] <- mygr)
Try(chr.eg[[mychr]] <- myeg)
}
}else if(whichRef == "mm9"){
Try(chr.length <- chrLength$mm9[which(names(chrLength$mm9)!="MT")])
Try(session <- browserSession())
Try(genome(session) <- "mm9")
# Try(trackNames(session)) #show the available tables
# UCSC genes knownGene
Try(query <- ucscTableQuery(session, "knownGene"))
Try(tableName(query) <- "knownToLocusLink")
Try(myEGtable <- getTable(query))
Try(chr.eg <- list())
Try(chr.gr <- list())
for( i in 1:length(chr.length)){
Try(mychr <- paste("chr", names(chr.length)[i], sep=""))
Try(query <- ucscTableQuery(session, "knownGene", GRangesForUCSCGenome("mm9", mychr, IRanges(1, as.numeric(chr.length[i])))))
Try(tableName(query) <- "knownGene") #indicates which table I am interested
Try(mytable <- getTable(query))
Try(mytable <- mytable[which(mytable$proteinID!=""),]) #keeping only genes associated to proteins
Try(mytable <- mytable[setdiff(seq(1,dim(mytable)[1]), which(duplicated(mytable$proteinID))),]) #since analysis at gene level I keep only one of the duplicated transcript isoforms
#associating eg to uscs transcripts names
Try(myeg <- myEGtable[which(myEGtable$name%in%mytable$name),])
Try(mytable <- mytable[which(mytable$name%in%myeg$name),])
Try(mytable <- mytable[setdiff(seq(1,dim(mytable)[1]), which(duplicated(mytable$name))),])
Try(mytable <- mytable[order(as.character(mytable$name)),])
Try(myeg <- myeg[order(as.character(myeg$name)),])
#identical(as.character(mytable$name), as.character(myeg$name))
#creating a gene-level grange object
Try(mygr <- GRanges(seqnames = Rle(rep(mychr, dim(mytable)[1])), ranges = IRanges(start=mytable$txStart, end=mytable$txEnd), strand = Rle(as.character(mytable$strand)), names=as.character(mytable$proteinID)))
Try(myeg <- cbind(myeg, elementMetadata(mygr)$names))
Try(names(myeg)<-c("names","EG","proteinID"))
Try(chr.gr[[mychr]] <- mygr)
Try(chr.eg[[mychr]] <- myeg)
}
}else if(whichRef=="rn4"){
Try(chr.length <- chrLength$rn4[which(names(chrLength$rn4)!="MT")])
Try(session <- browserSession())
Try(genome(session) <- "rn4")
# Try(trackNames(session)) #show the available tables
# UCSC genes knownGene
Try(query <- ucscTableQuery(session, "knownGene"))
Try(tableName(query) <- "knownToLocusLink")
Try(myEGtable <- getTable(query))
Try(chr.eg <- list())
Try(chr.gr <- list())
for( i in 1:length(chr.length)){
Try(mychr <- paste("chr", names(chr.length)[i], sep=""))
Try(query <- ucscTableQuery(session, "knownGene", GRangesForUCSCGenome("rn4", mychr, IRanges(1, as.numeric(chr.length[i])))))
Try(tableName(query) <- "knownGene") #indicates which table I am interested
Try(mytable <- getTable(query))
Try(mytable <- mytable[which(mytable$proteinID!=""),]) #keeping only genes associated to proteins
Try(mytable <- mytable[setdiff(seq(1,dim(mytable)[1]), which(duplicated(mytable$proteinID))),]) #since analysis at gene level I keep only one of the duplicated transcript isoforms
#associating eg to uscs transcripts names
Try(myeg <- myEGtable[which(myEGtable$name%in%mytable$name),])
Try(mytable <- mytable[which(mytable$name%in%myeg$name),])
Try(mytable <- mytable[setdiff(seq(1,dim(mytable)[1]), which(duplicated(mytable$name))),])
Try(mytable <- mytable[order(as.character(mytable$name)),])
Try(myeg <- myeg[order(as.character(myeg$name)),])
#identical(as.character(mytable$name), as.character(myeg$name))
#creating a gene-level grange object
Try(mygr <- GRanges(seqnames = Rle(rep(mychr, dim(mytable)[1])), ranges = IRanges(start=mytable$txStart, end=mytable$txEnd), strand = Rle(as.character(mytable$strand)), names=as.character(mytable$proteinID)))
Try(myeg <- cbind(myeg, elementMetadata(mygr)$names))
Try(names(myeg)<-c("names","EG","proteinID"))
Try(chr.gr[[mychr]] <- mygr)
Try(chr.eg[[mychr]] <- myeg)
}
}
Try(cat("\nEnd creating the gene-level scaffold for reads mapping\n"))
Try(output <- list("chr.gr"=chr.gr, "chr.eg"=chr.eg))
Try(return(output))
}
################################################################################
#scaffold is the GRange obj containing the position on chrs for each of the genes
#whichRef refers to the genome of interest
"makeGCcontent" <- function(scaffold, whichRef = c("hg19", "mm9", "rn4")){
Try(chr.gr <- scaffold)
Try(cat("\nStart creating the scaffold GC frequency \n"))
Try(cat("\nEnd creating the scaffold GC frequency \n"))
if(whichRef == "hg19"){
Try(library("BSgenome.Hsapiens.UCSC.hg19"))
} else if(whichRef == "mm9"){
Try(library("BSgenome.Mmusculus.UCSC.mm9"))
} else if(whichRef == "rn4"){
Try(library("BSgenome.Rnorvegicus.UCSC.rn4"))
}
Try(gc.freq <- list())
if(whichRef == "hg19"){
#scanning over chrs
for(i in names(chr.gr)){
#extract for a specific chr only the ranges of the genes from the GRange obj
Try(IRanges.tmp <- ranges(chr.gr[[i]]))
Try(tmpseq <- getSeq(Hsapiens, i, start=start(IRanges.tmp), end=end((IRanges.tmp))))
Try(tmpfreq <- alphabetFrequency(tmpseq, as.prob=T, baseOnly=T))
#sum GC freq
Try(gcfreq <- apply(tmpfreq[,2:3], 1, sum))
Try(names(gcfreq) <- elementMetadata(chr.gr[[i]])$names)
Try(gc.freq[[i]] <- gcfreq)
}
} else if(whichRef == "mm9"){
#scanning over chrs
for(i in names(chr.gr)){
#extract for a specific chr only the ranges of the genes from the GRange obj
Try(IRanges.tmp <- ranges(chr.gr[[i]]))
Try(tmpseq <- getSeq(Mmusculus, i, start=start(IRanges.tmp), end=end((IRanges.tmp))))
Try(tmpfreq <- alphabetFrequency(tmpseq, as.prob=T, baseOnly=T))
#sum GC freq
Try(gcfreq <- apply(tmpfreq[,2:3], 1, sum))
Try(names(gcfreq) <- elementMetadata(chr.gr[[i]])$names)
Try(gc.freq[[i]] <- gcfreq)
}
} else if(whichRef == "rn4"){
#scanning over chrs
for(i in names(chr.gr)){
#extract for a specific chr only the ranges of the genes from the GRange obj
Try(IRanges.tmp <- ranges(chr.gr[[i]]))
Try(tmpseq <- getSeq(Rnorvegicus, i, start=start(IRanges.tmp), end=end((IRanges.tmp))))
Try(tmpfreq <- alphabetFrequency(tmpseq, as.prob=T, baseOnly=T))
#sum GC freq
Try(gcfreq <- apply(tmpfreq[,2:3], 1, sum))
Try(names(gcfreq) <- elementMetadata(chr.gr[[i]])$names)
Try(gc.freq[[i]] <- gcfreq)
}
}
Try(return(gc.freq))
}
################################################################################
"wrapScaffold" <- function(whichRef = c("hg19", "mm9", "rn4")){
Try(whichRef <- whichRef)
Try(tmp <- makeGeneScaffold(as.character(whichRef)))
Try(chr.gr <- tmp[[1]])
Try(chr.eg <- tmp[[2]])
Try(gc.freq <- makeGCcontent(scaffold=chr.gr, as.character(whichRef)))
Try(save(chr.gr, chr.eg, gc.freq, file=paste(whichRef,"_gene-level.scaffold.Rda",sep="")))
}
################################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.