#' Make a leader that spans the max leader per transcript by cage files
#' @param leadersFolder folder with CAGE leaders
#' @param dataFolder dataFolder with helper objects
#' @importFrom BiocParallel bplapply
#' @importFrom GenomicRanges start
#' @return return(invisible(NULL))
allLeadersSpanningLeader <- function(leadersFolder, dataFolder){
message("Creating merged leader")
leadersList = list.files(leadersFolder, full.names = TRUE)
# Get all width of cage experiment TSS reassignments
widths <- bplapply(leadersList, function(i) {
return(widthPerGroup(readRDS(i), keep.names = FALSE))
})
widths <- matrix(unlist(widths), ncol = length(widths))
maxWidths <- rowMaxs(widths)
getLeaders()
change <- maxWidths - widthPerGroup(fiveUTRs, FALSE)
newStarts <- rep.int(0L, length(fiveUTRs))
outsidePos <- strandBool(fiveUTRs) & (change >= 0)
outsideMin <- !strandBool(fiveUTRs) & (change >= 0)
either <- outsidePos | outsideMin
# Outside Leader
# pos
newStarts[outsidePos] <- startSites(fiveUTRs)[outsidePos] -
change[outsidePos]
# min
newStarts[outsideMin] <- ORFik:::startSites(fiveUTRs)[outsideMin] +
change[outsideMin]
fOut <- fiveUTRs[either]
fOut <- ORFik:::downstreamFromPerGroup(fOut, newStarts[either])
# Inside Leader
inside <- change < 0
if(any(inside)) {
fIn <- ORFik:::pmapToTranscriptF(fiveUTRs[inside], fiveUTRs[inside])
start(fIn) <- start(fIn) - change[inside] # -- = +
fIn <- unlist(fIn, use.names = FALSE)
fIn <- ranges(fIn)
names(fIn) <- which(inside)
fInNew <- pmapFromTranscriptF(fIn, fiveUTRs, removeEmpty = TRUE)
} else {
fInNew <- GRangesList()
}
fTot <- fiveUTRs
fTot[either] <- fOut
fTot[inside] <- fInNew
if(!all(ORFik::widthPerGroup(fTot, FALSE) == maxWidths)) {
stop("Algorithm is wrong for five extension!")
}
# all ok, then save
saveRDS(widths, file = p(dataFolder, "/leaderLengths.rds"))
CageFiveUTRs <- fTot
saveRDS(CageFiveUTRs, file = p(dataFolder, "/CageFiveUTRs.rds"))
getCDS()
CageFiveWithCDS <- ORFik:::addCdsOnLeaderEnds(fTot, cds)
saveRDS(CageFiveWithCDS, file = p(dataFolder, "/CageFiveUTRsWithCDS.rds"))
return(invisible(NULL))
}
#' Assign transcriptnames to orfs, and find rank for each orf
#'
#' Given orfs and transcripts, find all transcripts the orfs are within
#' and name them by this. Also the second orf in
#' @param dataFolder dataFolder with helper objects
#' @return return(invisible(NULL))
linkORFsToTx <- function(dataFolder){
if (!file.exists(p(dataFolder,"/uniqueUorfsAsGRWithTx.rdata"))) {
message("Linking ORFs to transcripts")
leaders <- leaderCage()
grl <- getUorfsInDb(TRUE, FALSE, FALSE)
overlaps <- findOverlaps(grl, leaders, type = "within")
if( length(unique(from(overlaps))) != length(grl)) {
stop("leader is not spanning all uORFs, check for uORFs going into cds.")
}
sortedIndeces <- order(to(overlaps))
from <- from(overlaps)[sortedIndeces]
to <- to(overlaps)[sortedIndeces]
txNames <- names(leaders)[to]
uorfIDs <- ORFik:::orfID(grl)[from]
dt <- data.table(uorfID = uorfIDs, txNames = txNames)
insertTable(Matrix = dt, tableName = "linkORFsToTx", rmOld = TRUE)
# now make grl with transcript mapping
grlb <- grl[from]
names(grlb@unlistData) <- NULL
names(grlb) <- txNames
asGR <- unlist(grlb, use.names = TRUE)
names(grlb@unlistData) <- names(asGR)
grl <- ORFik:::makeORFNames(grlb, FALSE)
save(grl, file = p(dataFolder,"/uoRFsAsGRAllWithTx.rdata"))
# Unique uORFs
c <- ORFik:::orfID(grl, with.tx = FALSE)
d <- which(!duplicated(c))
grl <- grl[d]
insertTable(Matrix = data.table(d), tableName = "toUniqueOrder", rmOld = TRUE)
insertTable(Matrix = dt[d,], tableName = "linkORFsToTxUnique", rmOld = TRUE)
save(grl, file = p(dataFolder,"/uniqueUorfsAsGRWithTx.rdata"))
} else {
message("linkORFsToTx already exist, skipping remake of them")
}
return(invisible(NULL))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.