R/util-methods.r
In TFBSTools: Software Package for Transcription Factor Binding Site (TFBS) Analysis

Documented in IUPAC2Matrix sampleRanges shannon.entropy

### -----------------------------------------------------------------
### findOverlapsBases
### Not exported!
findLargestOverlaps = function(query, subject){
  hits = findOverlaps(query, subject, select="all")
  if(length(hits) == 0L){
    ## if no overlaps, return hits with length 0.
    return(hits)
  }
  hitsQuery = query[queryHits(hits)]
  hitsSubject = subject[subjectHits(hits)]
  overlapLength <- width(pintersect(hitsQuery, hitsSubject))
  splittedLength <- split(overlapLength, queryHits(hits))
  groupMax <- sapply(splittedLength, max)
  maxHits <- groupMax[as.character(queryHits(hits))] == overlapLength
  return(hits[maxHits])
}

### -----------------------------------------------------------------
### shannon.entropy
### Exported!
shannon.entropy <- function(p)
{
  if (min(p) < 0 || sum(p) <= 0)
    return(NA)
  p.norm <- p[p>0]/sum(p)
  -sum(log2(p.norm)*p.norm)
}

### -----------------------------------------------------------------
### IUPAC_CODE_MAP to the matrix conversion
### Exported!
IUPAC2Matrix <- function(x){
  x <- as.character(x)
  x <- strsplit(x, "")[[1]]
  if(!all(x %in% names(IUPAC_CODE_MAP))){
    stop("All characters must be in IUPAC_CODE_MAP!")
  }
  ans <- matrix(0L, nrow=4, ncol=length(x),
                dimnames=list(c("A", "C", "G", "T")))
  for(i in 1:length(x)){
    dnaCharacters <- strsplit(IUPAC_CODE_MAP[x[i]], "")[[1]]
    ans[dnaCharacters, i] <- 1L
  }
  return(ans)
}


### -----------------------------------------------------------------
### Sampling ranges from areas of subject sequence based on input ranges
### sampleRanges exported!
sampleRangesOneStrand <- function(inputGRanges, subjectGRanges){
  if(length(inputGRanges) == 0L){
    return(GRanges())
  }
  if(length(subjectGRanges) == 0L){
    return(GRanges())
  }
  widthsInput <- width(inputGRanges)
  widthsSubject <- width(subjectGRanges)
  indexAll <- lapply(widthsInput, 
                     function(x, widthsSubject){which(x<=widthsSubject)},
                     widthsSubject)
  indexSampling <- sapply(indexAll, sample, size=1L)
  selectedSubjectGRanges <- subjectGRanges[indexSampling]
  sampledStart <- sapply(end(selectedSubjectGRanges) - 
                         width(selectedSubjectGRanges) + 1L,
                         function(x){sample(1L:x, size=1L)})
  sampledGRanges <- GRanges(seqnames=seqnames(selectedSubjectGRanges),
                      ranges=IRanges(start=start(selectedSubjectGRanges)+
                                     sampledStart-1L,
                                     width=width(inputGRanges)),
                            strand="*",
                            seqinfo=seqinfo(selectedSubjectGRanges))
  stopifnot(length(sampledGRanges) == length(inputGRanges))
  stopifnot(all(width(sampledGRanges) <= width(selectedSubjectGRanges)))
  return(sampledGRanges)
}

sampleRanges <- function(inputGRanges, subjectGRanges, ignore.strand=TRUE){
  if(ignore.strand){
    ans <- sampleRangesOneStrand(inputGRanges, subjectGRanges)
  }else{
    orderPostive <- which(strand(inputGRanges)=="+")
    sampledGRangesPostive <- sampleRangesOneStrand(
                               inputGRanges[orderPostive],
                               subjectGRanges[strand(subjectGRanges)=="+"])
    strand(sampledGRangesPostive) <- "+"
    orderNegative <- which(strand(inputGRanges)=="-")
    sampledGRangesNegative <- sampleRangesOneStrand(
                                inputGRanges[orderNegative],
                                subjectGRanges[strand(subjectGRanges)=="-"])
    strand(sampledGRangesNegative) <- "-"
    orderUnknow <- which(strand(inputGRanges)=="*")
    sampledGRangesUnknown <- sampleRangesOneStrand(
                               inputGRanges[orderUnknow],
                               subjectGRanges)
    ans <- c(sampledGRangesPostive, sampledGRangesNegative, 
             sampledGRangesUnknown)[order(c(orderPostive, orderNegative, orderUnknow))]
  }
  return(ans)
}

Any scripts or data that you put into this service are public.

TFBSTools documentation built on Nov. 8, 2020, 8:14 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

TFBSTools
Software Package for Transcription Factor Binding Site (TFBS) Analysis

R/util-methods.r
In TFBSTools: Software Package for Transcription Factor Binding Site (TFBS) Analysis

Defines functions sampleRanges sampleRangesOneStrand IUPAC2Matrix shannon.entropy findLargestOverlaps

Documented in IUPAC2Matrix sampleRanges shannon.entropy

Try the TFBSTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TFBSTools Software Package for Transcription Factor Binding Site (TFBS) Analysis

R/util-methods.r In TFBSTools: Software Package for Transcription Factor Binding Site (TFBS) Analysis

Defines functions sampleRanges sampleRangesOneStrand IUPAC2Matrix shannon.entropy findLargestOverlaps

Documented in IUPAC2Matrix sampleRanges shannon.entropy

Try the TFBSTools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TFBSTools
Software Package for Transcription Factor Binding Site (TFBS) Analysis

R/util-methods.r
In TFBSTools: Software Package for Transcription Factor Binding Site (TFBS) Analysis