Nothing
table_RleList <- function(x)
{
nbins <- max(max(x)) + 1L
data <- lapply(x,
function(xi)
S4Vectors:::tabulate2(runValue(xi) + 1L, nbins,
weight=runLength(xi)))
ans <- matrix(unlist(data, use.names=FALSE),
nrow=length(x), byrow=TRUE)
dimnames(ans) <- list(names(x), 0:(nbins-1L))
class(ans) <- "table"
ans
}
sampleQC <- function(bamFile,bedFile=NULL,blklist=NULL,ChrOfInterest=NULL,GeneAnnotation=NULL,Window=400,FragmentLength=50,
shiftWindowStart=1,shiftWindowEnd=2,mapQCutoff=15,runCrossCor=FALSE,verboseT=TRUE){
ChrLengths <- scanBamHeader(bamFile)[[1]]$targets
if(length(ChrLengths[ChrLengths < shiftWindowEnd - shiftWindowStart]) > 0){
message("Removing ",length(ChrLengths[ChrLengths < shiftWindowEnd - shiftWindowStart]),
" chromosomes with length less than cross-coverage shift")
ChrLengths <- ChrLengths[!ChrLengths < shiftWindowEnd - shiftWindowStart]
}
if(verboseT == T){
message("Bam file has ",length(names(ChrLengths))," contigs")
}
if(!all(ChrOfInterest %in% names(ChrLengths))){
stop("Contigs of interest are not all in Bam file!!")
}
if(!is.null(ChrOfInterest)){
ChrLengths <- ChrLengths[names(ChrLengths) %in% ChrOfInterest]
}
ShiftMat <- NULL
ShiftMatCor <- NULL
FlagTagCounts <- NULL
CovHist <- NULL
Cov <- NULL
SSD <- NULL
PosAny <- NULL
NegAny <- NULL
SSDBL <- NULL
readlength <- as.numeric(NA)
CoverageMatrix <- NULL
CoverageMatrixInput <- NULL
bedRangesTemp <- GetGRanges(GRanges(),AllChr=names(ChrLengths))
bedRangesSummitsTemp <- vector("numeric")
Counts <- NULL
LeftPosBiasTemp <- NULL
RightNegBiasTemp <- NULL
meanBiasTemp <- NULL
MultiRangesCountsTemp <- NULL
txdb <- NULL
GeneAnnotationTotalSizes <- NULL
if(!sum(class(GeneAnnotation)=="list") & !is.null(GeneAnnotation)) {
GeneAnnotation = getAnnotation(GeneAnnotation,AllChr=names(ChrLengths))
}
if(sum(class(GeneAnnotation)=="list")){
if(length(GeneAnnotation)>1) {
GeneAnnotation <- GeneAnnotation[!names(GeneAnnotation) %in% "version"]
GeneAnnotation <- lapply(GeneAnnotation,function(x)reduce(GetGRanges(x,names(ChrLengths))))
GeneAnnotationTotalSizes <- unlist(lapply(GeneAnnotation,function(x)sum(width(x))))
} else {
GeneAnnotation=NULL
}
}
if(file.exists(bamFile) & length(index(BamFile(bamFile))) == 0){
message("Creating index for ",bamFile)
indexBam(bamFile)
message("..done")
}
for(k in 1:length(ChrLengths)){
Param <- ScanBamParam(which=GRanges(seqnames=names(ChrLengths)[k],IRanges(start=1,end=unname(ChrLengths[names(ChrLengths) == names(ChrLengths)[k]])-shiftWindowEnd)),
what=c("flag","mapq"))
temp <- readGAlignments(bamFile,param=Param)
if(length(temp) < 1){
emptyChr_SSD <- 0
names(emptyChr_SSD) <- names(ChrLengths)[k]
SSD <- c(SSD,emptyChr_SSD)
emptyChr_SSDBL <- 0
names(emptyChr_SSDBL) <- names(ChrLengths)[k]
SSDBL <- c(SSD,emptyChr_SSDBL)
emptyChr_PosAny <- 0
names(emptyChr_PosAny) <- names(ChrLengths)[k]
PosAny <- c(PosAny,emptyChr_PosAny)
emptyChr_NegAny <- 0
names(emptyChr_NegAny) <- names(ChrLengths)[k]
NegAny <- c(NegAny,emptyChr_NegAny)
ShiftMattemp <- matrix(rep(0,(shiftWindowEnd-shiftWindowStart)+1),ncol=1)
colnames(ShiftMattemp) <- names(ChrLengths)[k]
ShiftMat <- cbind(ShiftMat,ShiftMattemp)
}else{
if(k == 1){
tocheckforreads <- 1000
readlength=round(mean(width(temp[1:tocheckforreads])))
}
# Sample_GIT <- GNCList(GRanges(seqnames=seqnames(temp),ranges=ranges(temp),strand=strand(temp),elementMetadata(temp)))
Sample_GIT <- GRanges(seqnames=seqnames(temp),ranges=ranges(temp),strand=strand(temp),elementMetadata(temp))
flagMapQ <- cbind(bamFlagAsBitMatrix(Sample_GIT$flag)[,c("isUnmappedQuery","isDuplicate"),drop=FALSE],Sample_GIT$mapq)
colnames(flagMapQ) <- c("A","B","C")
flagMapQ[is.na(flagMapQ[,"C"]),"C"] <- Inf
temp <- as.data.frame(xtabs(~A+B+C, data=flagMapQ))
UnMapped <- sum(temp[temp[,"A"] == 1,"Freq"])
Mapped <- sum(temp[temp[,"A"] != 1,"Freq"])
Duplicates <- sum(temp[temp[,"A"] != 1 & temp[,"B"] == 1,"Freq"])
MapQPass <- sum(temp[temp[,"A"] != 1 & as.numeric(as.vector(temp[,"C"])) >= 15,"Freq"])
MapQPassAndDup <- sum(temp[temp[,"A"] != 1 & temp[,"B"] == 1 & as.numeric(as.vector(temp[,"C"])) >= mapQCutoff,"Freq"])
FlagTagCounts <- rbind(cbind(UnMapped,Mapped,Duplicates,MapQPass,MapQPassAndDup),FlagTagCounts)
# MapQPass <- sum(temp[temp[,"A"] != 1 & as.numeric(as.vector(temp[,"C"])) >= 15,"Freq"])
# MapQPassAndDup <- sum(temp[temp[,"A"] != 1 & temp[,"B"] == 1 & as.numeric(as.vector(temp[,"C"])) >= mapQCutoff,"Freq"])
# FlagTagCounts <- rbind(cbind(UnMapped,Mapped,Duplicates,MapQPass,MapQPassAndDup),FlagTagCounts)
Cov <- coverage(Sample_GIT,width=unname(ChrLengths[k]))
if(verboseT == T){
message("Calculating coverage histogram for ",names(ChrLengths)[k],"\n")
}
CovHist <- c(CovHist,list(colSums(table_RleList(Cov))))
if(verboseT == T){
message("Calculating SSD for ",names(ChrLengths)[k],"\n")
}
SSD <- c(SSD,sd(Cov)[names(ChrLengths)[k]])
PosCoverage <- coverage(IRanges(start(Sample_GIT[strand(Sample_GIT)=="+"]),start(Sample_GIT[strand(Sample_GIT)=="+"])),width=ChrLengths[k])
NegCoverage <- coverage(IRanges(end(Sample_GIT[strand(Sample_GIT)=="-"]),end(Sample_GIT[strand(Sample_GIT)=="-"])),width=ChrLengths[k])
if(verboseT == T){
message("Calculating unique positions per strand for ",names(ChrLengths)[k],"\n")
}
PosAny <- c(PosAny,sum(runLength((PosCoverage[PosCoverage > 1]))))
NegAny <- c(NegAny,sum(runLength((NegCoverage[NegCoverage > 1]))))
if(verboseT == T){
message("Calculating shift for ",names(ChrLengths)[k],"\n")
}
#ShiftsTemp <- shiftApply(seq(shiftWindowStart,shiftWindowEnd),PosCoverage,NegCoverage,cor)
ShiftsTemp <- shiftApply(seq(shiftWindowStart,shiftWindowEnd),PosCoverage,NegCoverage,RleSumAny, verbose = verboseT)
ShiftMat <- cbind(ShiftMat,ShiftsTemp)
colnames(ShiftMat)[ncol(ShiftMat)] <- names(ChrLengths)[k]
if(runCrossCor==TRUE){
ShiftsCorTemp <- shiftApply(seq(shiftWindowStart,shiftWindowEnd),PosCoverage,NegCoverage,cor, verbose = verboseT)
ShiftMatCor <- cbind(ShiftMatCor,ShiftsCorTemp)
colnames(ShiftMatCor)[ncol(ShiftMatCor)] <- names(ChrLengths)[k]
}
if(!is.null(bedFile)){
bedRanges <- GetGRanges(bedFile,as.vector(names(ChrLengths)),names(ChrLengths)[k])
toFilterOutOfBounds <- start(bedRanges)-Window < 0 |
end(bedRanges)+Window > unname(ChrLengths[names(ChrLengths) == names(ChrLengths)[k]])
if(any(toFilterOutOfBounds)){
message("removing",sum(toFilterOutOfBounds),"peaks within ",Window,"bp of chromosome edges")
bedRanges <- bedRanges[!toFilterOutOfBounds]
}
GRangesOfInterestList <- GRangesList(GRanges(seqnames(bedRanges),ranges(bedRanges)))
names(GRangesOfInterestList) <- c(names(GRangesOfInterestList),"Peaks")
seqlevels(GRangesOfInterestList) <- names(ChrLengths)
}else{
GRangesOfInterestList <- GRangesList()
seqlevels(GRangesOfInterestList) <- names(ChrLengths)
}
if(!is.null(blklist)){
blkRanges <- GetGRanges(blklist,names(ChrLengths),names(ChrLengths)[k])
GRangesOfInterestListBL <- GRangesList(GRanges(seqnames(blkRanges),ranges(blkRanges)))
names(GRangesOfInterestListBL) <- "BlackList"
GRangesOfInterestList <- c(GRangesOfInterestList,GRangesOfInterestListBL)
Cov[[names(ChrLengths)[k]]][ranges(blkRanges)] <- 0
SSDBL <- c(SSDBL,sd(Cov)[names(ChrLengths)[k]])
#print(names(GRangesOfInterestList))
}else{
SSDBL <- NULL
}
if(sum(class(GeneAnnotation)=="list")){
#GRangesOfInterestListGA <- GRangesList()
noVersionGeneAnnotation <- GeneAnnotation[!names(GeneAnnotation)=="version"]
GRangesOfInterestListGF <- GRangesList()
seqlevels(GRangesOfInterestListGF) <- names(ChrLengths)
for(g in 1:length(noVersionGeneAnnotation)){
GRangesOfInterestListGF <- c(
GRangesOfInterestListGF,
GRangesList(GetGRanges(noVersionGeneAnnotation[[g]],names(ChrLengths),names(ChrLengths)[k],simplify=TRUE))
)
}
names(GRangesOfInterestListGF) <- names(noVersionGeneAnnotation)
GRangesOfInterestList <- c(GRangesOfInterestList,GRangesOfInterestListGF)
#GRangesOfInterestListGA <-
#GRangesList(GetGRanges(GeneAnnotation$All5utrs,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$All3utrs,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$Allcds,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$Allintrons,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$Alltranscripts,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$Promoters500,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$Promoters2000to500,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE),
# GetGRanges(GeneAnnotation$LongPromoter20000to2000,names(ChrLengths),names(ChrLengths)[k],simplify=TRUE))
#names(GRangesOfInterestListGA) <-
# c("All5utrs","All3utrs","Allcds","Allintrons","Alltranscripts",
# "Promoters500","Promoters2000to500","LongPromoter20000to2000")
#GRangesOfInterestList <- c(GRangesOfInterestList,GRangesOfInterestListGA)
}
GRangesOfInterestList <- GRangesOfInterestList
MultiRangesCountsTemp <- c(MultiRangesCountsTemp,countOverlaps(GRangesOfInterestList,Sample_GIT))
if(verboseT == T){
message("Counting reads in features for ",names(ChrLengths)[k],"\n")
}
if(!is.null(bedFile)){
CountsTemp <- countOverlaps(bedRanges,Sample_GIT)
Counts <- c(Counts,CountsTemp)
bedRangesTemp <- c(bedRangesTemp,bedRanges)
if(verboseT == TRUE){
message("Signal over peaks for ",names(ChrLengths)[k],"\n")
}
AllFragRanges <- resize(as(Sample_GIT[Sample_GIT %over% bedRanges],"GRanges"),FragmentLength,"start")
bedRangesSummits <- findCovMaxPos(AllFragRanges,bedRanges,ChrLengths[k],FragmentLength)
bedRangesSummitsTemp <- c(bedRangesSummitsTemp,as.numeric(as.vector(start(bedRangesSummits))))
updatedRanges <- resize(bedRangesSummits,Window,"center")
AllCov <- coverage(AllFragRanges,width=unname(ChrLengths[k])+Window)
CoverageMatrix <- rbind(CoverageMatrix,matrix(as.vector(AllCov[[which(names(AllCov) == names(ChrLengths)[k])]][ranges(updatedRanges[seqnames(updatedRanges) == names(ChrLengths)[k]])]),ncol=Window,byrow=TRUE))
}
}
}
Weights <- ChrLengths
CovHistAll <- NULL
if(!is.null(CovHist)){
for(l in 1:length(CovHist)){
print(length(l))
tempCovHist <- cbind(as.numeric(names(CovHist[[l]])),as.numeric(CovHist[[l]]))
tempCovHist <- merge(CovHistAll,tempCovHist,by.x=0,by.y=1,all=TRUE,sort=FALSE)
CovSums <- data.frame(Depth=rowSums(tempCovHist[,-1,drop=FALSE],na.rm=TRUE),row.names=tempCovHist[,1])
CovHistAll <- CovSums
}
tempCovHistAll <- as.numeric(CovHistAll[,1])
names(tempCovHistAll) <- rownames(CovHistAll)
CovHistAll <- tempCovHistAll
}else{
CovHistAll <- as.numeric(NA)
}
if(!is.null(ShiftMat)){
ShiftsAv <- apply(ShiftMat,1,function(x)weighted.mean(as.numeric(x),Weights[colnames(ShiftMat)],na.rm=TRUE))
}else{
ShiftsAv <- as.numeric(NA)
}
if(!is.null(PosAny) & !is.null(NegAny)){
PosAny <- unname((sum(NegAny)))+unname((sum(PosAny)))
}else{
PosAny <- as.numeric(NA)
}
if(!is.null(FlagTagCounts)){
FlagTagCounts <- c(colSums(FlagTagCounts),DuplicateByChIPQC=PosAny)
}else{
FlagTagCounts <- as.numeric(data.frame(UnMapped=0,Mapped=0,Duplicates=0,MapQPass=0,MapQPassAndDup=0,DuplicateByChIPQC=0))
}
if(!is.null(ShiftMatCor)){
ShiftsCorAv <- apply(ShiftMatCor,1,function(x)weighted.mean(x,Weights[colnames(ShiftMatCor)],na.rm=TRUE))
}else{
ShiftsCorAv <- as.numeric(NA)
}
if(!is.null(SSD)){
SSDAv <- unname((weighted.mean(SSD[names(ChrLengths)],ChrLengths)*1000)/sqrt(FlagTagCounts[4]))
}else{
SSDAv <- as.numeric(NA)
}
if(!is.null(MultiRangesCountsTemp)){
GFCountsMatrix <- matrix(MultiRangesCountsTemp,ncol=length(GRangesOfInterestList),byrow=TRUE)
colnames(GFCountsMatrix) <- unique(names(GRangesOfInterestList))
}else{
GFCountsMatrix <- NULL
}
if(!is.null(bedFile)){
AvProfile <- colMeans(CoverageMatrix)
NormAvProfile <- (AvProfile/FlagTagCounts[4])*1e6
elementMetadata(bedRangesTemp) <- data.frame(Counts,bedRangesSummitsTemp)
#print(length(GRangesOfInterestList))
ReadsInPeaks <- sum(GFCountsMatrix[,"Peaks"])
}else{
AvProfile <- NA
NormAvProfile <- NA
bedRangesTemp <- GRanges()
#print(length(GRangesOfInterestList))
ReadsInPeaks <- as.numeric(NA)
}
if(!is.null(blklist)){
ReadsInBLs <- sum(GFCountsMatrix[,"BlackList"])
}else{
ReadsInBLs <- as.numeric(NA)
}
if(!is.null(SSDBL)){
SSDBLAv <- unname((weighted.mean(SSDBL[names(ChrLengths)],ChrLengths)*1000)/(sqrt(FlagTagCounts[4]-ReadsInBLs)))
}else{
SSDBLAv <- as.numeric(NA)
}
if(sum(class(GeneAnnotation)=="list") & !is.null(GFCountsMatrix)){
#CountsInFeatures <-vector("list",length=ncol(GFCountsMatrix))
CountsInFeatures <- as.list(apply(GFCountsMatrix,2,function(x)sum(x,na.rm=TRUE)))
names(CountsInFeatures) <- colnames(GFCountsMatrix)
#ReadsIn5utrs<- sum(GFCountsMatrix[,"All5utrs"])
#ReadsIn3utrs <- sum(GFCountsMatrix[,"All3utrs"])
#ReadsInCDS <- sum(GFCountsMatrix[,"Allcds"])
#ReadsInIntrons <- sum(GFCountsMatrix[,"Allintrons"])
#ReadsInTranscripts <- sum(GFCountsMatrix[,"Alltranscripts"])
#ReadsInPromoters500 <- sum(GFCountsMatrix[,"Promoters500"])
#ReadsInPromoters2000to500 <- sum(GFCountsMatrix[,"Promoters2000to500"])
#ReadsInLongPromoter20000to2000 <- sum(GFCountsMatrix[,"LongPromoter20000to2000"])
#CountsInFeatures <- list(CountsIn5utrs=ReadsIn5utrs,CountsIn3UTRs=ReadsIn3utrs,
# CountsinReads=ReadsInCDS,CountsInIntrons=ReadsInIntrons,CountsInTranscripts=ReadsInTranscripts,
# CountsInPromoters500=ReadsInPromoters500,CountsInPromoters2000to500=ReadsInPromoters2000to500,
# CountsInLongPromoter20000to2000=ReadsInLongPromoter20000to2000)
PropInFeatures <- as.list(GeneAnnotationTotalSizes/sum(as.numeric(ChrLengths)))
#TotalLength5utrs<- GeneAnnotationTotalSizes["All5utrs"]
#TotalLength3utrs <- GeneAnnotationTotalSizes["All3utrs"]
#TotalLengthCDS <- GeneAnnotationTotalSizes["Allcds"]
#TotalLengthIntrons <- GeneAnnotationTotalSizes["Allintrons"]
#TotalLengthTranscripts <- GeneAnnotationTotalSizes["Alltranscripts"]
#TotalLengthPromoters500 <- GeneAnnotationTotalSizes["Promoters500"]
#TotalLengthPromoters2000to500 <- GeneAnnotationTotalSizes["Promoters2000to500"]
#TotalLengthLongPromoter20000to2000 <- GeneAnnotationTotalSizes["LongPromoter20000to2000"]
#PropInFeatures <- list(PropIn5utrs=TotalLength5utrs/sum(as.numeric(ChrLengths)),PropIn3UTRs=TotalLength3utrs/sum(as.numeric(ChrLengths)),
# PropInCDS=TotalLengthCDS/sum(as.numeric(ChrLengths)),PropInIntrons=TotalLengthIntrons/sum(as.numeric(ChrLengths)),PropInTranscripts=TotalLengthTranscripts/sum(as.numeric(ChrLengths)),
# PropInPromoters500=TotalLengthPromoters500/sum(as.numeric(ChrLengths)),PropInPromoters2000to500=TotalLengthPromoters2000to500/sum(as.numeric(ChrLengths)),
# PropInLongPromoter20000to2000=TotalLengthLongPromoter20000to2000/sum(as.numeric(ChrLengths))
#)
}else{
ReadsIn5utrs<- NA
ReadsIn3utrs <- NA
ReadsInCDS <- NA
ReadsInIntrons <- NA
ReadsInTranscripts <- NA
ReadsInPromoters500 <- NA
ReadsInPromoters2000to500 <- NA
ReadsInLongPromoter20000to2000 <- NA
CountsInFeatures <- list(CountsIn5utrs=ReadsIn5utrs,CountsIn3UTRs=ReadsIn3utrs,
CountsinReads=ReadsInCDS,CountsInIntrons=ReadsInIntrons,CountsInTranscripts=ReadsInTranscripts,
CountsInPromoters500=ReadsInPromoters500,CountsInPromoters2000to500=ReadsInPromoters2000to500,
CountsInLongPromoter20000to2000=ReadsInLongPromoter20000to2000)
TotalLength5utrs<- NA
TotalLength3utrs <- NA
TotalLengthCDS <- NA
TotalLengthIntrons <- NA
TotalLengthTranscripts <- NA
TotalLengthPromoters500 <- NA
TotalLengthPromoters2000to500 <- NA
TotalLengthLongPromoter20000to2000 <- GeneAnnotationTotalSizes["LongPromoter20000to2000"]
PropInFeatures <- list(PropIn5utrs=TotalLength5utrs,PropIn3UTRs=TotalLength3utrs,
PropInReads=TotalLengthCDS,PropInIntrons=TotalLengthIntrons,PropInTranscripts=TotalLengthTranscripts,
PropInPromoters500=TotalLengthPromoters500,PropInPromoters2000to500=TotalLengthPromoters2000to500,
PropInLongPromoter20000to2000=TotalLengthLongPromoter20000to2000)
}
CHQC <- new("ChIPQCsample", bedRangesTemp,AveragePeakSignal=list(AvProfile,NormAvProfile),
CrossCoverage=ShiftsAv,CrossCorrelation=ShiftsCorAv,SSD=SSDAv,SSDBL=SSDBLAv,CountsInPeaks=ReadsInPeaks,
CountsInBlackList=ReadsInBLs,
CountsInFeatures=CountsInFeatures,PropInFeatures=PropInFeatures,
CoverageHistogram=CovHistAll,FlagAndTagCounts=FlagTagCounts,readlength=readlength)
return(CHQC)
}
RleSumAny <- function (e1, e2)
{
len <- length(e1)
stopifnot(len == length(e2))
x1 <- runValue(e1); s1 <- cumsum(runLength(e1))
x2 <- runValue(e2); s2 <- cumsum(runLength(e2))
.Call("rle_sum_any",
as.integer(x1), as.integer(s1),
as.integer(x2), as.integer(s2),
as.integer(len),
PACKAGE = "chipseq")
}
GetGRanges <- function(LoadFile,AllChr=NULL,ChrOfInterest=NULL,simple=FALSE,sepr="\t",simplify=FALSE){
# require(Rsamtools)
# require(GenomicRanges)
if(sum(class(LoadFile) == "GRanges")) {
RegionRanges <- LoadFile
if(simplify){
RegionRanges <- GRanges(seqnames(RegionRanges),ranges(RegionRanges))
}
}else{
if(sum(class(LoadFile) == "character")){
RangesTable <- read.delim(LoadFile,sep=sepr,header=TRUE,comment.char="#")
} else if(sum(class(LoadFile) == "matrix")) {
RangesTable <- as.data.frame(LoadFile)
} else{
RangesTable <- as.data.frame(LoadFile)
}
Chromosomes <- as.vector(RangesTable[,1])
Start <- as.numeric(as.vector(RangesTable[,2]))
End <- as.numeric(as.vector(RangesTable[,3]))
RegionRanges <- GRanges(seqnames=Chromosomes,ranges=IRanges(start=Start,end=End))
if(simple == FALSE){
if(ncol(RangesTable) > 4){
ID <- as.vector(RangesTable[,4])
Score <- as.vector(RangesTable[,5])
if(ncol(RangesTable) > 6){
Strand <- rep("*",nrow(RangesTable))
RemainderColumn <- as.data.frame(RangesTable[,-c(1:6)])
elementMetadata(RegionRanges) <- cbind(ID,Score,Strand,RemainderColumn)
}else{
elementMetadata(RegionRanges) <- cbind(ID,Score)
}
}
}
}
if(!is.null(AllChr)){
RegionRanges <- RegionRanges[seqnames(RegionRanges) %in% AllChr]
seqlevels(RegionRanges,pruning.mode="coarse") <- AllChr
}
if(!is.null(ChrOfInterest)){
RegionRanges <- RegionRanges[seqnames(RegionRanges) == ChrOfInterest]
}
return(RegionRanges)
}
findCovMaxPos <- function(reads,bedRanges,ChrOfInterest,FragmentLength){
# require(GenomicRanges)
# require(Rsamtools)
cat("done\n")
cat("Calculating coverage\n")
MaxRanges <- GRanges()
if(length(reads) > 0){
seqlengths(reads)[names(ChrOfInterest)] <- ChrOfInterest
AllCov <- coverage(reads)
cat("Calculating Summits on ",names(ChrOfInterest)," ..")
covPerPeak <- Views(AllCov[[which(names(AllCov) %in% names(ChrOfInterest))]],ranges(bedRanges[seqnames(bedRanges) == names(ChrOfInterest)]))
meanSummitLocations <- viewApply(covPerPeak,function(x)round(mean(which(x==max(x)))))
Maxes <- (start(bedRanges)+meanSummitLocations)-1
if(any(is.na(Maxes))){
NoSummitRanges <- bedRanges[is.na(Maxes)]
Maxes[is.na(Maxes)] <- (start((ranges(NoSummitRanges[seqnames(NoSummitRanges) == names(ChrOfInterest)])))+end((ranges(NoSummitRanges[seqnames(NoSummitRanges) == names(ChrOfInterest)]))))/2
}
MaxRanges <- GRanges(seqnames(bedRanges[seqnames(bedRanges) == names(ChrOfInterest)]),IRanges(start=Maxes,end=Maxes),elementMetadata=elementMetadata(bedRanges[seqnames(bedRanges) == names(ChrOfInterest)]))
#revAllCov <- rev(coverage(reads))
#revAllCov <- runmean(revAllCov[names(revAllCov) %in% ChrOfInterest],20)
#cat("Calculating reverse Summits on ",ChrOfInterest," ..")
#revMaxes <- which.max(Views(revAllCov[[which(names(revAllCov) %in% ChrOfInterest)]],ranges(bedRanges[seqnames(bedRanges) == ChrOfInterest])))
#if(any(is.na(revMaxes))){
# revNoSummitRanges <- bedRanges[is.na(revMaxes)]
# revMaxes[is.na(revMaxes)] <- (start((ranges(revNoSummitRanges[seqnames(revNoSummitRanges) == ChrOfInterest])))+end((ranges(revNoSummitRanges[seqnames(revNoSummitRanges) == ChrOfInterest]))))/2
#}
#revMaxRanges <- GRanges(seqnames(bedRanges[seqnames(bedRanges) == ChrOfInterest]),IRanges(start=Maxes,end=Maxes),elementMetadata=elementMetadata(bedRanges[seqnames(bedRanges) == ChrOfInterest]))
#meanMaxes <- rowMeans(cbind(Maxes,revMaxes))
#meanMaxRanges <- GRanges(seqnames(bedRanges[seqnames(bedRanges) == ChrOfInterest]),IRanges(start=meanMaxes,end=meanMaxes),elementMetadata=elementMetadata(bedRanges[seqnames(bedRanges) == ChrOfInterest]))
#cat(".done\n")
}
#return(meanMaxRanges)
return(MaxRanges)
}
getAnnotation = function(GeneAnnotation="hg19",AllChr){
if(!is.null(GeneAnnotation)){
if(GeneAnnotation == "hg19"){
#require(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
} else if(GeneAnnotation == "hg38"){
#require(TxDb.Hsapiens.UCSC.hg20.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
}else if(GeneAnnotation == "hg18"){
#require(TxDb.Hsapiens.UCSC.hg18.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg18.knownGene
}else if(GeneAnnotation == "mm10"){
#require(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
} else if(GeneAnnotation == "mm9"){
#require(TxDb.Mmusculus.UCSC.mm9.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
} else if(GeneAnnotation == "rn4"){
#require(TxDb.Rnorvegicus.UCSC.rn4.ensGene)
txdb <- TxDb.Rnorvegicus.UCSC.rn4.ensGene
} else if(GeneAnnotation == "ce6"){
#require(TxDb.Celegans.UCSC.ce6.ensGene)
txdb <- TxDb.Celegans.UCSC.ce6.ensGene
} else if(GeneAnnotation == "dm3"){
#require(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
}else {
stop('Unsupported annotation:',GeneAnnotation)
}
All5utrs <- reduce(unique(unlist(fiveUTRsByTranscript(txdb))))
All3utrs <- reduce(unique(unlist(threeUTRsByTranscript(txdb))))
Allcds <- reduce(unique(unlist(cdsBy(txdb,"tx"))))
Allintrons <- reduce(unique(unlist(intronsByTranscript(txdb))))
Alltranscripts <- reduce(unique(transcripts(txdb)))
posAllTranscripts <- Alltranscripts[strand(Alltranscripts) == "+"]
posAllTranscripts <- posAllTranscripts[!(start(posAllTranscripts)-20000 < 0)]
negAllTranscripts <- Alltranscripts[strand(Alltranscripts) == "-"]
chrLimits <- seqlengths(negAllTranscripts)[as.character(seqnames(negAllTranscripts))]
negAllTranscripts <- negAllTranscripts[!(end(negAllTranscripts)+20000 > chrLimits)]
Alltranscripts <- c(posAllTranscripts,negAllTranscripts)
Promoters500 <- reduce(flank(Alltranscripts,500))
Promoters2000to500 <- reduce(flank(Promoters500,1500))
LongPromoter20000to2000 <- reduce(flank(Promoters2000to500,18000))
if(!missing(AllChr) & !is.null(AllChr)){
All5utrs <- GetGRanges(All5utrs,AllChr=AllChr)
All3utrs <- GetGRanges(All3utrs,AllChr=AllChr)
Allcds <- GetGRanges(Allcds,AllChr=AllChr)
Allintrons <- GetGRanges(Allintrons,AllChr=AllChr)
Alltranscripts <- GetGRanges(Alltranscripts,AllChr=AllChr)
Promoters500 <- GetGRanges(Promoters500,AllChr=AllChr)
Promoters2000to500 <- GetGRanges(Promoters2000to500,AllChr=AllChr)
LongPromoter20000to2000 <- GetGRanges(LongPromoter20000to2000,AllChr=AllChr)
}
}
return(list(version=GeneAnnotation,LongPromoter20000to2000=LongPromoter20000to2000,
Promoters2000to500=Promoters2000to500,Promoters500=Promoters500,
All5utrs=All5utrs,Alltranscripts=Alltranscripts,Allcds=Allcds,
Allintrons=Allintrons,All3utrs=All3utrs))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.