###############################################################################
##.deseq2 function calcuates gene differential expression based on Deseq2 package
##.deseq2 function takes two assigned clusters and tss.raw table
##users need to provide which sample they want to compare and
##run script with the following example command:
##.deseq2(clustersx.asn,clustersy.asn, tss.raw,
## samplex <- c("ScerBY4741.1","ScerBY4741.2"),
## sampley <- c("ScerArrest.1","ScerArrest.2"),
## sampleOne <- "ScerBY4741",sampleTwo <- "ScerArrest")
############################################################################
##tss.raw is the raw tss merged tables, before any sums
############################################################################
.deseq2 <- function(object,cx,cy, tss.raw, samplex,sampley, sampleOne,sampleTwo,useMultiCore, numCores){
##define variable as a NULL value
TAGtables = NULL
##get raw count tables
TAGtables <- object@TAGtables
if(sampleOne%in%names(TAGtables)){
xCounts<-TAGtables[[which(names(TAGtables)==sampleOne)]]
} else{
xCounts <-.tagCount_updated(cx, tss.raw,samplex,useMultiCore, numCores)
## save the tagCount results
TAGtables[[sampleOne]]<-xCounts
}
if(sampleTwo%in%names(TAGtables)){
yCounts<-TAGtables[[which(names(TAGtables)==sampleTwo)]]
} else{
yCounts <-.tagCount_updated(cy, tss.raw,sampley,useMultiCore, numCores)
TAGtables[[sampleTwo]]<-yCounts
}
#save TAGtable as temp file
save(TAGtables, file="TAGtable_temp.RData")
xCounts <- xCounts[,-c(2:11)]
yCounts <- yCounts[,-c(2:11)]
##tag counts by gene for sampleOne
setkey(xCounts, gene)
if(useMultiCore){
if(is.null(numCores)){
numCores <- detectCores()
}
one <- mclapply(as.list(unique(xCounts$gene)), function(my.gene) {
data <- xCounts[list(my.gene)]
return(c(my.gene,colSums(data[,-c(1,2,3)])))
}, mc.cores = numCores)
}else{
one <- lapply(as.list(unique(xCounts$gene)), function(my.gene) {
data <- xCounts[list(my.gene)]
return(c(my.gene,colSums(data[,-c(1,2,3)])))
})
}
one <- data.frame(matrix(unlist(one), nrow=length(one), byrow=TRUE),stringsAsFactors=FALSE)
##tag counts by gene for sampleTwo
setkey(yCounts, gene)
if(useMultiCore){
if(is.null(numCores)){
numCores <- detectCores()
}
two <- mclapply(as.list(unique(yCounts$gene)), function(my.gene) {
data <- yCounts[list(my.gene)]
return(c(my.gene,colSums(data[,-c(1,2,3)])))
}, mc.cores = numCores)
}else{
two <- lapply(as.list(unique(yCounts$gene)), function(my.gene) {
data <- yCounts[list(my.gene)]
return(c(my.gene,colSums(data[,-c(1,2,3)])))
})
}
two <- data.frame(matrix(unlist(two), nrow=length(two), byrow=TRUE),stringsAsFactors=FALSE)
##merge the two raw count tables together by genes
one[,2:ncol(one)] <- sapply(one[,2:ncol(one)], as.integer)
two[,2:ncol(two)] <- sapply(two[,2:ncol(two)], as.integer)
setnames(one, colnames(one), c("gene",samplex))
setnames(two, colnames(two), c("gene",sampley))
Dtable <- merge(one, two, by = c("gene"), all = TRUE)
Dtable[is.na(Dtable)] = 0
##
rownames(Dtable) <- Dtable[,1]
Dtable <- Dtable[,-1]
Dtable <- data.matrix(Dtable)
condition <- factor(c(rep(sampleOne, times = length(samplex)), rep(sampleTwo, times = length(sampley))))
dds <- DESeqDataSetFromMatrix(countData = Dtable,data.frame(condition), ~ condition)
dds$condition <- factor(dds$condition, levels = c(sampleOne, sampleTwo))
dds <- DESeq(dds)
res <- results(dds)
res <- res[order(res$padj),]
return(as.data.frame(res))
}
############################################################################
##.tagCount updated
.tagCount_updated <- function(cs, tss.raw, samples, useMultiCore, numCores){
##define variable as a NULL value
tag_sum = NULL
cols <- c("chr","pos","strand", samples)
tss1 <- tss.raw[,.SD, .SDcols = cols]
#exclude rows with no count
tss1$tag_sum <- rowSums(tss1[, -c(1,2,3)])
tss <- tss1 %>% dplyr::filter(tag_sum != 0)
tss$tag_sum<- NULL
##define variable as a NULL value
chr = strand = start = end = NULL
if(useMultiCore){
if(is.null(numCores)){
numCores <- detectCores()
}
print(paste("process is running on",numCores, "cores..."))
tags <- mclapply(seq_len(cs[,.N]),function(r){
data <- tss[tss$chr == cs[r,chr] & tss$strand == cs[r,strand] & tss$pos >= cs[r,start] & tss$pos <= cs[r,end],]
temp <- sapply(as.list(samples), function(s){
sum(data[,.SD,.SDcols = s])
})
return(temp)
}, mc.cores = numCores)
}else{
tags <- lapply(seq_len(cs[,.N]),function(r){
data <- tss[tss$chr == cs[r,chr] & tss$strand == cs[r,strand] & tss$pos >= cs[r,start] & tss$pos <= cs[r,end],]
temp <- sapply(as.list(samples), function(s){
sum(data[,.SD,.SDcols = s])
})
return(temp)
})
}
tags <- data.frame(matrix(unlist(tags), nrow=length(tags), byrow=TRUE),stringsAsFactors=FALSE)
colnames(tags) <- samples
cs <- cbind(cs,tags)
return(cs)
}
##.tagCount is slow
.tagCount <- function(cs, tss.raw, samples, useMultiCore, numCores){
cols <- c("chr","pos","strand", samples)
tss <- tss.raw[,.SD, .SDcols = cols]
##define variable as a NULL value
chr = strand = start = end = NULL
if(useMultiCore){
if(is.null(numCores)){
numCores <- detectCores()
}
print(paste("process is running on",numCores, "cores..."))
tags <- mclapply(seq_len(cs[,.N]),function(r){
data <- tss[tss$chr == cs[r,chr] & tss$strand == cs[r,strand] & tss$pos >= cs[r,start] & tss$pos <= cs[r,end],]
temp <- sapply(as.list(samples), function(s){
sum(data[,.SD,.SDcols = s])
})
return(temp)
}, mc.cores = numCores)
}else{
tags <- lapply(seq_len(cs[,.N]),function(r){
data <- tss[tss$chr == cs[r,chr] & tss$strand == cs[r,strand] & tss$pos >= cs[r,start] & tss$pos <= cs[r,end],]
temp <- sapply(as.list(samples), function(s){
sum(data[,.SD,.SDcols = s])
})
return(temp)
})
}
tags <- data.frame(matrix(unlist(tags), nrow=length(tags), byrow=TRUE),stringsAsFactors=FALSE)
colnames(tags) <- samples
cs <- cbind(cs,tags)
return(cs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.