Nothing
### Set functions
dim.switchAnalyzeRlist <- function(x) {
dim(x[[1]])
}
nrow.switchAnalyzeRlist <- function(x) {
nrow(x[[1]])
}
ncol.switchAnalyzeRlist <- function(x) {
ncol(x[[1]])
}
head.switchAnalyzeRlist <- function(x, ...) {
head(x[[1]], ...)
}
tail.switchAnalyzeRlist <- function(x, ...) {
tail(x[[1]], ...)
}
### subset
subsetSwitchAnalyzeRlist <- function(switchAnalyzeRlist, subset) {
### Subset isoform features
switchAnalyzeRlist$isoformFeatures <- subset(
switchAnalyzeRlist$isoformFeatures,
subset
)
if(nrow(switchAnalyzeRlist$isoformFeatures) == 0 ) {
stop('Nothing is left after subsetting.')
}
### Based on which isoforms are left subset other features
isoformsToKeep <- unique(switchAnalyzeRlist$isoformFeatures$isoform_id)
genesToKeep <- unique( switchAnalyzeRlist$isoformFeatures$gene_id[which(
switchAnalyzeRlist$isoformFeatures$isoform_id %in% isoformsToKeep
)] )
allIsoformsAssociated <- unique(
switchAnalyzeRlist$exons$isoform_id[which(
switchAnalyzeRlist$exons$gene_id %in%
switchAnalyzeRlist$isoformFeatures$gene_id
)]
)
# exons
switchAnalyzeRlist$exons <- switchAnalyzeRlist$exons[which(
switchAnalyzeRlist$exons$isoform_id %in% allIsoformsAssociated
),]
# conditions
switchAnalyzeRlist$conditions <- switchAnalyzeRlist$conditions[which(
switchAnalyzeRlist$conditions$condition %in%
c(
switchAnalyzeRlist$isoformFeatures$condition_1,
switchAnalyzeRlist$isoformFeatures$condition_2
)
),]
# design matrix
if( switchAnalyzeRlist$sourceId != 'preDefinedSwitches' ) {
switchAnalyzeRlist$designMatrix <- switchAnalyzeRlist$designMatrix[which(
switchAnalyzeRlist$designMatrix$condition %in% c(
unique(switchAnalyzeRlist$isoformFeatures$condition_1),
unique(switchAnalyzeRlist$isoformFeatures$condition_2)
)
),]
}
# rep count columns
if( !is.null(switchAnalyzeRlist$isoformCountMatrix )) {
switchAnalyzeRlist$isoformCountMatrix <-
switchAnalyzeRlist$isoformCountMatrix[
which(
switchAnalyzeRlist$isoformCountMatrix$isoform_id %in%
allIsoformsAssociated
),
which(
colnames(switchAnalyzeRlist$isoformCountMatrix) %in%
c('isoform_id', switchAnalyzeRlist$designMatrix$sampleID)
)
]
}
# rep expression columns
if( !is.null(switchAnalyzeRlist$isoformRepExpression )) {
switchAnalyzeRlist$isoformRepExpression <-
switchAnalyzeRlist$isoformRepExpression[
which(
switchAnalyzeRlist$isoformRepExpression$isoform_id %in%
allIsoformsAssociated
),
which(
colnames(switchAnalyzeRlist$isoformRepExpression) %in%
c('isoform_id', switchAnalyzeRlist$designMatrix$sampleID)
)
]
}
# rep IF columns
if( !is.null(switchAnalyzeRlist$isoformRepIF )) {
switchAnalyzeRlist$isoformRepIF <-
switchAnalyzeRlist$isoformRepIF[
which(
switchAnalyzeRlist$isoformRepIF$isoform_id %in%
allIsoformsAssociated
),
which(
colnames(switchAnalyzeRlist$isoformRepIF) %in%
c('isoform_id', switchAnalyzeRlist$designMatrix$sampleID)
)
]
}
### For standard analysis
# otherAnalysisPerformed <- setdiff(
# names(switchAnalyzeRlist),
# c(
# 'isoformFeatures','exons','conditions','sourceId','designMatrix',
# 'isoformSwitchAnalysis','ntSequence','aaSequence',
# 'switchConsequence', 'isoformSwitchAnalysis',
# # added to prevent wrong IF estimations after limma based test introduction
# 'isoformRepIF','isoformRepExpression','isoformCountMatrix',
# 'runInfo'
# )
# )
otherAnalysisPerformed <- intersect(
c(
'orfAnalysis',
'domainAnalysis',
'signalPeptideAnalysis',
'AlternativeSplicingAnalysis',
'idrAnalysis',
'subCellLocationAnalysis'
),
names(switchAnalyzeRlist)
)
if(length(otherAnalysisPerformed)) {
for(localAnalysis in otherAnalysisPerformed) {
switchAnalyzeRlist[[ localAnalysis ]] <-
switchAnalyzeRlist[[ localAnalysis ]][
which(
switchAnalyzeRlist[[ localAnalysis ]]$isoform_id %in%
isoformsToKeep
),,drop=FALSE
]
}
}
### For the specialized analysis
if( !is.null(switchAnalyzeRlist$ntSequence) ) {
switchAnalyzeRlist$ntSequence <- switchAnalyzeRlist$ntSequence[which(
names(switchAnalyzeRlist$ntSequence) %in% isoformsToKeep
)]
}
if( !is.null(switchAnalyzeRlist$aaSequence) ) {
switchAnalyzeRlist$aaSequence <- switchAnalyzeRlist$aaSequence[which(
names(switchAnalyzeRlist$aaSequence) %in% isoformsToKeep
)]
}
if( !is.null(switchAnalyzeRlist$isoformSwitchAnalysis)) {
switchAnalyzeRlist$isoformSwitchAnalysis <-
switchAnalyzeRlist$isoformSwitchAnalysis[which(
switchAnalyzeRlist$isoformSwitchAnalysis$iso_ref %in%
switchAnalyzeRlist$isoformFeatures$iso_ref
),]
}
if( !is.null(switchAnalyzeRlist$switchConsequence)) {
switchAnalyzeRlist$switchConsequence <-
switchAnalyzeRlist$switchConsequence[which(
switchAnalyzeRlist$switchConsequence$gene_ref %in%
switchAnalyzeRlist$isoformFeatures$gene_ref
),]
}
return(switchAnalyzeRlist)
}
### summary
summary.switchAnalyzeRlist <- function(object, ...) {
### Eobjecttract data
nComparisons <- nrow(unique(
object$isoformFeatures[,c('condition_1','condition_2')]
))
nCond <- nrow(object$conditions)
nSamples <- sum(object$conditions$nrReplicates)
if(is.na(nSamples)) {
if(!is.null( object$isoformCountMatrix )) {
nSamples <- ncol(object$isoformCountMatrix) - 1
} else {
nSamples <- ncol(object$isoformRepExpression) - 1
}
}
nGenes <- length(unique(object$isoformFeatures$gene_id))
nIso <- length(unique(object$isoformFeatures$isoform_id))
analysisAdded <- setdiff(
names(object),
c(
'isoformFeatures','exons','conditions','sourceId',
'isoformSwitchAnalysis','designMatrix',
'isoformCountMatrix','isoformRepExpression','isoformRepIF',
'runInfo'
)
)
if( 'codingPotentialValue' %in% colnames(object$isoformFeatures) ) {
analysisAdded <- c(analysisAdded, 'Coding Potential')
}
### Print size summary data
cat(
'This switchAnalyzeRlist list contains:\n',
paste(nIso,
'isoforms from',
nGenes, 'genes\n',
nComparisons,
'comparison from',
nCond,
'conditions',
paste0(
'(in total ',
nSamples,
' samples)\n'
),
sep=' '
)
)
### Print switching summary
includingSwitches <- !all(
is.na( object$isoformFeatures$gene_switch_q_value )
)
if(includingSwitches) {
try(
switchNumber <- extractSwitchSummary(object),
silent = TRUE
)
if( exists('switchNumber') ) {
colnames(switchNumber) <- c(
'Comparison',
'Isoforms',
'Switches',
'Genes'
)
# subset if to large
if(nrow(switchNumber) > 10) {
switchNumberHead <- head(switchNumber, 5)
switchNumberTail <- tail(switchNumber, 5)
switchNumber <- rbind(switchNumberHead, '...', switchNumberTail)
}
cat('\nSwitching features:\n')
print(switchNumber)
## add to analysis performed
analysisAdded <- c('Isoform Switch Identification',analysisAdded)
}
}
if(length(analysisAdded)) {
cat('\nFeature analyzed:\n')
analysisAdded <- gsub(
'signalPeptideAnalysis' ,'Signal Peptides' , analysisAdded)
analysisAdded <- gsub(
'domainAnalysis' ,'Protein Domains' , analysisAdded)
analysisAdded <- gsub(
'intronRetentionAnalysis' ,'Intron Retentions' , analysisAdded)
analysisAdded <- gsub(
'AlternativeSplicingAnalysis','Alternative splicing' , analysisAdded)
analysisAdded <- gsub(
'orfAnalysis' ,'ORFs' , analysisAdded)
analysisAdded <- gsub(
'switchConsequence' ,'Switch Consequences' , analysisAdded)
analysisAdded <- gsub(
'idrAnalysis' ,'IDR' , analysisAdded)
analysisAdded <- gsub(
'subCellLocationAnalysis' ,'Sub-cellular localization' , analysisAdded)
print(paste(analysisAdded, collapse = ', '))
}
}
### show
show.switchAnalyzeRlist <- function(object) {
summary(object)
}
setMethod("show", "switchAnalyzeRlist", function(object) {
summary(object)
})
### make objects
createSwitchAnalyzeRlist <- function(
isoformFeatures,
exons,
designMatrix,
isoformCountMatrix=NULL,
isoformRepExpression=NULL,
sourceId,
removeFusionTranscripts = TRUE
){
### Test input
if(TRUE) {
### each feature individually
if(TRUE) {
if(! is.data.frame(isoformFeatures)){
stop('The isoform_feature argument must be a data.frame')
}
if(class(exons) != 'GRanges'){
stop('The exons argument must be a GenomicRanges (GRanges)')
}
if(class(sourceId) != 'character'){
stop('The sourceId argument must be a character')
}
# isoformFeatures
neededCols <- c(
'isoform_id','gene_id','condition_1','condition_2','gene_name',
'gene_overall_mean','gene_value_1','gene_value_2','gene_stderr_1','gene_stderr_2',
'gene_log2_fold_change','gene_q_value','iso_overall_mean','iso_value_1',
'iso_value_2', 'iso_stderr_1','iso_stderr_2',
'iso_log2_fold_change','iso_q_value','IF_overall','IF1','IF2','dIF',
'isoform_switch_q_value','gene_switch_q_value'
)
if( ! all(neededCols %in% colnames(isoformFeatures)) ){
tmp <- setdiff(neededCols, colnames(isoformFeatures))
stop(paste(
'The \'isoformFeatures\' argument does not',
'contain the needed information.',
'The following are missing',
paste(tmp, collapse=', '), sep=' '
))
}
if( any(
isoformFeatures$condition_1 == isoformFeatures$condition_2)
) {
stop(paste(
'The input data is inadequate - conditions compared',
'must have unique names (e.g. you cannot compare two',
'knock out (KO) conditions both called KO',
'- they must be renamed to fx. KO1 and KO2'
))
}
# exons
if( ! all(
c("isoform_id","gene_id") %in% colnames( exons@elementMetadata)
)) {
stop(paste(
'The \'exons\' argument must contain both \'isoform_id\'',
'and \'gene_id\' as metadata collumns'
))
}
# sourceId
if(length(sourceId) != 1) {stop('sourceId must have length 1')}
### gene_id duplications
idSplit <- split( as.character(exons@seqnames), f=exons$gene_id)
idSplit <- lapply(idSplit, unique)
idLength <- sapply(idSplit, length)
if(any( idLength == 1)) {
genesToRemove <- names(idLength)[which(idLength > 1)]
} else {
stop(paste(
'The gene_ids must be uniqe - we identified multiple',
'instances of the same gene_id on different chromosomes.',
'This typically occures because the annotation have',
'multiple version of the same region.',
'If annotated transcipts were imported please consider',
'use the \'removeNonConvensionalChr\' paramter.'
))
}
### gene_id duplications
idSplit2 <- split( as.character(exons@seqnames), f=exons$isoform_id)
idSplit2 <- lapply(idSplit2, unique)
idSplit2 <- sapply(idSplit2, length)
if(any( idLength == 1)) {
isoformsToRemove <- names(idLength)[which(idLength > 1)]
} else {
stop(paste(
'The isoform_ids must be uniqe - we identified multiple',
'instances of the same isoform_id on different chromosomes.',
'This typically occures because the annotation have',
'multiple version of the same region.',
'If annotated transcipts were imported please consider',
'use the \'removeNonConvensionalChr\' paramter.'
))
}
### Test supplied expression
if(TRUE) {
countsSuppled <- !is.null(isoformCountMatrix)
abundSuppled <- !is.null(isoformRepExpression)
if( !countsSuppled ) {
warning('Note that when no count matrix was given as input no statistical identification of isoform switches can be done')
}
if( countsSuppled ) {
if (!any(colnames(isoformCountMatrix) == 'isoform_id')) {
stop(paste(
'The data.frame passed to the \'isoformCountMatrix\'',
'argument must contain a \'isoform_id\' column'
))
}
}
if ( abundSuppled ) {
if (!any(colnames(isoformRepExpression) == 'isoform_id')) {
stop(paste(
'The data.frame passed to the \'isoformCountMatrix\'',
'argument must contain a \'isoform_id\' column'
))
}
}
}
}
### against each other
if(TRUE) {
if( jaccardSimilarity(
isoformFeatures$isoform_id, exons$isoform_id
) != 1) {
stop(paste(
'The isoform_id in isoformFeatures',
'and exons does not match'
))
}
if( jaccardSimilarity(
c(isoformFeatures$condition_1, isoformFeatures$condition_2),
designMatrix$condition
)!= 1) {
stop(paste(
'The conditions isoformFeatures in and',
'designMatrix does not match'
))
}
if(countsSuppled) {
if (!all(designMatrix$sampleID %in% colnames(isoformCountMatrix))) {
stop(paste(
'Each sample stored in \'designMatrix$sampleID\' must have',
'a corresponding expression column in \'isoformCountMatrix\''
))
}
}
if ( abundSuppled ) {
if (!all(designMatrix$sampleID %in%
colnames(isoformRepExpression))) {
stop(paste(
'Each sample stored in \'designMatrix$sampleID\' must',
'have a corresponding expression column',
'in \'isoformRepExpression\''
))
}
}
if( abundSuppled & countsSuppled ) {
if( ! identical( colnames(isoformCountMatrix) , colnames(isoformRepExpression)) ) {
stop('The column name and order of \'isoformCountMatrix\' and \'isoformRepExpression\' must be identical')
}
if( ! identical( isoformCountMatrix$isoform_id , isoformCountMatrix$isoform_id ) ) {
stop('The ids and order of the \'isoform_id\' column in \'isoformCountMatrix\' and \'isoformRepExpression\' must be identical')
}
}
}
### Isoforms supplied
if(TRUE) {
if( countsSuppled ) {
j1 <- jaccardSimilarity(
isoformCountMatrix$isoform_id,
isoformFeatures$isoform_id
)
} else {
j1 <- jaccardSimilarity(
isoformRepExpression$isoform_id,
isoformFeatures$isoform_id
)
}
jcCutoff <- 0.95
if (j1 != 1 ) {
if( j1 < jcCutoff) {
stop(
paste(
'The annotation (count matrix and isoform annotation)',
'seems to be different (jacard similarity < 0.95).',
'Either isforoms found in the annotation are',
'not quantifed or vise versa.',
sep=' '
)
)
}
if( j1 >= jcCutoff ) {
warning(
paste(
'The annotation (count matrix and isoform annotation)',
'contain differences in which isoforms are analyzed.',
'specifically the annotation provided contain:',
length(unique(isoformAnnotation$isoform_id)) - length(unique(isoformCountMatrix$isoform_id)),
'more isoforms than the count matrix.',
'Please make sure this is on purpouse since differences',
'will cause inaccurate quantification and thereby skew all analysis.',
'NB! All differences were removed from the final switchAnalyzeRlist!',
sep=' '
)
)
### Reduce to those found in all
if( countsSuppled ) {
isoformsUsed <- intersect(
isoformCountMatrix$isoform_id,
isoformAnnotation$isoform_id
)
} else {
isoformsUsed <- intersect(
isoformRepExpression$isoform_id,
isoformAnnotation$isoform_id
)
}
isoformExonStructure <- isoformExonStructure[which(
isoformExonStructure$isoform_id %in% isoformsUsed
), ]
isoformAnnotation <-isoformAnnotation[which(
isoformAnnotation$isoform_id %in% isoformsUsed
), ]
if( countsSuppled ) {
isoformCountMatrix <-isoformCountMatrix[which(
isoformCountMatrix$isoform_id %in% isoformsUsed
), ]
}
if( abundSuppled ) {
isoformRepExpression <-isoformRepExpression[which(
isoformRepExpression$isoform_id %in% isoformsUsed
), ]
}
}
}
}
}
### Add refrence genes
if(TRUE) {
### Helper functions
zeroHelper <- Vectorize(function(nrTimes) {
stringr::str_c( rep.int(x = 0, times= nrTimes ), collapse = '')
})
addZeroes <- function(aVec, n=8) {
localData <- data.frame(
id=aVec,
stringsAsFactors = FALSE
)
localData$nToAdd <- n - stringr::str_length(localData$id)
localData$zeeros <- zeroHelper(localData$nToAdd)
localData$combinedId <- stringr::str_c(
localData$zeeros,
localData$id
)
return(
localData$combinedId
)
}
### reorder (nessesary for ref creation)
isoformFeatures <- isoformFeatures[order(
isoformFeatures$condition_1,
isoformFeatures$condition_2,
isoformFeatures$gene_id,
isoformFeatures$isoform_id
),]
### Make unique id per comparison
tmp <- stringr::str_c(
isoformFeatures$gene_id,
isoformFeatures$condition_1,
isoformFeatures$condition_2
)
### Convert unique id to a number
isoformFeatures$gene_ref <- stringr::str_c(
'geneComp',
'_',
addZeroes(
as.integer(factor(tmp, levels=unique(tmp)))
)
)
isoformFeatures$iso_ref <- stringr::str_c(
'isoComp',
'_',
addZeroes( 1:nrow(isoformFeatures) )
)
### Reorder
isoformFeatures <- isoformFeatures[,c(
which( colnames(isoformFeatures) == 'iso_ref'),
which( colnames(isoformFeatures) == 'gene_ref'),
which( ! colnames(isoformFeatures) %in% c('iso_ref','gene_ref'))
)]
}
### Change to propper R names
if(TRUE) {
tmp <- designMatrix
for( i in 2:ncol(designMatrix) ) { # i <- 2
if( class(designMatrix[,i]) %in% c('character','factor') ) {
designMatrix[,i] <- makeProperNames( designMatrix[,i] )
}
}
if( ! identical(tmp, designMatrix) ) {
message('Please note that some condition names were changed due to names not suited for modeling in R.')
isoformFeatures$condition_1 <- makeProperNames( isoformFeatures$condition_1 )
isoformFeatures$condition_2 <- makeProperNames( isoformFeatures$condition_2 )
}
}
### Test full rank of design
if(TRUE) {
isFullRank <- testFullRank( designMatrix )
if( ! isFullRank ) {
stop(
paste(
'The supplied design matrix will result in a model matrix that is not full rank',
'\nPlease make sure there are no co-linearities in the design'
)
)
}
}
### Calculate conditions
nrRep <- table( designMatrix$condition)
nrRep <- data.frame(
condition=names(nrRep),
nrReplicates=as.vector(nrRep),
row.names = NULL,
stringsAsFactors = FALSE
)
### Create switchList
localSwitchList <- new(
"switchAnalyzeRlist",
list(
isoformFeatures=isoformFeatures,
exons=exons,
conditions=nrRep,
designMatrix=designMatrix,
sourceId=sourceId
)
)
### Add quantification data
if( countsSuppled ) {
localSwitchList$isoformCountMatrix <- isoformCountMatrix[,c('isoform_id',designMatrix$sampleID)]
}
if( abundSuppled ) {
localSwitchList$isoformRepExpression <- isoformRepExpression[,c('isoform_id',designMatrix$sampleID)]
}
### Add runInfo
localSwitchList$runInfo <- list(
IsoformSwitchAnalyzeR = list(
version = package.version('IsoformSwitchAnalyzeR')
)
)
### Convert isoforms to remove to gene ids
if(length(isoformsToRemove)) {
additionalGenesToRemove <- localSwitchList$isoformFeatures$gene_id[which(
localSwitchList$isoformFeatures$isoform_id %in% isoformsToRemove
)]
genesToRemove <- unique(c(
genesToRemove,
additionalGenesToRemove
))
}
### Subset if nessesary
if(length(genesToRemove) & removeFusionTranscripts) {
localSwitchList <- subsetSwitchAnalyzeRlist(
localSwitchList,
! localSwitchList$isoformFeatures$gene_id %in% genesToRemove
)
warning(paste(
'The gene_ids or isoform_ids were not unique - we identified multiple instances',
'of the same gene_id/isoform_id on different chromosomes.',
'To solve this we removed', length(genesToRemove), 'gene_id.',
'Please note there might still be duplicated gene_id located',
'on the same chromosome.',
'Some of these could be due to fusion transcripts',
'which IsoformSwitchAnalyzeR cannot handle.'
))
}
### Return result
return(localSwitchList)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.