View source: R/expression_filter_fusion.R
expression_filter_fusion | R Documentation |
Expression filtering with user provided expression Matrix and standard fusion calls
expression_filter_fusion(
standardFusioncalls,
expressionMatrix,
expressionFilter
)
standardFusioncalls |
A dataframe from star fusion or arriba standardized to run through the filtering steps |
expressionMatrix |
Expression matrix for samples used in cohort for fusion calls |
expressionFilter |
FPKM/TPM threshold for not expressed |
Standardized fusion calls annotated with gene list and fusion list provided in reference folder
## Not run:
# standardize
fusionfileArriba <- read_arriba_calls(
system.file("extdata", "arriba_example.tsv", package = "annoFuseData")
)
fusionfileStarFusion <- read_starfusion_calls(
system.file("extdata", "starfusion_example.tsv", package = "annoFuseData")
)
library(dplyr)
formattedArriba <- fusion_standardization(fusionfileArriba,
caller = "ARRIBA",
tumorID = "tumorID"
)
formattedStarFusion <- fusion_standardization(fusionfileStarFusion,
caller = "STARFUSION",
tumorID = "tumorID"
)
# merge standardized fusion calls
standardFusioncalls <- rbind(formattedStarFusion, formattedArriba) %>% as.data.frame()
fusionQCFiltered <- fusion_filtering_QC(
standardFusioncalls = standardFusioncalls,
readingFrameFilter = "in-frame|frameshift|other",
artifactFilter = "GTEx_Recurrent|DGD_PARALOGS|Normal|BodyMap|ConjoinG",
junctionReadCountFilter = 1,
spanningFragCountFilter = 10,
readthroughFilter = TRUE
)
# expression based filter to capture only fusions where atleast 1 gene is expressed
expressionFile <- system.file("extdata", "example.rsem.genes.results.gz", package = "annoFuseData")
expressionMatrix <- read_tsv(expressionFile)
library(reshape2)
# split gene id and symbol
expressionMatrix <- cbind(
expressionMatrix,
colsplit(expressionMatrix$gene_id, pattern = "_", names = c("EnsembleID", "GeneSymbol"))
)
# collapse to matrix of HUGO symbols x Sample identifiers
# take max expression per row and use the max value for duplicated gene symbols
expressionMatrix.collapsed <- expressionMatrix %>%
arrange(desc(FPKM)) %>% # arrange decreasing by FPKM
distinct(GeneSymbol, .keep_all = TRUE) %>% # keep the ones with greatest FPKM value.
# If ties occur, keep the first occurencce
unique() %>%
remove_rownames() %>%
dplyr::select(.data$EnsembleID, .data$GeneSymbol, .data$FPKM, .data$gene_id)
# rename columns
colnames(expressionMatrix.collapsed)[3] <- "tumorID"
expressionFiltered <- expression_filter_fusion(
standardFusioncalls = fusionQCFiltered,
expressionMatrix = expressionMatrix.collapsed,
expressionFilter = 1
)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.