knitr::opts_chunk$set( message = FALSE, warning = FALSE, fig.width = 10 )
This tutorial demonstrates how to coerce GeoMxSet objects into Seurat or SpatialExperiment objects and the subsequent analyses. For more examples of what analyses are available in these objects, look at these Seurat or SpatialExperiment vignettes.
Data Processing should occur in GeomxTools. Due to the unique nature of the regions of interest (ROIs), it is recommended to use the preproccesing steps available in GeomxTools rather than the single-cell made preprocessing available in Seurat.
library(GeomxTools) library(Seurat) library(SpatialDecon) library(patchwork)
datadir <- system.file("extdata", "DSP_NGS_Example_Data", package="GeomxTools") DCCFiles <- dir(datadir, pattern=".dcc$", full.names=TRUE) PKCFiles <- unzip(zipfile = file.path(datadir, "/pkcs.zip")) SampleAnnotationFile <- file.path(datadir, "annotations.xlsx") demoData <- suppressWarnings(readNanoStringGeoMxSet(dccFiles = DCCFiles, pkcFiles = PKCFiles, phenoDataFile = SampleAnnotationFile, phenoDataSheet = "CW005", phenoDataDccColName = "Sample_ID", protocolDataColNames = c("aoi", "cell_line", "roi_rep", "pool_rep", "slide_rep")))
After reading in the object, we will do a couple of QC steps.
demoData <- shiftCountsOne(demoData, useDALogic=TRUE) demoData <- setSegmentQCFlags(demoData, qcCutoffs = list(percentSaturation = 45)) demoData <- setBioProbeQCFlags(demoData) # low sequenced ROIs lowSaturation <- which(protocolData(demoData)[["QCFlags"]]$LowSaturation) # probes that are considered outliers lowQCprobes <- which(featureData(demoData)[["QCFlags"]]$LowProbeRatio | featureData(demoData)[["QCFlags"]]$GlobalGrubbsOutlier) # remove low quality ROIs and probes passedQC <- demoData[-lowQCprobes, -lowSaturation] dim(demoData) dim(passedQC)
Objects must be aggregated to Target level data before coercing. This changes the row (gene) information to be the gene name rather than the probe ID.
featureType(passedQC) data.frame(assayData(passedQC)[["exprs"]][seq_len(3), seq_len(3)]) target_demoData <- aggregateCounts(passedQC) featureType(target_demoData) data.frame(assayData(target_demoData)[["exprs"]][seq_len(3), seq_len(3)])
It is recommended to normalize using a GeoMx specific model before coercing. The normalized data is now in the assayData slot called "q_norm".
norm_target_demoData <- normalize(target_demoData, norm_method="quant", desiredQuantile = .75, toElt = "q_norm") assayDataElementNames(norm_target_demoData) data.frame(assayData(norm_target_demoData)[["q_norm"]][seq_len(3), seq_len(3)])
The three errors that can occur when trying to coerce to Seurat are:
as.Seurat(demoData) as.Seurat(target_demoData, normData = "exprs") as.Seurat(norm_target_demoData, normData = "exprs_norm")
After coercing to a Seurat object all of the metadata is still accessible.
demoSeurat <- as.Seurat(x = norm_target_demoData, normData = "q_norm") demoSeurat # overall data object head(demoSeurat, 3) # most important ROI metadata demoSeurat@misc[1:8] # experiment data head(demoSeurat@misc$sequencingMetrics) # sequencing metrics head(demoSeurat@misc$QCMetrics$QCFlags) # QC metrics head(demoSeurat@assays$GeoMx@meta.data) # gene metadata
All Seurat functionality is available after coercing. Outputs might differ if the ident value is set or not.
VlnPlot(demoSeurat, features = "nCount_GeoMx", pt.size = 0.1) demoSeurat <- as.Seurat(norm_target_demoData, normData = "q_norm", ident = "cell_line") VlnPlot(demoSeurat, features = "nCount_GeoMx", pt.size = 0.1)
Here is an example of a typical dimensional reduction workflow.
demoSeurat <- FindVariableFeatures(demoSeurat) demoSeurat <- ScaleData(demoSeurat) demoSeurat <- RunPCA(demoSeurat, assay = "GeoMx", verbose = FALSE) demoSeurat <- FindNeighbors(demoSeurat, reduction = "pca", dims = seq_len(30)) demoSeurat <- FindClusters(demoSeurat, verbose = FALSE) demoSeurat <- RunUMAP(demoSeurat, reduction = "pca", dims = seq_len(30)) DimPlot(demoSeurat, reduction = "umap", label = TRUE, group.by = "cell_line")
Here is a work through of a more indepth DSP dataset. This is a non-small cell lung cancer (nsclc) tissue sample that has an ROI strategy to simulate a visium dataset (55 um circles evenly spaced apart). It was segmented on tumor and non-tumor.
data("nsclc", package = "SpatialDecon")
#this data is from an old version, column names have been updated #ensuring continuity with current version nsclc@featureType <- "Target" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "NormFactorQ3"] <- "qFactors" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "NormFactorHK"] <- "hkFactors" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "RawReads"] <- "Raw" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "TrimmedReads"] <- "Trimmed" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "AlignedReads"] <- "Aligned" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "StitchedReads"] <- "Stitched" colnames(protocolData(nsclc))[colnames(protocolData(nsclc)) == "SequencingSaturation"] <- "Saturated (%)" protocolData(nsclc) <- protocolData(nsclc)[,-which(colnames(protocolData(nsclc)) %in% c("UID"))] nsclc <- updateGeoMxSet(nsclc)
nsclc dim(nsclc) data.frame(exprs(nsclc)[seq_len(5), seq_len(5)]) head(pData(nsclc))
When coercing, we can add the coordinate columns allowing for spatial graphing using Seurat.
nsclcSeurat <- as.Seurat(nsclc, normData = "exprs_norm", ident = "AOI.annotation", coordinates = c("x", "y")) nsclcSeurat VlnPlot(nsclcSeurat, features = "nCount_GeoMx", pt.size = 0.1)
nsclcSeurat <- FindVariableFeatures(nsclcSeurat) nsclcSeurat <- ScaleData(nsclcSeurat) nsclcSeurat <- RunPCA(nsclcSeurat, assay = "GeoMx", verbose = FALSE) nsclcSeurat <- FindNeighbors(nsclcSeurat, reduction = "pca", dims = seq_len(30)) nsclcSeurat <- FindClusters(nsclcSeurat, verbose = FALSE) nsclcSeurat <- RunUMAP(nsclcSeurat, reduction = "pca", dims = seq_len(30)) DimPlot(nsclcSeurat, reduction = "umap", label = TRUE, group.by = "AOI.name")
Because this dataset is segmented, we need to separate the tumor and TME sections before using the spatial graphing. These Seurat functions were created for Visium data, so they can only plot the same sized circles.
Here we are showing the gene counts in each ROI separated by segment.
tumor <- suppressMessages(SpatialFeaturePlot(nsclcSeurat[,nsclcSeurat$AOI.name == "Tumor"], features = "nCount_GeoMx", pt.size.factor = 12) + labs(title = "Tumor") + theme(legend.position = "none") + scale_fill_continuous(type = "viridis", limits = c(min(nsclcSeurat$nCount_GeoMx), max(nsclcSeurat$nCount_GeoMx)))) TME <- suppressMessages(SpatialFeaturePlot(nsclcSeurat[,nsclcSeurat$AOI.name == "TME"], features = "nCount_GeoMx", pt.size.factor = 12) + labs(title = "TME") + theme(legend.position = "right") + scale_fill_continuous(type = "viridis", limits = c(min(nsclcSeurat$nCount_GeoMx), max(nsclcSeurat$nCount_GeoMx)))) wrap_plots(tumor, TME)
Here we show the count for A2M
tumor <- suppressMessages(SpatialFeaturePlot(nsclcSeurat[,nsclcSeurat$AOI.name == "Tumor"], features = "A2M", pt.size.factor = 12) + labs(title = "Tumor") + theme(legend.position = "none") + scale_fill_continuous(type = "viridis", limits = c(min(nsclcSeurat@assays$GeoMx$data["A2M",]), max(nsclcSeurat@assays$GeoMx$data["A2M",])))) TME <- suppressMessages(SpatialFeaturePlot(nsclcSeurat[,nsclcSeurat$AOI.name == "TME"], features = "A2M", pt.size.factor = 12) + labs(title = "TME") + theme(legend.position = "right") + scale_fill_continuous(type = "viridis", limits = c(min(nsclcSeurat@assays$GeoMx$data["A2M",]), max(nsclcSeurat@assays$GeoMx$data["A2M",])))) wrap_plots(tumor, TME)
Using the FindMarkers built in function from Seurat, we can determine the most differentially expressed genes in Tumor and TME
Idents(nsclcSeurat) <- nsclcSeurat$AOI.name de_genes <- FindMarkers(nsclcSeurat, ident.1 = "Tumor", ident.2 = "TME") de_genes <- de_genes[order(abs(de_genes$avg_log2FC), decreasing = TRUE),] de_genes <- de_genes[is.finite(de_genes$avg_log2FC) & de_genes$p_val < 1e-25,] for(i in rownames(de_genes)[1:2]){ print(data.frame(de_genes[i,])) tumor <- suppressMessages(SpatialFeaturePlot(nsclcSeurat[,nsclcSeurat$AOI.name == "Tumor"], features = i, pt.size.factor = 12) + labs(title = "Tumor") + theme(legend.position = "none") + scale_fill_continuous(type = "viridis", limits = c(min(nsclcSeurat@assays$GeoMx$data[i,]), max(nsclcSeurat@assays$GeoMx$data[i,])))) TME <- suppressMessages(SpatialFeaturePlot(nsclcSeurat[,nsclcSeurat$AOI.name == "TME"], features = i, pt.size.factor = 12) + labs(title = "TME") + theme(legend.position = "right") + scale_fill_continuous(type = "viridis", limits = c(min(nsclcSeurat@assays$GeoMx$data[i,]), max(nsclcSeurat@assays$GeoMx$data[i,])))) print(wrap_plots(tumor, TME)) }
SpatialExperiment is an S4 class inheriting from SingleCellExperiment. It is meant as a data storage object rather than an analysis suite like Seurat. Because of this, this section won't have the fancy analysis outputs like the Seurat section had but will show where in the object all the pieces are stored.
library(SpatialExperiment)
The three errors that can occur when trying to coerce to SpatialExperiment are:
as.SpatialExperiment(demoData) as.SpatialExperiment(target_demoData, normData = "exprs") as.SpatialExperiment(norm_target_demoData, normData = "exprs_norm")
After coercing to a SpatialExperiment object all of the metadata is still accessible.
demoSPE <- as.SpatialExperiment(norm_target_demoData, normData = "q_norm") demoSPE # overall data object data.frame(head(colData(demoSPE))) # most important ROI metadata demoSPE@metadata[1:8] # experiment data head(demoSPE@metadata$sequencingMetrics) # sequencing metrics head(demoSPE@metadata$QCMetrics$QCFlags) # QC metrics data.frame(head(rowData(demoSPE))) # gene metadata
When coercing, we can add the coordinate columns and they will be appended to the correct location in SpatialExperiment
nsclcSPE <- as.SpatialExperiment(nsclc, normData = "exprs_norm", coordinates = c("x", "y")) nsclcSPE data.frame(head(spatialCoords(nsclcSPE)))
With the coordinates and the metadata, we can create spatial graphing figures similar to Seurat's. To get the same orientation as Seurat we must swap the axes and flip the y.
figureData <- as.data.frame(cbind(colData(nsclcSPE), spatialCoords(nsclcSPE))) figureData <- cbind(figureData, A2M=as.numeric(nsclcSPE@assays@data$GeoMx["A2M",])) tumor <- ggplot(figureData[figureData$AOI.name == "Tumor",], aes(x=y, y=-x, color = A2M))+ geom_point(size = 6)+ scale_color_continuous(type = "viridis", limits = c(min(figureData$A2M), max(figureData$A2M)))+ theme(legend.position = "none", panel.grid = element_blank(), panel.background = element_rect(fill = "white"), axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), axis.line = element_blank())+ labs(title = "Tumor") TME <- ggplot(figureData[figureData$AOI.name == "TME",], aes(x=y, y=-x, color = A2M))+ geom_point(size = 6)+ scale_color_continuous(type = "viridis", limits = c(min(figureData$A2M), max(figureData$A2M))) + theme(panel.grid = element_blank(), panel.background = element_rect(fill = "white"), axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank(), axis.line = element_blank())+ labs(title = "TME") wrap_plots(tumor, TME)
The free-handed nature of Region of Interest (ROI) selection in a GeoMx experiment makes visualization on top of the image difficult in packages designed for different data. We created SpatialOmicsOverlay specifically to visualize and analyze these types of ROIs in a GeoMx experiment and the immunofluorescent-guided segmentation process.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.