Cell-Cycle Scoring with Seurat and ggplot2
Using umap information to generate customize dimension plots
This note uses R kernel.
First load the data matrix, the files used are available from here.
library(Seurat)
# Read in the expression matrix The first row is a header row, the first column is rownames
exp.mat <- read.table(file = "./nestorawa_forcellcycle_expressionMatrix.txt", header = TRUE,
as.is = TRUE, row.names = 1)
# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
# Create our Seurat object and complete the initalization steps
marrow <- CreateSeuratObject(counts = exp.mat)
marrow <- NormalizeData(marrow)
marrow <- FindVariableFeatures(marrow, selection.method = "vst")
marrow <- ScaleData(marrow, features = rownames(marrow))
If we run a PCA on our object, using the variable genes we found in FindVariableFeatures() above, we see that while most of the variance can be explained by lineage, PC8 and PC10 are split on cell-cycle genes including TOP2A and MKI67. We will attempt to regress this signal from the data, so that cell-cycle heterogeneity does not contribute to PCA or downstream analysis.
marrow <- RunPCA(marrow, features = VariableFeatures(marrow), ndims.print = 6:10, nfeatures.print = 10)
DimHeatmap(marrow, dims = c(8, 10))
marrow <- CellCycleScoring(marrow, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
# view cell cycle scores and phase assignments
head(marrow[[]])
RidgePlot(marrow, features = c("PCNA", "TOP2A", "MCM6", "MKI67"), ncol = 2)
s.genes
g2m.genes
For more information about how to regress this source of heterogeneity(cell cycles) out of the data, you can check here.
marrow <- FindNeighbors(marrow, dims = 1:10)
marrow <- FindClusters(marrow, resolution = 0.5)
head(Idents(marrow), 5)
Run umap and visualize:
marrow <- RunUMAP(marrow, dims = 1:10, verbose = FALSE)
DimPlot(marrow, reduction = "umap", label = TRUE)
library(celldex, verbose = FALSE)
hpca.se <- HumanPrimaryCellAtlasData()
hpca.se
pred.hesc <- SingleR::SingleR(GetAssayData(marrow, assay = "RNA", slot = "data"), clusters = Idents(marrow),ref = hpca.se, assay.type.test=1,
labels = hpca.se$label.main)
pred.hesc
table(pred.hesc$labels)
Import cluster ident information back to seurat object.
marrow[["SingleR.cluster.labels"]] <-
pred.hesc$labels[match(Idents(marrow), rownames(pred.hesc))]
DimPlot(marrow, reduction = "umap", label = TRUE,group.by = "SingleR.cluster.labels")
Or identify cells individually:
pred.hesc2 <- SingleR::SingleR(GetAssayData(marrow, assay = "RNA", slot = "data"),ref = hpca.se, assay.type.test=1,
labels = hpca.se$label.main)
table(pred.hesc2$labels)
colors = c("#097559","#757575","#C29359","#C2101E","#EDDB51","#EDDCBE","#F7A3AA","#1BBCC2","#F6E246")
marrow[["SingleR.labels"]] <- pred.hesc2$labels
DimPlot(marrow, reduction = "umap", label = TRUE,group.by = "SingleR.labels")
colors = c("#097559","#757575","#C29359","#C2101E","#EDDB51","#EDDCBE","#F7A3AA","#1BBCC2","#F6E246")
DimPlot(marrow, reduction = "umap", label = TRUE, group.by = "SingleR.labels", cols = colors)
Export to use ggplot2 directly:
library(tidyverse, verbose = FALSE)
umap_tx = marrow@reductions$umap@cell.embeddings %>%
as.data.frame() %>%
cbind(cycle = marrow@meta.data$Phase) %>%
cbind(singleRid = marrow@meta.data$SingleR.labels)
options(repr.plot.width=12, repr.plot.height=8)
ggplot(umap_tx, aes(x=UMAP_1, y=UMAP_2, color=singleRid, shape=cycle)) +
geom_point(aes(size=cycle), alpha = 0.6 ) +
scale_color_manual(values = colors) +
scale_size_manual(values = c(6,3,3)) +
scale_shape_manual(values = c(16,17,18)) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"))
We can tall from the plot that cell cycling phases could contribute to the ambiguity of annotation and clustering.
sessionInfo()
Created: 02/09/2022 by vikkki