Computational annotation of doublets with scds on Seurat object
Only takes 3 min!
Sometimes when we see an "unexpectied" cluster or group, one of the possible causes could be doublets. Treating doublets as single cells in downstream analyses can bias a study’s conclusions. Here we can use scds, which implamented two new approaches to identify doublets(Co-expression based doublet scoring cxds and binary classification based doublet scoring bcds) to elaluate doublets scores.
For more guides, check scds's page.
set.seed(8887)
library(scds, verbose = FALSE)
library(scater, verbose = FALSE)
library(rsvd, verbose = FALSE)
library(Rtsne, verbose = FALSE)
library(cowplot, verbose = FALSE)
library(scater, verbose = FALSE)
library(patchwork, verbose = FALSE)
library(Seurat, verbose = FALSE)
library(SeuratData, verbose = FALSE)
library(dplyr)
We first load the data (download available here), it's a reference scRNA-seq dataset of ~14,000 adult mouse cortical cell taxonomy from the Allen Institute, generated with the SMART-Seq2 protocol.
allen_reference <- readRDS("~/Documents/blog_notes/allen_cortex.rds")
brain.ref.sce <- as.SingleCellExperiment(allen_reference)
dim(brain.ref.sce)
table(brain.ref.sce$subclass)
Visualize:
logcounts(brain.ref.sce) = log1p(counts(brain.ref.sce))
vrs = apply(logcounts(brain.ref.sce),1,var)
pc = rpca(t(logcounts(brain.ref.sce)[order(vrs,decreasing=TRUE)[1:100],]))
ts = Rtsne(pc$x[,1:10],verb=FALSE)
options(repr.plot.width=12, repr.plot.height=8)
reducedDim(brain.ref.sce,"tsne") = ts$Y; rm(ts,vrs,pc)
plotReducedDim(brain.ref.sce,"tsne",col="subclass")
brain.ref.sce = cxds(brain.ref.sce,retRes = TRUE)
brain.ref.sce = bcds(brain.ref.sce,retRes = TRUE,verb=TRUE)
brain.ref.sce = cxds_bcds_hybrid(brain.ref.sce)
plotReducedDim(brain.ref.sce,"tsne",col="hybrid_score")
options(repr.plot.width=20, repr.plot.height=8)
boxplot(brain.ref.sce$hybrid_score ~ brain.ref.sce$subclass, main="hybrid")
metadata = as.data.frame(colData(brain.ref.sce))
hyb_doublet_score = select(metadata, hybrid_score)
allen_reference <- AddMetaData(allen_reference, metadata = hyb_doublet_score, col.name = 'hybrid.score')
allen_reference_3000 <- allen_reference[,sample(colnames(allen_reference), size =3000, replace=F)]
allen_reference_3000 <- SCTransform(allen_reference_3000, ncells = 3000, verbose = FALSE) %>%
RunPCA(verbose = FALSE) %>%
RunUMAP(dims = 1:30)
DimPlot(allen_reference_3000, group.by = "subclass", label = TRUE)
options(repr.plot.width=10, repr.plot.height=8)
FeaturePlot(allen_reference_3000, features = "hybrid.score", label = TRUE, pt.size=4)
3 min is a lie
sessionInfo()