Skip to contents

Annotate single cells using SingleR

Usage

RunSingleR(
  srt_query,
  srt_ref,
  query_group = NULL,
  ref_group = NULL,
  query_assay = "RNA",
  ref_assay = "RNA",
  genes = "de",
  de.method = "wilcox",
  sd.thresh = 1,
  de.n = NULL,
  aggr.ref = FALSE,
  aggr.args = list(),
  quantile = 0.8,
  fine.tune = TRUE,
  tune.thresh = 0.05,
  prune = TRUE,
  cores = 1,
  verbose = TRUE
)

Arguments

srt_query

An object of class Seurat to be annotated with cell types.

srt_ref

An object of class Seurat storing the reference cells.

query_group

A character vector specifying the column name in the `srt_query` metadata that represents the cell grouping.

ref_group

A character vector specifying the column name in the `srt_ref` metadata that represents the cell grouping.

query_assay

A character vector specifying the assay to be used for the query data. Defaults to the default assay of the `srt_query` object.

ref_assay

A character vector specifying the assay to be used for the reference data. Defaults to the default assay of the `srt_ref` object.

genes

"genes" parameter in SingleR::SingleR function.

de.method

"de.method" parameter in SingleR::SingleR function.

sd.thresh

Deprecated and ignored.

de.n

An integer scalar specifying the number of DE genes to use when genes="de". If de.method="classic", defaults to 500 * (2/3) ^ log2(N) where N is the number of unique labels. Otherwise, defaults to 10. Ignored if genes is a list of markers/DE genes.

aggr.ref, aggr.args

Arguments controlling the aggregation of the references prior to annotation, see trainSingleR.

quantile

"quantile" parameter in SingleR::SingleR function.

fine.tune

"fine.tune" parameter in SingleR::SingleR function.

tune.thresh

"tune.thresh" parameter in SingleR::SingleR function.

prune

"prune" parameter in SingleR::SingleR function.

verbose

Whether to print the message. Default is TRUE.

Examples

data(panc8_sub)
# Simply convert genes from human to mouse and preprocess the data
genenames <- make.unique(
  thisutils::capitalize(
    rownames(panc8_sub),
    force_tolower = TRUE
  )
)
names(genenames) <- rownames(panc8_sub)
panc8_sub <- RenameFeatures(
  panc8_sub,
  newnames = genenames
)
#>  [2025-09-20 13:50:04] Rename features for the assay: RNA
panc8_sub <- CheckDataMerge(
  panc8_sub,
  batch = "tech"
)[["srt_merge"]]
#>  [2025-09-20 13:50:04] Spliting `srt_merge` into `srt_list` by column "tech"...
#>  [2025-09-20 13:50:05] Checking a list of <Seurat> object...
#> ! [2025-09-20 13:50:05] Data 1/5 of the `srt_list` is "unknown"
#>  [2025-09-20 13:50:05] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 1/5 of the `srt_list`...
#>  [2025-09-20 13:50:07] Perform `Seurat::FindVariableFeatures()` on the data 1/5 of the `srt_list`...
#> ! [2025-09-20 13:50:07] Data 2/5 of the `srt_list` is "unknown"
#>  [2025-09-20 13:50:07] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 2/5 of the `srt_list`...
#>  [2025-09-20 13:50:09] Perform `Seurat::FindVariableFeatures()` on the data 2/5 of the `srt_list`...
#> ! [2025-09-20 13:50:09] Data 3/5 of the `srt_list` is "unknown"
#>  [2025-09-20 13:50:09] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 3/5 of the `srt_list`...
#>  [2025-09-20 13:50:11] Perform `Seurat::FindVariableFeatures()` on the data 3/5 of the `srt_list`...
#> ! [2025-09-20 13:50:11] Data 4/5 of the `srt_list` is "unknown"
#>  [2025-09-20 13:50:11] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 4/5 of the `srt_list`...
#>  [2025-09-20 13:50:13] Perform `Seurat::FindVariableFeatures()` on the data 4/5 of the `srt_list`...
#> ! [2025-09-20 13:50:13] Data 5/5 of the `srt_list` is "unknown"
#>  [2025-09-20 13:50:13] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 5/5 of the `srt_list`...
#>  [2025-09-20 13:50:15] Perform `Seurat::FindVariableFeatures()` on the data 5/5 of the `srt_list`...
#>  [2025-09-20 13:50:15] Use the separate HVF from srt_list
#>  [2025-09-20 13:50:16] Number of available HVF: 2000
#>  [2025-09-20 13:50:16] Finished check

# Annotation
data(pancreas_sub)
pancreas_sub <- standard_scop(pancreas_sub)
#>  [2025-09-20 13:50:18] Start standard scop workflow...
#>  [2025-09-20 13:50:19] Checking a list of <Seurat> object...
#> ! [2025-09-20 13:50:19] Data 1/1 of the `srt_list` is "unknown"
#>  [2025-09-20 13:50:19] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 1/1 of the `srt_list`...
#>  [2025-09-20 13:50:21] Perform `Seurat::FindVariableFeatures()` on the data 1/1 of the `srt_list`...
#>  [2025-09-20 13:50:22] Use the separate HVF from srt_list
#>  [2025-09-20 13:50:22] Number of available HVF: 2000
#>  [2025-09-20 13:50:22] Finished check
#>  [2025-09-20 13:50:22] Perform `Seurat::ScaleData()`
#> Warning: Different features in new layer data than already exists for scale.data
#>  [2025-09-20 13:50:22] Perform pca linear dimension reduction
#> StandardPC_ 1 
#> Positive:  Aplp1, Cpe, Gnas, Fam183b, Map1b, Hmgn3, Pcsk1n, Chga, Tuba1a, Bex2 
#> 	   Syt13, Isl1, 1700086L19Rik, Pax6, Chgb, Scgn, Rbp4, Scg3, Gch1, Camk2n1 
#> 	   Cryba2, Pcsk2, Pyy, Tspan7, Mafb, Hist3h2ba, Dbpht2, Abcc8, Rap1b, Slc38a5 
#> Negative:  Spp1, Anxa2, Sparc, Dbi, 1700011H14Rik, Wfdc2, Gsta3, Adamts1, Clu, Mgst1 
#> 	   Bicc1, Ldha, Vim, Cldn3, Cyr61, Rps2, Mt1, Ptn, Phgdh, Nudt19 
#> 	   Smtnl2, Smco4, Habp2, Mt2, Col18a1, Rpl12, Galk1, Cldn10, Acot1, Ccnd1 
#> StandardPC_ 2 
#> Positive:  Rbp4, Tagln2, Tuba1b, Fkbp2, Pyy, Pcsk2, Iapp, Tmem27, Meis2, Tubb4b 
#> 	   Pcsk1n, Dbpht2, Rap1b, Dynll1, Tubb2a, Sdf2l1, Scgn, 1700086L19Rik, Scg2, Abcc8 
#> 	   Atp1b1, Hspa5, Fam183b, Papss2, Slc38a5, Scg3, Mageh1, Tspan7, Ppp1r1a, Ociad2 
#> Negative:  Neurog3, Btbd17, Gadd45a, Ppp1r14a, Neurod2, Sox4, Smarcd2, Mdk, Pax4, Btg2 
#> 	   Sult2b1, Hes6, Grasp, Igfbpl1, Gpx2, Cbfa2t3, Foxa3, Shf, Mfng, Tmsb4x 
#> 	   Amotl2, Gdpd1, Cdc14b, Epb42, Rcor2, Cotl1, Upk3bl, Rbfox3, Cldn6, Cer1 
#> StandardPC_ 3 
#> Positive:  Nusap1, Top2a, Birc5, Aurkb, Cdca8, Pbk, Mki67, Tpx2, Plk1, Ccnb1 
#> 	   2810417H13Rik, Incenp, Cenpf, Ccna2, Prc1, Racgap1, Cdk1, Aurka, Cdca3, Hmmr 
#> 	   Spc24, Kif23, Sgol1, Cenpe, Cdc20, Hist1h1b, Cdca2, Mxd3, Kif22, Ska1 
#> Negative:  Anxa5, Pdzk1ip1, Acot1, Tpm1, Anxa2, Dcdc2a, Capg, Sparc, Ttr, Pamr1 
#> 	   Clu, Cxcl12, Ndrg2, Hnf1aos1, Gas6, Gsta3, Krt18, Ces1d, Atp1b1, Muc1 
#> 	   Hhex, Acadm, Spp1, Enpp2, Bcl2l14, Sat1, Smtnl2, 1700011H14Rik, Tgm2, Fam159a 
#> StandardPC_ 4 
#> Positive:  Glud1, Tm4sf4, Akr1c19, Cldn4, Runx1t1, Fev, Pou3f4, Gm43861, Pgrmc1, Arx 
#> 	   Cd200, Lrpprc, Hmgn3, Ppp1r14c, Pam, Etv1, Tsc22d1, Slc25a5, Akap17b, Pgf 
#> 	   Fam43a, Emb, Jun, Krt8, Dnajc12, Mid1ip1, Ids, Rgs17, Uchl1, Alcam 
#> Negative:  Ins2, Ins1, Ppp1r1a, Nnat, Calr, Sytl4, Sdf2l1, Iapp, Pdia6, Mapt 
#> 	   G6pc2, C2cd4b, Npy, Gng12, P2ry1, Ero1lb, Adra2a, Papss2, Arhgap36, Fam151a 
#> 	   Dlk1, Creld2, Gip, Tmem215, Gm27033, Cntfr, Prss53, C2cd4a, Lyve1, Ociad2 
#> StandardPC_ 5 
#> Positive:  Pdx1, Nkx6-1, Npepl1, Cldn4, Cryba2, Fev, Jun, Chgb, Gng12, Adra2a 
#> 	   Mnx1, Sytl4, Pdk3, Gm27033, Nnat, Chga, Ins2, 1110012L19Rik, Enho, Krt7 
#> 	   Mlxipl, Tmsb10, Flrt1, Pax4, Tubb3, Prrg2, Gars, Frzb, BC023829, Gm2694 
#> Negative:  Irx2, Irx1, Gcg, Ctxn2, Tmem27, Ctsz, Tmsb15l, Nap1l5, Pou6f2, Gria2 
#> 	   Ghrl, Peg10, Smarca1, Arx, Lrpap1, Rgs4, Ttr, Gast, Tmsb15b2, Serpina1b 
#> 	   Slc16a10, Wnk3, Ly6e, Auts2, Sct, Arg1, Dusp10, Sphkap, Dock11, Edn3 
#>  [2025-09-20 13:50:23] Perform `Seurat::FindClusters()` with louvain and `cluster_resolution` = 0.6
#>  [2025-09-20 13:50:23] Reorder clusters...
#> ! [2025-09-20 13:50:23] Using `Seurat::AggregateExpression()` to calculate pseudo-bulk data for <Assay5>
#>  [2025-09-20 13:50:23] Perform umap nonlinear dimension reduction
#>  [2025-09-20 13:50:23] Non-linear dimensionality reduction (umap) using (Standardpca) dims (1-50) as input
#>  [2025-09-20 13:50:23] UMAP will return its model
#>  [2025-09-20 13:50:28] Non-linear dimensionality reduction (umap) using (Standardpca) dims (1-50) as input
#>  [2025-09-20 13:50:28] UMAP will return its model
#>  [2025-09-20 13:50:32] Run scop standard workflow done
pancreas_sub <- RunSingleR(
  srt_query = pancreas_sub,
  srt_ref = panc8_sub,
  query_group = "Standardpca_SNN_res.0.6",
  ref_group = "celltype"
)
#>  [2025-09-20 13:50:32] Start SingleR annotation
#>  [2025-09-20 13:50:32] Installing: SingleR...
#>  
#> → Will install 3 packages.
#> → All 3 packages (0 B) are cached.
#> + DelayedMatrixStats   1.30.0 [bld]
#> + SingleR              2.10.0 [bld][cmp]
#> + sparseMatrixStats    1.20.0 [bld][cmp]
#>  All system requirements are already installed.
#>   
#>  No downloads are needed, 3 pkgs are cached
#>  Got DelayedMatrixStats 1.30.0 (source) (270.71 kB)
#>  Got SingleR 2.10.0 (source) (691.78 kB)
#>  Got sparseMatrixStats 1.20.0 (source) (704.19 kB)
#>  Installing system requirements
#>  Executing `sudo sh -c apt-get -y update`
#> Get:1 file:/etc/apt/apt-mirrors.txt Mirrorlist [144 B]
#> Hit:2 http://azure.archive.ubuntu.com/ubuntu noble InRelease
#> Hit:3 http://azure.archive.ubuntu.com/ubuntu noble-updates InRelease
#> Hit:4 http://azure.archive.ubuntu.com/ubuntu noble-backports InRelease
#> Hit:5 http://azure.archive.ubuntu.com/ubuntu noble-security InRelease
#> Hit:6 https://packages.microsoft.com/repos/azure-cli noble InRelease
#> Hit:7 https://packages.microsoft.com/ubuntu/24.04/prod noble InRelease
#> Reading package lists...
#>  Executing `sudo sh -c apt-get -y install libcurl4-openssl-dev libssl-dev`
#> Reading package lists...
#> Building dependency tree...
#> Reading state information...
#> libcurl4-openssl-dev is already the newest version (8.5.0-2ubuntu10.6).
#> libssl-dev is already the newest version (3.0.13-0ubuntu3.5).
#> 0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.
#>  Building sparseMatrixStats 1.20.0
#>  Built sparseMatrixStats 1.20.0 (16.9s)
#>  Installed sparseMatrixStats 1.20.0  (56ms)
#>  Building DelayedMatrixStats 1.30.0
#>  Built DelayedMatrixStats 1.30.0 (10.5s)
#>  Installed DelayedMatrixStats 1.30.0  (1s)
#>  Building SingleR 2.10.0
#>  Built SingleR 2.10.0 (38.1s)
#>  Installed SingleR 2.10.0  (80ms)
#>  1 pkg + 41 deps: kept 38, added 3, dld 3 (1.67 MB) [1m 10.3s]
#>  [2025-09-20 13:51:43] Installing: scrapper...
#>  
#> → Will install 4 packages.
#> → All 4 packages (0 B) are cached.
#> + Rigraphlib   1.0.0  [bld][cmp]
#> + biocmake     1.0.1  [bld]
#> + dir.expiry   1.16.0 [bld]
#> + scrapper     1.2.1  [bld][cmp]
#>   
#>  No downloads are needed, 4 pkgs are cached
#>  Got biocmake 1.0.1 (source) (228.04 kB)
#>  Got dir.expiry 1.16.0 (source) (308.47 kB)
#>  Got scrapper 1.2.1 (source) (893.37 kB)
#>  Got Rigraphlib 1.0.0 (source) (4.53 MB)
#>  Building dir.expiry 1.16.0
#>  Built dir.expiry 1.16.0 (978ms)
#>  Installed dir.expiry 1.16.0  (1s)
#>  Building biocmake 1.0.1
#>  Built biocmake 1.0.1 (996ms)
#>  Installed biocmake 1.0.1  (1s)
#>  Building Rigraphlib 1.0.0
#>  Built Rigraphlib 1.0.0 (2m 27.7s)
#>  Installed Rigraphlib 1.0.0  (274ms)
#>  Building scrapper 1.2.1
#>  Built scrapper 1.2.1 (4m 6.6s)
#>  Installed scrapper 1.2.1  (1.7s)
#>  1 pkg + 22 deps: kept 18, added 4, dld 4 (5.96 MB) [6m 42.5s]
#>  [2025-09-20 13:58:25] SingleR and scrapper installed successfully
#> Error in CheckDataType(data = GetAssayData5(srt_query, layer = "data",     assay = query_assay, verbose = FALSE)): argument "object" is missing, with no default
CellDimPlot(
  pancreas_sub,
  group.by = c("singler_annotation", "CellType")
)
#> Error in CellDimPlot(pancreas_sub, group.by = c("singler_annotation",     "CellType")): Singler_annotation is not in the meta.data of srt object.

pancreas_sub <- RunSingleR(
  srt_query = pancreas_sub,
  srt_ref = panc8_sub,
  query_group = NULL,
  ref_group = "celltype"
)
#>  [2025-09-20 13:58:25] Start SingleR annotation
#>  [2025-09-20 13:58:25] SingleR and scrapper installed successfully
#> Error in CheckDataType(data = GetAssayData5(srt_query, layer = "data",     assay = query_assay, verbose = FALSE)): argument "object" is missing, with no default
CellDimPlot(
  pancreas_sub,
  group.by = c("singler_annotation", "CellType")
)
#> Error in CellDimPlot(pancreas_sub, group.by = c("singler_annotation",     "CellType")): Singler_annotation is not in the meta.data of srt object.