Skip to contents

Run cell-level quality control

Usage

RunCellQC(
  srt,
  assay = "RNA",
  split.by = NULL,
  group.by = NULL,
  return_filtered = FALSE,
  qc_metrics = c("doublets", "decontX", "atac", "outlier", "umi", "gene", "mito", "ribo",
    "ribo_mito_ratio", "species"),
  db_method = "scDblFinder",
  db_rate = NULL,
  db_coefficient = 0.01,
  decontX_threshold = NULL,
  decontX_batch = NULL,
  decontX_background = NULL,
  decontX_background_assay = NULL,
  decontX_bg_batch = NULL,
  decontX_assay_name = "decontXcounts",
  decontX_store_assay = FALSE,
  decontX_round_counts = TRUE,
  decontX_args = list(),
  atac_args = list(),
  outlier_threshold = c("log10_nCount:lower:2.5", "log10_nCount:higher:5",
    "log10_nFeature:lower:2.5", "log10_nFeature:higher:5", "featurecount_dist:lower:2.5"),
  outlier_n = 1,
  UMI_threshold = 3000,
  gene_threshold = 1000,
  mito_threshold = 20,
  mito_pattern = c("MT-", "Mt-", "mt-"),
  mito_gene = NULL,
  ribo_threshold = 50,
  ribo_pattern = c("RP[SL]\\d+\\w{0,1}\\d*$", "Rp[sl]\\d+\\w{0,1}\\d*$",
    "rp[sl]\\d+\\w{0,1}\\d*$"),
  ribo_gene = NULL,
  ribo_mito_ratio_range = c(1, Inf),
  species = NULL,
  species_gene_prefix = NULL,
  species_percent = 95,
  seed = 11
)

Arguments

srt

A Seurat object.

assay

The name of the assay to be used for doublet-calling. Default is "RNA".

split.by

Name of a meta.data column used to split the object before QC. Default is NULL. When specified, QC and doublet-calling are performed separately within each split object and merged back afterward.

group.by

Group labels passed to RunDecontX() when "decontX" is included in qc_metrics. Can be NULL, a meta.data column name, or a vector aligned to cells. Default is NULL.

return_filtered

Logical indicating whether to return a cell-filtered Seurat object. Default is FALSE.

qc_metrics

A character vector specifying the quality control metrics to be applied. Available metrics are "doublets", "decontX", "atac", "outlier", "umi", "gene", "mito", "ribo", "ribo_mito_ratio", and "species". Default is c("doublets", "decontX", "outlier", "umi", "gene", "mito", "ribo", "ribo_mito_ratio", "species"). For ChromatinAssay, if .arg qc_metrics is not supplied, the default is "atac".

db_method

Method used for doublet-calling. Can be one of "scDblFinder", "Scrublet", "DoubletDetection", "scds_cxds", "scds_bcds", "scds_hybrid". The resulting doublet labels are aggregated afterward into db_qc and do not affect the thresholds used by the other QC metrics.

db_rate

The expected doublet rate. Default is calculated as ncol(srt) / 1000 * 0.01.

db_coefficient

The coefficient used to calculate the doublet rate. Default is 0.01. Doublet rate is calculated as ncol(srt) / 1000 * db_coefficient.

decontX_threshold

Optional contamination threshold used to filter cells after running RunDecontX(). Cells with decontX_contamination greater than this value are marked as failed in decontX_qc. Default is NULL, which computes decontX results without filtering cells by contamination.

decontX_batch

Batch labels passed to RunDecontX() when "decontX" is included in qc_metrics. Default is NULL.

decontX_background

Optional background / empty-droplet input passed to RunDecontX() when "decontX" is included in qc_metrics. Default is NULL.

decontX_background_assay

Assay name used when decontX_background is a Seurat object or SingleCellExperiment. Default is NULL.

decontX_bg_batch

Batch labels for decontX_background passed to RunDecontX(). Default is NULL.

decontX_assay_name

Name of the assay used to store decontaminated counts from RunDecontX(). Default is "decontXcounts".

decontX_store_assay

Whether to store decontaminated counts as a new assay when running RunDecontX(). Default is FALSE.

decontX_round_counts

Whether to round decontaminated counts before creating the assay in RunDecontX(). Default is TRUE.

decontX_args

A named list of additional advanced arguments passed to RunDecontX() when "decontX" is included in qc_metrics. Explicit decontX_* parameters are preferred for common options and take precedence when both are supplied. Default is list().

atac_args

A named list of additional arguments passed to RunATACQC() when "atac" is included in qc_metrics. Threshold arguments from RunATACQC() are used to label failed cells in atac_qc, but filtering is deferred to RunCellQC(). Default is list().

outlier_threshold

A character vector specifying the outlier threshold. Default is c("log10_nCount:lower:2.5", "log10_nCount:higher:5", "log10_nFeature:lower:2.5", "log10_nFeature:higher:5", "featurecount_dist:lower:2.5").

outlier_n

Minimum number of outlier metrics that meet the conditions for determining outlier cells. Default is 1.

UMI_threshold

UMI number threshold. Cells that exceed this threshold will be considered as kept. Default is 3000.

gene_threshold

Gene number threshold. Cells that exceed this threshold will be considered as kept. Default is 1000.

mito_threshold

Percentage of UMI counts of mitochondrial genes. Cells that exceed this threshold will be considered as discarded. Default is 20.

mito_pattern

Regex patterns to match the mitochondrial genes. Default is c("MT-", "Mt-", "mt-").

mito_gene

A defined mitochondrial genes. If features provided, will ignore the mito_pattern matching. Default is NULL.

ribo_threshold

Percentage of UMI counts of ribosomal genes. Cells that exceed this threshold will be considered as discarded. Default is 50.

ribo_pattern

Regex patterns to match the ribosomal genes. Default is c("RP[SL]\\d+\\w{0,1}\\d*$", "Rp[sl]\\d+\\w{0,1}\\d*$", "rp[sl]\\d+\\w{0,1}\\d*$").

ribo_gene

A defined ribosomal genes. If features provided, will ignore the ribo_pattern matching. Default is NULL.

ribo_mito_ratio_range

A numeric vector specifying the range of ribosomal/mitochondrial gene expression ratios for ribo_mito_ratio outlier cells. Default is c(1, Inf).

species

Species used as the suffix of the QC metrics. The first is the species of interest. Default is NULL.

species_gene_prefix

Species gene prefix used to calculate QC metrics for each species. Default is NULL.

species_percent

Percentage of UMI counts of the first species. Cells that exceed this threshold will be considered as kept. Default is 95.

seed

Random seed for reproducibility. Default is 11.

Value

Returns Seurat object with the QC results stored in the meta.data layer.

Examples

data(pancreas_sub)
pancreas_sub <- standard_scop(pancreas_sub)
#>  [2026-04-26 01:53:35] Start standard processing workflow...
#>  [2026-04-26 01:53:36] Checking a list of <Seurat>...
#> ! [2026-04-26 01:53:36] Data 1/1 of the `srt_list` is "unknown"
#>  [2026-04-26 01:53:36] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on 1/1 of `srt_list`...
#>  [2026-04-26 01:53:38] Perform `Seurat::FindVariableFeatures()` on 1/1 of `srt_list`...
#>  [2026-04-26 01:53:39] Use the separate HVF from `srt_list`
#>  [2026-04-26 01:53:39] Number of available HVF: 2000
#>  [2026-04-26 01:53:39] Finished check
#>  [2026-04-26 01:53:39] Perform `Seurat::ScaleData()`
#>  [2026-04-26 01:53:40] Perform pca linear dimension reduction
#>  [2026-04-26 01:53:40] Use stored estimated dimensions 1:20 for Standardpca
#>  [2026-04-26 01:53:41] Perform `Seurat::FindClusters()` with `cluster_algorithm = 'louvain'` and `cluster_resolution = 0.6`
#>  [2026-04-26 01:53:41] Reorder clusters...
#>  [2026-04-26 01:53:41] Skip `log1p()` because `layer = data` is not "counts"
#>  [2026-04-26 01:53:41] Perform umap nonlinear dimension reduction
#>  [2026-04-26 01:53:41] Perform umap nonlinear dimension reduction using Standardpca (1:20)
#>  [2026-04-26 01:53:46] Perform umap nonlinear dimension reduction using Standardpca (1:20)
#>  [2026-04-26 01:53:50] Standard processing workflow completed
pancreas_sub <- RunCellQC(
  pancreas_sub,
  db_method = "scds_cxds"
)
#>  [2026-04-26 01:53:50] Running cell-level quality control
#>  [2026-04-26 01:53:51] Data type is raw counts
#>  [2026-04-26 01:53:51] Running scds with method "cxds"
#> Registered S3 method overwritten by 'pROC':
#>   method   from            
#>   plot.roc spatstat.explore
#> ! [2026-04-26 01:54:58] Skip "atac" QC because `assay = 'RNA'` is not a <ChromatinAssay>
#>  [2026-04-26 01:54:58] Running decontX
#> Warning: 'librarySizeFactors' is deprecated.
#> Use 'scrapper::centerSizeFactors' instead.
#> See help("Deprecated")
#> Warning: 'normalizeCounts' is deprecated.
#> Use 'scrapper::normalizeCounts' instead.
#> See help("Deprecated")
#>  [2026-04-26 01:59:16] decontX contamination (median/mean/max): 0.0136 / 0.1628 / 0.7465
#>  [2026-04-26 01:59:16] decontX assay stored as decontXcounts
#>  [2026-04-26 01:59:16] decontX decontamination completed
#>  [2026-04-26 01:59:17] ● Total cells: 1000
#> 967 cells remained
#> 33 cells filtered out:
#> 10 potential doublets
#> 0 ATAC QC failed cells
#> 0 high-contamination cells
#> 23 outlier cells
#> 0 low-UMI cells
#> 0 low-gene cells
#> 0 high-mito cells
#> 0 high-ribo cells
#> 0 ribo_mito_ratio outlier cells
#> 0 species-contaminated cells

CellStatPlot(
  pancreas_sub,
  stat.by = c(
    "db_qc", "outlier_qc"
  ),
  plot_type = "upset",
  stat_level = "Fail"
)
#> Error in StatPlot(meta_data, stat.by = stat.by, group.by = group.by, split.by = split.by,     bg.by = bg.by, flip = flip, NA_color = NA_color, NA_stat = NA_stat,     keep_empty = keep_empty, individual = individual, stat_level = stat_level,     plot_type = plot_type, stat_type = stat_type, position = position,     palette = palette, palcolor = palcolor, alpha = alpha, bg_palette = bg_palette,     bg_palcolor = bg_palcolor, bg_alpha = bg_alpha, label = label,     label.size = label.size, label.fg = label.fg, label.bg = label.bg,     label.bg.r = label.bg.r, aspect.ratio = aspect.ratio, title = title,     subtitle = subtitle, xlab = xlab, ylab = ylab, legend.position = legend.position,     legend.direction = legend.direction, theme_use = theme_use,     theme_args = theme_args, grid_major = grid_major, grid_major_colour = grid_major_colour,     grid_major_linetype = grid_major_linetype, grid_major_linewidth = grid_major_linewidth,     combine = combine, nrow = nrow, ncol = ncol, byrow = byrow,     force = force, seed = seed): unused arguments (grid_major = grid_major, grid_major_colour = grid_major_colour, grid_major_linetype = grid_major_linetype, grid_major_linewidth = grid_major_linewidth)