Skip to contents

PAGA is a graph-based method used to infer cellular trajectories. This function runs the PAGA analysis on a Seurat object.

Usage

RunPAGA(
  srt = NULL,
  assay_x = "RNA",
  layer_x = "counts",
  assay_y = c("spliced", "unspliced"),
  layer_y = "counts",
  adata = NULL,
  group_by = NULL,
  linear_reduction = NULL,
  nonlinear_reduction = NULL,
  basis = NULL,
  n_pcs = 30,
  n_neighbors = 30,
  use_rna_velocity = FALSE,
  vkey = "stochastic",
  embedded_with_PAGA = FALSE,
  paga_layout = "fr",
  threshold = 0.1,
  point_size = 20,
  infer_pseudotime = FALSE,
  root_group = NULL,
  root_cell = NULL,
  n_dcs = 10,
  n_branchings = 0,
  min_group_size = 0.01,
  palette = "Paired",
  palcolor = NULL,
  show_plot = TRUE,
  save = FALSE,
  dpi = 300,
  dirpath = "./",
  fileprefix = "",
  return_seurat = !is.null(srt)
)

Arguments

srt

A Seurat object.

assay_x

Assay to convert in the anndata object.

layer_x

Layer name for assay_x in the Seurat object.

assay_y

Assay to convert in the anndata object.

layer_y

Layer names for the assay_y in the Seurat object.

adata

An anndata object.

group_by

Variable to use for grouping cells in the Seurat object.

linear_reduction

Linear reduction method to use, e.g., "PCA".

nonlinear_reduction

Non-linear reduction method to use, e.g., "UMAP".

basis

The basis to use for reduction, e.g., "UMAP".

n_pcs

Number of principal components to use for linear reduction. Default is 30.

n_neighbors

Number of neighbors to use for constructing the KNN graph. Default is 30.

use_rna_velocity

Whether to use RNA velocity for PAGA analysis. Default is FALSE.

vkey

The name of the RNA velocity data to use if use_rna_velocity is TRUE. Default is "stochastic".

embedded_with_PAGA

Whether to embed data using PAGA layout. Default is FALSE.

paga_layout

The layout for plotting PAGA graph. See layout param in scanpy.pl.paga function.

threshold

The threshold for plotting PAGA graph. Edges for weights below this threshold will not be drawn.

point_size

The point size for plotting.

infer_pseudotime

Whether to infer pseudotime.

root_group

The group to use as the root for pseudotime inference.

root_cell

The cell to use as the root for pseudotime inference.

n_dcs

The number of diffusion components to use for pseudotime inference.

n_branchings

Number of branchings to detect.

min_group_size

The minimum size of a group (as a fraction of the total number of cells) to consider it as a potential branching point.

palette

The palette to use for coloring cells.

palcolor

A vector of colors to use as the palette.

show_plot

Whether to show the plot.

save

Whether to save the plots.

dpi

The DPI (dots per inch) for saving the plot.

dirpath

The directory to save the plots.

fileprefix

The file prefix to use for the plots.

return_seurat

Whether to return a Seurat object instead of an anndata object. Default is TRUE.

Examples

PrepareEnv()
#>  [2025-09-20 13:44:50] Preparing scop Python Environment
#>  [2025-09-20 13:44:50] Environment name: scop_env
#>  [2025-09-20 13:44:50] Python version: 3.10-1
#>  [2025-09-20 13:44:50] Number of packages to install: 21
#>  [2025-09-20 13:44:50] Auto-detecting conda...
#>  [2025-09-20 13:44:51] Using existing environment: /usr/share/miniconda/envs/scop_env
#>  [2025-09-20 13:44:51] Checking and installing required packages...
#>  [2025-09-20 13:44:51] Installing conda packages
#>  [2025-09-20 13:44:52] Checking 3 packages in environment: scop_env
#>  [2025-09-20 13:44:53] Retrieving package list for environment: scop_env
#>  [2025-09-20 13:44:55] Found 197 packages installed
#>  [2025-09-20 13:44:55] leidenalg 0.10.2
#>  [2025-09-20 13:44:55] tbb 2022.2.0
#>  [2025-09-20 13:44:55] python-igraph 0.11.9
#>  [2025-09-20 13:44:55] Installing pip packages
#>  [2025-09-20 13:44:56] Checking 18 packages in environment: scop_env
#>  [2025-09-20 13:44:57] Retrieving package list for environment: scop_env
#>  [2025-09-20 13:44:59] Found 197 packages installed
#>  [2025-09-20 13:44:59] matplotlib 3.10.3
#>  [2025-09-20 13:44:59] numba 0.59.1
#>  [2025-09-20 13:44:59] llvmlite 0.42.0
#>  [2025-09-20 13:44:59] numpy 1.26.4
#>  [2025-09-20 13:44:59] palantir 1.4.1
#>  [2025-09-20 13:44:59] pandas 2.0.3
#>  [2025-09-20 13:44:59] scanpy 1.11.3
#>  [2025-09-20 13:44:59] scikit-learn 1.7.0
#>  [2025-09-20 13:44:59] scipy 1.15.3
#>  [2025-09-20 13:44:59] scvelo 0.3.3
#>  [2025-09-20 13:44:59] wot 1.0.8.post2
#>  [2025-09-20 13:44:59] trimap 1.1.4
#>  [2025-09-20 13:44:59] pacmap 0.8.0
#>  [2025-09-20 13:44:59] phate 1.0.11
#>  [2025-09-20 13:44:59] bbknn 1.6.0
#>  [2025-09-20 13:44:59] scanorama 1.7.4
#>  [2025-09-20 13:44:59] scvi-tools 1.2.1
#>  [2025-09-20 13:44:59] cellrank 2.0.7
#>  [2025-09-20 13:45:02] Python Environment Ready
#> conda environment: 
#>   conda:          /usr/share/miniconda/bin/conda
#>   environment:    /usr/share/miniconda/envs/scop_env
#> python config: 
#>   python:         /usr/share/miniconda/envs/scop_env/bin/python3.10
#>   libpython:      /usr/share/miniconda/envs/scop_env/lib/libpython3.10.so
#>   pythonhome:     /usr/share/miniconda/envs/scop_env:/usr/share/miniconda/envs/scop_env
#>   version:        3.10.1 | packaged by conda-forge | (main, Dec 22 2021, 01:39:36) [GCC 9.4.0]
#>   numpy:          /usr/share/miniconda/envs/scop_env/lib/python3.10/site-packages/numpy
#>   numpy_version:  1.26.4
#>   
#>   NOTE: Python version was forced by use_python() function
data(pancreas_sub)
pancreas_sub <- standard_scop(pancreas_sub)
#>  [2025-09-20 13:45:02] Start standard scop workflow...
#>  [2025-09-20 13:45:03] Checking a list of <Seurat> object...
#> ! [2025-09-20 13:45:03] Data 1/1 of the `srt_list` is "unknown"
#>  [2025-09-20 13:45:03] Perform `NormalizeData()` with `normalization.method = 'LogNormalize'` on the data 1/1 of the `srt_list`...
#>  [2025-09-20 13:45:05] Perform `Seurat::FindVariableFeatures()` on the data 1/1 of the `srt_list`...
#>  [2025-09-20 13:45:06] Use the separate HVF from srt_list
#>  [2025-09-20 13:45:06] Number of available HVF: 2000
#>  [2025-09-20 13:45:06] Finished check
#>  [2025-09-20 13:45:07] Perform `Seurat::ScaleData()`
#> Warning: Different features in new layer data than already exists for scale.data
#>  [2025-09-20 13:45:07] Perform pca linear dimension reduction
#> StandardPC_ 1 
#> Positive:  Aplp1, Cpe, Gnas, Fam183b, Map1b, Hmgn3, Pcsk1n, Chga, Tuba1a, Bex2 
#> 	   Syt13, Isl1, 1700086L19Rik, Pax6, Chgb, Scgn, Rbp4, Scg3, Gch1, Camk2n1 
#> 	   Cryba2, Pcsk2, Pyy, Tspan7, Mafb, Hist3h2ba, Dbpht2, Abcc8, Rap1b, Slc38a5 
#> Negative:  Spp1, Anxa2, Sparc, Dbi, 1700011H14Rik, Wfdc2, Gsta3, Adamts1, Clu, Mgst1 
#> 	   Bicc1, Ldha, Vim, Cldn3, Cyr61, Rps2, Mt1, Ptn, Phgdh, Nudt19 
#> 	   Smtnl2, Smco4, Habp2, Mt2, Col18a1, Rpl12, Galk1, Cldn10, Acot1, Ccnd1 
#> StandardPC_ 2 
#> Positive:  Rbp4, Tagln2, Tuba1b, Fkbp2, Pyy, Pcsk2, Iapp, Tmem27, Meis2, Tubb4b 
#> 	   Pcsk1n, Dbpht2, Rap1b, Dynll1, Tubb2a, Sdf2l1, Scgn, 1700086L19Rik, Scg2, Abcc8 
#> 	   Atp1b1, Hspa5, Fam183b, Papss2, Slc38a5, Scg3, Mageh1, Tspan7, Ppp1r1a, Ociad2 
#> Negative:  Neurog3, Btbd17, Gadd45a, Ppp1r14a, Neurod2, Sox4, Smarcd2, Mdk, Pax4, Btg2 
#> 	   Sult2b1, Hes6, Grasp, Igfbpl1, Gpx2, Cbfa2t3, Foxa3, Shf, Mfng, Tmsb4x 
#> 	   Amotl2, Gdpd1, Cdc14b, Epb42, Rcor2, Cotl1, Upk3bl, Rbfox3, Cldn6, Cer1 
#> StandardPC_ 3 
#> Positive:  Nusap1, Top2a, Birc5, Aurkb, Cdca8, Pbk, Mki67, Tpx2, Plk1, Ccnb1 
#> 	   2810417H13Rik, Incenp, Cenpf, Ccna2, Prc1, Racgap1, Cdk1, Aurka, Cdca3, Hmmr 
#> 	   Spc24, Kif23, Sgol1, Cenpe, Cdc20, Hist1h1b, Cdca2, Mxd3, Kif22, Ska1 
#> Negative:  Anxa5, Pdzk1ip1, Acot1, Tpm1, Anxa2, Dcdc2a, Capg, Sparc, Ttr, Pamr1 
#> 	   Clu, Cxcl12, Ndrg2, Hnf1aos1, Gas6, Gsta3, Krt18, Ces1d, Atp1b1, Muc1 
#> 	   Hhex, Acadm, Spp1, Enpp2, Bcl2l14, Sat1, Smtnl2, 1700011H14Rik, Tgm2, Fam159a 
#> StandardPC_ 4 
#> Positive:  Glud1, Tm4sf4, Akr1c19, Cldn4, Runx1t1, Fev, Pou3f4, Gm43861, Pgrmc1, Arx 
#> 	   Cd200, Lrpprc, Hmgn3, Ppp1r14c, Pam, Etv1, Tsc22d1, Slc25a5, Akap17b, Pgf 
#> 	   Fam43a, Emb, Jun, Krt8, Dnajc12, Mid1ip1, Ids, Rgs17, Uchl1, Alcam 
#> Negative:  Ins2, Ins1, Ppp1r1a, Nnat, Calr, Sytl4, Sdf2l1, Iapp, Pdia6, Mapt 
#> 	   G6pc2, C2cd4b, Npy, Gng12, P2ry1, Ero1lb, Adra2a, Papss2, Arhgap36, Fam151a 
#> 	   Dlk1, Creld2, Gip, Tmem215, Gm27033, Cntfr, Prss53, C2cd4a, Lyve1, Ociad2 
#> StandardPC_ 5 
#> Positive:  Pdx1, Nkx6-1, Npepl1, Cldn4, Cryba2, Fev, Jun, Chgb, Gng12, Adra2a 
#> 	   Mnx1, Sytl4, Pdk3, Gm27033, Nnat, Chga, Ins2, 1110012L19Rik, Enho, Krt7 
#> 	   Mlxipl, Tmsb10, Flrt1, Pax4, Tubb3, Prrg2, Gars, Frzb, BC023829, Gm2694 
#> Negative:  Irx2, Irx1, Gcg, Ctxn2, Tmem27, Ctsz, Tmsb15l, Nap1l5, Pou6f2, Gria2 
#> 	   Ghrl, Peg10, Smarca1, Arx, Lrpap1, Rgs4, Ttr, Gast, Tmsb15b2, Serpina1b 
#> 	   Slc16a10, Wnk3, Ly6e, Auts2, Sct, Arg1, Dusp10, Sphkap, Dock11, Edn3 
#>  [2025-09-20 13:45:08] Perform `Seurat::FindClusters()` with louvain and `cluster_resolution` = 0.6
#>  [2025-09-20 13:45:08] Reorder clusters...
#> ! [2025-09-20 13:45:08] Using `Seurat::AggregateExpression()` to calculate pseudo-bulk data for <Assay5>
#>  [2025-09-20 13:45:08] Perform umap nonlinear dimension reduction
#>  [2025-09-20 13:45:08] Non-linear dimensionality reduction (umap) using (Standardpca) dims (1-50) as input
#>  [2025-09-20 13:45:08] UMAP will return its model
#>  [2025-09-20 13:45:12] Non-linear dimensionality reduction (umap) using (Standardpca) dims (1-50) as input
#>  [2025-09-20 13:45:12] UMAP will return its model
#>  [2025-09-20 13:45:16] Run scop standard workflow done
pancreas_sub <- RunPAGA(
  pancreas_sub,
  assay_x = "RNA",
  group_by = "SubCellType",
  linear_reduction = "PCA",
  nonlinear_reduction = "UMAP"
)
#>  [2025-09-20 13:45:18] Checking 2 packages in environment: scop_env
#>  [2025-09-20 13:45:19] Retrieving package list for environment: scop_env
#>  [2025-09-20 13:45:21] Found 197 packages installed
#>  [2025-09-20 13:45:21] scanpy version: 1.11.3
#>  [2025-09-20 13:45:21] numpy version: 1.26.4
#>  [2025-09-20 13:45:21] Converting <Seurat> to <AnnData> ...
#> ! [2025-09-20 13:45:21] "misc" slot is not converted
#> ! [2025-09-20 13:45:21] "tools" slot is not converted
#>  [2025-09-20 13:45:21] Convert <Seurat> object to <AnnData> object completed
#>  [2025-09-20 13:45:21] Running PAGA analysis...
#>  [2025-09-20 13:45:22] PAGA analysis completed
#>  [2025-09-20 13:45:22] Converting <AnnData> object to <Seurat> object...
#>  [2025-09-20 13:45:23] Convert <AnnData> object to <Seurat> object completed
CellDimPlot(
  pancreas_sub,
  group.by = "SubCellType",
  reduction = "draw_graph_fr"
)
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.


PAGAPlot(pancreas_sub, reduction = "UMAP")


CellDimPlot(
  pancreas_sub,
  group.by = "SubCellType",
  reduction = "UMAP",
  paga = pancreas_sub@misc$paga
)
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.


pancreas_sub <- RunPAGA(
  pancreas_sub,
  group_by = "SubCellType",
  linear_reduction = "PCA",
  nonlinear_reduction = "UMAP",
  embedded_with_PAGA = TRUE,
  infer_pseudotime = TRUE,
  root_group = "Ductal"
)
#>  [2025-09-20 13:45:26] Checking 2 packages in environment: scop_env
#>  [2025-09-20 13:45:26] Retrieving package list for environment: scop_env
#>  [2025-09-20 13:45:29] Found 197 packages installed
#>  [2025-09-20 13:45:29] scanpy version: 1.11.3
#>  [2025-09-20 13:45:29] numpy version: 1.26.4
#>  [2025-09-20 13:45:29] Converting <Seurat> to <AnnData> ...
#> ! [2025-09-20 13:45:29] "misc" slot is not converted
#> ! [2025-09-20 13:45:29] "tools" slot is not converted
#>  [2025-09-20 13:45:29] Convert <Seurat> object to <AnnData> object completed
#>  [2025-09-20 13:45:29] Running PAGA analysis...
#>  [2025-09-20 13:45:32] PAGA analysis completed
#>  [2025-09-20 13:45:32] Converting <AnnData> object to <Seurat> object...
#>  [2025-09-20 13:45:33] Convert <AnnData> object to <Seurat> object completed

FeatureDimPlot(
  pancreas_sub,
  features = "dpt_pseudotime",
  reduction = "PAGAUMAP2D"
)


PAGAPlot(pancreas_sub, reduction = "PAGAUMAP2D")


CellDimPlot(
  pancreas_sub,
  group.by = "SubCellType",
  reduction = "PAGAUMAP2D",
  paga = pancreas_sub@misc$paga
)
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.
#> Warning: No shared levels found between `names(values)` of the manual scale and the
#> data's fill values.