Skip to contents

This function subsamples a matrix using either random sampling or meta cells method.

Usage

subsampling(
  matrix,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  seed = 1,
  verbose = TRUE,
  ...
)

Arguments

matrix

The input matrix to be subsampled.

subsampling_method

The method to use for subsampling. Options are "sample", "pseudobulk" or "meta_cells".

subsampling_ratio

The percent of all samples used for fit_srm. Default is 1.

seed

The random seed for cross-validation. Default is 1.

verbose

Whether to print progress messages. Default is TRUE.

...

Parameters for other methods.

Value

The subsampled matrix.

Examples

data(example_matrix)
data("example_ground_truth")
subsample_matrix <- subsampling(
  example_matrix,
  subsampling_ratio = 0.5
)
#>  [2025-10-16 02:37:04] Subsample matrix generated, dimensions: 2500 cells by 18 genes
subsample_matrix_2 <- subsampling(
  example_matrix,
  subsampling_method = "meta_cells",
  subsampling_ratio = 0.5,
  fast_pca = FALSE
)
#> ! [2025-10-16 02:37:04] Number of PCs of PCA result is less than the desired number, using all PCs.
#>  [2025-10-16 02:37:05] Subsample matrix generated, dimensions: 2500 cells by 18 genes
subsample_matrix_3 <- subsampling(
  example_matrix,
  subsampling_method = "pseudobulk",
  subsampling_ratio = 0.5
)
#>  [2025-10-16 02:37:05] Subsample matrix generated, dimensions: 2500 cells by 18 genes

calculate_metrics(
  inferCSN(example_matrix),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-16 02:37:05] Running for <dense matrix>.
#>  [2025-10-16 02:37:05] Checking input parameters...
#>  [2025-10-16 02:37:05] Using `L0` sparse regression model
#>  [2025-10-16 02:37:05] Using 1 core
#>  [2025-10-16 02:37:05] Running [1/18] ETA:  0s
#>  [2025-10-16 02:37:05] Completed 18 tasks in 572ms
#> 
#>  [2025-10-16 02:37:05] Building results
#>  [2025-10-16 02:37:06] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.952
#> 2     AUPRC  0.437
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#> 
calculate_metrics(
  inferCSN(subsample_matrix),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-16 02:37:06] Running for <dense matrix>.
#>  [2025-10-16 02:37:06] Checking input parameters...
#>  [2025-10-16 02:37:06] Using `L0` sparse regression model
#>  [2025-10-16 02:37:06] Using 1 core
#>  [2025-10-16 02:37:06] Building results
#>  [2025-10-16 02:37:06] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.955
#> 2     AUPRC  0.449
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#> 
calculate_metrics(
  inferCSN(subsample_matrix_2),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-16 02:37:06] Running for <dense matrix>.
#>  [2025-10-16 02:37:06] Checking input parameters...
#>  [2025-10-16 02:37:06] Using `L0` sparse regression model
#>  [2025-10-16 02:37:06] Using 1 core
#>  [2025-10-16 02:37:06] Building results
#>  [2025-10-16 02:37:07] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.952
#> 2     AUPRC  0.439
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#> 
calculate_metrics(
  inferCSN(subsample_matrix_3),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-16 02:37:07] Running for <dense matrix>.
#>  [2025-10-16 02:37:07] Checking input parameters...
#>  [2025-10-16 02:37:07] Using `L0` sparse regression model
#>  [2025-10-16 02:37:07] Using 1 core
#>  [2025-10-16 02:37:07] Running [1/18] ETA:  0s
#>  [2025-10-16 02:37:07] Completed 18 tasks in 131ms
#> 
#>  [2025-10-16 02:37:07] Building results
#>  [2025-10-16 02:37:07] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.955
#> 2     AUPRC  0.449
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#>