Skip to contents

This function subsamples a matrix using either random sampling or meta cells method.

Usage

subsampling(
  matrix,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  seed = 1,
  verbose = TRUE,
  ...
)

Arguments

matrix

The input matrix to be subsampled.

subsampling_method

The method to use for subsampling. Options are "sample", "pseudobulk" or "meta_cells".

subsampling_ratio

The percent of all samples used for fit_srm. Default is 1.

seed

The random seed for cross-validation. Default is 1.

verbose

Whether to print progress messages. Default is TRUE.

...

Parameters for other methods.

Value

The subsampled matrix.

Examples

data(example_matrix)
data("example_ground_truth")
subsample_matrix <- subsampling(
  example_matrix,
  subsampling_ratio = 0.5
)
#>  [2025-10-30 09:49:28] Subsample matrix generated, dimensions: 2500 cells by 18 genes
subsample_matrix_2 <- subsampling(
  example_matrix,
  subsampling_method = "meta_cells",
  subsampling_ratio = 0.5,
  fast_pca = FALSE
)
#> ! [2025-10-30 09:49:28] Number of PCs of PCA result is less than the desired number, using all PCs.
#>  [2025-10-30 09:49:29] Subsample matrix generated, dimensions: 2500 cells by 18 genes
subsample_matrix_3 <- subsampling(
  example_matrix,
  subsampling_method = "pseudobulk",
  subsampling_ratio = 0.5
)
#>  [2025-10-30 09:49:29] Subsample matrix generated, dimensions: 2500 cells by 18 genes

calculate_metrics(
  inferCSN(example_matrix),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-30 09:49:29] Running for <dense matrix>.
#>  [2025-10-30 09:49:29] Checking input parameters...
#>  [2025-10-30 09:49:29] Using `L0` sparse regression model
#>  [2025-10-30 09:49:29] Using 1 core
#>  [2025-10-30 09:49:29] Running [1/18] ETA:  0s
#>  [2025-10-30 09:49:29] Completed 18 tasks in 217ms
#> 
#>  [2025-10-30 09:49:29] Building results
#>  [2025-10-30 09:49:30] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.952
#> 2     AUPRC  0.437
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#> 
calculate_metrics(
  inferCSN(subsample_matrix),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-30 09:49:30] Running for <dense matrix>.
#>  [2025-10-30 09:49:30] Checking input parameters...
#>  [2025-10-30 09:49:30] Using `L0` sparse regression model
#>  [2025-10-30 09:49:30] Using 1 core
#>  [2025-10-30 09:49:30] Building results
#>  [2025-10-30 09:49:30] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.955
#> 2     AUPRC  0.449
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#> 
calculate_metrics(
  inferCSN(subsample_matrix_2),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-30 09:49:30] Running for <dense matrix>.
#>  [2025-10-30 09:49:30] Checking input parameters...
#>  [2025-10-30 09:49:30] Using `L0` sparse regression model
#>  [2025-10-30 09:49:30] Using 1 core
#>  [2025-10-30 09:49:30] Building results
#>  [2025-10-30 09:49:31] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.952
#> 2     AUPRC  0.439
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#> 
calculate_metrics(
  inferCSN(subsample_matrix_3),
  example_ground_truth,
  return_plot = TRUE
)
#>  [2025-10-30 09:49:31] Running for <dense matrix>.
#>  [2025-10-30 09:49:31] Checking input parameters...
#>  [2025-10-30 09:49:31] Using `L0` sparse regression model
#>  [2025-10-30 09:49:31] Using 1 core
#>  [2025-10-30 09:49:31] Building results
#>  [2025-10-30 09:49:31] Run done.
#> $metrics
#>      Metric  Value
#> 1     AUROC  0.955
#> 2     AUPRC  0.449
#> 3 Precision  0.529
#> 4    Recall  1.000
#> 5        F1  0.692
#> 6       ACC  0.948
#> 7        JI  0.514
#> 8        SI 18.000
#> 
#> $plot

#>