inferring cell-type specific gene regulatory network

Usage

inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'matrix'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'sparseMatrix'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'data.frame'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

Arguments

object: The input data for inferCSN.
penalty: The type of regularization, default is L0. This can take either one of the following choices: L0, L0L1, and L0L2. For high-dimensional and sparse data, L0L2 is more effective.
cross_validation: Logical value, default is FALSE, whether to use cross-validation.
seed: The random seed for cross-validation, default is 1.
n_folds: The number of folds for cross-validation, default is 5.
subsampling_method: The method to use for subsampling. Options are "sample", "pseudobulk" or "meta_cells".
subsampling_ratio: The percent of all samples used for fit_srm, default is 1.
r_squared_threshold: Threshold of \(R^2\) coefficient, default is 0.
regulators: The regulator genes for which to infer the regulatory network.
targets: The target genes for which to infer the regulatory network. Recommend setting this to a small fraction of min(n,p) (e.g. 0.05 * min(n,p)) as L0 regularization typically selects a small portion of non-zeros.
cores: The number of cores to use for parallelization with foreach, default is 1.
verbose: Logical value, default is TRUE, whether to print progress messages.
...: Parameters for other methods.

Value

A data table of regulator-target regulatory relationships

Examples

data("example_matrix")
network_table_1 <- inferCSN(
  example_matrix
)
#> ℹ [2025-07-28 09:21:52] Running for <dense matrix>.
#> ℹ [2025-07-28 09:21:52] Checking input parameters.
#> ℹ [2025-07-28 09:21:52] Using L0 sparse regression model.
#> ℹ [2025-07-28 09:21:52] Using 1 core
#> ℹ [2025-07-28 09:21:53] Building results
#> ✔ [2025-07-28 09:21:53] Run done.

network_table_2 <- inferCSN(
  example_matrix,
  cores = 2
)
#> ℹ [2025-07-28 09:21:53] Running for <dense matrix>.
#> ℹ [2025-07-28 09:21:53] Checking input parameters.
#> ℹ [2025-07-28 09:21:53] Using L0 sparse regression model.
#> ℹ [2025-07-28 09:21:53] Using 2 cores
#> ℹ [2025-07-28 09:21:53] Building results
#> ✔ [2025-07-28 09:21:53] Run done.

head(network_table_1)
#>   regulator target     weight
#> 1       g18     g1 -0.9223177
#> 2       g17    g18  0.8770468
#> 3        g4     g3  0.8103065
#> 4       g16    g15  0.7659245
#> 5       g17    g16  0.7558764
#> 6       g12    g11  0.7444053

identical(
  network_table_1,
  network_table_2
)
#> [1] TRUE

inferCSN(
  example_matrix,
  regulators = c("g1", "g2"),
  targets = c("g3", "g4")
)
#> ℹ [2025-07-28 09:21:53] Running for <dense matrix>.
#> ℹ [2025-07-28 09:21:53] Checking input parameters.
#> ℹ [2025-07-28 09:21:53] Using 2 regulator(s).
#> ℹ [2025-07-28 09:21:53] Using 2 target(s).
#> ℹ [2025-07-28 09:21:53] Using L0 sparse regression model.
#> ℹ [2025-07-28 09:21:53] Using 1 core
#> ℹ [2025-07-28 09:21:53] Building results
#> ✔ [2025-07-28 09:21:53] Run done.
#>   regulator target     weight
#> 1        g2     g3  0.9848781
#> 2        g2     g4  0.9230387
#> 3        g1     g4 -0.3847071
#> 4        g1     g3 -0.1732490
inferCSN(
  example_matrix,
  regulators = c("g1", "g2"),
  targets = c("g3", "g0")
)
#> ℹ [2025-07-28 09:21:53] Running for <dense matrix>.
#> ℹ [2025-07-28 09:21:53] Checking input parameters.
#> ℹ [2025-07-28 09:21:53] Using 2 regulator(s).
#> ! [2025-07-28 09:21:53] 1 out of 2 candidate targets are in the input matrix.
#> ℹ [2025-07-28 09:21:53] Using L0 sparse regression model.
#> ℹ [2025-07-28 09:21:53] Using 1 core
#> ℹ [2025-07-28 09:21:53] Building results
#> ✔ [2025-07-28 09:21:53] Run done.
#>   regulator target     weight
#> 1        g2     g3  0.9848781
#> 2        g1     g3 -0.1732490

if (FALSE) { # \dontrun{
data("example_ground_truth")
network_table_07 <- inferCSN(
  example_matrix,
  r_squared_threshold = 0.7
)
calculate_metrics(
  network_table_1,
  example_ground_truth,
  return_plot = TRUE
)
calculate_metrics(
  network_table_07,
  example_ground_truth,
  return_plot = TRUE
)
} # }
if (FALSE) { # \dontrun{
data("example_matrix")
network_table <- inferCSN(example_matrix)
head(network_table)

network_table_sparse_1 <- inferCSN(
  as(example_matrix, "sparseMatrix")
)
head(network_table_sparse_1)

network_table_sparse_2 <- inferCSN(
  as(example_matrix, "sparseMatrix"),
  cores = 2
)
identical(
  network_table,
  network_table_sparse_1
)

identical(
  network_table_sparse_1,
  network_table_sparse_2
)

plot_scatter(
  data.frame(
    network_table$weight,
    network_table_sparse_1$weight
  ),
  legend_position = "none"
)

plot_weight_distribution(
  network_table
) + plot_weight_distribution(
  network_table_sparse_1
)
} # }