inferring cell-type specific gene regulatory network

inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'matrix'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'sparseMatrix'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'data.frame'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

# S4 method for class 'Network'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = "sample",
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  method = c("srm", "glm", "glmnet", "cv.glmnet", "xgb", "susie"),
  gene_cor_threshold = 0,
  ...
)

# S4 method for class 'CSNObject'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = "sample",
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  celltypes = NULL,
  network_name = paste0(method, "_network"),
  peak_to_gene_method = c("Signac", "GREAT"),
  upstream = 1e+05,
  downstream = 0,
  extend = 1e+06,
  only_tss = FALSE,
  peak_to_gene_domains = NULL,
  gene_cor_threshold = 0.1,
  peak_cor_threshold = 0,
  aggregate_rna_col = NULL,
  aggregate_peaks_col = NULL,
  method = c("srm", "glm", "glmnet", "cv.glmnet", "xgb", "susie"),
  alpha = 0.5,
  family = "gaussian",
  interaction_term = ":",
  adjust_method = "fdr",
  scale = FALSE,
  ...
)

# S4 method for class 'Seurat'
inferCSN(
  object,
  penalty = "L0",
  cross_validation = FALSE,
  seed = 1,
  n_folds = 5,
  subsampling_method = c("sample", "meta_cells", "pseudobulk"),
  subsampling_ratio = 1,
  r_squared_threshold = 0,
  regulators = NULL,
  targets = NULL,
  cores = 1,
  verbose = TRUE,
  ...
)

Arguments

object

The input data for inferCSN.

penalty

The type of regularization, default is L0. This can take either one of the following choices: L0, L0L1, and L0L2. For high-dimensional and sparse data, L0L2 is more effective.

cross_validation

Logical value, default is FALSE, whether to use cross-validation.

seed

The random seed for cross-validation, default is 1.

n_folds

The number of folds for cross-validation, default is 5.

subsampling_method

The method to use for subsampling. Options are "sample", "pseudobulk" or "meta_cells".

subsampling_ratio

The percent of all samples used for fit_srm, default is 1.

r_squared_threshold

Threshold of \(R^2\) coefficient, default is 0.

regulators

The regulator genes for which to infer the regulatory network.

targets

The target genes for which to infer the regulatory network. Recommend setting this to a small fraction of min(n,p) (e.g. 0.05 * min(n,p)) as L0 regularization typically selects a small portion of non-zeros.

cores

The number of cores to use for parallelization with foreach, default is 1.

verbose

Logical value, default is TRUE, whether to print progress messages.

...

Parameters for other methods.

method

A character string indicating the method to fit the model. * 'srm' - Sparse Regression Model. * 'glm' - Generalized Liner Model with glm. * 'glmnet', 'cv.glmnet' - Regularized Generalized Liner Model with glmnet. * 'xgb' - Gradient Boosting Regression using xgboost.

gene_cor_threshold

Threshold for TF - target gene correlation.

celltypes

Character vector of cell types to infer networks for.

network_name

network_name.

peak_to_gene_method

Character specifying the method to link peak overlapping motif regions to nearby genes. One of Signac or GREAT.

upstream

Integer defining the distance upstream of the gene to consider as potential regulatory region.

downstream

Integer defining the distance downstream of the gene to consider as potential regulatory region.

extend

Integer defining the distance from the upstream and downstream of the basal regulatory region. Only used of `peak_to_gene_method = 'GREAT'`.

only_tss

Logical. Measure distance from the TSS (TRUE) or from the entire gene body (FALSE).

peak_to_gene_domains

GenomicRanges object with regulatory regions for each gene.

peak_cor_threshold

Threshold for binding peak - target gene correlation.

aggregate_rna_col

aggregate_rna_col

aggregate_peaks_col

aggregate_peaks_col

alpha

The elasticnet mixing parameter. See glmnet for details.

family

A description of the error distribution and link function to be used in the model. See family for mode details.

interaction_term

The interaction variable to use in the model between TF and binding site. * '+' for additive interaction. * ':' for 'multiplicative' interaction. * '*' for crossing interaction, i.e. additive AND 'multiplicative'. For more info, see formula

adjust_method

Method for adjusting p-values.

scale

Logical. Whether to z-transform the expression and accessibility matrices.

Value

A data table of regulator-target regulatory relationships

A CSNObject.

Examples

data("example_matrix")
network_table_1 <- inferCSN(
  example_matrix
)
#>  [2025-05-28 02:35:13] Running for <dense matrix>.
#>  [2025-05-28 02:35:13] Checking input parameters.
#>  [2025-05-28 02:35:13] Using L0 sparse regression model.
#>  [2025-05-28 02:35:13] Using 1 core
#>  [2025-05-28 02:35:13] Run done.

network_table_2 <- inferCSN(
  example_matrix,
  cores = 2
)
#>  [2025-05-28 02:35:13] Running for <dense matrix>.
#>  [2025-05-28 02:35:13] Checking input parameters.
#>  [2025-05-28 02:35:13] Using L0 sparse regression model.
#>  [2025-05-28 02:35:13] Using 2 cores
#>  [2025-05-28 02:35:13] Run done.

head(network_table_1)
#>   regulator target     weight
#> 1       g18     g1 -0.9223177
#> 2       g17    g18  0.8770468
#> 3        g4     g3  0.8103065
#> 4       g16    g15  0.7659245
#> 5       g17    g16  0.7558764
#> 6       g12    g11  0.7444053

identical(
  network_table_1,
  network_table_2
)
#> [1] TRUE

inferCSN(
  example_matrix,
  regulators = c("g1", "g2"),
  targets = c("g3", "g4")
)
#>  [2025-05-28 02:35:13] Running for <dense matrix>.
#>  [2025-05-28 02:35:13] Checking input parameters.
#>  [2025-05-28 02:35:13] Using 2 regulator(s).
#>  [2025-05-28 02:35:13] Using 2 target(s).
#>  [2025-05-28 02:35:13] Using L0 sparse regression model.
#>  [2025-05-28 02:35:13] Using 1 core
#>  [2025-05-28 02:35:13] Run done.
#>   regulator target     weight
#> 1        g2     g3  0.9848781
#> 2        g2     g4  0.9230387
#> 3        g1     g4 -0.3847071
#> 4        g1     g3 -0.1732490
inferCSN(
  example_matrix,
  regulators = c("g1", "g2"),
  targets = c("g3", "g0")
)
#>  [2025-05-28 02:35:13] Running for <dense matrix>.
#>  [2025-05-28 02:35:13] Checking input parameters.
#>  [2025-05-28 02:35:13] Using 2 regulator(s).
#> ! [2025-05-28 02:35:13] 1 out of 2 candidate targets are in the input matrix.
#>  [2025-05-28 02:35:13] Using L0 sparse regression model.
#>  [2025-05-28 02:35:13] Using 1 core
#>  [2025-05-28 02:35:13] Run done.
#>   regulator target     weight
#> 1        g2     g3  0.9848781
#> 2        g1     g3 -0.1732490

if (FALSE) { # \dontrun{
data("example_ground_truth")
network_table_07 <- inferCSN(
  example_matrix,
  r_squared_threshold = 0.7
)
calculate_metrics(
  network_table_1,
  example_ground_truth,
  return_plot = TRUE
)
calculate_metrics(
  network_table_07,
  example_ground_truth,
  return_plot = TRUE
)
} # }
if (FALSE) { # \dontrun{
data("example_matrix")
network_table <- inferCSN(example_matrix)
head(network_table)

network_table_sparse_1 <- inferCSN(
  as(example_matrix, "sparseMatrix")
)
head(network_table_sparse_1)

network_table_sparse_2 <- inferCSN(
  as(example_matrix, "sparseMatrix"),
  cores = 2
)
identical(
  network_table,
  network_table_sparse_1
)

identical(
  network_table_sparse_1,
  network_table_sparse_2
)

plot_scatter(
  data.frame(
    network_table$weight,
    network_table_sparse_1$weight
  ),
  legend_position = "none"
)

plot_weight_distribution(
  network_table
) + plot_weight_distribution(
  network_table_sparse_1
)
} # }
if (FALSE) { # \dontrun{
data("example_matrix")
object <- initiate_object(example_matrix)
object <- inferCSN(object)
} # }