R/inferCSN.R
, R/inferCSN-methods.R
inferCSN.Rd
inferring cell-type specific gene regulatory network
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = c("sample", "meta_cells", "pseudobulk"),
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
...
)
# S4 method for class 'matrix'
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = c("sample", "meta_cells", "pseudobulk"),
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
...
)
# S4 method for class 'sparseMatrix'
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = c("sample", "meta_cells", "pseudobulk"),
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
...
)
# S4 method for class 'data.frame'
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = c("sample", "meta_cells", "pseudobulk"),
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
...
)
# S4 method for class 'Network'
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = "sample",
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
method = c("srm", "glm", "glmnet", "cv.glmnet", "xgb", "susie"),
gene_cor_threshold = 0,
...
)
# S4 method for class 'CSNObject'
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = "sample",
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
celltypes = NULL,
network_name = paste0(method, "_network"),
peak_to_gene_method = c("Signac", "GREAT"),
upstream = 1e+05,
downstream = 0,
extend = 1e+06,
only_tss = FALSE,
peak_to_gene_domains = NULL,
gene_cor_threshold = 0.1,
peak_cor_threshold = 0,
aggregate_rna_col = NULL,
aggregate_peaks_col = NULL,
method = c("srm", "glm", "glmnet", "cv.glmnet", "xgb", "susie"),
alpha = 0.5,
family = "gaussian",
interaction_term = ":",
adjust_method = "fdr",
scale = FALSE,
...
)
# S4 method for class 'Seurat'
inferCSN(
object,
penalty = "L0",
cross_validation = FALSE,
seed = 1,
n_folds = 5,
subsampling_method = c("sample", "meta_cells", "pseudobulk"),
subsampling_ratio = 1,
r_squared_threshold = 0,
regulators = NULL,
targets = NULL,
cores = 1,
verbose = TRUE,
...
)
The input data for inferCSN
.
The type of regularization, default is L0
.
This can take either one of the following choices: L0
, L0L1
, and L0L2
.
For high-dimensional and sparse data, L0L2
is more effective.
Logical value, default is FALSE
, whether to use cross-validation.
The random seed for cross-validation, default is 1
.
The number of folds for cross-validation, default is 5
.
The method to use for subsampling. Options are "sample", "pseudobulk" or "meta_cells".
The percent of all samples used for fit_srm
, default is 1
.
Threshold of \(R^2\) coefficient, default is 0
.
The regulator genes for which to infer the regulatory network.
The target genes for which to infer the regulatory network. Recommend setting this to a small fraction of min(n,p) (e.g. 0.05 * min(n,p)) as L0 regularization typically selects a small portion of non-zeros.
The number of cores to use for parallelization with foreach
, default is 1
.
Logical value, default is TRUE
, whether to print progress messages.
Parameters for other methods.
A character string indicating the method to fit the model.
* 'srm'
- Sparse Regression Model.
* 'glm'
- Generalized Liner Model with glm
.
* 'glmnet'
, 'cv.glmnet'
- Regularized Generalized Liner Model with glmnet
.
* 'xgb'
- Gradient Boosting Regression using xgboost
.
Threshold for TF - target gene correlation.
Character vector of cell types to infer networks for.
network_name.
Character specifying the method to
link peak overlapping motif regions to nearby genes. One of Signac
or GREAT
.
Integer defining the distance upstream of the gene to consider as potential regulatory region.
Integer defining the distance downstream of the gene to consider as potential regulatory region.
Integer defining the distance from the upstream and downstream of the basal regulatory region. Only used of `peak_to_gene_method = 'GREAT'`.
Logical. Measure distance from the TSS (TRUE
) or from the entire gene body (FALSE
).
GenomicRanges
object with regulatory regions for each gene.
Threshold for binding peak - target gene correlation.
aggregate_rna_col
aggregate_peaks_col
The elasticnet mixing parameter. See glmnet
for details.
A description of the error distribution and link function to be used in the model.
See family
for mode details.
The interaction variable to use in the model between TF and binding site.
* '+'
for additive interaction.
* ':'
for 'multiplicative' interaction.
* '*'
for crossing interaction, i.e. additive AND 'multiplicative'.
For more info, see formula
Method for adjusting p-values.
Logical. Whether to z-transform the expression and accessibility matrices.
A data table of regulator-target regulatory relationships
A CSNObject.
data("example_matrix")
network_table_1 <- inferCSN(
example_matrix
)
#> ℹ [2025-05-28 02:35:13] Running for <dense matrix>.
#> ℹ [2025-05-28 02:35:13] Checking input parameters.
#> ℹ [2025-05-28 02:35:13] Using L0 sparse regression model.
#> ℹ [2025-05-28 02:35:13] Using 1 core
#> ✔ [2025-05-28 02:35:13] Run done.
network_table_2 <- inferCSN(
example_matrix,
cores = 2
)
#> ℹ [2025-05-28 02:35:13] Running for <dense matrix>.
#> ℹ [2025-05-28 02:35:13] Checking input parameters.
#> ℹ [2025-05-28 02:35:13] Using L0 sparse regression model.
#> ℹ [2025-05-28 02:35:13] Using 2 cores
#> ✔ [2025-05-28 02:35:13] Run done.
head(network_table_1)
#> regulator target weight
#> 1 g18 g1 -0.9223177
#> 2 g17 g18 0.8770468
#> 3 g4 g3 0.8103065
#> 4 g16 g15 0.7659245
#> 5 g17 g16 0.7558764
#> 6 g12 g11 0.7444053
identical(
network_table_1,
network_table_2
)
#> [1] TRUE
inferCSN(
example_matrix,
regulators = c("g1", "g2"),
targets = c("g3", "g4")
)
#> ℹ [2025-05-28 02:35:13] Running for <dense matrix>.
#> ℹ [2025-05-28 02:35:13] Checking input parameters.
#> ℹ [2025-05-28 02:35:13] Using 2 regulator(s).
#> ℹ [2025-05-28 02:35:13] Using 2 target(s).
#> ℹ [2025-05-28 02:35:13] Using L0 sparse regression model.
#> ℹ [2025-05-28 02:35:13] Using 1 core
#> ✔ [2025-05-28 02:35:13] Run done.
#> regulator target weight
#> 1 g2 g3 0.9848781
#> 2 g2 g4 0.9230387
#> 3 g1 g4 -0.3847071
#> 4 g1 g3 -0.1732490
inferCSN(
example_matrix,
regulators = c("g1", "g2"),
targets = c("g3", "g0")
)
#> ℹ [2025-05-28 02:35:13] Running for <dense matrix>.
#> ℹ [2025-05-28 02:35:13] Checking input parameters.
#> ℹ [2025-05-28 02:35:13] Using 2 regulator(s).
#> ! [2025-05-28 02:35:13] 1 out of 2 candidate targets are in the input matrix.
#> ℹ [2025-05-28 02:35:13] Using L0 sparse regression model.
#> ℹ [2025-05-28 02:35:13] Using 1 core
#> ✔ [2025-05-28 02:35:13] Run done.
#> regulator target weight
#> 1 g2 g3 0.9848781
#> 2 g1 g3 -0.1732490
if (FALSE) { # \dontrun{
data("example_ground_truth")
network_table_07 <- inferCSN(
example_matrix,
r_squared_threshold = 0.7
)
calculate_metrics(
network_table_1,
example_ground_truth,
return_plot = TRUE
)
calculate_metrics(
network_table_07,
example_ground_truth,
return_plot = TRUE
)
} # }
if (FALSE) { # \dontrun{
data("example_matrix")
network_table <- inferCSN(example_matrix)
head(network_table)
network_table_sparse_1 <- inferCSN(
as(example_matrix, "sparseMatrix")
)
head(network_table_sparse_1)
network_table_sparse_2 <- inferCSN(
as(example_matrix, "sparseMatrix"),
cores = 2
)
identical(
network_table,
network_table_sparse_1
)
identical(
network_table_sparse_1,
network_table_sparse_2
)
plot_scatter(
data.frame(
network_table$weight,
network_table_sparse_1$weight
),
legend_position = "none"
)
plot_weight_distribution(
network_table
) + plot_weight_distribution(
network_table_sparse_1
)
} # }
if (FALSE) { # \dontrun{
data("example_matrix")
object <- initiate_object(example_matrix)
object <- inferCSN(object)
} # }