Perform differential expression testing on a Seurat object or a
SummarizedExperiment bulk object.
Users have the flexibility to specify custom cell groups, marker types, and
various options for DE analysis.
Usage
RunDEtest(object = NULL, ..., srt = NULL)
# S3 method for class 'Seurat'
RunDEtest(
object,
group.by = NULL,
group1 = NULL,
group2 = NULL,
ident.1 = NULL,
ident.2 = NULL,
cells1 = NULL,
cells2 = NULL,
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
feature_type = c("gene", "peak", "cCRE"),
markers_type = c("all", "paired", "conserved", "disturbed"),
grouping.var = NULL,
meta.method = c("maximump", "minimump", "wilkinsonp", "meanp", "sump", "votep"),
test.use = "wilcox",
only.pos = TRUE,
fc.threshold = 1.5,
logfc.threshold = NULL,
base = 2,
pseudocount.use = 1,
mean.fxn = NULL,
min.pct = 0.1,
min.diff.pct = -Inf,
max.cells.per.ident = Inf,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
norm.method = "LogNormalize",
sample_col = NULL,
condition_col = NULL,
bulk_assay = "counts",
p.adjust.method = "bonferroni",
layer = "data",
assay = NULL,
seed = 11,
verbose = TRUE,
cores = 1,
...
)
# S3 method for class 'SummarizedExperiment'
RunDEtest(
object,
group.by = NULL,
group1 = NULL,
group2 = NULL,
cells1 = NULL,
cells2 = NULL,
features = NULL,
feature_type = c("gene", "peak", "cCRE"),
markers_type = c("all", "paired", "conserved", "disturbed"),
grouping.var = NULL,
meta.method = c("maximump", "minimump", "wilkinsonp", "meanp", "sump", "votep"),
test.use = "edgeR",
only.pos = TRUE,
fc.threshold = 1.5,
base = 2,
pseudocount.use = 1,
mean.fxn = NULL,
min.pct = 0.1,
min.diff.pct = -Inf,
max.cells.per.ident = Inf,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
norm.method = "LogNormalize",
sample_col = NULL,
condition_col = NULL,
bulk_assay = "counts",
p.adjust.method = "bonferroni",
layer = "counts",
assay = NULL,
seed = 11,
verbose = TRUE,
cores = 1,
...
)Arguments
- object
A
Seuratobject or aSummarizedExperimentobject.- ...
Additional arguments to pass to the Seurat::FindMarkers function.
- srt
Compatibility alias for
object.- group.by
A grouping variable in the dataset to define the groups or conditions for the differential test. If not provided, the function uses the "active.ident" variable in the Seurat object.
- group1
A vector of cell IDs or a character vector specifying the cells that belong to the first group. If both group.by and group1 are provided, group1 takes precedence. For sample-level methods (
"edgeR","limma","DESeq2", and"dream"), this parameter is interpreted as the first condition label.- group2
A vector of cell IDs or a character vector specifying the cells that belong to the second group. This parameter is only used when group.by or group1 is provided. For sample-level methods (
"edgeR","limma","DESeq2", and"dream"), this parameter is interpreted as the second condition label.- ident.1, ident.2
Seurat-style aliases for
group1andgroup2.- cells1
A vector of cell IDs specifying the cells that belong to group1. If provided, group1 is ignored.
- cells2
A vector of cell IDs specifying the cells that belong to group2. This parameter is only used when cells1 is provided.
- cells.1, cells.2
Seurat-style aliases for
cells1andcells2.- features
A vector of feature names specifying the features to consider for the differential test. If not provided, all features in the dataset are considered.
- feature_type
Feature type used for differential testing. Default is
"gene".- markers_type
A character value specifying the type of markers to find. Possible values are "all", "paired", "conserved", and "disturbed". Sample-level methods (
"edgeR","limma","DESeq2", and"dream") currently support only"all".- grouping.var
A character value specifying the grouping variable for finding conserved or disturbed markers. This parameter is only used when markers_type is "conserved" or "disturbed".
- meta.method
A character value specifying the method to use for combining p-values in the conserved markers test. Possible values are "maximump", "minimump", "wilkinsonp", "meanp", "sump", and "votep".
- test.use
Differential testing method.
"edgeR","limma","DESeq2", and"dream"run sample-level pseudobulk differential testing onSeuratinput and bulk DE onSummarizedExperimentinput.- only.pos
Only return positive markers (FALSE by default)
- fc.threshold
A numeric value used to filter genes for testing based on their average fold change between/among the two groups. Default is
1.5.- logfc.threshold
Seurat-style log fold-change threshold. When provided, it is converted to
fc.threshold = base^logfc.threshold.- base
The base with respect to which logarithms are computed.
- pseudocount.use
Pseudocount to add to averaged expression values when calculating logFC. 1 by default.
- mean.fxn
Function to use for fold change or average difference calculation. The default depends on the the value of
fc.slot:"counts" : difference in the log of the mean counts, with pseudocount.
"data" : difference in the log of the average exponentiated data, with pseudocount. This adjusts for differences in sequencing depth between cells, and assumes that "data" has been log-normalized.
"scale.data" : difference in the means of scale.data.
- min.pct
only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.01
- min.diff.pct
only test genes that show a minimum difference in the fraction of detection between the two groups. Set to -Inf by default
- max.cells.per.ident
Down sample each identity class to a max number. Default is no downsampling. Not activated by default (set to Inf)
- latent.vars
Variables to test, used only when
test.useis one of 'LR', 'negbinom', 'poisson', or 'MAST'- min.cells.feature
Minimum number of cells expressing the feature in at least one of the two groups, currently only used for poisson and negative binomial tests
- min.cells.group
Minimum number of cells in one of the groups
- norm.method
Normalization method for fold change calculation when layer is 'data'. Default is
"LogNormalize".- sample_col
Metadata column storing biological sample IDs. Required when
test.useis a sample-level pseudobulk method onSeurat.- condition_col
Metadata column storing condition labels. Required when
test.useis a sample-level pseudobulk method onSeurat, and required forSummarizedExperimentinput.- bulk_assay
Assay name used as the bulk counts matrix for
SummarizedExperimentinput.- p.adjust.method
A character value specifying the method to use for adjusting p-values. Default is
"bonferroni".- layer
Which layer to use. Default is
data.- assay
Assay to use in differential expression testing
- seed
Random seed for reproducibility. Default is
11.- verbose
Whether to print the message. Default is
TRUE.- cores
The number of cores to use for parallelization with foreach::foreach. Default is
1.
Examples
data(pancreas_sub)
pancreas_sub <- standard_scop(pancreas_sub)
pancreas_sub <- RunDEtest(
pancreas_sub,
group.by = "SubCellType",
only.pos = FALSE
)
AllMarkers <- dplyr::filter(
pancreas_sub@tools$DEtest_SubCellType$AllMarkers_wilcox,
p_val_adj < 0.05 & avg_log2FC > 1
)
ht1 <- GroupHeatmap(
pancreas_sub,
features = AllMarkers$gene,
feature_split = AllMarkers$group1,
group.by = "SubCellType"
)
ht1$plot
TopMarkers <- AllMarkers |>
dplyr::group_by(gene) |>
dplyr::top_n(1, avg_log2FC) |>
dplyr::group_by(group1) |>
dplyr::top_n(3, avg_log2FC)
ht2 <- GroupHeatmap(
pancreas_sub,
features = TopMarkers$gene,
feature_split = TopMarkers$group1,
group.by = "SubCellType",
show_row_names = TRUE
)
ht2$plot
pancreas_sub <- RunDEtest(
pancreas_sub,
group.by = "SubCellType",
markers_type = "paired",
cores = 2
)
PairedMarkers <- dplyr::filter(
pancreas_sub@tools$DEtest_SubCellType$PairedMarkers_wilcox,
p_val_adj < 0.05 & avg_log2FC > 1
)
ht3 <- GroupHeatmap(
pancreas_sub,
features = PairedMarkers$gene,
feature_split = PairedMarkers$group1,
group.by = "SubCellType"
)
ht3$plot
data(panc8_sub)
panc8_sub <- integration_scop(
panc8_sub,
batch = "tech",
integration_method = "Uncorrected"
)
CellDimPlot(
panc8_sub,
group.by = c("celltype", "tech")
)
panc8_sub <- RunDEtest(
srt = panc8_sub,
group.by = "celltype",
grouping.var = "tech",
markers_type = "conserved",
cores = 2
)
ConservedMarkers1 <- dplyr::filter(
panc8_sub@tools$DEtest_celltype$ConservedMarkers_wilcox,
p_val_adj < 0.05 & avg_log2FC > 1
)
ht4 <- GroupHeatmap(
panc8_sub,
layer = "data",
features = ConservedMarkers1$gene,
feature_split = ConservedMarkers1$group1,
group.by = "tech",
split.by = "celltype",
within_groups = TRUE
)
ht4$plot
panc8_sub <- RunDEtest(
srt = panc8_sub,
group.by = "tech",
grouping.var = "celltype",
markers_type = "conserved",
cores = 2
)
ConservedMarkers2 <- dplyr::filter(
panc8_sub@tools$DEtest_tech$ConservedMarkers_wilcox,
p_val_adj < 0.05 & avg_log2FC > 1
)
ht4 <- GroupHeatmap(
srt = panc8_sub,
layer = "data",
features = ConservedMarkers2$gene,
feature_split = ConservedMarkers2$group1,
group.by = "tech",
split.by = "celltype"
)
ht4$plot
panc8_sub <- RunDEtest(
srt = panc8_sub,
group.by = "celltype",
grouping.var = "tech",
markers_type = "disturbed",
cores = 2
)
DisturbedMarkers <- dplyr::filter(
panc8_sub@tools$DEtest_celltype$DisturbedMarkers_wilcox,
p_val_adj < 0.05 & avg_log2FC > 1 & var1 == "smartseq2"
)
ht5 <- GroupHeatmap(
srt = panc8_sub,
layer = "data",
features = DisturbedMarkers$gene,
feature_split = DisturbedMarkers$group1,
group.by = "celltype",
split.by = "tech"
)
ht5$plot
gene_specific <- names(which(table(DisturbedMarkers$gene) == 1))
DisturbedMarkers_specific <- DisturbedMarkers[
DisturbedMarkers$gene %in% gene_specific,
]
ht6 <- GroupHeatmap(
srt = panc8_sub,
layer = "data",
features = DisturbedMarkers_specific$gene,
feature_split = DisturbedMarkers_specific$group1,
group.by = "celltype",
split.by = "tech"
)
ht6$plot
ht7 <- GroupHeatmap(
srt = panc8_sub,
layer = "data",
aggregate_fun = function(x) mean(expm1(x)) + 1,
features = DisturbedMarkers_specific$gene,
feature_split = DisturbedMarkers_specific$group1,
group.by = "celltype",
grouping.var = "tech",
numerator = "smartseq2"
)
ht7$plot
cell_index <- ave(
seq_along(pancreas_sub$CellType),
pancreas_sub$CellType,
FUN = seq_along
)
pancreas_sub[["sample"]] <- paste0(
"S",
(cell_index - 1) %% 4 + 1
)
pancreas_sub[["condition"]] <- ifelse(
pancreas_sub$sample %in% c("S1", "S2"),
"ctrl",
"case"
)
pancreas_sub <- RunDEtest(
pancreas_sub,
group.by = "CellType",
sample_col = "sample",
condition_col = "condition",
test.use = "limma",
fc.threshold = 1,
layer = "counts",
only.pos = FALSE
)
DEtestPlot(
pancreas_sub,
group.by = "CellType",
test.use = "limma",
group_use = "Ductal",
plot_type = "volcano",
x_metric = "avg_log2FC",
y_metric = "p_val"
)
pancreas_sub <- RunDEtest(
pancreas_sub,
group.by = "CellType",
sample_col = "sample",
condition_col = "condition",
test.use = "edgeR",
layer = "counts",
fc.threshold = 1,
only.pos = FALSE
)
DEtestPlot(
pancreas_sub,
group.by = "CellType",
test.use = "edgeR",
group_use = "Ductal",
plot_type = "volcano",
x_metric = "avg_log2FC",
y_metric = "p_val",
DE_threshold = "abs(avg_log2FC) > log2(1.5) & p_val < 0.05"
)
data(islet_bulk)
bulk_out <- RunDEtest(
islet_bulk,
condition_col = "condition",
group1 = "control",
group2 = "bfa",
test.use = "edgeR",
only.pos = FALSE,
fc.threshold = 1
)
DEtestPlot(
bulk_out,
test.use = "edgeR",
plot_type = "volcano",
x_metric = "avg_log2FC",
y_metric = "p_val"
)