This function can convert different gene ID types within one species or between two species using the biomart service.
Usage
GeneConvert(
geneID,
geneID_from_IDtype = "symbol",
geneID_to_IDtype = "entrez_id",
species_from = "Homo_sapiens",
species_to = NULL,
Ensembl_version = 103,
biomart = NULL,
mirror = NULL,
max_tries = 5
)
Arguments
- geneID
A vector of the geneID character.
- geneID_from_IDtype
Gene ID type of the input
geneID
. e.g. "symbol", "ensembl_id", "entrez_id"- geneID_to_IDtype
Gene ID type(s) to convert to. e.g. "symbol", "ensembl_id", "entrez_id"
- species_from
Latin names for animals of the input geneID. e.g. "Homo_sapiens","Mus_musculus"
- species_to
Latin names for animals of the output geneID. e.g. "Homo_sapiens","Mus_musculus"
- Ensembl_version
Ensembl database version. If NULL, use the current release version.
- biomart
The name of the BioMart database that you want to connect to. Possible options include "ensembl", "protists_mart", "fungi_mart", and "plants_mart".
- mirror
Specify an Ensembl mirror to connect to. The valid options here are 'www', 'uswest', 'useast', 'asia'.
- max_tries
The maximum number of attempts to connect with the BioMart service.
Value
A list with the following elements:
geneID_res:
A data.frame contains the all gene IDs mapped in the database with columns: 'from_IDtype','from_geneID','to_IDtype','to_geneID'.geneID_collapse:
The data.frame contains all the successfully converted gene IDs, and the output gene IDs are collapsed into a list. As a result, the 'from_geneID' column (which is set as the row names) of the data.frame is unique.geneID_expand:
The data.frame contains all the successfully converted gene IDs, and the output gene IDs are expanded.Ensembl_version:
Ensembl database version.Datasets:
Datasets available in the selected BioMart database.Attributes:
Attributes available in the selected BioMart database.geneID_unmapped:
A character vector of gene IDs that are unmapped in the database.
Examples
res <- GeneConvert(
geneID = c("CDK1", "MKI67", "TOP2A", "AURKA", "CTCF"),
geneID_from_IDtype = "symbol",
geneID_to_IDtype = "entrez_id",
species_from = "Homo_sapiens",
species_to = "Mus_musculus",
Ensembl_version = 103
)
#> ℹ [2025-07-26 07:01:51] Connect to the Ensembl archives...
#> ℹ [2025-07-26 07:01:51] Using the 103 version of biomart...
#> ℹ [2025-07-26 07:01:51] Connecting to the biomart...
#> ℹ [2025-07-26 07:02:51] Error in `req_perform()`:
#> ℹ [2025-07-26 07:02:51] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:02:51] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:02:51] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:02:51] Operation timed out after 60001 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:02:51]
#> ℹ [2025-07-26 07:02:51] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:02:52] Retrying...
#> ℹ [2025-07-26 07:03:52] Error in `req_perform()`:
#> ℹ [2025-07-26 07:03:52] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:03:52] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:03:52] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:03:52] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:03:52]
#> ℹ [2025-07-26 07:03:52] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:03:53] Retrying...
#> ℹ [2025-07-26 07:04:53] Error in `req_perform()`:
#> ℹ [2025-07-26 07:04:53] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:04:53] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:04:53] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:04:53] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:04:53]
#> ℹ [2025-07-26 07:04:53] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:04:54] Retrying...
#> ℹ [2025-07-26 07:05:54] Error in `req_perform()`:
#> ℹ [2025-07-26 07:05:54] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:05:54] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:05:54] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:05:54] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:05:54]
#> ℹ [2025-07-26 07:05:54] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:05:55] Retrying...
#> ℹ [2025-07-26 07:06:55] Error in `req_perform()`:
#> ℹ [2025-07-26 07:06:55] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:06:55] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:06:55] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:06:55] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:06:55]
#> ℹ [2025-07-26 07:06:55] Get errors when connecting with ensembl mart...
#> Error in log_message(out, message_type = "error"): Error in `req_perform()`: ! Failed to perform HTTP request. Caused by
#> error in `curl::curl_fetch_memory()`: ! Timeout was reached
#> [feb2021.archive.ensembl.org]: Operation timed out after 60002 milliseconds
#> with 0 bytes received
str(res)
#> Error: object 'res' not found
# Convert the human genes to mouse homologs,
# and replace the raw counts in a Seurat object.
data(pancreas_sub)
counts <- GetAssayData5(
pancreas_sub,
assay = "RNA",
layer = "counts"
)
res <- GeneConvert(
geneID = rownames(counts),
geneID_from_IDtype = "symbol",
geneID_to_IDtype = "symbol",
species_from = "Mus_musculus",
species_to = "Homo_sapiens",
Ensembl_version = 103
)
#> ℹ [2025-07-26 07:06:59] Connect to the Ensembl archives...
#> ℹ [2025-07-26 07:06:59] Using the 103 version of biomart...
#> ℹ [2025-07-26 07:06:59] Connecting to the biomart...
#> ℹ [2025-07-26 07:07:59] Error in `req_perform()`:
#> ℹ [2025-07-26 07:07:59] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:07:59] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:07:59] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:07:59] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:07:59]
#> ℹ [2025-07-26 07:07:59] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:08:00] Retrying...
#> ℹ [2025-07-26 07:09:00] Error in `req_perform()`:
#> ℹ [2025-07-26 07:09:00] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:09:00] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:09:00] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:09:00] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:09:00]
#> ℹ [2025-07-26 07:09:00] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:09:01] Retrying...
#> ℹ [2025-07-26 07:10:01] Error in `req_perform()`:
#> ℹ [2025-07-26 07:10:01] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:10:01] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:10:01] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:10:01] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:10:01]
#> ℹ [2025-07-26 07:10:01] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:10:02] Retrying...
#> ℹ [2025-07-26 07:11:02] Error in `req_perform()`:
#> ℹ [2025-07-26 07:11:02] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:11:02] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:11:02] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:11:02] Operation timed out after 60002 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:11:02]
#> ℹ [2025-07-26 07:11:02] Get errors when connecting with ensembl mart...
#> ℹ [2025-07-26 07:11:03] Retrying...
#> ℹ [2025-07-26 07:12:03] Error in `req_perform()`:
#> ℹ [2025-07-26 07:12:03] ! Failed to perform HTTP request.
#> ℹ [2025-07-26 07:12:03] Caused by error in `curl::curl_fetch_memory()`:
#> ℹ [2025-07-26 07:12:03] ! Timeout was reached [feb2021.archive.ensembl.org]:
#> ℹ [2025-07-26 07:12:03] Operation timed out after 60001 milliseconds with 0 bytes received
#> ℹ [2025-07-26 07:12:03]
#> ℹ [2025-07-26 07:12:03] Get errors when connecting with ensembl mart...
#> Error in log_message(out, message_type = "error"): Error in `req_perform()`: ! Failed to perform HTTP request. Caused by
#> error in `curl::curl_fetch_memory()`: ! Timeout was reached
#> [feb2021.archive.ensembl.org]: Operation timed out after 60001 milliseconds
#> with 0 bytes received
# Check the number of input and converted gene IDs
input_genes <- length(rownames(counts))
db_genes <- length(unique(res$geneID_res$from_geneID))
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'unique': object 'res' not found
converted_genes_input <- length(unique(res$geneID_collapse$from_geneID))
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'unique': object 'res' not found
converted_genes_output <- length(unique(res$geneID_expand$symbol))
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'unique': object 'res' not found
thisutils::log_message(
"Number of input gene IDs:", input_genes
)
#> ℹ [2025-07-26 07:12:04] Number of input gene IDs:15962
thisutils::log_message(
"Number of gene IDs mapped in the database:", db_genes
)
#> Error: object 'db_genes' not found
thisutils::log_message(
"Number of input gene IDs that were successfully converted:",
converted_genes_input
)
#> Error: object 'converted_genes_input' not found
thisutils::log_message(
"Number of converted gene IDs:", converted_genes_output
)
#> Error: object 'converted_genes_output' not found
homologs_counts <- stats::aggregate(
x = counts[res$geneID_expand[, "from_geneID"], ],
by = list(res$geneID_expand[, "symbol"]), FUN = sum
)
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'i' in selecting a method for function '[': object 'res' not found
rownames(homologs_counts) <- homologs_counts[, 1]
#> Error: object 'homologs_counts' not found
homologs_counts <- methods::as(
Matrix::as.matrix(homologs_counts[, -1]),
"dgCMatrix"
)
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.matrix': object 'homologs_counts' not found
homologs_counts
#> Error: object 'homologs_counts' not found