Skip to contents

Annotate features in a Seurat object with additional metadata from databases or a GTF file.

Usage

AnnotateFeatures(
  srt,
  species = "Homo_sapiens",
  IDtype = c("symbol", "ensembl_id", "entrez_id"),
  db = NULL,
  db_update = FALSE,
  db_version = "latest",
  convert_species = TRUE,
  Ensembl_version = 103,
  mirror = NULL,
  gtf = NULL,
  merge_gtf_by = "gene_name",
  columns = c("seqname", "feature", "start", "end", "strand", "gene_id", "gene_name",
    "gene_type"),
  assays = "RNA",
  overwrite = FALSE
)

Arguments

srt

Seurat object to be annotated.

species

Name of the species to be used for annotation. Default is "Homo_sapiens".

IDtype

Type of identifier to use for annotation. Default is "symbol" with options "symbol", "ensembl_id", and "entrez_id".

db

Vector of database names to be used for annotation. Default is NULL.

db_update

Logical value indicating whether to update the database. Default is FALSE.

db_version

Version of the database to use. Default is "latest".

convert_species

Whether to use a species-converted database when the annotation is missing for the specified species. The default value is TRUE.

Ensembl_version

Version of the Ensembl database to use. Default is 103.

mirror

URL of the mirror to use for Ensembl database. Default is NULL.

gtf

Path to the GTF file to be used for annotation. Default is NULL.

merge_gtf_by

Column name to merge the GTF file by. Default is "gene_name".

columns

Vector of column names to be used from the GTF file. Default is "seqname", "feature", "start", "end", "strand", "gene_id", "gene_name", "gene_type".

assays

Character vector of assay names to be annotated. Default is "RNA".

overwrite

Logical value indicating whether to overwrite existing metadata. Default is FALSE.

See also

Examples

data(pancreas_sub)
pancreas_sub <- AnnotateFeatures(
  pancreas_sub,
  species = "Mus_musculus",
  db = c(
    "Chromosome",
    "GeneType",
    "Enzyme",
    "TF",
    "CSPA",
    "VerSeDa"
  )
)
#>  [2025-09-20 12:52:28] Species: Mus_musculus
#>  [2025-09-20 12:52:28] Installing: org.Mm.eg.db...
#>  Loading metadata database
#>  Loading metadata database ... done
#> 
#>  
#> → Will install 37 packages.
#> → All 37 packages (0 B) are cached.
#> + AnnotationDbi      1.70.0  [bld]
#> + Biobase            2.68.0  [bld][cmp]
#> + BiocGenerics       0.54.0  [bld]
#> + Biostrings         2.76.0  [bld][cmp]
#> + DBI                1.2.3   
#> + GenomeInfoDb       1.44.2  [bld]
#> + GenomeInfoDbData   1.2.14  [bld]
#> + IRanges            2.42.0  [bld][cmp]
#> + KEGGREST           1.48.1  [bld]
#> + R6                 2.6.1   
#> + RSQLite            2.4.3   
#> + S4Vectors          0.46.0  [bld][cmp]
#> + UCSC.utils         1.4.0   [bld]
#> + XVector            0.48.0  [bld][cmp]
#> + askpass            1.2.1   
#> + bit                4.6.0   
#> + bit64              4.6.0-1 
#> + blob               1.2.4   
#> + cachem             1.1.0   
#> + cli                3.6.5   
#> + crayon             1.5.3   
#> + curl               7.0.0    +  libcurl4-openssl-dev,  libssl-dev
#> + fastmap            1.2.0   
#> + generics           0.1.4   
#> + glue               1.8.0   
#> + httr               1.4.7   
#> + jsonlite           2.0.0   
#> + lifecycle          1.0.4   
#> + memoise            2.0.1   
#> + mime               0.13    
#> + openssl            2.3.3    +  libssl-dev
#> + org.Mm.eg.db       3.21.0  [bld]
#> + pkgconfig          2.0.3   
#> + png                0.1-8    +  libpng-dev
#> + rlang              1.1.6   
#> + sys                3.4.3   
#> + vctrs              0.6.5   
#>  All system requirements are already installed.
#>   
#>  No downloads are needed, 37 pkgs are cached
#>  Got blob 1.2.4 (x86_64-pc-linux-gnu-ubuntu-24.04) (47.47 kB)
#>  Got BiocGenerics 0.54.0 (source) (56.09 kB)
#>  Got askpass 1.2.1 (x86_64-pc-linux-gnu-ubuntu-24.04) (21.89 kB)
#>  Got cachem 1.1.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (67.49 kB)
#>  Got fastmap 1.2.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (66.05 kB)
#>  Got KEGGREST 1.48.1 (source) (239.15 kB)
#>  Got IRanges 2.42.0 (source) (483.03 kB)
#>  Got bit64 4.6.0-1 (x86_64-pc-linux-gnu-ubuntu-24.04) (492.54 kB)
#>  Got httr 1.4.7 (x86_64-pc-linux-gnu-ubuntu-24.04) (486.52 kB)
#>  Got pkgconfig 2.0.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (18.08 kB)
#>  Got bit 4.6.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (628.10 kB)
#>  Got lifecycle 1.0.4 (x86_64-pc-linux-gnu-ubuntu-24.04) (125.07 kB)
#>  Got png 0.1-8 (x86_64-pc-linux-gnu-ubuntu-24.04) (40.57 kB)
#>  Got glue 1.8.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (168.12 kB)
#>  Got R6 2.6.1 (x86_64-pc-linux-gnu-ubuntu-24.04) (86.81 kB)
#>  Got crayon 1.5.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (163.30 kB)
#>  Got memoise 2.0.1 (x86_64-pc-linux-gnu-ubuntu-24.04) (48.86 kB)
#>  Got mime 0.13 (x86_64-pc-linux-gnu-ubuntu-24.04) (44.52 kB)
#>  Got sys 3.4.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (40.73 kB)
#>  Got cli 3.6.5 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.34 MB)
#>  Got vctrs 0.6.5 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.31 MB)
#>  Got DBI 1.2.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (916.93 kB)
#>  Got rlang 1.1.6 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.59 MB)
#>  Got S4Vectors 0.46.0 (source) (1.07 MB)
#>  Got generics 0.1.4 (x86_64-pc-linux-gnu-ubuntu-24.04) (80.38 kB)
#>  Got curl 7.0.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (788.30 kB)
#>  Got XVector 0.48.0 (source) (67.89 kB)
#>  Got Biobase 2.68.0 (source) (1.98 MB)
#>  Got UCSC.utils 1.4.0 (source) (236.15 kB)
#>  Got openssl 2.3.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.31 MB)
#>  Got RSQLite 2.4.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.35 MB)
#>  Got jsonlite 2.0.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.09 MB)
#>  Got GenomeInfoDb 1.44.2 (source) (3.59 MB)
#>  Got AnnotationDbi 1.70.0 (source) (4.37 MB)
#>  Got Biostrings 2.76.0 (source) (12.82 MB)
#>  Got GenomeInfoDbData 1.2.14 (source) (13.24 MB)
#>  Got org.Mm.eg.db 3.21.0 (source) (86.57 MB)
#>  Installing system requirements
#>  Executing `sudo sh -c apt-get -y update`
#> Get:1 file:/etc/apt/apt-mirrors.txt Mirrorlist [144 B]
#> Hit:6 https://packages.microsoft.com/repos/azure-cli noble InRelease
#> Hit:2 http://azure.archive.ubuntu.com/ubuntu noble InRelease
#> Hit:7 https://packages.microsoft.com/ubuntu/24.04/prod noble InRelease
#> Hit:3 http://azure.archive.ubuntu.com/ubuntu noble-updates InRelease
#> Hit:4 http://azure.archive.ubuntu.com/ubuntu noble-backports InRelease
#> Hit:5 http://azure.archive.ubuntu.com/ubuntu noble-security InRelease
#> Reading package lists...
#>  Executing `sudo sh -c apt-get -y install libcurl4-openssl-dev libssl-dev libpng-dev`
#> Reading package lists...
#> Building dependency tree...
#> Reading state information...
#> libcurl4-openssl-dev is already the newest version (8.5.0-2ubuntu10.6).
#> libssl-dev is already the newest version (3.0.13-0ubuntu3.5).
#> libpng-dev is already the newest version (1.6.43-5build1).
#> 0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.
#>  Building GenomeInfoDbData 1.2.14
#>  Installed askpass 1.2.1  (87ms)
#>  Installed bit 4.6.0  (95ms)
#>  Installed bit64 4.6.0-1  (120ms)
#>  Installed blob 1.2.4  (62ms)
#>  Installed cachem 1.1.0  (106ms)
#>  Installed cli 3.6.5  (107ms)
#>  Installed crayon 1.5.3  (65ms)
#>  Installed curl 7.0.0  (68ms)
#>  Installed DBI 1.2.3  (66ms)
#>  Installed fastmap 1.2.0  (62ms)
#>  Installed generics 0.1.4  (83ms)
#>  Building BiocGenerics 0.54.0
#>  Installed glue 1.8.0  (99ms)
#>  Installed httr 1.4.7  (46ms)
#>  Installed lifecycle 1.0.4  (41ms)
#>  Installed jsonlite 2.0.0  (139ms)
#>  Installed memoise 2.0.1  (95ms)
#>  Installed mime 0.13  (98ms)
#>  Installed openssl 2.3.3  (116ms)
#>  Installed pkgconfig 2.0.3  (92ms)
#>  Installed png 0.1-8  (85ms)
#>  Built GenomeInfoDbData 1.2.14 (1.8s)
#>  Installed R6 2.6.1  (452ms)
#>  Installed rlang 1.1.6  (109ms)
#>  Installed RSQLite 2.4.3  (71ms)
#>  Installed sys 3.4.3  (67ms)
#>  Installed vctrs 0.6.5  (68ms)
#>  Installed GenomeInfoDbData 1.2.14  (90ms)
#>  Built BiocGenerics 0.54.0 (3s)
#>  Installed BiocGenerics 0.54.0  (1s)
#>  Building Biobase 2.68.0
#>  Building S4Vectors 0.46.0
#>  Built Biobase 2.68.0 (6.6s)
#>  Installed Biobase 2.68.0  (1.1s)
#>  Built S4Vectors 0.46.0 (15.7s)
#>  Installed S4Vectors 0.46.0  (1s)
#>  Building IRanges 2.42.0
#>  Building UCSC.utils 1.4.0
#>  Built UCSC.utils 1.4.0 (2.4s)
#>  Installed UCSC.utils 1.4.0  (1s)
#>  Built IRanges 2.42.0 (35.2s)
#>  Installed IRanges 2.42.0  (1s)
#>  Building GenomeInfoDb 1.44.2
#>  Building XVector 0.48.0
#>  Built GenomeInfoDb 1.44.2 (7.1s)
#>  Installed GenomeInfoDb 1.44.2  (100ms)
#>  Built XVector 0.48.0 (8.7s)
#>  Installed XVector 0.48.0  (1s)
#>  Building Biostrings 2.76.0
#>  Built Biostrings 2.76.0 (18.4s)
#>  Installed Biostrings 2.76.0  (1.1s)
#>  Building KEGGREST 1.48.1
#>  Built KEGGREST 1.48.1 (4.5s)
#>  Installed KEGGREST 1.48.1  (1s)
#>  Building AnnotationDbi 1.70.0
#>  Built AnnotationDbi 1.70.0 (11.4s)
#>  Installed AnnotationDbi 1.70.0  (1.1s)
#>  Building org.Mm.eg.db 3.21.0
#>  Built org.Mm.eg.db 3.21.0 (3m 7.4s)
#>  Installed org.Mm.eg.db 3.21.0  (3s)
#>  1 pkg + 36 deps: added 37, dld 37 (137.04 MB) [5m 22.3s]
#>  [2025-09-20 12:57:51] Installing: GO.db...
#>  
#> → Will install 1 package.
#> → The package (0 B) is cached.
#> + GO.db   3.21.0 [bld]
#>  All system requirements are already installed.
#>   
#>  No downloads are needed, 1 pkg is cached
#>  Got GO.db 3.21.0 (source) (25.56 MB)
#>  Installing system requirements
#>  Executing `sudo sh -c apt-get -y update`
#> Get:1 file:/etc/apt/apt-mirrors.txt Mirrorlist [144 B]
#> Hit:2 http://azure.archive.ubuntu.com/ubuntu noble InRelease
#> Hit:3 http://azure.archive.ubuntu.com/ubuntu noble-updates InRelease
#> Hit:6 https://packages.microsoft.com/repos/azure-cli noble InRelease
#> Hit:4 http://azure.archive.ubuntu.com/ubuntu noble-backports InRelease
#> Hit:5 http://azure.archive.ubuntu.com/ubuntu noble-security InRelease
#> Hit:7 https://packages.microsoft.com/ubuntu/24.04/prod noble InRelease
#> Reading package lists...
#>  Executing `sudo sh -c apt-get -y install libcurl4-openssl-dev libssl-dev libpng-dev`
#> Reading package lists...
#> Building dependency tree...
#> Reading state information...
#> libcurl4-openssl-dev is already the newest version (8.5.0-2ubuntu10.6).
#> libssl-dev is already the newest version (3.0.13-0ubuntu3.5).
#> libpng-dev is already the newest version (1.6.43-5build1).
#> 0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.
#>  Building GO.db 3.21.0
#>  Built GO.db 3.21.0 (37.1s)
#>  Installed GO.db 3.21.0  (448ms)
#>  1 pkg + 36 deps: kept 36, added 1, dld 1 (25.56 MB) [43.5s]
#>  [2025-09-20 12:58:34] Installing: GOSemSim...
#>  
#> → Will install 9 packages.
#> → All 9 packages (0 B) are cached.
#> + GOSemSim      2.34.0 [bld][cmp]
#> + R.methodsS3   1.8.2  
#> + R.oo          1.27.1 
#> + R.utils       2.13.0 
#> + Rcpp          1.1.0  
#> + digest        0.6.37 
#> + fs            1.6.6   +  make
#> + rappdirs      0.3.3  
#> + yulab.utils   0.2.1  
#>  All system requirements are already installed.
#>   
#>  No downloads are needed, 9 pkgs are cached
#>  Got GOSemSim 2.34.0 (source) (608.98 kB)
#>  Got rappdirs 0.3.3 (x86_64-pc-linux-gnu-ubuntu-24.04) (45.26 kB)
#>  Got R.methodsS3 1.8.2 (x86_64-pc-linux-gnu-ubuntu-24.04) (82.67 kB)
#>  Got digest 0.6.37 (x86_64-pc-linux-gnu-ubuntu-24.04) (227.14 kB)
#>  Got yulab.utils 0.2.1 (x86_64-pc-linux-gnu-ubuntu-24.04) (119.17 kB)
#>  Got fs 1.6.6 (x86_64-pc-linux-gnu-ubuntu-24.04) (310.07 kB)
#>  Got R.oo 1.27.1 (x86_64-pc-linux-gnu-ubuntu-24.04) (996.09 kB)
#>  Got R.utils 2.13.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (1.45 MB)
#>  Got Rcpp 1.1.0 (x86_64-pc-linux-gnu-ubuntu-24.04) (2.19 MB)
#>  Installing system requirements
#>  Executing `sudo sh -c apt-get -y update`
#> Get:1 file:/etc/apt/apt-mirrors.txt Mirrorlist [144 B]
#> Hit:2 http://azure.archive.ubuntu.com/ubuntu noble InRelease
#> Hit:6 https://packages.microsoft.com/repos/azure-cli noble InRelease
#> Hit:7 https://packages.microsoft.com/ubuntu/24.04/prod noble InRelease
#> Hit:3 http://azure.archive.ubuntu.com/ubuntu noble-updates InRelease
#> Hit:4 http://azure.archive.ubuntu.com/ubuntu noble-backports InRelease
#> Hit:5 http://azure.archive.ubuntu.com/ubuntu noble-security InRelease
#> Reading package lists...
#>  Executing `sudo sh -c apt-get -y install make libcurl4-openssl-dev libssl-dev libpng-dev`
#> Reading package lists...
#> Building dependency tree...
#> Reading state information...
#> make is already the newest version (4.3-4.1build2).
#> libcurl4-openssl-dev is already the newest version (8.5.0-2ubuntu10.6).
#> libssl-dev is already the newest version (3.0.13-0ubuntu3.5).
#> libpng-dev is already the newest version (1.6.43-5build1).
#> 0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.
#>  Installed digest 0.6.37  (60ms)
#>  Installed fs 1.6.6  (77ms)
#>  Installed R.methodsS3 1.8.2  (128ms)
#>  Installed R.oo 1.27.1  (156ms)
#>  Installed R.utils 2.13.0  (58ms)
#>  Installed rappdirs 0.3.3  (59ms)
#>  Installed Rcpp 1.1.0  (67ms)
#>  Installed yulab.utils 0.2.1  (45ms)
#>  Building GOSemSim 2.34.0
#>  Built GOSemSim 2.34.0 (13.5s)
#>  Installed GOSemSim 2.34.0  (37ms)
#>  1 pkg + 45 deps: kept 37, added 9, dld 9 (6.03 MB) [17.9s]
#>  [2025-09-20 12:58:52] org.Mm.eg.db, GO.db, and GOSemSim installed successfully
#>  [2025-09-20 12:58:53] Preparing database: Chromosome
#>  [2025-09-20 12:58:53] Preparing database: GeneType
#>  [2025-09-20 12:58:54] Preparing database: Enzyme
#>  [2025-09-20 12:58:56] Preparing database: TF
#>  [2025-09-20 12:58:58] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Mus_musculus_TF'
#> ! [2025-09-20 12:58:58] Failed to download using auto, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Mus_musculus_TF>
#>  [2025-09-20 12:59:00] Error in utils::download.file(url = url, destfile = destfile, method = method, : 'wget' call had nonzero exit status
#> ! [2025-09-20 12:59:00] Failed to download using wget, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Mus_musculus_TF>
#>  [2025-09-20 12:59:02] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Mus_musculus_TF'
#> ! [2025-09-20 12:59:02] Failed to download using libcurl, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Mus_musculus_TF>
#> Warning: incomplete final line found by readTableHeader on '/tmp/Rtmp8qFX1m/file266a43d4373b'
#>  [2025-09-20 12:59:05] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Mus_musculus_Cof'
#> ! [2025-09-20 12:59:05] Failed to download using auto, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Mus_musculus_Cof>
#>  [2025-09-20 12:59:08] Error in utils::download.file(url = url, destfile = destfile, method = method, : 'wget' call had nonzero exit status
#> ! [2025-09-20 12:59:08] Failed to download using wget, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Mus_musculus_Cof>
#>  [2025-09-20 12:59:10] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Mus_musculus_Cof'
#> ! [2025-09-20 12:59:10] Failed to download using libcurl, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Mus_musculus_Cof>
#> Warning: incomplete final line found by readTableHeader on '/tmp/Rtmp8qFX1m/file266a43d4373b'
#> ! [2025-09-20 12:59:12] Use the human annotation to create the TF database for Mus_musculus
#>  [2025-09-20 12:59:13] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Homo_sapiens_TF'
#> ! [2025-09-20 12:59:13] Failed to download using auto, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Homo_sapiens_TF>
#>  [2025-09-20 12:59:15] Error in utils::download.file(url = url, destfile = destfile, method = method, : 'wget' call had nonzero exit status
#> ! [2025-09-20 12:59:15] Failed to download using wget, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Homo_sapiens_TF>
#>  [2025-09-20 12:59:18] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Homo_sapiens_TF'
#> ! [2025-09-20 12:59:18] Failed to download using libcurl, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/TF_list_final/Homo_sapiens_TF>
#> Warning: incomplete final line found by readTableHeader on '/tmp/Rtmp8qFX1m/file266a43d4373b'
#>  [2025-09-20 12:59:21] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Homo_sapiens_Cof'
#> ! [2025-09-20 12:59:21] Failed to download using auto, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Homo_sapiens_Cof>
#>  [2025-09-20 12:59:23] Error in utils::download.file(url = url, destfile = destfile, method = method, : 'wget' call had nonzero exit status
#> ! [2025-09-20 12:59:23] Failed to download using wget, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Homo_sapiens_Cof>
#>  [2025-09-20 12:59:25] Error in utils::download.file(url = url, destfile = destfile, method = method, : cannot open URL 'https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Homo_sapiens_Cof'
#> ! [2025-09-20 12:59:25] Failed to download using libcurl, from <https://guolab.wchscu.cn/AnimalTFDB4_static/download/Cof_list_final/Homo_sapiens_Cof>
#> Warning: incomplete final line found by readTableHeader on '/tmp/Rtmp8qFX1m/file266a43d4373b'
#> Error in data.frame(Term = "TF", symbol = tf[["Symbol"]]): arguments imply differing number of rows: 1, 0
head(
  GetFeaturesData(
    pancreas_sub,
    assays = "RNA"
  )
)
#> data frame with 0 columns and 6 rows

if (FALSE) { # \dontrun{
# Annotate features using a GTF file
pancreas_sub <- AnnotateFeatures(
  pancreas_sub,
  gtf = "/refdata-gex-mm10-2020-A/genes/genes.gtf"
)
head(
  GetFeaturesData(
    pancreas_sub,
    assays = "RNA"
  )
)
} # }