This function can convert different gene ID types within one species or between two species using the biomart service.
Usage
GeneConvert(
  geneID,
  geneID_from_IDtype = "symbol",
  geneID_to_IDtype = "entrez_id",
  species_from = "Homo_sapiens",
  species_to = NULL,
  Ensembl_version = 103,
  biomart = NULL,
  mirror = NULL,
  max_tries = 5
)Arguments
- geneID
- A vector of the geneID character. 
- geneID_from_IDtype
- Gene ID type of the input - geneID. e.g. "symbol", "ensembl_id", "entrez_id"
- geneID_to_IDtype
- Gene ID type(s) to convert to. e.g. "symbol", "ensembl_id", "entrez_id" 
- species_from
- Latin names for animals of the input geneID. e.g. "Homo_sapiens","Mus_musculus" 
- species_to
- Latin names for animals of the output geneID. e.g. "Homo_sapiens","Mus_musculus" 
- Ensembl_version
- Ensembl database version. If NULL, use the current release version. 
- biomart
- The name of the BioMart database that you want to connect to. Possible options include "ensembl", "protists_mart", "fungi_mart", and "plants_mart". 
- mirror
- Specify an Ensembl mirror to connect to. The valid options here are 'www', 'uswest', 'useast', 'asia'. 
- max_tries
- The maximum number of attempts to connect with the BioMart service. 
Value
A list with the following elements:
- geneID_res:A data.frame contains the all gene IDs mapped in the database with columns: 'from_IDtype','from_geneID','to_IDtype','to_geneID'
- geneID_collapse:The data.frame contains all the successfully converted gene IDs, and the output gene IDs are collapsed into a list. As a result, the 'from_geneID' column (which is set as the row names) of the data.frame is unique.
- geneID_expand:The data.frame contains all the successfully converted gene IDs, and the output gene IDs are expanded.
- Ensembl_version:Ensembl database version.
- Datasets:Datasets available in the selected BioMart database.
- Attributes:Attributes available in the selected BioMart database.
- geneID_unmapped:A character vector of gene IDs that are unmapped in the database.
Examples
res <- GeneConvert(
  geneID = c("CDK1", "MKI67", "TOP2A", "AURKA", "CTCF"),
  geneID_from_IDtype = "symbol",
  geneID_to_IDtype = "entrez_id",
  species_from = "Homo_sapiens",
  species_to = "Mus_musculus",
  Ensembl_version = 103
)
#> Connect to the Ensembl archives...
#> Using the 103 version of biomart...
#> Connecting to the biomart...
#> Searching the dataset hsapiens ...
#> Connecting to the dataset hsapiens_gene_ensembl ...
#> Searching the dataset mmusculus ...
#> Connecting to the dataset mmusculus_gene_ensembl ...
#> Converting the geneIDs...
#> 5 genes mapped with ensembl_symbol
#> ==============================
#> 5 genes mapped
#> 0 genes unmapped
#> ==============================
#> Error in unnest(data = geneID_collapse, cols = colnames(geneID_collapse)[sapply(geneID_collapse,     class) %in% c("list", "AsIs")], keep_empty = FALSE): could not find function "unnest"
str(res)
#> Error: object 'res' not found
# Convert the human genes to mouse homologs and replace the raw counts in a Seurat object.
data("pancreas_sub")
counts <- get_seurat_data(pancreas_sub, slot = "counts", assay = "RNA")
#> Error in get_seurat_data(pancreas_sub, slot = "counts", assay = "RNA"): unused argument (slot = "counts")
res <- GeneConvert(
  geneID = rownames(counts),
  geneID_from_IDtype = "symbol",
  geneID_to_IDtype = "symbol",
  species_from = "Mus_musculus",
  species_to = "Homo_sapiens",
  Ensembl_version = 103
)
#> Connect to the Ensembl archives...
#> Using the 103 version of biomart...
#> Connecting to the biomart...
#> Searching the dataset mmusculus ...
#> Connecting to the dataset mmusculus_gene_ensembl ...
#> Converting the geneIDs...
#> ==============================
#> 0 genes mapped
#> 0 genes unmapped
#> ==============================
#> Warning: None of the gene IDs were converted
# Check the number of input and converted gene IDs
input_genes <- length(rownames(counts))
db_genes <- length(unique(res$geneID_res$from_geneID))
converted_genes_input <- length(unique(res$geneID_collapse$from_geneID))
converted_genes_output <- length(unique(res$geneID_expand$symbol))
cat("Number of input gene IDs:", input_genes, "\n")
#> Number of input gene IDs: 0 
cat("Number of gene IDs mapped in the database:", db_genes, "\n")
#> Number of gene IDs mapped in the database: 0 
cat("Number of input gene IDs that were successfully converted:", converted_genes_input, "\n")
#> Number of input gene IDs that were successfully converted: 0 
cat("Number of converted gene IDs:", converted_genes_output, "\n")
#> Number of converted gene IDs: 0 
homologs_counts <- aggregate(
  x = counts[res$geneID_expand[, "from_geneID"], ],
  by = list(res$geneID_expand[, "symbol"]), FUN = sum
)
#> Error in (function (cond) .Internal(C_tryCatchHelper(addr, 1L, cond)))(structure(list(message = "object of type 'closure' is not subsettable",     call = counts[res$geneID_expand[, "from_geneID"], ], object = new("standardGeneric",         .Data = function (object, ...)         standardGeneric("counts"), generic = structure("counts", package = "BiocGenerics"),         package = "BiocGenerics", group = list(), valueClass = character(0),         signature = "object", default = NULL, skeleton = (function (object,             ...)         stop(gettextf("invalid call in method dispatch to '%s' (no default method)",             "counts"), domain = NA))(object, ...))), class = c("notSubsettableError", "error", "condition"))): error in evaluating the argument 'x' in selecting a method for function 'aggregate': object of type 'closure' is not subsettable
rownames(homologs_counts) <- homologs_counts[, 1]
#> Error: object 'homologs_counts' not found
homologs_counts <- as(as_matrix(homologs_counts[, -1]), "dgCMatrix")
#> Error in as_matrix(homologs_counts[, -1]): could not find function "as_matrix"
homologs_counts
#> Error: object 'homologs_counts' not found