3

I have the taxonomic ID of species and I can get the species and genus name from NCBI (https://www.ncbi.nlm.nih.gov/Taxonomy/TaxIdentifier/tax_identifier.cgi). But I want phylum, class, order.. all from these data.

I have tried taxize package, but not working for a large dataset.

specieslist <- c("Clostridium", "Clostridium",  "Achromobacter",    "Achromobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Acinetobacter",    "Actinomyces",  "Actinomyces",  "Aeromonas",    "Agrococcus",   "Alcanivorax",  "Alkalihalobacillus",   "Alloprevotella",   "Aminobacterium",   "Amniculibacterium",    "Anaerocolumna",    "Anaerocolumna",    "Anaerocolumna",    "Asticcacaulis",    "Atopobium",    "Bacillus", "Bacillus", "Bacteroidales",    "Bacteroides",  "Bacteroides",  "Bacteroides",  "Bacteroides",  "Bacteroides",  "Bacteroides",  "Barnesiella",  "Bifidobacterium",  "Blochmannia",  "Bordetella",   "Brevibacillus",    "Buchnera", "Burkholderia", "Butyricimonas",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Campylobacter",    "Capnocytophaga",   "Capnocytophaga",   "Capnocytophaga",   "Chroococcidiopsis",    "Citrobacter",  "Clostridium",  "Clostridium",  "Clostridium",  "Clostridium",  "Corynebacterium",  "Corynebacterium",  "Corynebacterium",  "Corynebacterium",  "Cutibacterium",    "Dialister",    "Dolosigranulum",   "Enterobacter", "Enterococcus", "Entomoplasma", "Escherichia",  "Escherichia",  "Escherichia",  "Eubacterium",  "Fermentimonas",    "Frankia",  "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Fusobacterium",    "Gemella",  "Haemophilus",  "Haemophilus",  "Halomonas",    "Hydrogenophaga",   "Ilyobacter",   "Klebsiella",   "Klebsiella",   "Klebsiella",   "Klebsiella",   "Klebsiella",   "Kocuria",  "Kytococcus",   "Lachnoanaerobaculum",  "Lachnospira",  "Lachnospiraceae",  "Lachnospiraceae",  "Lacrimispora", "Lactobacillus",    "Lactobacillus",    "Lactobacillus",    "Lactobacillus",    "Lactobacillus",    "Lancefieldella",   "Lautropia",    "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Leptotrichia", "Ligilactobacillus",    "Limosilactobacillus",  "Luteimonas",   "Lysinibacillus",   "Lysobacter",   "Lysobacter",   "Lysobacter",   "Magnetospirillum", "Marivirga",    "Megasphaera",  "Megasphaera",  "Meiothermus",  "Methylobacterium", "Methylobacterium", "Methylobacterium", "Methylobacterium", "Methylobacterium", "Microbacterium",   "Microbacterium",   "Microbacterium",   "Microbacterium",   "Micrococcus",  "Muribaculaceae",   "Muribaculum",  "Muribaculum",  "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Neisseria",    "Nocardioides", "Nocardioides", "Paludibacter", "Pantoea",  "Paracoccus",   "Paracoccus",   "Paraprevotella",   "Pasteurella",  "Petrimonas",   "Phenylobacterium", "Phocaeicola",  "Phocaeicola",  "Phocaeicola",  "Phyllobacterium",  "Polaribacter", "Pontibacter",  "Pontibacter",  "Porphyromonas",    "Porphyromonas",    "Porphyromonas",    "Porphyromonas",    "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Prevotella",   "Proteus",  "Pseudoleptotrichia",   "Pseudomonas",  "Pseudonocardia",   "Pseudonocardia",   "Raoultella",   "Rheinheimera", "Romboutsia",   "Roseivirga",   "Roseococcus",  "Rothia",   "Rothia",   "Rubrobacter",  "Rubrobacter",  "Rufibacter",   "Saccharomonospora",    "Saccharopolyspora",    "Saccharopolyspora",    "Salinivirga",  "Salmonella",   "Schaalia", "Sedimentisphaera", "Selenomonas",  "Selenomonas",  "Selenomonas",  "Selenomonas",  "Selenomonas",  "Selenomonas",  "Shigella", "Skermanella",  "Sphingosinicella", "Spirosoma",    "Staphylococcus",   "Staphylococcus",   "Stenotrophomonas", "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptococcus",    "Streptomyces", "Tannerella",   "Tannerella",   "Thermovirga",  "Treponema",    "Treponema",    "Treponema",    "Treponema",    "Treponema",    "Treponema",    "Veillonella",  "Veillonella",  "Veillonella",  "Veillonella",  "Veillonella")


t <- tax_name(query = c(specieslist), get = c("phylum","class", "order", "family", "genus"), db = "ncbi")

Any suggestion please?

zx8754
  • 52,746
  • 12
  • 114
  • 209
Shaminur
  • 51
  • 6

2 Answers2

1

(taxize maintainer here)

Another option if taxize is too slow for you is taxizedb. By default uses NCBI as the data source. taxizedb is similar to taxize, but uses local database dumps instead of doing http requests; but you do have the initial setup time to download databases

install.packages("taxizedb")
library(taxizedb)
ids <- name2taxid(x, out_type="summary")
classification(ids$id)

Then you can pull out whatever ranks you want from each data.frame

sckott
  • 5,755
  • 2
  • 26
  • 42
0

https://bioinf.shenwei.me/taxonkit/usage/#usage-and-examples

taxonkit lineage txtid.txt | tee lineage.txt

cat lineage.txt \
    | taxonkit reformat \
    | csvtk -H -t cut -f 1,3 \
    | csvtk -H -t sep -f 2 -s ';' -R \
    | csvtk add-header -t -n taxid,kindom,phylum,class,order,family,genus,species \
    | csvtk pretty -t
Shaminur
  • 51
  • 6