Source code for bioregistry.external.ontobee

# -*- coding: utf-8 -*-

"""Download registry information from OntoBee."""

import json

from bs4 import BeautifulSoup
from pystow.utils import download

from bioregistry.constants import EXTERNAL

DIRECTORY = EXTERNAL / "ontobee"
DIRECTORY.mkdir(exist_ok=True, parents=True)
RAW_PATH = DIRECTORY / "raw.html"
PROCESSED_PATH = DIRECTORY / "processed.json"

URL = "http://www.ontobee.org/"
LEGEND = {
    "F": "Foundry",
    "L": "Library",
    "N": "Not Specified/No",
}


[docs]def get_ontobee(force_download: bool = False): """Get the OntoBee registry.""" if PROCESSED_PATH.exists() and not force_download: with PROCESSED_PATH.open() as file: return json.load(file) download(url=URL, path=RAW_PATH, force=True) with RAW_PATH.open() as f: soup = BeautifulSoup(f, "html.parser") rv = {} for row in soup.find(id="ontologyList").find("tbody").find_all("tr"): cells = row.find_all("td") prefix = cells[1].text rv[prefix] = { "name": cells[2].text, "library": LEGEND[cells[3].text.upper()], # "link": cells[1].find("a").attrs["href"], } with PROCESSED_PATH.open("w") as file: json.dump(rv, file, indent=2, sort_keys=True) return rv
if __name__ == "__main__": get_ontobee()