Source code for bioregistry.external.cheminf

"""Download the Chemical Information Ontology registry (children of ``CHEMINF:000464``).

To convert CHEMINF from OWL to OBO Graph JSON, do the following:

.. code-block:: sh

    $ robot convert --input cheminf.owl --format json --output cheminf.json

See the OBO Foundry workflow for preparing a docker container that has ROBOT available
"""

import json
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import Any, ClassVar

from bioregistry.external.alignment_utils import Aligner, load_processed
from bioregistry.utils import get_ols_descendants

__all__ = [
    "ChemInfAligner",
    "get_cheminf",
]

DIRECTORY = Path(__file__).parent.resolve()
PROCESSED_PATH = DIRECTORY / "processed.json"

BASE_URL = "http%253A%252F%252Fsemanticscience.org%252Fresource%252FCHEMINF_000464"
SKIP = {
    "000467": "Not enough information available on this term.",
    "000234": "PubChem Conformer isn't actually an identifier, just a part of PubChem Compound database",
    "000303": "Double mapping onto `genbank`",
}


[docs] def get_cheminf(force_download: bool = False) -> dict[str, dict[str, Any]]: """Get the Chemical Information Ontology registry.""" if PROCESSED_PATH.exists() and not force_download: return load_processed(PROCESSED_PATH) rv = get_ols_descendants(ontology="cheminf", uri=BASE_URL, force_download=force_download) with PROCESSED_PATH.open("w") as file: json.dump(rv, file, indent=2, sort_keys=True) return rv
class ChemInfAligner(Aligner): """Aligner for the Chemical Information Ontology.""" key = "cheminf" getter = get_cheminf curation_header: ClassVar[Sequence[str]] = ["name", "description"] def get_skip(self) -> Mapping[str, str]: """Get the skipped identifiers.""" return SKIP if __name__ == "__main__": ChemInfAligner.cli()