Source code for bioregistry.external.biocontext

"""Download BioContext."""

import json
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import Any, ClassVar

from pystow.utils import download

from bioregistry.constants import RAW_DIRECTORY, URI_FORMAT_KEY
from bioregistry.external.alignment_utils import Aligner, load_processed

__all__ = [
    "BioContextAligner",
    "get_biocontext",
]

DIRECTORY = Path(__file__).parent.resolve()
RAW_PATH = RAW_DIRECTORY / "biocontext.json"
PROCESSED_PATH = DIRECTORY / "processed.json"
URL = "https://raw.githubusercontent.com/prefixcommons/biocontext/master/registry/commons_context.jsonld"
SKIP_PARTS = {"identifiers.org", "purl.obolibrary.org"}


[docs] def get_biocontext(*, force_download: bool = False) -> dict[str, dict[str, Any]]: """Get the BioContext context map. :param force_download: If true, forces download. If false and the file is already cached, reuses it. :returns: The biocontext data dictionary .. seealso:: https://github.com/prefixcommons/biocontext """ if PROCESSED_PATH.exists() and not force_download: return load_processed(PROCESSED_PATH) download(url=URL, path=RAW_PATH, force=force_download) with RAW_PATH.open() as file: data = json.load(file) rv = { prefix: {URI_FORMAT_KEY: f"{uri_prefix.strip()}$1"} for prefix, uri_prefix in data["@context"].items() } with PROCESSED_PATH.open("w") as file: json.dump(rv, file, indent=2, sort_keys=True) return rv
class BioContextAligner(Aligner): """Aligner for BioContext.""" key = "biocontext" getter = get_biocontext curation_header: ClassVar[Sequence[str]] = [URI_FORMAT_KEY] def get_skip(self) -> Mapping[str, str]: """Get entries for BioContext that should be skipped.""" return { "fbql": "not a real resource, as far as I can tell", } def prepare_external(self, external_id: str, external_entry: dict[str, Any]) -> dict[str, Any]: """Prepare BioContext data to be added to the BioContext for each BioPortal registry entry.""" uri_format = external_entry[URI_FORMAT_KEY] if any(p in uri_format for p in SKIP_PARTS): return {} return {URI_FORMAT_KEY: uri_format} def get_curation_row(self, external_id: str, external_entry: dict[str, Any]) -> Sequence[str]: """Prepare curation rows for unaligned BioContext registry entries.""" formatter = external_entry[URI_FORMAT_KEY] return [formatter] if __name__ == "__main__": BioContextAligner.cli()