Source code for bioregistry.external.biolink

"""Download Biolink."""

import json
from collections.abc import Mapping, Sequence
from pathlib import Path
from typing import Any, ClassVar

import yaml
from pystow.utils import download

from bioregistry.constants import RAW_DIRECTORY, URI_FORMAT_KEY
from bioregistry.external.alignment_utils import Aligner, load_processed

__all__ = [
    "BiolinkAligner",
    "get_biolink",
]

URL = "https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml"

DIRECTORY = Path(__file__).parent.resolve()
RAW_PATH = RAW_DIRECTORY / "biolink.yaml"
PROCESSED_PATH = DIRECTORY / "processed.json"

PROCESSING_BIOLINK_PATH = DIRECTORY / "processing_biolink.json"






class BiolinkAligner(Aligner):
    """Aligner for Biolink."""

    key = "biolink"
    getter = get_biolink
    curation_header: ClassVar[Sequence[str]] = [URI_FORMAT_KEY, "identifiers", "purl"]

    def get_skip(self) -> Mapping[str, str]:
        """Get the skipped Biolink identifiers."""
        with PROCESSING_BIOLINK_PATH.open() as file:
            j = json.load(file)
        return {entry["prefix"]: entry["reason"] for entry in j["skip"]}

    def prepare_external(self, external_id: str, external_entry: dict[str, Any]) -> dict[str, Any]:
        """Prepare Biolink data to be added to the Biolink for each BioPortal registry entry."""
        uri_format = external_entry[URI_FORMAT_KEY]
        return {
            URI_FORMAT_KEY: uri_format,
            "is_identifiers": uri_format.startswith("http://identifiers.org"),
            "is_obo": uri_format.startswith("http://purl.obolibrary.org"),
        }

    def get_curation_row(self, external_id: str, external_entry: dict[str, Any]) -> Sequence[str]:
        """Prepare curation rows for unaligned Biolink registry entries."""
        uri_format = external_entry[URI_FORMAT_KEY]
        return [
            uri_format,
            uri_format.startswith("http://identifiers.org"),
            uri_format.startswith("http://purl.obolibrary.org"),
        ]


if __name__ == "__main__":
    BiolinkAligner.cli()