Source code for bioregistry.external.bioportal

# -*- coding: utf-8 -*-

"""Download the NCBO BioPortal registry.

Get an API key by logging up, signing in, and navigating to

import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional

import pystow
import requests
from tqdm.contrib.concurrent import thread_map

from bioregistry.constants import EXTERNAL

__all__ = [


class OntoPortalClient:
    """A client for an OntoPortal site, like BioPortal."""

    metaprefix: str
    base_url: str
    api_key: Optional[str] = None
    directory: Path = field(init=False)
    raw_path: Path = field(init=False)
    processed_path: Path = field(init=False)

    def __post_init__(self): = EXTERNAL.joinpath(self.metaprefix), parents=True)
        self.raw_path ="raw.json")
        self.processed_path ="processed.json")

    def query(self, url: str, **params) -> requests.Response:
        """Query the given endpoint on the OntoPortal site.

        :param url: URL to query
        :param params: Kwargs to give as params to :func:`requests.get`
        :returns: The response from :func:`requests.get`

        The rate limit is 15 queries per second. See:
        if self.api_key is None:
            self.api_key = pystow.get_config(self.metaprefix, "api_key", raise_on_missing=True)
        params.setdefault("apikey", self.api_key)
        return requests.get(url, params=params)

    def download(self, force_download: bool = False):
        """Get the full dump of the OntoPortal site's registry."""
        if self.processed_path.exists() and not force_download:
            with as file:
                return json.load(file)

        # see
        res = self.query(self.base_url + "/ontologies", summaryOnly=False, notes=True)
        records = res.json()
        with"w") as file:
            json.dump(records, file, indent=2, sort_keys=True)

        records = thread_map(self.process, records, disable=True)
        rv = {result["prefix"]: result for result in records}

        with"w") as file:
            json.dump(rv, file, indent=2, sort_keys=True)
        return rv

    def process(self, entry):
        """Process a record from the OntoPortal site's API."""
        prefix = entry["acronym"]
        extra_data = {}
        # res = query(f'{BASE_URL}/ontologies/{bioportal_key}/latest_submission')
        # if res:
        #     extra_data = res.json()
        # else:
        #     print('failed on', bioportal_key)
        #     extra_data = {}

        contact = (extra_data.get("contact") or [{}])[0]
        rv = {
            "prefix": prefix,
            "name": entry["name"],
            "description": extra_data.get("description"),
            "contact": contact.get("email"),
            "contact.label": contact.get("name"),
            "homepage": extra_data.get("homepage"),
            "version": extra_data.get("version"),
            "publication": extra_data.get("publication"),
        return {k: v for k, v in rv.items() if v is not None}

bioportal_client = OntoPortalClient(

[docs]def get_bioportal(force_download: bool = False): """Get the BioPortal registry.""" return
ecoportal_client = OntoPortalClient( metaprefix="ecoportal", base_url=ECOPORTAL_BASE_URL, ) def get_ecoportal(force_download: bool = False): """Get the EcoPortal registry.""" return agroportal_client = OntoPortalClient( metaprefix="agroportal", base_url=AGROPORTAL_BASE_URL, ) def get_agroportal(force_download: bool = False): """Get the AgroPortal registry.""" return if __name__ == "__main__": print("AgroPortal has", len(get_agroportal(force_download=True))) # noqa:T201 print("EcoPortal has", len(get_ecoportal(force_download=True))) # noqa:T201 print("BioPortal has", len(get_bioportal(force_download=True))) # noqa:T201