Source code for bioregistry.external.n2t

# -*- coding: utf-8 -*-

"""Download registry information from N2T."""

import json

import click
import yaml
from pystow.utils import download

from bioregistry.constants import EXTERNAL, URI_FORMAT_KEY

URL = "https://n2t.net/e/n2t_full_prefixes.yaml"
DIRECTORY = EXTERNAL / "n2t"
DIRECTORY.mkdir(exist_ok=True, parents=True)
RAW_PATH = DIRECTORY / "raw.yml"
PROCESSED_PATH = DIRECTORY / "processed.json"
SKIP = {
    "zzztestprefix": "test prefix should not be considered",
    "urn": "too meta",
    "url": "too meta",
    "purl": "too meta",
    "lsid": "too meta",
    "hdl": "paid service, too meta",
    "repec": "irrelevant prefix from economics",
    "merops": "issue with miriam having duplicate prefixes for this resource",  # FIXME
    "hgnc.family": "issue with miriam having duplicate prefixes for this resource",  # FIXME
}


[docs]def get_n2t(force_download: bool = False): """Get the N2T registry.""" if PROCESSED_PATH.exists() and not force_download: with PROCESSED_PATH.open() as file: return json.load(file) download(url=URL, path=RAW_PATH, force=True) # they give malformed YAML so time to write a new parser with RAW_PATH.open() as file: data = yaml.safe_load(file) rv = { key: _process(record) for key, record in data.items() if record["type"] == "scheme" and "/" not in key and key not in SKIP } with PROCESSED_PATH.open("w") as file: json.dump(rv, file, sort_keys=True, indent=2) return rv
def _process(record): rv = { "name": record.get("name"), URI_FORMAT_KEY: record["redirect"].replace("$id", "$1") if "redirect" in record else None, "description": record.get("description"), "homepage": record.get("more"), "pattern": record.get("pattern"), "example": record.get("test"), "namespaceEmbeddedInLui": (record.get("prefixed") == "true"), } return {k: v for k, v in rv.items() if v is not None} @click.command() def main(): """Reload the N2T data.""" rv = get_n2t(force_download=True) click.echo(f"Got {len(rv)} entries from n2t.") if __name__ == "__main__": main()