Source code for bioregistry.external.go

# -*- coding: utf-8 -*-

"""Download the Gene Ontology registry."""

import json

import click
import yaml
from pystow.utils import download

from bioregistry.constants import EXTERNAL

__all__ = [
    "get_go",
]

# Xrefs from GO that aren't generally useful
SKIP = {
    "TO_GIT",
    "OBO_SF_PO",
    "OBO_SF2_PO",
    "OBO_SF2_PECO",
    "PECO_GIT",
    "PO_GIT",
    "PSO_GIT",
    "EO_GIT",
}

# The key is redundant of the value
REDUNDANT = {
    "AspGD": "AspGD_LOCUS",
}

DIRECTORY = EXTERNAL / "go"
DIRECTORY.mkdir(exist_ok=True, parents=True)
RAW_PATH = DIRECTORY / "raw.yml"
PROCESSED_PATH = DIRECTORY / "processed.json"
GO_URL = "https://raw.githubusercontent.com/geneontology/go-site/master/metadata/db-xrefs.yaml"


[docs]def get_go(force_download: bool = False): """Get the GO registry.""" if PROCESSED_PATH.exists() and not force_download: with PROCESSED_PATH.open() as file: return json.load(file) download(url=GO_URL, path=RAW_PATH, force=True) with RAW_PATH.open() as file: entries = yaml.full_load(file) entries = [ entry for entry in entries if entry["database"] not in SKIP and entry["database"] not in REDUNDANT ] rv = {entry["database"]: entry for entry in entries} with PROCESSED_PATH.open("w") as file: json.dump(rv, file, indent=2, sort_keys=True) return rv
@click.command() def main(): """Reload the GO data.""" get_go(force_download=True) if __name__ == "__main__": main()