Source code for bioregistry.external.pathguide

"""Download registry information from Pathguide."""

from collections.abc import Sequence
from typing import Any, ClassVar

import requests
from bs4 import BeautifulSoup

from bioregistry.external.alignment_utils import Aligner

__all__ = [
    "PathguideAligner",
    "get_pathguide",
]


[docs] def get_pathguide(*, force_download: bool = False) -> dict[str, dict[str, Any]]: """Get the Pathguide metdata.""" res = requests.get("http://pathguide.org/", timeout=15) soup = BeautifulSoup(res.text, "html.parser") rv = {} for tr in soup.find_all("tr"): tr_id = tr.attrs.get("id") if not tr_id or not tr_id.startswith("ResourceRow_"): continue pathguide_id = tr_id.removeprefix("ResourceRow_") name_td, _, _availability_td, standards_td = list(tr) name_a = name_td.find("a") homepage = name_a.attrs["href"] abbreviation, name = (s.strip() for s in name_a.text.split(" - ", 1)) standards = sorted( {td.text for td in standards_td.find_all("td") if td.attrs.get("class") == ["Standard"]} ) rv[pathguide_id] = { "prefix": pathguide_id, "abbreviation": abbreviation, "name": name, "homepage": homepage, } if standards: rv[pathguide_id]["keywords"] = standards return rv
class PathguideAligner(Aligner): """Aligner for the Pathguide.""" key = "pathguide" alt_key_match = "abbreviation" getter = get_pathguide curation_header: ClassVar[Sequence[str]] = ("abbreviation", "name", "homepage") if __name__ == "__main__": PathguideAligner.cli()