diff --git a/docs/api.rst b/docs/api.rst index 308ab04..5326f5c 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -167,6 +167,20 @@ Functions: bill_black.load_meta +Paul Hardy +---------- + +.. automodule:: pyabc2.sources.hardy + +Functions: + +.. currentmodule:: pyabc2.sources + +.. autosummary:: + :toctree: api/ + + hardy.load_meta + abcjs tools =========== diff --git a/docs/changes.md b/docs/changes.md index 3814aad..67923bb 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -8,6 +8,7 @@ {class}`~pyabc2.abcjs.widget.ABCJSWidget`, but non-responsive is still the default. * Update for changes in ({pull}`104`). +* Add Paul Hardy to sources ({mod}`pyabc2.sources.hardy`; {pull}`105`). ## v0.1.2 (2026-02-03) diff --git a/docs/examples/sources.ipynb b/docs/examples/sources.ipynb index e62baa9..5fbb208 100644 --- a/docs/examples/sources.ipynb +++ b/docs/examples/sources.ipynb @@ -18,7 +18,7 @@ "outputs": [], "source": [ "from pyabc2 import Tune\n", - "from pyabc2.sources import load_example, norbeck, the_session, eskin, bill_black" + "from pyabc2.sources import load_example, norbeck, the_session, eskin, bill_black, hardy" ] }, { @@ -450,6 +450,37 @@ "source": [ "Tune(abcs[0])" ] + }, + { + "cell_type": "markdown", + "id": "39", + "metadata": {}, + "source": [ + "## Hardy\n", + "\n", + "Paul Hardy has a tunebook collection available at . We can load selected tunebooks as a list of tune blocks (strings) with {func}`pyabc2.sources.hardy.load_meta`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "abcs = hardy.load_meta(\"basic\")\n", + "len(abcs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [ + "Tune(abcs[0])" + ] } ], "metadata": { diff --git a/pyabc2/sources/hardy.py b/pyabc2/sources/hardy.py new file mode 100644 index 0000000..168eb49 --- /dev/null +++ b/pyabc2/sources/hardy.py @@ -0,0 +1,142 @@ +""" +Load data from Paul Hardy's tunebooks (https://pghardy.net/tunebooks/). + +Requires: + +* `requests `__ +""" + +import re +from pathlib import Path +from typing import Literal + +HERE = Path(__file__).parent + +SAVE_TO = HERE / "_hardy" + +_BASE_URL = "https://pghardy.net/tunebooks/" + +_TUNEBOOK_KEY_TO_URL = { + "session": _BASE_URL + "pgh_session_tunebook.abc", + "annex": _BASE_URL + "pgh_annex_tunebook.abc", + "basic": _BASE_URL + "pgh_basic_tunebook.abc", + "xmas": _BASE_URL + "pgh_xmas_tunebook.abc", + "possible": _BASE_URL + "pgh_possible_tunebook.abc", + "pete_mac": _BASE_URL + "pgh_pete_mac_tunebook.abc", + "clarke": _BASE_URL + "williamclarke_tunes.abc", +} + + +def download(key: str) -> None: + """Download the ABC file for the given tunebook key and cache it.""" + import requests + + key = key.lower() + try: + url = _TUNEBOOK_KEY_TO_URL[key] + except KeyError: + raise ValueError( + f"Unknown Hardy tunebook key: {key!r}. Valid options: {sorted(_TUNEBOOK_KEY_TO_URL)}." + ) from None + + r = requests.get(url, timeout=10) + r.raise_for_status() + + SAVE_TO.mkdir(exist_ok=True) + (SAVE_TO / f"{key}.abc").write_text(r.text, encoding="utf-8") + + +def load_meta( + key: str, + *, + redownload: bool = False, + remove_prefs: str | tuple[str, ...] | Literal[False] = ("%",), +) -> list[str]: + """Load ABC tune blocks from a Paul Hardy tunebook, no parsing. + + Parameters + ---------- + key + Tunebook key. + + .. list-table:: + :header-rows: 1 + :widths: 15 85 + + * - Key + - Description + * - ``session`` + - Paul Hardy's Session Tunebook (the main tunebook) + * - ``annex`` + - Paul Hardy's Annex Tunebook (current edition; tunes awaiting next session edition) + * - ``basic`` + - Paul Hardy's Basic Tunebook (subset of simpler/common session tunes) + * - ``xmas`` + - Paul Hardy's Xmas Tunebook (Christmas tunes and carols) + * - ``possible`` + - Paul Hardy's Possible Tunebook (tunes not yet fully learned) + * - ``pete_mac`` + - Paul Hardy's Pete Mac Tunebook (CC0 tunes by Pete Mac) + * - ``clarke`` + - William Clarke of Feltwell Tunebook (19th century East Anglian manuscript) + + See https://pghardy.net/tunebooks/ for more information. + redownload + Re-download the data file. + remove_prefs + Remove lines starting with these prefixes (applied at load time; cached file is unmodified). + Defaults to ``("%",)``, which strips all ``%`` comment and ``%%`` directive lines. + Pass ``False`` or ``()`` to keep all lines. + + See Also + -------- + :doc:`/examples/sources` + """ + key = key.lower() + if key not in _TUNEBOOK_KEY_TO_URL: + raise ValueError( + f"Unknown Hardy tunebook key: {key!r}. Valid options: {sorted(_TUNEBOOK_KEY_TO_URL)}." + ) + + if redownload or not (SAVE_TO / f"{key}.abc").is_file(): + print("downloading...", end=" ", flush=True) + download(key) + print("done") + + # Read as binary to avoid universal-newlines mangling of \r\r\n (Hardy's line ending) + # and then convert to \n. + text = (SAVE_TO / f"{key}.abc").read_bytes().decode("utf-8").replace("\r", "") + + if not remove_prefs: + remove_prefs = () + elif isinstance(remove_prefs, str): + remove_prefs = (remove_prefs,) + + # Split into tune blocks by finding X: at start of line + # (Each tune block begins with X:) + parts = re.split(r"(?m)^(?=X:)", text) + abcs = [] + for part in parts: + part = part.strip() + if not part.startswith("X:"): + continue + + # Strip trailing % directives unconditionally + lines = part.splitlines() + while lines and lines[-1].lstrip().startswith("%"): + lines.pop() + part = "\n".join(lines).strip() + + if not part: # pragma: no cover + continue + + if remove_prefs: + lines = [ + line for line in part.splitlines() if not line.lstrip().startswith(remove_prefs) + ] + part = "\n".join(lines).strip() + + if part: + abcs.append(part) + + return abcs diff --git a/tests/test_sources.py b/tests/test_sources.py index d24c254..425c8c4 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -10,6 +10,7 @@ bill_black_tunefolders, eskin, examples, + hardy, load_example, load_example_abc, load_url, @@ -549,3 +550,60 @@ def test_the_session_consume_validation(): def test_the_session_consume_auto_leading_slash(): (d,) = the_session._consume("tunes/22878") assert d["name"] == "Jack Farrell's" + + +@pytest.mark.parametrize("key", list(hardy._TUNEBOOK_KEY_TO_URL)) +def test_hardy_load_meta(key): + abcs = hardy.load_meta(key) + assert isinstance(abcs, list) + assert len(abcs) > 0 + for abc in abcs: + assert abc.startswith("X:") + assert any(line.startswith("T:") for line in abc.splitlines()) + assert "\n\n" not in abc, "no empty lines within a tune block" + + +def test_hardy_load_meta_remove_prefs(): + # Default: no % lines + key = "session" + abcs = hardy.load_meta(key) + abcs_single_pref_as_str = hardy.load_meta(key, remove_prefs="%") + assert abcs == abcs_single_pref_as_str + assert not any(line.lstrip().startswith("%") for abc in abcs for line in abc.splitlines()) + + # With remove_prefs=False: % lines preserved + abcs_raw = hardy.load_meta(key, remove_prefs=False) + assert sum(line.lstrip().startswith("%") for abc in abcs_raw for line in abc.splitlines()) > 1 + + +def test_hardy_bad_key(): + with pytest.raises(ValueError, match="Unknown Hardy tunebook key"): + _ = hardy.load_meta("asdf") + + +def test_hardy_download_bad_key(): + with pytest.raises(ValueError, match="Unknown Hardy tunebook key"): + _ = hardy.download("asdf") + + +def test_hardy_annex_is_latest(): + """Confirm the hardcoded 'annex' URL points to the current (non-superseded) annex file.""" + import requests + + r = requests.get("https://pghardy.net/tunebooks/", timeout=10) + r.raise_for_status() + html = r.text + + # The current annex abc link should appear outside the 'superceded' path + annex_url = hardy._TUNEBOOK_KEY_TO_URL["annex"] + annex_fn = annex_url.split("/")[-1] # e.g. 'pgh_annex_tunebook.abc' + + # Find all abc links on the page + abc_links = re.findall(r'href="([^"]*\.abc)"', html) + + # The non-superseded link matching our filename should exist + non_superseded = [lnk for lnk in abc_links if annex_fn in lnk and "superceded" not in lnk] + assert non_superseded, ( + f"No non-superseded link found for {annex_fn!r} on the tunebooks page. " + f"The 'annex' key URL may need updating. Found links: {abc_links}" + )