From 494358926986bcb26a0a2e20cb5f9c51fc2d500d Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 17 Apr 2026 13:55:37 -0500 Subject: [PATCH 1/6] Add Hardy --- docs/api.rst | 14 ++++ pyabc2/sources/hardy.py | 145 ++++++++++++++++++++++++++++++++++++++++ tests/test_sources.py | 50 ++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 pyabc2/sources/hardy.py diff --git a/docs/api.rst b/docs/api.rst index 308ab04..5326f5c 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -167,6 +167,20 @@ Functions: bill_black.load_meta +Paul Hardy +---------- + +.. automodule:: pyabc2.sources.hardy + +Functions: + +.. currentmodule:: pyabc2.sources + +.. autosummary:: + :toctree: api/ + + hardy.load_meta + abcjs tools =========== diff --git a/pyabc2/sources/hardy.py b/pyabc2/sources/hardy.py new file mode 100644 index 0000000..67f0b3f --- /dev/null +++ b/pyabc2/sources/hardy.py @@ -0,0 +1,145 @@ +""" +Load data from Paul Hardy's tunebooks (https://pghardy.net/tunebooks/). + +Requires: + +* `requests `__ +""" + +import re +from pathlib import Path +from typing import Literal + +HERE = Path(__file__).parent + +SAVE_TO = HERE / "_hardy" + +_BASE_URL = "https://pghardy.net/tunebooks/" + +_TUNEBOOK_KEY_TO_URL = { + "session": _BASE_URL + "pgh_session_tunebook.abc", + "annex": _BASE_URL + "pgh_annex_tunebook.abc", + "basic": _BASE_URL + "pgh_basic_tunebook.abc", + "xmas": _BASE_URL + "pgh_xmas_tunebook.abc", + "possible": _BASE_URL + "pgh_possible_tunebook.abc", + "pete_mac": _BASE_URL + "pgh_pete_mac_tunebook.abc", + "clarke": _BASE_URL + "williamclarke_tunes.abc", +} + + +def download(key: str) -> None: + """Download the ABC file for the given tunebook key and cache it.""" + import requests + + key = key.lower() + try: + url = _TUNEBOOK_KEY_TO_URL[key] + except KeyError: + raise ValueError( + f"Unknown Hardy tunebook key: {key!r}. Valid options: {sorted(_TUNEBOOK_KEY_TO_URL)}." + ) from None + + r = requests.get(url, timeout=10) + r.raise_for_status() + + SAVE_TO.mkdir(exist_ok=True) + (SAVE_TO / f"{key}.abc").write_text(r.text, encoding="utf-8") + + +def load_meta( + key: str, + *, + redownload: bool = False, + remove_prefs: str | tuple[str, ...] | Literal[False] = ("%",), +) -> list[str]: + """Load ABC tune blocks from a Paul Hardy tunebook, no parsing. + + Parameters + ---------- + key + Tunebook key. + + .. list-table:: + :header-rows: 1 + :widths: 15 85 + + * - Key + - Description + * - ``session`` + - Paul Hardy's Session Tunebook (the main tunebook) + * - ``annex`` + - Paul Hardy's Annex Tunebook (current edition; tunes awaiting next session edition) + * - ``basic`` + - Paul Hardy's Basic Tunebook (subset of simpler/common session tunes) + * - ``xmas`` + - Paul Hardy's Xmas Tunebook (Christmas tunes and carols) + * - ``possible`` + - Paul Hardy's Possible Tunebook (tunes not yet fully learned) + * - ``pete_mac`` + - Paul Hardy's Pete Mac Tunebook (CC0 tunes by Pete Mac) + * - ``clarke`` + - William Clarke of Feltwell Tunebook (19th century East Anglian manuscript) + + See https://pghardy.net/tunebooks/ for more information. + redownload + Re-download the data file. + remove_prefs + Remove lines starting with these prefixes (applied at load time; cached file is unmodified). + Defaults to ``("%",)``, which strips all ``%`` comment and ``%%`` directive lines. + Pass ``False`` or ``()`` to keep all lines. + + Returns + ------- + list of str + List of ABC tune block strings, one per tune. + + See Also + -------- + :doc:`/examples/sources` + """ + key = key.lower() + if key not in _TUNEBOOK_KEY_TO_URL: + raise ValueError( + f"Unknown Hardy tunebook key: {key!r}. Valid options: {sorted(_TUNEBOOK_KEY_TO_URL)}." + ) + + if redownload or not (SAVE_TO / f"{key}.abc").is_file(): + print("downloading...", end=" ", flush=True) + download(key) + print("done") + + text = (SAVE_TO / f"{key}.abc").read_text(encoding="utf-8") + + if not remove_prefs: + remove_prefs = () + elif isinstance(remove_prefs, str): + remove_prefs = (remove_prefs,) + + # Split into tune blocks by finding X: at start of line + # (Each tune block begins with X:) + parts = re.split(r"(?m)^(?=X:)", text) + abcs = [] + for part in parts: + part = part.strip() + if not part.startswith("X:"): + continue + + # Strip trailing % directives unconditionally + lines = part.splitlines() + while lines and lines[-1].lstrip().startswith("%"): + lines.pop() + part = "\n".join(lines).strip() + + if not part: + continue + + if remove_prefs: + lines = [ + line for line in part.splitlines() if not line.lstrip().startswith(remove_prefs) + ] + part = "\n".join(lines).strip() + + if part: + abcs.append(part) + + return abcs diff --git a/tests/test_sources.py b/tests/test_sources.py index 7d21a41..60e0eb4 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -10,6 +10,7 @@ bill_black_tunefolders, eskin, examples, + hardy, load_example, load_example_abc, load_url, @@ -563,3 +564,52 @@ def test_the_session_consume_validation(): def test_the_session_consume_auto_leading_slash(): (d,) = the_session._consume("tunes/22878") assert d["name"] == "Jack Farrell's" + + +@pytest.mark.parametrize("key", list(hardy._TUNEBOOK_KEY_TO_URL)) +def test_hardy_load_meta(key): + abcs = hardy.load_meta(key) + assert isinstance(abcs, list) + assert len(abcs) > 0 + for abc in abcs: + assert abc.startswith("X:") + assert any(line.startswith("T:") for line in abc.splitlines()) + + +def test_hardy_load_meta_remove_prefs(): + # Default: no % lines + key = "session" + abcs = hardy.load_meta(key) + assert not any(line.lstrip().startswith("%") for abc in abcs for line in abc.splitlines()) + + # With remove_prefs=False: % lines preserved + abcs_raw = hardy.load_meta(key, remove_prefs=False) + assert sum(line.lstrip().startswith("%") for abc in abcs_raw for line in abc.splitlines()) > 1 + + +def test_hardy_bad_key(): + with pytest.raises(ValueError, match="Unknown Hardy tunebook key"): + _ = hardy.load_meta("asdf") + + +def test_hardy_annex_is_latest(): + """Confirm the hardcoded 'annex' URL points to the current (non-superseded) annex file.""" + import requests + + r = requests.get("https://pghardy.net/tunebooks/", timeout=10) + r.raise_for_status() + html = r.text + + # The current annex abc link should appear outside the 'superceded' path + annex_url = hardy._TUNEBOOK_KEY_TO_URL["annex"] + annex_fn = annex_url.split("/")[-1] # e.g. 'pgh_annex_tunebook.abc' + + # Find all abc links on the page + abc_links = re.findall(r'href="([^"]*\.abc)"', html) + + # The non-superseded link matching our filename should exist + non_superseded = [lnk for lnk in abc_links if annex_fn in lnk and "superceded" not in lnk] + assert non_superseded, ( + f"No non-superseded link found for {annex_fn!r} on the tunebooks page. " + f"The 'annex' key URL may need updating. Found links: {abc_links}" + ) From 7a5ceccdd70ec35c025dbed7e0d5912993aabcd4 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 17 Apr 2026 14:10:55 -0500 Subject: [PATCH 2/6] Fix issue with empty lines in tune block results --- pyabc2/sources/hardy.py | 4 +++- tests/test_sources.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/hardy.py b/pyabc2/sources/hardy.py index 67f0b3f..2fb9ea3 100644 --- a/pyabc2/sources/hardy.py +++ b/pyabc2/sources/hardy.py @@ -108,7 +108,9 @@ def load_meta( download(key) print("done") - text = (SAVE_TO / f"{key}.abc").read_text(encoding="utf-8") + # Read as binary to avoid universal-newlines mangling of \r\r\n (Hardy's line ending) + # and then convert to \n. + text = (SAVE_TO / f"{key}.abc").read_bytes().decode("utf-8").replace("\r", "") if not remove_prefs: remove_prefs = () diff --git a/tests/test_sources.py b/tests/test_sources.py index 60e0eb4..b0515ee 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -574,6 +574,7 @@ def test_hardy_load_meta(key): for abc in abcs: assert abc.startswith("X:") assert any(line.startswith("T:") for line in abc.splitlines()) + assert "\n\n" not in abc, "no empty lines within a tune block" def test_hardy_load_meta_remove_prefs(): From be974618e17b4786292c3914b677d246900a3e22 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 17 Apr 2026 14:11:30 -0500 Subject: [PATCH 3/6] Add small Hardy example to sources nb --- docs/examples/sources.ipynb | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/examples/sources.ipynb b/docs/examples/sources.ipynb index e62baa9..5fbb208 100644 --- a/docs/examples/sources.ipynb +++ b/docs/examples/sources.ipynb @@ -18,7 +18,7 @@ "outputs": [], "source": [ "from pyabc2 import Tune\n", - "from pyabc2.sources import load_example, norbeck, the_session, eskin, bill_black" + "from pyabc2.sources import load_example, norbeck, the_session, eskin, bill_black, hardy" ] }, { @@ -450,6 +450,37 @@ "source": [ "Tune(abcs[0])" ] + }, + { + "cell_type": "markdown", + "id": "39", + "metadata": {}, + "source": [ + "## Hardy\n", + "\n", + "Paul Hardy has a tunebook collection available at . We can load selected tunebooks as a list of tune blocks (strings) with {func}`pyabc2.sources.hardy.load_meta`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "abcs = hardy.load_meta(\"basic\")\n", + "len(abcs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [ + "Tune(abcs[0])" + ] } ], "metadata": { From ce9a4992b64b40a07a94f4222d0ba9957692543b Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 17 Apr 2026 14:14:21 -0500 Subject: [PATCH 4/6] changelog --- docs/changes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changes.md b/docs/changes.md index dff3c09..185b728 100644 --- a/docs/changes.md +++ b/docs/changes.md @@ -7,6 +7,7 @@ You can now activate abcjs responsive mode in {class}`~pyabc2.abcjs.widget.ABCJSWidget`, but non-responsive is still the default. +* Add Paul Hardy ({mod}`pyabc2.sources.hardy`; {pull}`105`). ## v0.1.2 (2026-02-03) From e1e2067cd146aa80b9a8d0a28a63bce8717f1fa5 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 17 Apr 2026 15:17:56 -0500 Subject: [PATCH 5/6] Remove returns block for stylistic consistency --- pyabc2/sources/hardy.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyabc2/sources/hardy.py b/pyabc2/sources/hardy.py index 2fb9ea3..b22e9a0 100644 --- a/pyabc2/sources/hardy.py +++ b/pyabc2/sources/hardy.py @@ -88,11 +88,6 @@ def load_meta( Defaults to ``("%",)``, which strips all ``%`` comment and ``%%`` directive lines. Pass ``False`` or ``()`` to keep all lines. - Returns - ------- - list of str - List of ABC tune block strings, one per tune. - See Also -------- :doc:`/examples/sources` From f5ba0673011152a42d28cd1df4ae0ff3379efa18 Mon Sep 17 00:00:00 2001 From: zmoon Date: Fri, 17 Apr 2026 15:21:26 -0500 Subject: [PATCH 6/6] cov --- pyabc2/sources/hardy.py | 2 +- tests/test_sources.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pyabc2/sources/hardy.py b/pyabc2/sources/hardy.py index b22e9a0..168eb49 100644 --- a/pyabc2/sources/hardy.py +++ b/pyabc2/sources/hardy.py @@ -127,7 +127,7 @@ def load_meta( lines.pop() part = "\n".join(lines).strip() - if not part: + if not part: # pragma: no cover continue if remove_prefs: diff --git a/tests/test_sources.py b/tests/test_sources.py index b0515ee..50e7840 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -581,6 +581,8 @@ def test_hardy_load_meta_remove_prefs(): # Default: no % lines key = "session" abcs = hardy.load_meta(key) + abcs_single_pref_as_str = hardy.load_meta(key, remove_prefs="%") + assert abcs == abcs_single_pref_as_str assert not any(line.lstrip().startswith("%") for abc in abcs for line in abc.splitlines()) # With remove_prefs=False: % lines preserved @@ -593,6 +595,11 @@ def test_hardy_bad_key(): _ = hardy.load_meta("asdf") +def test_hardy_download_bad_key(): + with pytest.raises(ValueError, match="Unknown Hardy tunebook key"): + _ = hardy.download("asdf") + + def test_hardy_annex_is_latest(): """Confirm the hardcoded 'annex' URL points to the current (non-superseded) annex file.""" import requests