From 9649dcca2467eb5a58a25f75efe4c5cc8b0d9367 Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Wed, 22 Apr 2026 16:33:43 +0100 Subject: [PATCH 1/9] fix location of the claude skill --- .claude/{.claude => }/skills/protocol-migration/SKILL.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .claude/{.claude => }/skills/protocol-migration/SKILL.md (100%) diff --git a/.claude/.claude/skills/protocol-migration/SKILL.md b/.claude/skills/protocol-migration/SKILL.md similarity index 100% rename from .claude/.claude/skills/protocol-migration/SKILL.md rename to .claude/skills/protocol-migration/SKILL.md From 0f0cfd71a861a1226ca21f67e9807e26c99df72d Mon Sep 17 00:00:00 2001 From: Ira Iosub Date: Wed, 22 Apr 2026 16:54:32 +0100 Subject: [PATCH 2/9] closes #5 - improve validation --- .github/workflows/validate-protocol.yml | 32 ++-- README.md | 9 +- docs/USING_THIS_TEMPLATE.md | 5 +- scripts/validate_protocol.py | 162 ++---------------- scripts/validate_protocol_content.py | 124 ++++++++++++++ scripts/validate_protocol_style.py | 128 ++++++++++++++ ...l.py => test_validate_protocol_content.py} | 31 ++-- tests/test_validate_protocol_style.py | 91 ++++++++++ 8 files changed, 396 insertions(+), 186 deletions(-) create mode 100644 scripts/validate_protocol_content.py create mode 100644 scripts/validate_protocol_style.py rename tests/{test_validate_protocol.py => test_validate_protocol_content.py} (72%) create mode 100644 tests/test_validate_protocol_style.py diff --git a/.github/workflows/validate-protocol.yml b/.github/workflows/validate-protocol.yml index bd4ea95..e3999e5 100644 --- a/.github/workflows/validate-protocol.yml +++ b/.github/workflows/validate-protocol.yml @@ -6,19 +6,23 @@ on: - main paths: - README.md - - legacy/source.txt - scripts/validate_protocol.py - - tests/test_validate_protocol.py - - .github/workflows/validate_protocol.yml + - scripts/validate_protocol_content.py + - scripts/validate_protocol_style.py + - tests/test_validate_protocol_content.py + - tests/test_validate_protocol_style.py + - .github/workflows/validate-protocol.yml push: branches: - main paths: - README.md - - legacy/source.txt - scripts/validate_protocol.py - - tests/test_validate_protocol.py - - .github/workflows/validate_protocol.yml + - scripts/validate_protocol_content.py + - scripts/validate_protocol_style.py + - tests/test_validate_protocol_content.py + - tests/test_validate_protocol_style.py + - .github/workflows/validate-protocol.yml workflow_dispatch: jobs: @@ -35,7 +39,7 @@ jobs: with: python-version: "3.11" - - name: Check whether validation should run + - name: Check whether README validation should run id: validation_gate run: | set -euo pipefail @@ -56,15 +60,15 @@ jobs: echo "should_validate=true" >> "$GITHUB_OUTPUT" - name: Run validator tests - if: steps.validation_gate.outputs.should_validate == 'true' run: | python -m unittest discover -s tests -p 'test_*.py' - - name: Run protocol validation + - name: Run content validation if: steps.validation_gate.outputs.should_validate == 'true' run: | - if [ -f legacy/source.txt ]; then - python scripts/validate_protocol.py README.md legacy/source.txt - else - python scripts/validate_protocol.py README.md - fi + python scripts/validate_protocol_content.py README.md + + - name: Run style validation + if: steps.validation_gate.outputs.should_validate == 'true' + run: | + python scripts/validate_protocol_style.py README.md diff --git a/README.md b/README.md index a17d4eb..86ead9b 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ 4. [Step 4](#4-step-4) 5. [Step 5](#5-step-5) 6. [Step 6](#6-step-6) -7. [Buffers](#buffers) +7. [Materials](#7-materials) 8. [Migration notes](#migration-notes) --- @@ -120,7 +120,10 @@ --- -# 7. Buffers +# 7. Materials + +## 7.1 Buffers +## 7.2 Reagents --- @@ -131,4 +134,4 @@ ## Unplaced content -## CHECK items \ No newline at end of file +## CHECK items diff --git a/docs/USING_THIS_TEMPLATE.md b/docs/USING_THIS_TEMPLATE.md index 0f81379..ac521e2 100644 --- a/docs/USING_THIS_TEMPLATE.md +++ b/docs/USING_THIS_TEMPLATE.md @@ -74,7 +74,7 @@ The main file you must edit for protocol content is `README.md`. Do not rename t 7. Follow the guidelines in [3. General guidelines for the protocol file (`README.md`)](#3-general-guidelines-for-the-protocol-file-readmemd) 8. Commit your changes, then push. 9. Once you are happy with the result, open a pull request from `import-protocol` into `main`. -10. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It checks the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names. If checks fail, fix them before merging into `main`. +10. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It runs a content check for the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names, plus a style check for unit formatting. If checks fail, fix them before merging into `main`. 11. Ask for a reviewer. > **Note:** Always check accuracy and make sure required sections, such as protocol status and the status legend, are present. @@ -121,7 +121,7 @@ This route can save time. It helps keep the template structure consistent, norma 14. Follow the guidelines in [3. General guidelines for the protocol file (`README.md`)](#3-general-guidelines-for-the-protocol-file-readmemd) 15. Commit your changes, then push. 16. Once you are happy with the result, open a pull request from `import-protocol` into `main`. -17. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It checks the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names. If `legacy/source.txt` is present, it also checks that key quantities from the source appear in `README.md`. If checks fail, fix them before merging into `main`. +17. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It runs a content check for the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names, plus a style check for unit formatting. If checks fail, fix them before merging into `main`. 18. Ask for a reviewer. --- @@ -141,6 +141,7 @@ Mandatory items for validation: - a status legend row containing `[OK]`, `[?]`, and `[X]` - a short description (`# About`) - contents (`## Contents`) +- a materials section (`# ... Materials`) Recommended content: diff --git a/scripts/validate_protocol.py b/scripts/validate_protocol.py index 04081f5..c2ef7ba 100644 --- a/scripts/validate_protocol.py +++ b/scripts/validate_protocol.py @@ -1,157 +1,19 @@ -"""Validate a protocol README against template requirements and optional source text.""" +"""Backward-compatible entrypoint for protocol README validation.""" from pathlib import Path -import re import sys -from typing import List, Optional, Tuple +from typing import List -HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE) -STATUS_LINE_RE = re.compile( - r"^### Status:\s+.*`\[(?:OK|\?|X)\]`.*$", - re.MULTILINE, -) -STATUS_LEGEND_RE = re.compile( - r"^\| \*\*\*Status legend\*\*\*:.*`\[OK\]`.*`\[\?\]`.*`\[X\]`.*\|$", - re.MULTILINE, -) -PLACEHOLDER_STEP_HEADING_RE = re.compile( - r"^#{1,6}\s+\d+(?:\.\d+)*(?:\.)?\s+(?:Step|Sub-step)\b.*$", - re.MULTILINE, -) -PLACEHOLDER_CONTENTS_RE = re.compile( - r"^\d+\.\s+\[Step\s+\d+\]\(#.*$", - re.MULTILINE, -) +try: + from scripts.validate_protocol_content import validate_readme as validate_content + from scripts.validate_protocol_style import validate_readme_style +except ModuleNotFoundError: + from validate_protocol_content import validate_readme as validate_content + from validate_protocol_style import validate_readme_style -REQUIRED_HEADINGS = [ - (1, "About"), - (2, "Contents"), -] -BAD_PLACEHOLDERS = { - "TODO": re.compile(r"\bTODO\b"), - "TBD": re.compile(r"\bTBD\b"), - "XXX": re.compile(r"\bXXX\b"), - "CHECK:": re.compile(r"CHECK:"), -} - -DISALLOWED_TEMPLATE_TEXT = [ - "> Template repository: Click `Use this template` to create a new protocol repo. Template docs are in [docs/USING_THIS_TEMPLATE.md](https://github.com/ulelab/protocol-template/blob/main/docs/USING_THIS_TEMPLATE.md)", -] - - -def extract_headings(text: str) -> List[Tuple[int, str]]: - return [(len(level), title.strip()) for level, title in HEADING_RE.findall(text)] - - -def normalize_text(text: str) -> str: - return re.sub(r"\s+", " ", text.lower().replace("µ", "u")) - - -def canonicalize_measurement(token: str) -> str: - token = normalize_text(token) - token = token.replace("°", "") - token = re.sub(r"\s+", "", token) - - time_match = re.fullmatch(r"(\d+(?:\.\d+)?)(seconds|second|secs|sec|s|minutes|minute|mins|min|hours|hour|hrs|hr)", token) - if time_match: - value, unit = time_match.groups() - unit_map = { - "seconds": "s", - "second": "s", - "secs": "s", - "sec": "s", - "s": "s", - "minutes": "min", - "minute": "min", - "mins": "min", - "min": "min", - "hours": "h", - "hour": "h", - "hrs": "h", - "hr": "h", - } - return f"{value}{unit_map[unit]}" - - temp_match = re.fullmatch(r"(\d+(?:\.\d+)?)(c)", token) - if temp_match: - value, unit = temp_match.groups() - return f"{value}{unit}" - - volume_or_mass_match = re.fullmatch(r"(\d+(?:\.\d+)?)(ul|ml|l|g|mg|kg|ng|ug)", token) - if volume_or_mass_match: - value, unit = volume_or_mass_match.groups() - return f"{value}{unit}" - - percent_match = re.fullmatch(r"(\d+(?:\.\d+)?)%", token) - if percent_match: - return token - - return token - - -def extract_key_tokens(text: str) -> List[str]: - patterns = [ - r"\b\d+(?:\.\d+)?\s*(?:µL|uL|mL|L|g|mg|kg|ng|µg)\b", - r"\b\d+(?:\.\d+)?\s*(?:seconds|second|minutes|minute|hours|hour|s|sec|secs|min|mins|hr|hrs)\b", - r"\b\d+(?:\.\d+)?\s*°?\s*C\b", - r"\b\d+(?:\.\d+)?%\b", - ] - hits = [] - for pattern in patterns: - hits.extend(re.findall(pattern, text, flags=re.IGNORECASE)) - return sorted(set(hits)) - - -def validate_readme(readme: str, source: Optional[str] = None) -> List[str]: - failures: List[str] = [] - headings = extract_headings(readme) - top_level_headings = [title for level, title in headings if level == 1] - - if not headings: - failures.append("README does not contain any Markdown headings.") - elif not top_level_headings: - failures.append("README must contain a top-level protocol title ('# ...').") - else: - first_title = top_level_headings[0] - if first_title == "About": - failures.append("Missing top-level protocol title before '# About'.") - - for level, title in REQUIRED_HEADINGS: - if (level, title) not in headings: - failures.append(f"Missing heading: {'#' * level} {title}") - - if not STATUS_LINE_RE.search(readme): - failures.append("Missing or malformed status line: expected '### Status: ...'.") - - if not STATUS_LEGEND_RE.search(readme): - failures.append( - "Missing or malformed status legend row with `[OK]`, `[?]`, and `[X]`." - ) - - for token, pattern in BAD_PLACEHOLDERS.items(): - if pattern.search(readme): - failures.append(f"Found unresolved placeholder: {token}") - - for text in DISALLOWED_TEMPLATE_TEXT: - if text in readme: - failures.append(f"Found template-only text that must be removed: {text}") - - for match in PLACEHOLDER_STEP_HEADING_RE.findall(readme): - failures.append(f"Found placeholder step heading: {match}") - - for match in PLACEHOLDER_CONTENTS_RE.findall(readme): - failures.append(f"Found placeholder contents entry: {match}") - - if source is not None: - readme_tokens = { - canonicalize_measurement(token) for token in extract_key_tokens(readme) - } - for token in extract_key_tokens(source): - if canonicalize_measurement(token) not in readme_tokens: - failures.append(f"Source token missing from README: {token}") - - return list(dict.fromkeys(failures)) +def validate_readme(readme: str) -> List[str]: + return list(dict.fromkeys(validate_content(readme) + validate_readme_style(readme))) def main() -> None: @@ -160,9 +22,7 @@ def main() -> None: sys.exit(1) readme = Path(sys.argv[1]).read_text(encoding="utf-8") - source = Path(sys.argv[2]).read_text(encoding="utf-8") if len(sys.argv) == 3 else None - - failures = validate_readme(readme, source) + failures = validate_readme(readme) if failures: print("VALIDATION FAILED") diff --git a/scripts/validate_protocol_content.py b/scripts/validate_protocol_content.py new file mode 100644 index 0000000..fe1213f --- /dev/null +++ b/scripts/validate_protocol_content.py @@ -0,0 +1,124 @@ +"""Validate protocol README content against template requirements.""" + +from pathlib import Path +import re +import sys +from typing import List, Tuple + +HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE) +STATUS_LINE_RE = re.compile( + r"^### Status:\s+.*`\[(?:OK|\?|X)\]`.*$", + re.MULTILINE, +) +STATUS_LEGEND_RE = re.compile( + r"^\| \*\*\*Status legend\*\*\*:.*`\[OK\]`.*`\[\?\]`.*`\[X\]`.*\|$", + re.MULTILINE, +) +PLACEHOLDER_STEP_HEADING_RE = re.compile( + r"^#{1,6}\s+\d+(?:\.\d+)*(?:\.)?\s+(?:Step|Sub-step)\b.*$", + re.MULTILINE, +) +PLACEHOLDER_CONTENTS_RE = re.compile( + r"^\d+\.\s+\[Step\s+\d+\]\(#.*$", + re.MULTILINE, +) + +REQUIRED_HEADINGS = [ + (1, "About"), + (2, "Contents"), + (1, "Materials"), +] + +BAD_PLACEHOLDERS = { + "TODO": re.compile(r"\bTODO\b"), + "TBD": re.compile(r"\bTBD\b"), + "XXX": re.compile(r"\bXXX\b"), + "CHECK:": re.compile(r"CHECK:"), +} + +DISALLOWED_TEMPLATE_TEXT = [ + "> Template repository: Click `Use this template` to create a new protocol repo. Template docs are in [docs/USING_THIS_TEMPLATE.md](https://github.com/ulelab/protocol-template/blob/main/docs/USING_THIS_TEMPLATE.md)", +] + + +def extract_headings(text: str) -> List[Tuple[int, str]]: + return [(len(level), title.strip()) for level, title in HEADING_RE.findall(text)] + + +def normalize_heading_title(title: str) -> str: + return re.sub(r"^\d+(?:\.\d+)*(?:\.)?\s+", "", title).strip() + + +def has_required_heading( + headings: List[Tuple[int, str]], + required_level: int, + required_title: str, +) -> bool: + return any( + level == required_level and normalize_heading_title(title) == required_title + for level, title in headings + ) + + +def validate_readme(readme: str) -> List[str]: + failures: List[str] = [] + headings = extract_headings(readme) + top_level_headings = [title for level, title in headings if level == 1] + + if not headings: + failures.append("README does not contain any Markdown headings.") + elif not top_level_headings: + failures.append("README must contain a top-level protocol title ('# ...').") + else: + first_title = normalize_heading_title(top_level_headings[0]) + if first_title == "About": + failures.append("Missing top-level protocol title before '# About'.") + + for level, title in REQUIRED_HEADINGS: + if not has_required_heading(headings, level, title): + failures.append(f"Missing heading: {'#' * level} {title}") + + if not STATUS_LINE_RE.search(readme): + failures.append("Missing or malformed status line: expected '### Status: ...'.") + + if not STATUS_LEGEND_RE.search(readme): + failures.append( + "Missing or malformed status legend row with `[OK]`, `[?]`, and `[X]`." + ) + + for token, pattern in BAD_PLACEHOLDERS.items(): + if pattern.search(readme): + failures.append(f"Found unresolved placeholder: {token}") + + for text in DISALLOWED_TEMPLATE_TEXT: + if text in readme: + failures.append(f"Found template-only text that must be removed: {text}") + + for match in PLACEHOLDER_STEP_HEADING_RE.findall(readme): + failures.append(f"Found placeholder step heading: {match}") + + for match in PLACEHOLDER_CONTENTS_RE.findall(readme): + failures.append(f"Found placeholder contents entry: {match}") + + return list(dict.fromkeys(failures)) + + +def main() -> None: + if len(sys.argv) != 2: + print("Usage: python validate_protocol_content.py README.md") + sys.exit(1) + + readme = Path(sys.argv[1]).read_text(encoding="utf-8") + failures = validate_readme(readme) + + if failures: + print("VALIDATION FAILED") + for failure in failures: + print(f"- {failure}") + sys.exit(1) + + print("Content validation passed.") + + +if __name__ == "__main__": + main() diff --git a/scripts/validate_protocol_style.py b/scripts/validate_protocol_style.py new file mode 100644 index 0000000..9b2e3a9 --- /dev/null +++ b/scripts/validate_protocol_style.py @@ -0,0 +1,128 @@ +"""Validate protocol README unit and notation style.""" + +from pathlib import Path +import re +import sys +from typing import List + +NUMBER_RE = r"\d+(?:\.\d+)?" +TEMPERATURE_RE = re.compile( + rf"\b(?P{NUMBER_RE})(?P\s*)(?P°?)(?P\s*)(?P[Cc])\b" +) +PH_RE = re.compile(r"\b(?P