diff --git a/.agents/skills/protocol-migration/SKILL.md b/.agents/skills/protocol-migration/SKILL.md index 7b806de..ea002fe 100644 --- a/.agents/skills/protocol-migration/SKILL.md +++ b/.agents/skills/protocol-migration/SKILL.md @@ -45,11 +45,12 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: - Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2). - Do not use Unicode subscript characters such as `₂`. - Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc. +- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`. - Normalize bullet formatting and markdown table formatting. - Normalize heading structure to match the repository template. - For reaction mixes and anything tabular, place them inside a table as in template. - Normalize markdown headings, bullets, and tables. -- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. +- "Note" or "NOTE" or "NB" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. - Remove empty columns from tables. - Synchronize `Contents` with actual headings in the protocol. @@ -82,7 +83,7 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: - add `# Migration notes` including: - imported protocol metadata from `source-metadata.yml` if present - imported protocol metadata from `source-metadata.yml` using only the non-blank lines - - template metadata from `template-metadata.yml` + - template_version from `template-metadata.yml` - ambiguous mappings - normalized formatting changes - content copied verbatim but not confidently placed diff --git a/.claude/.claude/skills/protocol-migration/SKILL.md b/.claude/skills/protocol-migration/SKILL.md similarity index 92% rename from .claude/.claude/skills/protocol-migration/SKILL.md rename to .claude/skills/protocol-migration/SKILL.md index 7b806de..ea002fe 100644 --- a/.claude/.claude/skills/protocol-migration/SKILL.md +++ b/.claude/skills/protocol-migration/SKILL.md @@ -45,11 +45,12 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: - Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2). - Do not use Unicode subscript characters such as `₂`. - Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc. +- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`. - Normalize bullet formatting and markdown table formatting. - Normalize heading structure to match the repository template. - For reaction mixes and anything tabular, place them inside a table as in template. - Normalize markdown headings, bullets, and tables. -- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. +- "Note" or "NOTE" or "NB" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. - Remove empty columns from tables. - Synchronize `Contents` with actual headings in the protocol. @@ -82,7 +83,7 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: - add `# Migration notes` including: - imported protocol metadata from `source-metadata.yml` if present - imported protocol metadata from `source-metadata.yml` using only the non-blank lines - - template metadata from `template-metadata.yml` + - template_version from `template-metadata.yml` - ambiguous mappings - normalized formatting changes - content copied verbatim but not confidently placed diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index f312e0e..59337cd 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -38,11 +38,12 @@ You may normalize formatting only when the meaning is unchanged and unambiguous: - Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2). - Do not use Unicode subscript characters such as `₂`. - Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc. +- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`. - Normalize bullet formatting and markdown table formatting. - Normalize heading structure to match the repository template. - For reaction mixes and anything tabular, place them inside a table as in template. - Normalize markdown headings, bullets, and tables. -- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. +- "Note" or "NOTE" or "NB" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol. - Remove empty columns from tables. - Synchronize `Contents` with actual headings in the protocol. @@ -73,7 +74,7 @@ When drafting a migrated protocol: - content placed in `## Unplaced content` - Imported protocol metadata from `source-metadata.yml` (only the non-blank lines). - Imported protocol metadata from `source-metadata.yml` if present. - - template metadata from `template-metadata.yml`. + - template_version from `template-metadata.yml`. - ambiguous mappings. - normalized formatting changes. - content copied verbatim but not confidently placed. diff --git a/.github/workflows/validate-protocol.yml b/.github/workflows/validate-protocol.yml index bd4ea95..e3999e5 100644 --- a/.github/workflows/validate-protocol.yml +++ b/.github/workflows/validate-protocol.yml @@ -6,19 +6,23 @@ on: - main paths: - README.md - - legacy/source.txt - scripts/validate_protocol.py - - tests/test_validate_protocol.py - - .github/workflows/validate_protocol.yml + - scripts/validate_protocol_content.py + - scripts/validate_protocol_style.py + - tests/test_validate_protocol_content.py + - tests/test_validate_protocol_style.py + - .github/workflows/validate-protocol.yml push: branches: - main paths: - README.md - - legacy/source.txt - scripts/validate_protocol.py - - tests/test_validate_protocol.py - - .github/workflows/validate_protocol.yml + - scripts/validate_protocol_content.py + - scripts/validate_protocol_style.py + - tests/test_validate_protocol_content.py + - tests/test_validate_protocol_style.py + - .github/workflows/validate-protocol.yml workflow_dispatch: jobs: @@ -35,7 +39,7 @@ jobs: with: python-version: "3.11" - - name: Check whether validation should run + - name: Check whether README validation should run id: validation_gate run: | set -euo pipefail @@ -56,15 +60,15 @@ jobs: echo "should_validate=true" >> "$GITHUB_OUTPUT" - name: Run validator tests - if: steps.validation_gate.outputs.should_validate == 'true' run: | python -m unittest discover -s tests -p 'test_*.py' - - name: Run protocol validation + - name: Run content validation if: steps.validation_gate.outputs.should_validate == 'true' run: | - if [ -f legacy/source.txt ]; then - python scripts/validate_protocol.py README.md legacy/source.txt - else - python scripts/validate_protocol.py README.md - fi + python scripts/validate_protocol_content.py README.md + + - name: Run style validation + if: steps.validation_gate.outputs.should_validate == 'true' + run: | + python scripts/validate_protocol_style.py README.md diff --git a/README.md b/README.md index a17d4eb..f3fe486 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ 4. [Step 4](#4-step-4) 5. [Step 5](#5-step-5) 6. [Step 6](#6-step-6) -7. [Buffers](#buffers) +7. [Materials](#7-materials) 8. [Migration notes](#migration-notes) --- @@ -32,15 +32,15 @@ ## 1.1 Sub-step of Step 1 TODO if Step 1 is complex -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO ## 1.2 Sub-step of Step 1 TODO if Step 1 is complex -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO > **Note:** TODO @@ -48,9 +48,9 @@ # 2. Step 2 -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO ## Reagents / mix @@ -66,9 +66,9 @@ # 3. Step 3 -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO ## Program / incubation @@ -82,9 +82,9 @@ # 4. Step 4 -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO > **Optional:** TODO @@ -92,9 +92,9 @@ # 5. Step 5 -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO ## Reaction setup @@ -108,19 +108,22 @@ # 6. Step 6 -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO ## Output / QC -- TODO -- TODO -- TODO +1. TODO +2. TODO +3. TODO --- -# 7. Buffers +# 7. Materials + +## 7.1 Buffers +## 7.2 Reagents --- @@ -131,4 +134,4 @@ ## Unplaced content -## CHECK items \ No newline at end of file +## CHECK items diff --git a/docs/PROMPT.md b/docs/PROMPT.md index b991a6d..48b29d6 100644 --- a/docs/PROMPT.md +++ b/docs/PROMPT.md @@ -42,9 +42,10 @@ Normalize formatting only when the meaning is unchanged and unambiguous: - standardize chemical formulas with HTML subscripts, for example H2O to H2O and MgCl2 to MgCl2 - do not use Unicode subscript characters such as `₂` - standardize `RNAseq` or `RNA-Seq` to `RNA-seq`, and similarly for `ChIP-seq`, `ATAC-seq`, and related names +- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`. - normalize bullets, headings, and markdown tables to match the repository template - use tables for reaction mixes and other tabular content -- normalize note-like text to blockquote style, for example `> **Note**` +- normalize note-like text such as Note, NB, Optional, Recommended, and Warning to blockquote style, for example `> **Note**` - place note-like text immediately after the step it refers to, or at the end of the protocol if it clearly refers to the whole protocol - remove empty columns from tables - synchronize `Contents` with the actual headings in the protocol @@ -78,7 +79,7 @@ Normalize formatting only when the meaning is unchanged and unambiguous: - Include the following in `# Migration notes`: - imported protocol metadata from `source-metadata.yml` if present - imported protocol metadata from `source-metadata.yml` using only the non-blank lines - - template metadata from `template-metadata.yml` + - template_version from `template-metadata.yml` - ambiguous mappings - normalized formatting changes - content copied verbatim but not confidently placed diff --git a/docs/USING_THIS_TEMPLATE.md b/docs/USING_THIS_TEMPLATE.md index d706253..55bb3e3 100644 --- a/docs/USING_THIS_TEMPLATE.md +++ b/docs/USING_THIS_TEMPLATE.md @@ -74,7 +74,7 @@ The main file you must edit for protocol content is `README.md`. Do not rename t 7. Follow the guidelines in [3. General guidelines for the protocol file (`README.md`)](#3-general-guidelines-for-the-protocol-file-readmemd) 8. Commit your changes, then push. 9. Once you are happy with the result, open a pull request from `import-protocol` into `main`. -10. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It checks the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names. If checks fail, fix them before merging into `main`. +10. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It runs a content check for the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names, plus a style check for unit formatting. If checks fail, fix them before merging into `main`. 11. Ask for a reviewer. > **Note:** Always check accuracy and make sure required sections, such as protocol status and the status legend, are present. @@ -121,7 +121,7 @@ This route can save time. It helps keep the template structure consistent, norma 14. Follow the guidelines in [3. General guidelines for the protocol file (`README.md`)](#3-general-guidelines-for-the-protocol-file-readmemd) 15. Commit your changes, then push. 16. Once you are happy with the result, open a pull request from `import-protocol` into `main`. -17. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It checks the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names. If `legacy/source.txt` is present, it also checks that key quantities from the source appear in `README.md`. If checks fail, fix them before merging into `main`. +17. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It runs a content check for the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names, plus a style check for unit formatting. If checks fail, fix them before merging into `main`. 18. Ask for a reviewer. --- @@ -141,6 +141,7 @@ Mandatory items for validation: - a status legend row containing `[OK]`, `[?]`, and `[X]` - a short description (`# About`) - contents (`## Contents`) +- a materials section (`# ... Materials`) Recommended content: diff --git a/docs/template-metadata.yml b/docs/template-metadata.yml index 8f2e2cb..644e6ce 100644 --- a/docs/template-metadata.yml +++ b/docs/template-metadata.yml @@ -6,4 +6,4 @@ template_authors: - name: Ira A. Iosub template_doi: template_version: 1.0.0dev -template_release_date: 2026-04-10 \ No newline at end of file +template_release_date: \ No newline at end of file diff --git a/protocol-template.pdf b/protocol-template.pdf index 17e9476..8d06288 100644 Binary files a/protocol-template.pdf and b/protocol-template.pdf differ diff --git a/scripts/validate_protocol.py b/scripts/validate_protocol.py index 43f5554..a9c4444 100644 --- a/scripts/validate_protocol.py +++ b/scripts/validate_protocol.py @@ -1,43 +1,15 @@ -"""Validate a protocol README against template requirements and optional source text.""" +"""Backward-compatible entrypoint for protocol README validation.""" from pathlib import Path -import re import sys from typing import Dict, List, Optional, Tuple -HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE) -STATUS_LINE_RE = re.compile( - r"^### Status:\s+.*`\[(?:OK|\?|X)\]`.*$", - re.MULTILINE, -) -STATUS_LEGEND_RE = re.compile( - r"^\| \*\*\*Status legend\*\*\*:.*`\[OK\]`.*`\[\?\]`.*`\[X\]`.*\|$", - re.MULTILINE, -) -PLACEHOLDER_STEP_HEADING_RE = re.compile( - r"^#{1,6}\s+\d+(?:\.\d+)*(?:\.)?\s+(?:Step|Sub-step)\b.*$", - re.MULTILINE, -) -PLACEHOLDER_CONTENTS_RE = re.compile( - r"^\d+\.\s+\[Step\s+\d+\]\(#.*$", - re.MULTILINE, -) - -REQUIRED_HEADINGS = [ - (1, "About"), - (2, "Contents"), -] - -BAD_PLACEHOLDERS = { - "TODO": re.compile(r"\bTODO\b"), - "TBD": re.compile(r"\bTBD\b"), - "XXX": re.compile(r"\bXXX\b"), - "CHECK:": re.compile(r"CHECK:"), -} - -DISALLOWED_TEMPLATE_TEXT = [ - "> Template repository: Click `Use this template` to create a new protocol repo. Template docs are in [docs/USING_THIS_TEMPLATE.md](https://github.com/ulelab/protocol-template/blob/main/docs/USING_THIS_TEMPLATE.md)", -] +try: + from scripts.validate_protocol_content import validate_readme as validate_content + from scripts.validate_protocol_style import validate_readme_style +except ModuleNotFoundError: + from validate_protocol_content import validate_readme as validate_content + from validate_protocol_style import validate_readme_style def extract_headings(text: str) -> List[Tuple[int, str]]: @@ -213,9 +185,7 @@ def main() -> None: sys.exit(1) readme = Path(sys.argv[1]).read_text(encoding="utf-8") - source = Path(sys.argv[2]).read_text(encoding="utf-8") if len(sys.argv) == 3 else None - - failures = validate_readme(readme, source) + failures = validate_readme(readme) if failures: print("VALIDATION FAILED") diff --git a/scripts/validate_protocol_content.py b/scripts/validate_protocol_content.py new file mode 100644 index 0000000..306fba1 --- /dev/null +++ b/scripts/validate_protocol_content.py @@ -0,0 +1,161 @@ +"""Validate protocol README content against template requirements.""" + +from pathlib import Path +import re +import sys +from typing import List, Optional, Tuple + +HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE) +STATUS_LINE_RE = re.compile( + r"^### Status:\s+.*`\[(?:OK|\?|X)\]`.*$", + re.MULTILINE, +) +STATUS_LEGEND_RE = re.compile( + r"^\| \*\*\*Status legend\*\*\*:.*`\[OK\]`.*`\[\?\]`.*`\[X\]`.*\|$", + re.MULTILINE, +) +PLACEHOLDER_STEP_HEADING_RE = re.compile( + r"^#{1,6}\s+\d+(?:\.\d+)*(?:\.)?\s+(?:Step|Sub-step)\b.*$", + re.MULTILINE, +) +PLACEHOLDER_CONTENTS_RE = re.compile( + r"^\d+\.\s+\[Step\s+\d+\]\(#.*$", + re.MULTILINE, +) + +REQUIRED_HEADINGS = [ + (1, "About"), + (2, "Contents"), + (1, "Materials"), +] + +BAD_PLACEHOLDERS = { + "TODO": re.compile(r"\bTODO\b"), + "TBD": re.compile(r"\bTBD\b"), + "XXX": re.compile(r"\bXXX\b"), + "CHECK:": re.compile(r"CHECK:"), +} + +DISALLOWED_TEMPLATE_TEXT = [ + "> Template repository: Click `Use this template` to create a new protocol repo. Template docs are in [docs/USING_THIS_TEMPLATE.md](https://github.com/ulelab/protocol-template/blob/main/docs/USING_THIS_TEMPLATE.md)", +] + + +def extract_headings(text: str) -> List[Tuple[int, str]]: + return [(len(level), title.strip()) for level, title in HEADING_RE.findall(text)] + + +def normalize_heading_title(title: str) -> str: + return re.sub(r"^\d+(?:\.\d+)*(?:\.)?\s+", "", title).strip() + + +def has_required_heading( + headings: List[Tuple[int, str]], + required_level: int, + required_title: str, +) -> bool: + return any( + level == required_level and normalize_heading_title(title) == required_title + for level, title in headings + ) + + +def find_line_number_for_exact_text(text: str, needle: str) -> Optional[int]: + for line_number, line in enumerate(text.splitlines(), start=1): + if needle in line: + return line_number + return None + + +def find_line_number_for_regex( + text: str, + pattern: re.Pattern, + target: str, +) -> Optional[int]: + for line_number, line in enumerate(text.splitlines(), start=1): + for match in pattern.finditer(line): + if match.group(0) == target: + return line_number + return None + + +def validate_readme(readme: str) -> List[str]: + failures: List[str] = [] + headings = extract_headings(readme) + top_level_headings = [title for level, title in headings if level == 1] + + if not headings: + failures.append("README does not contain any Markdown headings.") + elif not top_level_headings: + failures.append("README must contain a top-level protocol title ('# ...').") + else: + first_title = normalize_heading_title(top_level_headings[0]) + if first_title == "About": + failures.append("Missing top-level protocol title before '# About'.") + + for level, title in REQUIRED_HEADINGS: + if not has_required_heading(headings, level, title): + failures.append(f"Missing heading: {'#' * level} {title}") + + if not STATUS_LINE_RE.search(readme): + failures.append("Missing or malformed status line: expected '### Status: ...'.") + + if not STATUS_LEGEND_RE.search(readme): + failures.append( + "Missing or malformed status legend row with `[OK]`, `[?]`, and `[X]`." + ) + + for token, pattern in BAD_PLACEHOLDERS.items(): + if pattern.search(readme): + failures.append(f"Found unresolved placeholder: {token}") + + for text in DISALLOWED_TEMPLATE_TEXT: + if text in readme: + line_number = find_line_number_for_exact_text(readme, text) + if line_number is None: + failures.append(f"Found template-only text that must be removed: {text}") + else: + failures.append( + f"Found template-only text that must be removed: {text} (README line {line_number})" + ) + + for match in PLACEHOLDER_STEP_HEADING_RE.findall(readme): + line_number = find_line_number_for_regex(readme, PLACEHOLDER_STEP_HEADING_RE, match) + if line_number is None: + failures.append(f"Found placeholder step heading: {match}") + else: + failures.append( + f"Found placeholder step heading: {match} (README line {line_number})" + ) + + for match in PLACEHOLDER_CONTENTS_RE.findall(readme): + line_number = find_line_number_for_regex(readme, PLACEHOLDER_CONTENTS_RE, match) + if line_number is None: + failures.append(f"Found placeholder contents entry: {match}") + else: + failures.append( + f"Found placeholder contents entry: {match} (README line {line_number})" + ) + + return list(dict.fromkeys(failures)) + + +def main() -> None: + if len(sys.argv) != 2: + print("Usage: python validate_protocol_content.py README.md") + sys.exit(1) + + readme = Path(sys.argv[1]).read_text(encoding="utf-8") + failures = validate_readme(readme) + + if failures: + print("VALIDATION FAILED") + for failure in failures: + print(f"- {failure}") + sys.exit(1) + + print("Content validation passed.") + + +if __name__ == "__main__": + main() diff --git a/scripts/validate_protocol_style.py b/scripts/validate_protocol_style.py new file mode 100644 index 0000000..fb8e140 --- /dev/null +++ b/scripts/validate_protocol_style.py @@ -0,0 +1,171 @@ +"""Validate protocol README unit and notation style.""" + +from pathlib import Path +import re +import sys +from typing import List + +NUMBER_RE = r"\d+(?:\.\d+)?" +TEMPERATURE_RE = re.compile( + rf"\b(?P{NUMBER_RE})(?P\s*)(?P°?)(?P\s*)(?P[Cc])\b" +) +PH_RE = re.compile(r"\b(?P