diff --git a/.agents/skills/protocol-migration/SKILL.md b/.agents/skills/protocol-migration/SKILL.md
index 7b806de..ea002fe 100644
--- a/.agents/skills/protocol-migration/SKILL.md
+++ b/.agents/skills/protocol-migration/SKILL.md
@@ -45,11 +45,12 @@ You may normalize formatting only when the meaning is unchanged and unambiguous:
- Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2).
- Do not use Unicode subscript characters such as `₂`.
- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc.
+- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`.
- Normalize bullet formatting and markdown table formatting.
- Normalize heading structure to match the repository template.
- For reaction mixes and anything tabular, place them inside a table as in template.
- Normalize markdown headings, bullets, and tables.
-- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol.
+- "Note" or "NOTE" or "NB" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol.
- Remove empty columns from tables.
- Synchronize `Contents` with actual headings in the protocol.
@@ -82,7 +83,7 @@ You may normalize formatting only when the meaning is unchanged and unambiguous:
- add `# Migration notes` including:
- imported protocol metadata from `source-metadata.yml` if present
- imported protocol metadata from `source-metadata.yml` using only the non-blank lines
- - template metadata from `template-metadata.yml`
+ - template_version from `template-metadata.yml`
- ambiguous mappings
- normalized formatting changes
- content copied verbatim but not confidently placed
diff --git a/.claude/.claude/skills/protocol-migration/SKILL.md b/.claude/skills/protocol-migration/SKILL.md
similarity index 92%
rename from .claude/.claude/skills/protocol-migration/SKILL.md
rename to .claude/skills/protocol-migration/SKILL.md
index 7b806de..ea002fe 100644
--- a/.claude/.claude/skills/protocol-migration/SKILL.md
+++ b/.claude/skills/protocol-migration/SKILL.md
@@ -45,11 +45,12 @@ You may normalize formatting only when the meaning is unchanged and unambiguous:
- Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2).
- Do not use Unicode subscript characters such as `₂`.
- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc.
+- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`.
- Normalize bullet formatting and markdown table formatting.
- Normalize heading structure to match the repository template.
- For reaction mixes and anything tabular, place them inside a table as in template.
- Normalize markdown headings, bullets, and tables.
-- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol.
+- "Note" or "NOTE" or "NB" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol.
- Remove empty columns from tables.
- Synchronize `Contents` with actual headings in the protocol.
@@ -82,7 +83,7 @@ You may normalize formatting only when the meaning is unchanged and unambiguous:
- add `# Migration notes` including:
- imported protocol metadata from `source-metadata.yml` if present
- imported protocol metadata from `source-metadata.yml` using only the non-blank lines
- - template metadata from `template-metadata.yml`
+ - template_version from `template-metadata.yml`
- ambiguous mappings
- normalized formatting changes
- content copied verbatim but not confidently placed
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index f312e0e..59337cd 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -38,11 +38,12 @@ You may normalize formatting only when the meaning is unchanged and unambiguous:
- Standardize chemical formulas with HTML subscripts, for example H2O to H2O. Similarly for other chemical formulas (e.g. MgCl2 to MgCl2).
- Do not use Unicode subscript characters such as `₂`.
- Standardize `RNAseq` or `RNA-Seq` to `RNA-seq`. Same for `ChIP-seq`, `ATAC-seq`, etc.
+- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`.
- Normalize bullet formatting and markdown table formatting.
- Normalize heading structure to match the repository template.
- For reaction mixes and anything tabular, place them inside a table as in template.
- Normalize markdown headings, bullets, and tables.
-- "Note" or "NOTE" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol.
+- "Note" or "NOTE" or "NB" or "Optional" or "Recommended" or "Warning" are normalized to start with `>` (example `> **Note**`) and are placed immediately after the step they refer to, or at the end of the protocol if they clearly refer to the whole protocol.
- Remove empty columns from tables.
- Synchronize `Contents` with actual headings in the protocol.
@@ -73,7 +74,7 @@ When drafting a migrated protocol:
- content placed in `## Unplaced content`
- Imported protocol metadata from `source-metadata.yml` (only the non-blank lines).
- Imported protocol metadata from `source-metadata.yml` if present.
- - template metadata from `template-metadata.yml`.
+ - template_version from `template-metadata.yml`.
- ambiguous mappings.
- normalized formatting changes.
- content copied verbatim but not confidently placed.
diff --git a/.github/workflows/validate-protocol.yml b/.github/workflows/validate-protocol.yml
index bd4ea95..e3999e5 100644
--- a/.github/workflows/validate-protocol.yml
+++ b/.github/workflows/validate-protocol.yml
@@ -6,19 +6,23 @@ on:
- main
paths:
- README.md
- - legacy/source.txt
- scripts/validate_protocol.py
- - tests/test_validate_protocol.py
- - .github/workflows/validate_protocol.yml
+ - scripts/validate_protocol_content.py
+ - scripts/validate_protocol_style.py
+ - tests/test_validate_protocol_content.py
+ - tests/test_validate_protocol_style.py
+ - .github/workflows/validate-protocol.yml
push:
branches:
- main
paths:
- README.md
- - legacy/source.txt
- scripts/validate_protocol.py
- - tests/test_validate_protocol.py
- - .github/workflows/validate_protocol.yml
+ - scripts/validate_protocol_content.py
+ - scripts/validate_protocol_style.py
+ - tests/test_validate_protocol_content.py
+ - tests/test_validate_protocol_style.py
+ - .github/workflows/validate-protocol.yml
workflow_dispatch:
jobs:
@@ -35,7 +39,7 @@ jobs:
with:
python-version: "3.11"
- - name: Check whether validation should run
+ - name: Check whether README validation should run
id: validation_gate
run: |
set -euo pipefail
@@ -56,15 +60,15 @@ jobs:
echo "should_validate=true" >> "$GITHUB_OUTPUT"
- name: Run validator tests
- if: steps.validation_gate.outputs.should_validate == 'true'
run: |
python -m unittest discover -s tests -p 'test_*.py'
- - name: Run protocol validation
+ - name: Run content validation
if: steps.validation_gate.outputs.should_validate == 'true'
run: |
- if [ -f legacy/source.txt ]; then
- python scripts/validate_protocol.py README.md legacy/source.txt
- else
- python scripts/validate_protocol.py README.md
- fi
+ python scripts/validate_protocol_content.py README.md
+
+ - name: Run style validation
+ if: steps.validation_gate.outputs.should_validate == 'true'
+ run: |
+ python scripts/validate_protocol_style.py README.md
diff --git a/README.md b/README.md
index a17d4eb..f3fe486 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
4. [Step 4](#4-step-4)
5. [Step 5](#5-step-5)
6. [Step 6](#6-step-6)
-7. [Buffers](#buffers)
+7. [Materials](#7-materials)
8. [Migration notes](#migration-notes)
---
@@ -32,15 +32,15 @@
## 1.1 Sub-step of Step 1 TODO if Step 1 is complex
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
## 1.2 Sub-step of Step 1 TODO if Step 1 is complex
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
> **Note:** TODO
@@ -48,9 +48,9 @@
# 2. Step 2
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
## Reagents / mix
@@ -66,9 +66,9 @@
# 3. Step 3
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
## Program / incubation
@@ -82,9 +82,9 @@
# 4. Step 4
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
> **Optional:** TODO
@@ -92,9 +92,9 @@
# 5. Step 5
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
## Reaction setup
@@ -108,19 +108,22 @@
# 6. Step 6
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
## Output / QC
-- TODO
-- TODO
-- TODO
+1. TODO
+2. TODO
+3. TODO
---
-# 7. Buffers
+# 7. Materials
+
+## 7.1 Buffers
+## 7.2 Reagents
---
@@ -131,4 +134,4 @@
## Unplaced content
-## CHECK items
\ No newline at end of file
+## CHECK items
diff --git a/docs/PROMPT.md b/docs/PROMPT.md
index b991a6d..48b29d6 100644
--- a/docs/PROMPT.md
+++ b/docs/PROMPT.md
@@ -42,9 +42,10 @@ Normalize formatting only when the meaning is unchanged and unambiguous:
- standardize chemical formulas with HTML subscripts, for example H2O to H2O and MgCl2 to MgCl2
- do not use Unicode subscript characters such as `₂`
- standardize `RNAseq` or `RNA-Seq` to `RNA-seq`, and similarly for `ChIP-seq`, `ATAC-seq`, and related names
+- Use numbered lists for procedural actions in sequence. For other non-procedural content, bullets are better. Note-like text such as Note, NB, Optional, Recommended, and Warning should use blockquote style such as `> **Note**`.
- normalize bullets, headings, and markdown tables to match the repository template
- use tables for reaction mixes and other tabular content
-- normalize note-like text to blockquote style, for example `> **Note**`
+- normalize note-like text such as Note, NB, Optional, Recommended, and Warning to blockquote style, for example `> **Note**`
- place note-like text immediately after the step it refers to, or at the end of the protocol if it clearly refers to the whole protocol
- remove empty columns from tables
- synchronize `Contents` with the actual headings in the protocol
@@ -78,7 +79,7 @@ Normalize formatting only when the meaning is unchanged and unambiguous:
- Include the following in `# Migration notes`:
- imported protocol metadata from `source-metadata.yml` if present
- imported protocol metadata from `source-metadata.yml` using only the non-blank lines
- - template metadata from `template-metadata.yml`
+ - template_version from `template-metadata.yml`
- ambiguous mappings
- normalized formatting changes
- content copied verbatim but not confidently placed
diff --git a/docs/USING_THIS_TEMPLATE.md b/docs/USING_THIS_TEMPLATE.md
index d706253..55bb3e3 100644
--- a/docs/USING_THIS_TEMPLATE.md
+++ b/docs/USING_THIS_TEMPLATE.md
@@ -74,7 +74,7 @@ The main file you must edit for protocol content is `README.md`. Do not rename t
7. Follow the guidelines in [3. General guidelines for the protocol file (`README.md`)](#3-general-guidelines-for-the-protocol-file-readmemd)
8. Commit your changes, then push.
9. Once you are happy with the result, open a pull request from `import-protocol` into `main`.
-10. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It checks the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names. If checks fail, fix them before merging into `main`.
+10. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It runs a content check for the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names, plus a style check for unit formatting. If checks fail, fix them before merging into `main`.
11. Ask for a reviewer.
> **Note:** Always check accuracy and make sure required sections, such as protocol status and the status legend, are present.
@@ -121,7 +121,7 @@ This route can save time. It helps keep the template structure consistent, norma
14. Follow the guidelines in [3. General guidelines for the protocol file (`README.md`)](#3-general-guidelines-for-the-protocol-file-readmemd)
15. Commit your changes, then push.
16. Once you are happy with the result, open a pull request from `import-protocol` into `main`.
-17. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It checks the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names. If `legacy/source.txt` is present, it also checks that key quantities from the source appear in `README.md`. If checks fail, fix them before merging into `main`.
+17. A validation GitHub Actions workflow will run on that pull request when `README.md` has changed. It runs a content check for the required title, status line, status legend, key headings, unresolved placeholders, and placeholder step names, plus a style check for unit formatting. If checks fail, fix them before merging into `main`.
18. Ask for a reviewer.
---
@@ -141,6 +141,7 @@ Mandatory items for validation:
- a status legend row containing `[OK]`, `[?]`, and `[X]`
- a short description (`# About`)
- contents (`## Contents`)
+- a materials section (`# ... Materials`)
Recommended content:
diff --git a/docs/template-metadata.yml b/docs/template-metadata.yml
index 8f2e2cb..644e6ce 100644
--- a/docs/template-metadata.yml
+++ b/docs/template-metadata.yml
@@ -6,4 +6,4 @@ template_authors:
- name: Ira A. Iosub
template_doi:
template_version: 1.0.0dev
-template_release_date: 2026-04-10
\ No newline at end of file
+template_release_date:
\ No newline at end of file
diff --git a/protocol-template.pdf b/protocol-template.pdf
index 17e9476..8d06288 100644
Binary files a/protocol-template.pdf and b/protocol-template.pdf differ
diff --git a/scripts/validate_protocol.py b/scripts/validate_protocol.py
index 43f5554..a9c4444 100644
--- a/scripts/validate_protocol.py
+++ b/scripts/validate_protocol.py
@@ -1,43 +1,15 @@
-"""Validate a protocol README against template requirements and optional source text."""
+"""Backward-compatible entrypoint for protocol README validation."""
from pathlib import Path
-import re
import sys
from typing import Dict, List, Optional, Tuple
-HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE)
-STATUS_LINE_RE = re.compile(
- r"^### Status:\s+.*`\[(?:OK|\?|X)\]`.*$",
- re.MULTILINE,
-)
-STATUS_LEGEND_RE = re.compile(
- r"^\| \*\*\*Status legend\*\*\*:.*`\[OK\]`.*`\[\?\]`.*`\[X\]`.*\|$",
- re.MULTILINE,
-)
-PLACEHOLDER_STEP_HEADING_RE = re.compile(
- r"^#{1,6}\s+\d+(?:\.\d+)*(?:\.)?\s+(?:Step|Sub-step)\b.*$",
- re.MULTILINE,
-)
-PLACEHOLDER_CONTENTS_RE = re.compile(
- r"^\d+\.\s+\[Step\s+\d+\]\(#.*$",
- re.MULTILINE,
-)
-
-REQUIRED_HEADINGS = [
- (1, "About"),
- (2, "Contents"),
-]
-
-BAD_PLACEHOLDERS = {
- "TODO": re.compile(r"\bTODO\b"),
- "TBD": re.compile(r"\bTBD\b"),
- "XXX": re.compile(r"\bXXX\b"),
- "CHECK:": re.compile(r"CHECK:"),
-}
-
-DISALLOWED_TEMPLATE_TEXT = [
- "> Template repository: Click `Use this template` to create a new protocol repo. Template docs are in [docs/USING_THIS_TEMPLATE.md](https://github.com/ulelab/protocol-template/blob/main/docs/USING_THIS_TEMPLATE.md)",
-]
+try:
+ from scripts.validate_protocol_content import validate_readme as validate_content
+ from scripts.validate_protocol_style import validate_readme_style
+except ModuleNotFoundError:
+ from validate_protocol_content import validate_readme as validate_content
+ from validate_protocol_style import validate_readme_style
def extract_headings(text: str) -> List[Tuple[int, str]]:
@@ -213,9 +185,7 @@ def main() -> None:
sys.exit(1)
readme = Path(sys.argv[1]).read_text(encoding="utf-8")
- source = Path(sys.argv[2]).read_text(encoding="utf-8") if len(sys.argv) == 3 else None
-
- failures = validate_readme(readme, source)
+ failures = validate_readme(readme)
if failures:
print("VALIDATION FAILED")
diff --git a/scripts/validate_protocol_content.py b/scripts/validate_protocol_content.py
new file mode 100644
index 0000000..306fba1
--- /dev/null
+++ b/scripts/validate_protocol_content.py
@@ -0,0 +1,161 @@
+"""Validate protocol README content against template requirements."""
+
+from pathlib import Path
+import re
+import sys
+from typing import List, Optional, Tuple
+
+HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE)
+STATUS_LINE_RE = re.compile(
+ r"^### Status:\s+.*`\[(?:OK|\?|X)\]`.*$",
+ re.MULTILINE,
+)
+STATUS_LEGEND_RE = re.compile(
+ r"^\| \*\*\*Status legend\*\*\*:.*`\[OK\]`.*`\[\?\]`.*`\[X\]`.*\|$",
+ re.MULTILINE,
+)
+PLACEHOLDER_STEP_HEADING_RE = re.compile(
+ r"^#{1,6}\s+\d+(?:\.\d+)*(?:\.)?\s+(?:Step|Sub-step)\b.*$",
+ re.MULTILINE,
+)
+PLACEHOLDER_CONTENTS_RE = re.compile(
+ r"^\d+\.\s+\[Step\s+\d+\]\(#.*$",
+ re.MULTILINE,
+)
+
+REQUIRED_HEADINGS = [
+ (1, "About"),
+ (2, "Contents"),
+ (1, "Materials"),
+]
+
+BAD_PLACEHOLDERS = {
+ "TODO": re.compile(r"\bTODO\b"),
+ "TBD": re.compile(r"\bTBD\b"),
+ "XXX": re.compile(r"\bXXX\b"),
+ "CHECK:": re.compile(r"CHECK:"),
+}
+
+DISALLOWED_TEMPLATE_TEXT = [
+ "> Template repository: Click `Use this template` to create a new protocol repo. Template docs are in [docs/USING_THIS_TEMPLATE.md](https://github.com/ulelab/protocol-template/blob/main/docs/USING_THIS_TEMPLATE.md)",
+]
+
+
+def extract_headings(text: str) -> List[Tuple[int, str]]:
+ return [(len(level), title.strip()) for level, title in HEADING_RE.findall(text)]
+
+
+def normalize_heading_title(title: str) -> str:
+ return re.sub(r"^\d+(?:\.\d+)*(?:\.)?\s+", "", title).strip()
+
+
+def has_required_heading(
+ headings: List[Tuple[int, str]],
+ required_level: int,
+ required_title: str,
+) -> bool:
+ return any(
+ level == required_level and normalize_heading_title(title) == required_title
+ for level, title in headings
+ )
+
+
+def find_line_number_for_exact_text(text: str, needle: str) -> Optional[int]:
+ for line_number, line in enumerate(text.splitlines(), start=1):
+ if needle in line:
+ return line_number
+ return None
+
+
+def find_line_number_for_regex(
+ text: str,
+ pattern: re.Pattern,
+ target: str,
+) -> Optional[int]:
+ for line_number, line in enumerate(text.splitlines(), start=1):
+ for match in pattern.finditer(line):
+ if match.group(0) == target:
+ return line_number
+ return None
+
+
+def validate_readme(readme: str) -> List[str]:
+ failures: List[str] = []
+ headings = extract_headings(readme)
+ top_level_headings = [title for level, title in headings if level == 1]
+
+ if not headings:
+ failures.append("README does not contain any Markdown headings.")
+ elif not top_level_headings:
+ failures.append("README must contain a top-level protocol title ('# ...').")
+ else:
+ first_title = normalize_heading_title(top_level_headings[0])
+ if first_title == "About":
+ failures.append("Missing top-level protocol title before '# About'.")
+
+ for level, title in REQUIRED_HEADINGS:
+ if not has_required_heading(headings, level, title):
+ failures.append(f"Missing heading: {'#' * level} {title}")
+
+ if not STATUS_LINE_RE.search(readme):
+ failures.append("Missing or malformed status line: expected '### Status: ...'.")
+
+ if not STATUS_LEGEND_RE.search(readme):
+ failures.append(
+ "Missing or malformed status legend row with `[OK]`, `[?]`, and `[X]`."
+ )
+
+ for token, pattern in BAD_PLACEHOLDERS.items():
+ if pattern.search(readme):
+ failures.append(f"Found unresolved placeholder: {token}")
+
+ for text in DISALLOWED_TEMPLATE_TEXT:
+ if text in readme:
+ line_number = find_line_number_for_exact_text(readme, text)
+ if line_number is None:
+ failures.append(f"Found template-only text that must be removed: {text}")
+ else:
+ failures.append(
+ f"Found template-only text that must be removed: {text} (README line {line_number})"
+ )
+
+ for match in PLACEHOLDER_STEP_HEADING_RE.findall(readme):
+ line_number = find_line_number_for_regex(readme, PLACEHOLDER_STEP_HEADING_RE, match)
+ if line_number is None:
+ failures.append(f"Found placeholder step heading: {match}")
+ else:
+ failures.append(
+ f"Found placeholder step heading: {match} (README line {line_number})"
+ )
+
+ for match in PLACEHOLDER_CONTENTS_RE.findall(readme):
+ line_number = find_line_number_for_regex(readme, PLACEHOLDER_CONTENTS_RE, match)
+ if line_number is None:
+ failures.append(f"Found placeholder contents entry: {match}")
+ else:
+ failures.append(
+ f"Found placeholder contents entry: {match} (README line {line_number})"
+ )
+
+ return list(dict.fromkeys(failures))
+
+
+def main() -> None:
+ if len(sys.argv) != 2:
+ print("Usage: python validate_protocol_content.py README.md")
+ sys.exit(1)
+
+ readme = Path(sys.argv[1]).read_text(encoding="utf-8")
+ failures = validate_readme(readme)
+
+ if failures:
+ print("VALIDATION FAILED")
+ for failure in failures:
+ print(f"- {failure}")
+ sys.exit(1)
+
+ print("Content validation passed.")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/validate_protocol_style.py b/scripts/validate_protocol_style.py
new file mode 100644
index 0000000..fb8e140
--- /dev/null
+++ b/scripts/validate_protocol_style.py
@@ -0,0 +1,171 @@
+"""Validate protocol README unit and notation style."""
+
+from pathlib import Path
+import re
+import sys
+from typing import List
+
+NUMBER_RE = r"\d+(?:\.\d+)?"
+TEMPERATURE_RE = re.compile(
+ rf"\b(?P{NUMBER_RE})(?P\s*)(?P°?)(?P\s*)(?P[Cc])\b"
+)
+PH_RE = re.compile(r"\b(?P