forked from EndogenAI/dogma
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathencoding_coverage.py
More file actions
236 lines (181 loc) · 8.23 KB
/
Copy pathencoding_coverage.py
File metadata and controls
236 lines (181 loc) · 8.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
"""scripts/encoding_coverage.py
Checks MANIFESTO.md and AGENTS.md to determine whether each named principle
and axiom has all four [4,1] encoding forms present.
Encoding forms:
F1 = verbal description — at least one substantive paragraph in the
principle's section body
F2 = canonical example — labeled ``**Canonical example**:`` block
F3 = anti-pattern — labeled ``**Anti-pattern**`` block
F4 = programmatic gate — labeled ``**Programmatic gate**:`` OR an
explicit reference to a script/hook/CI mechanism
Purpose:
Produce a Markdown coverage table as a baseline for tracking encoding
completeness of every MANIFESTO principle. Gaps in F2–F4 signal
principles where knowledge is verbally described but has not been
concretized into examples, anti-patterns, or enforcement mechanisms.
Inputs:
--manifesto PATH Path to MANIFESTO.md (default: MANIFESTO.md)
--agents PATH Path to AGENTS.md (default: AGENTS.md)
Outputs:
Markdown table written to stdout. Exits 0 on success, 1 on a missing
input file.
Exit codes:
0 Table generated successfully.
1 One or more input files not found — error written to stderr.
Usage:
uv run python scripts/encoding_coverage.py --manifesto MANIFESTO.md --agents AGENTS.md
"""
from __future__ import annotations
import argparse
import re
import sys
from dataclasses import dataclass
from pathlib import Path
# ---------------------------------------------------------------------------
# Principle registry
# ---------------------------------------------------------------------------
@dataclass
class Principle:
name: str
layer: str
#: All named principles extracted from MANIFESTO.md in document order.
PRINCIPLES: list[Principle] = [
Principle("Endogenous-First", "Axiom 1"),
Principle("Algorithms Before Tokens", "Axiom 2"),
Principle("Local Compute-First", "Axiom 3"),
Principle("Programmatic-First", "Cross-cutting"),
Principle("Documentation-First", "Cross-cutting"),
Principle("Adopt Over Author", "Cross-cutting"),
Principle("Self-Governance & Guardrails", "Cross-cutting"),
Principle("Compress Context, Not Content", "Cross-cutting"),
Principle("Isolate Invocations, Parallelize Safely", "Cross-cutting"),
Principle("Validate & Gate, Always", "Cross-cutting"),
Principle("Minimal Posture", "Cross-cutting"),
Principle("Testing-First", "Cross-cutting"),
]
# ---------------------------------------------------------------------------
# Detection patterns
# ---------------------------------------------------------------------------
# F1 — a line of ≥40 printable characters that is not a heading, blockquote,
# list item, table row, code fence, or blank line.
_F1_PARA_RE = re.compile(
r"^(?![ \t]*[>|#\-\*`])[A-Za-z\(\"'].{39,}$",
re.MULTILINE,
)
# F2 — explicit canonical-example label (bold, case-insensitive)
_CANONICAL_EXAMPLE_RE = re.compile(r"\*\*canonical example\*\*", re.IGNORECASE)
# F3 — explicit anti-pattern label (bold, case-insensitive; may include a
# parenthesised sub-label before the colon)
_ANTI_PATTERN_RE = re.compile(r"\*\*anti-pattern", re.IGNORECASE)
# F4 — explicit programmatic-gate label OR an unambiguous enforcement reference
_PROGRAMMATIC_GATE_RE = re.compile(
r"\*\*programmatic gate\*\*"
r"|scripts/\S+\.py"
r"|pre-commit hook"
r"|pre-push hook"
r"|`uv run pytest"
r"|CI step"
r"|`uv run python",
re.IGNORECASE,
)
# ---------------------------------------------------------------------------
# Section extraction
# ---------------------------------------------------------------------------
def extract_h3_section(text: str, principle_name: str) -> str:
"""Return the body of the first H3 section whose title contains *principle_name*.
Searches for the heading using a case-insensitive substring match so that
numbering prefixes ("1. Endogenous-First") and parenthesised suffixes
("Adopt Over Author (Avoid Reinventing the Wheel)") are handled
transparently.
Returns an empty string if no matching heading is found.
"""
escaped = re.escape(principle_name)
heading_re = re.compile(
rf"^###\s+.*{escaped}.*$",
re.MULTILINE | re.IGNORECASE,
)
m = heading_re.search(text)
if not m:
return ""
start = m.end()
# Terminate at the next ## or ### heading
next_heading_re = re.compile(r"^#{2,3}\s+", re.MULTILINE)
nx = next_heading_re.search(text, start)
end = nx.start() if nx else len(text)
return text[start:end]
def _agents_context(agents_text: str, principle_name: str) -> str:
"""Return up to ~3 000 characters of AGENTS.md surrounding each mention of
*principle_name*, for use as a supplementary F2–F4 source."""
pattern = re.compile(re.escape(principle_name), re.IGNORECASE)
snippets: list[str] = []
for m in pattern.finditer(agents_text):
start = max(0, m.start() - 600)
end = min(len(agents_text), m.end() + 600)
snippets.append(agents_text[start:end])
return "\n".join(snippets)
# ---------------------------------------------------------------------------
# Coverage checks
# ---------------------------------------------------------------------------
def check_coverage(
section_body: str,
agents_text: str,
principle_name: str,
) -> tuple[bool, bool, bool, bool]:
"""Return (F1, F2, F3, F4) coverage flags for *principle_name*.
F1 is evaluated against the MANIFESTO section body only.
F2–F4 are evaluated against the section body first; if absent there, the
nearby AGENTS.md context is also checked to capture cross-document encoding.
"""
ctx = _agents_context(agents_text, principle_name)
f1 = bool(_F1_PARA_RE.search(section_body)) if section_body else False
f2 = bool(_CANONICAL_EXAMPLE_RE.search(section_body) or _CANONICAL_EXAMPLE_RE.search(ctx))
f3 = bool(_ANTI_PATTERN_RE.search(section_body) or _ANTI_PATTERN_RE.search(ctx))
f4 = bool(_PROGRAMMATIC_GATE_RE.search(section_body) or _PROGRAMMATIC_GATE_RE.search(ctx))
return f1, f2, f3, f4
# ---------------------------------------------------------------------------
# Table rendering
# ---------------------------------------------------------------------------
_TICK = "✓"
_CROSS = "✗"
def _cell(flag: bool) -> str:
return _TICK if flag else _CROSS
def build_table(manifesto_text: str, agents_text: str) -> str:
"""Return the complete Markdown coverage table as a string."""
header = "| Principle | Layer | F1 Desc | F2 Canonical | F3 Anti-pattern | F4 Programmatic | Score |"
separator = "|-----------|-------|---------|--------------|-----------------|-----------------|-------|"
rows: list[str] = [header, separator]
for p in PRINCIPLES:
section = extract_h3_section(manifesto_text, p.name)
f1, f2, f3, f4 = check_coverage(section, agents_text, p.name)
score = sum([f1, f2, f3, f4])
rows.append(f"| {p.name} | {p.layer} | {_cell(f1)} | {_cell(f2)} | {_cell(f3)} | {_cell(f4)} | {score}/4 |")
return "\n".join(rows)
# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Check MANIFESTO F1-F4 encoding coverage for named principles.")
parser.add_argument(
"--manifesto",
default="MANIFESTO.md",
help="Path to MANIFESTO.md (default: MANIFESTO.md)",
)
parser.add_argument(
"--agents",
default="AGENTS.md",
help="Path to AGENTS.md (default: AGENTS.md)",
)
args = parser.parse_args(argv)
manifesto_path = Path(args.manifesto)
agents_path = Path(args.agents)
missing = [p for p in (manifesto_path, agents_path) if not p.exists()]
if missing:
for p in missing:
print(f"Error: file not found: {p}", file=sys.stderr)
return 1
manifesto_text = manifesto_path.read_text(encoding="utf-8")
agents_text = agents_path.read_text(encoding="utf-8")
print(build_table(manifesto_text, agents_text))
return 0
if __name__ == "__main__":
sys.exit(main())