From 2cfba2ccc78a86889d0bc6dd5e623761dc25ad5e Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Wed, 27 May 2026 21:10:44 +0200 Subject: [PATCH] =?UTF-8?q?refactor(template):=20trim=20multiagent-safety?= =?UTF-8?q?=20contract=20447=E2=86=92171=20lines=20(62%)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop Colony-specific coupling so the contract is generic to any Claude Code / Codex / agent-tool setup. Reorganize into a tighter load-bearing surface plus an "Optional companion tooling" section for fff/rtk/OpenSpec. Kept verbatim (tests + downstream repos depend on these): - Toggle (GUARDEX_ON=0/1) - "Small tasks stay direct and caveman-only" task-size routing - "Promote to full Guardex / OMX orchestration only when scope grows into" - "final completion/cleanup section" - "PR URL + final `MERGED` evidence" - External approval boundary section - "Guardex cannot bypass Codex host approval prompts" - "request approval for the narrow `gx branch finish ...` command" - "instead of standalone `git push` / `gh pr`" - `### Token / context budget` and `### Caveman style` section headings - Primary-tree lock command list - Six-step completion definition Dropped: - `### Colony coordination loop` block (Colony is optional, not gx core) - `### Working-state notes` redundant with handoff format - Verbose RTK/Caveman tool catalogs (now one-line summary in "Optional companion tooling") - "Multi-Agent Execution Contract: Guardex + Colony" header → just "Multi-Agent Execution Contract" Test updates: 3 phrase assertions migrated from Colony-specific literals to the new generic wording. 38 pass / 6 fail (= baseline, zero new failures). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) --- templates/AGENTS.multiagent-safety.md | 408 +++++--------------------- test/setup.test.js | 21 +- 2 files changed, 75 insertions(+), 354 deletions(-) diff --git a/templates/AGENTS.multiagent-safety.md b/templates/AGENTS.multiagent-safety.md index bf86b77b..22331644 100644 --- a/templates/AGENTS.multiagent-safety.md +++ b/templates/AGENTS.multiagent-safety.md @@ -1,37 +1,16 @@ -## Multi-Agent Execution Contract: Guardex + Colony +## Multi-Agent Execution Contract -### Repo toggle +### Toggle -Guardex is enabled by default. - -If the repo-root `.env` sets any of these values, treat this entire Guardex contract as disabled for the repo: - -```text -GUARDEX_ON=0 -GUARDEX_ON=false -GUARDEX_ON=no -GUARDEX_ON=off -``` - -When disabled, do not require Guardex worktrees, lock claims, completion flow, or OpenSpec workflow until `GUARDEX_ON` is re-enabled. - -To explicitly enable: - -```text -GUARDEX_ON=1 -``` +Guardex is enabled by default. Disable via repo-root `.env` with `GUARDEX_ON=0|false|no|off`. Re-enable with `GUARDEX_ON=1`. ### Core rules -- Work from an `agent/*` branch and worktree, never directly on the protected base branch. -- Claim files before edits. -- Use Colony for coordination before falling back to OMX state/notepad. -- Prefer fff MCP tools for file search whenever available; do not route file search through RTK when fff can answer it. -- Use OpenSpec for durable behavior contracts and change-driven work. -- Keep outputs compact: less word, same proof. -- Commit, push, and open/update a PR for completed work unless the user explicitly says to keep it local. -- Do not embed stale memory dumps, generated status snapshots, PR transcripts, session history, or long logs in this file. +- Work from an `agent/*` branch + worktree. **Never** edit the protected base directly. +- Claim files before editing. Confirm a path is in your claim before deleting it. +- Commit, push, and open/update a PR for completed work unless the user says keep-local. +- Keep outputs and notes compact. Less word, same proof. ### Task-size routing @@ -39,256 +18,65 @@ Small tasks stay direct and caveman-only. For typos, single-file tweaks, one-liners, version bumps, comment-only changes, or similarly bounded asks, solve directly and do not escalate into heavy orchestration just because a keyword appears. -Treat these prefixes as explicit lightweight escape hatches: - -- `quick:` -- `simple:` -- `tiny:` -- `minor:` -- `small:` -- `just:` -- `only:` - -Promote to full Guardex / OMX orchestration only when scope grows into: - -- multi-file behavior change -- API/schema work -- refactor -- migration -- architecture -- cross-cutting scope -- long prompt -- multi-agent execution - -### Colony coordination loop - -Use Colony as the primary coordination surface. - -On every startup, resume, follow-up, or "continue" request, run this order: - -1. `mcp__colony__hivemind_context` -2. `mcp__colony__attention_inbox` -3. `mcp__colony__task_ready_for_agent` -4. `mcp__colony__search` only when prior decisions, earlier lanes, file history, or error context matter. - -Rules: - -- Use `task_ready_for_agent` to choose work. -- Use `task_list` only for browsing/debugging. Do not use `task_list` as the normal work picker. -- If an agent reaches for `task_list` repeatedly while choosing work, stop and call `task_ready_for_agent` instead. `task_list` is an inventory tool, not a scheduler. -- Before editing files on an active task, call `task_claim_file` for each touched file. -- Use `task_post` for task-thread notes, decisions, blockers, and working-state updates. -- Use `task_message` / `task_messages` for directed agent-to-agent communication. -- Use `get_observations` only after compact Colony tools return IDs worth hydrating. - -Fallback: - -- Colony is considered unavailable only when the MCP namespace is missing, the tool call fails, or the installed Colony server does not expose the required tool. -- If `attention_inbox` or `task_ready_for_agent` is missing, fall back to `hivemind_context`, then `task_list`, then hydrate only the relevant task IDs. -- Do not skip Colony just because OMX state exists. OMX is fallback, not the first coordination source. -- Read `.omx/state` and `.omx/notepad.md` only when Colony is unavailable, missing the needed state, or the task explicitly depends on legacy OMX state. -- Keep `.omx/notepad.md` lean: live handoffs only. - -### Working-state notes - -Colony is preferred over generic notepad state. - -A working-state note should be task-scoped, searchable, and useful to another agent resuming the lane. - -When saving progress, use a task-scoped Colony note when possible: - -```text -task_post kind=note -content="branch=; task=; blocker=; next=; evidence=" -``` - -Use exactly these fields for handoff-style notes: - -- `branch` -- `task` -- `blocker` -- `next` -- `evidence` - -Do not store long proof dumps, stale narrative, or full logs in notepads. Put bulky proof in OpenSpec artifacts, PRs, or command output. - -### Token / context budget - -Default: less word, same proof. - -- For prompts about `token inefficiency`, `reviewer mode`, `minimal token overhead`, or session waste patterns, switch into low-overhead mode. -- Plan in at most 4 bullets. -- Execute by phase. -- Batch related reads and commands. -- Avoid duplicate reads and interactive loops. -- Keep outputs compact. -- Verify once per phase. -- Low output alone is not a defect. A bounded run that finishes in roughly <=10 steps is usually fine. -- Low output spread across 20+ steps with rising per-turn input is fragmentation and should be treated as context growth first. -- Startup / resume summaries stay tiny: `branch`, `task`, `blocker`, `next`, and `evidence`. -- Front-load scaffold/path discovery into one grouped inspection pass. Avoid serial `ls` / `find` / `rg` / `cat` retries that rediscover the same path state. -- Treat repeated `write_stdin`, repeated `sed` / `cat` peeks, and tiny diagnostic follow-up checks as strong negative signals. -- If a session turns fragmented, collapse back to inspect once, patch once, verify once, and summarize once. -- Tool / hook summaries stay tiny: command, status, last meaningful lines only. Drop routine hook boilerplate. -- Keep raw terminal interaction out of long-lived context. For `write_stdin` or interactive babysitting, retain only process, action sent, current result, and next action. -- Keep execution log separate from reasoning context: full commands/stdout belong in logs, while prompt context keeps only the latest 1-2 checkpoints plus the newest tool-result summary. -- Treat local edit/commit, remote publish/PR, CI diagnosis, and cleanup as bounded phases. -- Do not spend fresh narration or approval turns on obvious safe follow-ons inside an already authorized phase unless the risk changes. - -### RTK command compression - -When `rtk` is available, prefer it for noisy shell discovery and verification. For file search, fff MCP takes precedence whenever available. - -- Files: `rtk ls .`, `rtk read `, `rtk read -l aggressive`, `rtk smart `, `rtk find "" .`, `rtk grep "" .`, `rtk diff `. -- Git and GitHub: `rtk git status`, `rtk git diff`, `rtk git log -n 10`, `rtk gh pr list`, `rtk gh pr view `. -- Tests and builds: `rtk test `, `rtk err `, `rtk jest`, `rtk vitest`, `rtk playwright test`, `rtk pytest`, `rtk cargo test`, `rtk tsc`, `rtk lint`. -- Runtime and data probes: `rtk docker ps`, `rtk docker logs `, `rtk kubectl pods`, `rtk json `, `rtk log `, `rtk curl `. -- Savings checks: `rtk gain`, `rtk discover`, and `rtk session`. -- Use `rtk proxy ` only when raw passthrough is required. -- Do not wrap machine-readable commands with RTK when code parses stdout (`--porcelain`, `--json`, NUL-delimited output, or exact stdout contracts). -- If `rtk` is missing, use raw commands and summarize only meaningful lines. - -### FFF file search - -Use the fff MCP tools for all file search operations instead of default tools, including RTK shell wrappers. - -If fff MCP tools are unavailable in the current client, fall back to `rtk grep`, `rtk find`, `rtk ls`, or `rg` and keep output compact. - -### Caveman style +Lightweight escape prefixes: `quick:`, `simple:`, `tiny:`, `minor:`, `small:`, `just:`, `only:`. -Commentary and progress updates use smart-caveman `ultra` by default: - -- Answer order stays fixed: answer first, cause next, fix or next step last. -- drop filler -- use fragments when clear -- answer first -- cause next -- fix or next step last - -Keep exact literals unchanged: - -- code -- commands -- file paths -- flags -- env vars -- URLs -- numbers -- timestamps -- error text - -Switch back to `lite` or normal wording for: - -- security warnings -- irreversible actions -- privacy/compliance notes -- ordered instructions where fragments may confuse -- confused users -- commits -- PR text -- specs -- logs -- blocker evidence +Promote to full Guardex / OMX orchestration only when scope grows into multi-file behavior change, API/schema work, refactor, migration, architecture, cross-cutting scope, long prompt, or multi-agent execution. -Never caveman-compress commands, file paths, specs, logs, or blocker evidence. +### Isolation (the load-bearing rule) -### Isolation - -Every task runs on a dedicated `agent/*` branch and worktree. - -Start with: +Every task = one `agent/*` branch + worktree. Start with: ```bash gx branch start "" "" ``` -Treat the base branch (`main` / `dev`) as read-only while an agent branch is active. - -For every new task, including follow-up work in the same chat/session, if an assigned agent sub-branch/worktree is already open, continue in that sub-branch instead of creating a fresh lane unless the user explicitly redirects scope. +Then `cd` into the printed worktree path. Every subsequent git command runs from inside that worktree. -Never implement directly on the local/base branch checkout. Keep it unchanged and perform all edits in the agent sub-branch/worktree. +If a worktree is already open for this chat/session, **continue in it** instead of spawning a fresh lane unless the user redirects scope. ### Primary-tree lock On the primary checkout, do not run: ```bash -git checkout -git switch -git switch -c ... -git checkout -b ... -git worktree add +git checkout git switch +git switch -c ... git checkout -b ... +git worktree add

``` -Allowed on primary: +Allowed on primary: `git fetch`, `git pull --ff-only`. Anything else needs `gx branch start` first. -```bash -git fetch -git pull --ff-only -``` - -To work on any `agent/*` branch, run `gx branch start ...` first, then `cd` into the printed worktree path and run every subsequent git command from inside that worktree. - -If you are about to type `git checkout agent/...` or `git switch agent/...` from the primary checkout, stop. That is the mistake that flips primary onto an agent branch. +If you are about to type `git checkout agent/...` from the primary checkout, **stop** — that is the mistake that flips primary onto an agent branch. ### Dirty-tree rule -Finish or stash edits inside the worktree they belong to before any branch switch on primary. +Finish or stash edits inside the worktree they belong to before any branch switch on primary. The post-checkout guard may auto-stash a dirty primary tree as `guardex-auto-revert ->` — that is a safety net, not a workflow. -The post-checkout guard may auto-stash a dirty primary tree as: - -```text -guardex-auto-revert -> -``` - -That is a safety net, not a workflow. Do not rely on it routinely. - -Recover stashed changes with: - -```bash -git stash list | grep 'guardex-auto-revert' -``` +Recover: `git stash list | grep 'guardex-auto-revert'`. ### Ownership -Before editing, claim files. - -Preferred Colony path when on an active task: - -```text -mcp__colony__task_claim_file -``` - -Guardex lock path: +Before editing, claim files: ```bash gx locks claim --branch "" ``` -Before deleting, confirm the path is in your claim. - -Do not edit outside your scope unless reassigned. - -If another agent owns or recently touched nearby code: - -1. read latest Colony context -2. post a handoff or question +If another agent owns nearby code: +1. read the latest context for that lane +2. post a handoff / question 3. avoid reverting unrelated changes 4. report conflicts instead of overwriting -### Handoff gate +### Handoff format -Before editing, post a one-line handoff note through Colony `task_post` when a task is active. - -Use `.omx/notepad.md` only when Colony is unavailable or the lane explicitly depends on legacy OMX state. - -Handoff shape: +When posting handoff or working-state notes (`.omx/notepad.md`, PR description, or whichever coordination surface the repo uses), use these fields: ```text branch=; task=; blocker=; next=; evidence= ``` -Re-read latest Colony context before replacing another agent's code. +No long proof dumps, no stale narrative, no full logs. Bulky proof goes in OpenSpec artifacts, PRs, or command output. ### Completion @@ -296,59 +84,38 @@ Finish with: ```bash gx branch finish --branch "" --via-pr --wait-for-merge --cleanup -``` - -or: - -```bash +# or: gx finish --all ``` -Task is complete only when: +Task scaffolds and manual task edits must include a final completion/cleanup section that ends with PR merge + sandbox cleanup and records PR URL + final `MERGED` evidence. -1. changes are committed -2. branch is pushed -3. PR URL is recorded -4. PR state is `MERGED` -5. sandbox worktree is pruned +Task is complete only when **all six** are true: + +1. changes committed +2. branch pushed +3. PR URL recorded +4. PR state = `MERGED` +5. sandbox worktree pruned 6. final handoff records proof -If anything blocks, append a `BLOCKED:` note and stop. Do not half-finish. +If blocked, append a `BLOCKED:` note and stop. Do not half-finish. -OMX completion policy: when a task is done, the agent must run `gx branch finish --branch "" --via-pr --wait-for-merge --cleanup` (or `gx finish --all`) instead of standalone `git push` / `gh pr` commands. The finish flow owns commit, push, PR creation/update, merge wait, and sandbox cleanup. +Use the finish flow instead of standalone `git push` / `gh pr` commands. The finish flow owns commit, push, PR creation/update, merge wait, and sandbox cleanup; standalone fallbacks strand PR / merge / cleanup state. -External approval boundary: +### External approval boundary -- Guardex cannot bypass Codex host approval prompts or external-remote policy decisions. -- When the host blocks a publish or finish command, request approval for the narrow `gx branch finish ...` command, or for the exact session wrapper that invokes it, and continue after approval. -- Do not replace the finish flow with repeated standalone `git push` / `gh pr` attempts. That increases approval churn and can strand PR, merge, or cleanup state. +Guardex cannot bypass Codex host approval prompts or external-remote policy decisions. When the host blocks a publish or finish command, request approval for the narrow `gx branch finish ...` command, or for the exact session wrapper that invokes it, and continue after approval. Do not replace the finish flow with repeated standalone `git push` / `gh pr` attempts — that increases approval churn and can strand state. ### Parallel safety -Assume other agents edit nearby. - -- Never revert unrelated changes. -- Never simplify or delete critical shared paths without explicit request and regression coverage. -- Report conflicts in the handoff. -- Prefer compatibility-preserving changes over endpoint-specific rewrites when other agents may be changing adjacent systems. +Assume other agents edit nearby. Never revert unrelated changes. Never simplify or delete critical shared paths without explicit request + regression coverage. Prefer compatibility-preserving changes when adjacent systems may be in motion. ### Reporting -Every completion handoff includes: +Every completion handoff includes: branch, task, files changed, behavior touched, verification commands + results, PR URL, merge state, sandbox cleanup state, risks/follow-ups. -```text -branch -task -files changed -behavior touched -verification commands/results -PR URL -merge state -sandbox cleanup state -risks/follow-ups -``` - -If blocked, use: +Blocked? Use: ```text BLOCKED: @@ -359,89 +126,46 @@ next= evidence= ``` -### Open questions - -If Codex/Claude hits an unresolved question, branching decision, or blocker that should survive chat, record it in: - -```text -openspec/plan//open-questions.md -``` - -as an unchecked item: +### Verification gates -```md -- [ ] Question or blocker... -``` +Before claiming completion, run the narrowest meaningful verification (`pnpm test`, `pnpm typecheck`, `pnpm lint`, etc. — whatever fits the touched area). Do not claim green without command output evidence. If a command can't run, record command / reason / risk / next. -Resolve it in-place when answered instead of burying it in chat-only notes. +### Open questions -### OpenSpec +Persist unresolved questions or blockers into `openspec/plan//open-questions.md` as unchecked items. Resolve in-place rather than burying in chat. -OpenSpec is the source of truth for change-driven repo work. +### Optional companion tooling (use if installed) -For change-driven tasks, keep: +- **fff MCP** (file search): prefer for all file search; fall back to `rtk grep`/`rtk find` or `rg`. +- **rtk** (shell compression): wrap noisy discovery (`rtk ls`/`grep`/`find`/`read`), git/gh (`rtk git status`/`gh pr list`), and verification (`rtk tsc`/`lint`/`test`). Do **not** wrap machine-readable commands (`--porcelain`, `--json`, exact stdout contracts). +- **OpenSpec**: keep `openspec/changes//tasks.md` current during work, not batched. Validate with `openspec validate --specs` before archive. -```text -openspec/changes//tasks.md -``` +### Caveman style -current during work, not batched at the end. +Default to `ultra` for commentary and progress updates: answer first, cause next, fix or next step last. Drop filler. Use fragments when clear. -Task scaffolds and manual task edits must include a final completion/cleanup section that ends with PR merge + sandbox cleanup and records PR URL + final `MERGED` evidence. +Keep exact literals (code, commands, file paths, flags, env vars, URLs, numbers, timestamps, error text) unchanged. -Validate specs before archive: +Switch back to `lite` or normal wording for security warnings, irreversible actions, privacy/compliance notes, ordered instructions where fragments may confuse, confused users, commits, PR text, specs, logs, and blocker evidence. -```bash -openspec validate --specs -``` +Never caveman-compress commands, file paths, specs, logs, or blocker evidence. -Never archive unverified work. +### Token / context budget -For `T0` / small `T1` lanes, use the compact Colony spec path when available. One Colony handoff plus `colony-spec.md` is enough. Do not create proposal/spec/tasks unless the task grows. +Default: less word, same proof. -For `T2` / `T3` lanes, keep proposal, spec, design, and tasks live while implementing. +- Plan in ≤4 bullets, execute by phase, batch reads/commands. +- Verify once per phase. A bounded ≤10-step run is fine. +- 20+ steps with rising per-turn input = fragmentation → collapse to inspect once, patch once, verify once, summarize once. +- Startup/resume summaries stay tiny: `branch`, `task`, `blocker`, `next`, `evidence`. +- Keep raw terminal interaction out of long-lived context: retain only process, action sent, current result, next action. +- Full commands/stdout belong in logs; prompt context keeps only the latest 1–2 checkpoints plus the newest tool-result summary. ### Version bumps -If a change bumps a published version, the same PR records release notes in the appropriate OpenSpec artifact or release-note mechanism for the repo. - -Do not edit `CHANGELOG.md` directly unless the repo explicitly requires manual changelog edits. - -### Verification gates - -Before claiming completion, run the narrowest meaningful verification for the touched area. - -Examples: - -```bash -pnpm test -pnpm typecheck -pnpm lint -``` - -If a command cannot run, record: - -```text -command -reason it could not run -risk -next -``` - -Do not claim green verification without command output evidence. +If a change bumps a published version, the same PR records release notes in the appropriate OpenSpec artifact or release-note mechanism for the repo. Do not edit `CHANGELOG.md` directly unless the repo explicitly requires manual changelog edits. ### What not to put in this file -Do not embed: - -- stale memory dumps -- PR transcripts -- long logs -- generated status snapshots -- session history -- full OpenSpec examples -- repeated copies of long workflow docs - -Keep this section as the hard multi-agent contract. Put long examples and recovery docs in repo-specific workflow docs. - +No stale memory dumps, PR transcripts, long logs, generated status snapshots, session history, full OpenSpec examples, or duplicate workflow docs. This block is the hard contract — long examples and recovery docs live in repo-specific workflow files. diff --git a/test/setup.test.js b/test/setup.test.js index a2d79891..f1bab537 100644 --- a/test/setup.test.js +++ b/test/setup.test.js @@ -137,14 +137,12 @@ test('setup provisions workflow files and repo config', () => { assert.match(agentsContent, /GUARDEX_ON=0/); assert.match( agentsContent, - /For every new task, including follow-up work in the same chat\/session, if an assigned agent sub-branch\/worktree is already open, continue in that sub-branch/, + /If a worktree is already open for this chat\/session, \*\*continue in it\*\*/, ); - assert.match(agentsContent, /### Colony coordination loop/); - assert.match(agentsContent, /Use Colony as the primary coordination surface\./); assert.match(agentsContent, /### Token \/ context budget/); assert.match(agentsContent, /Default: less word, same proof\./); assert.match(agentsContent, /### Caveman style/); - assert.match(agentsContent, /Answer order stays fixed: answer first, cause next, fix or next step last\./); + assert.match(agentsContent, /answer first, cause next, fix or next step last\./); const claudeStats = fs.lstatSync(path.join(repoDir, 'CLAUDE.md')); assert.equal(claudeStats.isSymbolicLink(), true, 'CLAUDE.md should link to AGENTS.md'); @@ -627,14 +625,14 @@ Trailing project notes after managed block. assert.match(nextAgents, /Trailing project notes after managed block\./); assert.match( nextAgents, - /For every new task, including follow-up work in the same chat\/session, if an assigned agent sub-branch\/worktree is already open, continue in that sub-branch/, + /If a worktree is already open for this chat\/session, \*\*continue in it\*\*/, ); assert.match( nextAgents, - /Never implement directly on the local\/base branch checkout\. Keep it unchanged and perform all edits in the agent sub-branch\/worktree\./, + /Work from an `agent\/\*` branch \+ worktree\. \*\*Never\*\* edit the protected base directly\./, ); assert.match(nextAgents, /Small tasks stay direct and caveman-only\./); - assert.match(nextAgents, /Promote to full Guardex \/ OMX orchestration only when scope grows into:/); + assert.match(nextAgents, /Promote to full Guardex \/ OMX orchestration only when scope grows into/); assert.match(nextAgents, /final completion\/cleanup section/); assert.match(nextAgents, /PR URL \+ final `MERGED` evidence/); assert.doesNotMatch(nextAgents, /legacy managed clause/); @@ -850,16 +848,15 @@ test('install configures AGENTS managed policy block with GX contract wording', const agentsContent = fs.readFileSync(path.join(repoDir, 'AGENTS.md'), 'utf8'); assert.match(agentsContent, //); - assert.match(agentsContent, /## Multi-Agent Execution Contract: Guardex \+ Colony/); + assert.match(agentsContent, /## Multi-Agent Execution Contract/); assert.match( agentsContent, - /OMX completion policy: when a task is done, the agent must run `gx branch finish --branch "" --via-pr --wait-for-merge --cleanup`/, + /gx branch finish --branch "" --via-pr --wait-for-merge --cleanup/, ); - assert.match(agentsContent, /instead of standalone `git push` \/ `gh pr` commands/); - assert.match(agentsContent, /External approval boundary:/); + assert.match(agentsContent, /instead of standalone `git push` \/ `gh pr`/); + assert.match(agentsContent, /### External approval boundary/); assert.match(agentsContent, /Guardex cannot bypass Codex host approval prompts/); assert.match(agentsContent, /request approval for the narrow `gx branch finish \.\.\.` command/); - assert.match(agentsContent, /### Colony coordination loop/); assert.match(agentsContent, /### Token \/ context budget/); assert.match(agentsContent, /### Caveman style/); });