From ecb4a1fcfbeee087d93dfe7080379a3ffa4b6181 Mon Sep 17 00:00:00 2001
From: Pablo Borges <90059865+pablo-ibco@users.noreply.github.com>
Date: Fri, 29 May 2026 07:14:38 -0300
Subject: [PATCH 1/3] feat: agentic engineering foundation (rules, skills,
 profiles, AGENTS)

Establishes the agentic foundation for this repo, mirroring the rollout
in CityCatalyst and CityCatalyst-global-data.

- AGENTS.md / CLAUDE.md as the read-first agent contract
- .cursor/rules: general, project-architecture, git-conventions,
  security-baseline, os-shell, anthropic-tool-use, adapters
- .cursor/skills: commit-message-standards, pull-request-standards
- profiles/: per-target-repo defaults (citycatalyst, global-data,
  agentic-coder)
- prompts/system-base.md as reference for the live system prompt
- docs/PLAYBOOK.md (operating) + docs/EXTENDING.md (extending)
- tasks/global-data-cleanup.md + tasks/self-improvement.md
- .cursorignore tuned for the repo
- README updated with the new layout + documentation map

Same shape (rules + skills + AGENTS.md + .cursorignore) is rolled out
in parallel PRs in CityCatalyst and CityCatalyst-global-data so every
agent (Cursor, Cursor Cloud Agent, agentic-coder, Codex) gets the same
guardrails regardless of repo.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .cursor/rules/adapters.mdc                    |  39 +++++
 .cursor/rules/anthropic-tool-use.mdc          |  47 ++++++
 .cursor/rules/general.mdc                     |  13 ++
 .cursor/rules/git-conventions.mdc             |  44 ++++++
 .cursor/rules/os-shell.mdc                    |  15 ++
 .cursor/rules/project-architecture.mdc        |  56 +++++++
 .cursor/rules/security-baseline.mdc           |  47 ++++++
 .../skills/commit-message-standards/SKILL.md  |  45 ++++++
 .../skills/pull-request-standards/SKILL.md    |  44 ++++++
 .cursorignore                                 |  18 +++
 AGENTS.md                                     | 129 ++++++++++++++++
 CLAUDE.md                                     |  55 +++++++
 README.md                                     |  29 +++-
 docs/EXTENDING.md                             | 143 +++++++++++++++++
 docs/PLAYBOOK.md                              | 145 ++++++++++++++++++
 profiles/agentic-coder.yaml                   |  22 +++
 profiles/citycatalyst.yaml                    |  26 ++++
 profiles/global-data.yaml                     |  24 +++
 prompts/system-base.md                        |  85 ++++++++++
 tasks/global-data-cleanup.md                  |  50 ++++++
 tasks/self-improvement.md                     |  81 ++++++++++
 21 files changed, 1154 insertions(+), 3 deletions(-)
 create mode 100644 .cursor/rules/adapters.mdc
 create mode 100644 .cursor/rules/anthropic-tool-use.mdc
 create mode 100644 .cursor/rules/general.mdc
 create mode 100644 .cursor/rules/git-conventions.mdc
 create mode 100644 .cursor/rules/os-shell.mdc
 create mode 100644 .cursor/rules/project-architecture.mdc
 create mode 100644 .cursor/rules/security-baseline.mdc
 create mode 100644 .cursor/skills/commit-message-standards/SKILL.md
 create mode 100644 .cursor/skills/pull-request-standards/SKILL.md
 create mode 100644 .cursorignore
 create mode 100644 AGENTS.md
 create mode 100644 CLAUDE.md
 create mode 100644 docs/EXTENDING.md
 create mode 100644 docs/PLAYBOOK.md
 create mode 100644 profiles/agentic-coder.yaml
 create mode 100644 profiles/citycatalyst.yaml
 create mode 100644 profiles/global-data.yaml
 create mode 100644 prompts/system-base.md
 create mode 100644 tasks/global-data-cleanup.md
 create mode 100644 tasks/self-improvement.md
diff --git a/.cursor/rules/adapters.mdc b/.cursor/rules/adapters.mdc
new file mode 100644
index 0000000..d55f394
--- /dev/null
+++ b/.cursor/rules/adapters.mdc
@@ -0,0 +1,39 @@
+---
+description: TaskAdapter conventions — fetch_tasks, on_task_started, on_task_completed.
+globs: agent_factory/adapters/**/*.py
+alwaysApply: false
+---
+
+# Adapter Conventions
+
+Each adapter lives in `agent_factory/adapters/<source>.py` and implements `TaskAdapter`:
+
+```python
+class TaskAdapter:
+    def fetch_tasks(self) -> list[Task]: ...
+    def on_task_started(self, task: Task) -> None: ...
+    def on_task_completed(self, task: Task, result: AgentResult) -> None: ...
+```
+
+## Required behaviours
+
+- **Auth missing → loud failure** at `__init__`. Don't silently fall back to anonymous mode.
+- **HTTP timeouts** (15s default for Jira / Notion). Keep them.
+- **Lifecycle hooks** wrap upstream calls in try/except and `print` the error — they must **not** raise (a failed comment on Jira shouldn't kill the PR).
+- **Task fields** populated: at minimum `title`, `task_type`, `description`. `files` and `acceptance_criteria` are optional but the agent uses them when present.
+- **Stash adapter-specific identifiers** as private attrs on the Task (`_jira_key`, `_notion_page_id`).
+
+## When fetching
+
+- Sort tasks deterministically (e.g. by ticket key / created date). Don't rely on API ordering.
+- Limit pages — most adapters cap at 50 results per call.
+
+## Adding a new adapter
+
+See `docs/EXTENDING.md` → "Add an adapter". TL;DR:
+
+1. Subclass `TaskAdapter` in `adapters/<source>.py`.
+2. Add a branch in `_build_adapter()` (`main.py`).
+3. Add a subparser in `_build_argparser()` (`main.py`).
+4. Mirror `.env.example` block for credentials.
+5. Smoke test (`tests/adapters/test_<source>.py`).
diff --git a/.cursor/rules/anthropic-tool-use.mdc b/.cursor/rules/anthropic-tool-use.mdc
new file mode 100644
index 0000000..7deb67b
--- /dev/null
+++ b/.cursor/rules/anthropic-tool-use.mdc
@@ -0,0 +1,47 @@
+---
+description: Anthropic tool-use conventions — TOOL_DEFINITIONS schemas, response handling.
+globs: agent_factory/agent.py,agent_factory/tools.py
+alwaysApply: false
+---
+
+# Anthropic tool-use conventions
+
+## Tool definitions
+
+Each tool in `tools.py:TOOL_DEFINITIONS` is a dict with:
+
+```python
+{
+    "name": "snake_case",
+    "description": "<one-line, instructions-to-the-model voice>",
+    "input_schema": {
+        "type": "object",
+        "properties": { ... },
+        "required": [ ... ],
+    },
+}
+```
+
+The `description` is read by the LLM. Write it as instructions, not as docs for humans.
+
+## Adding a new tool
+
+1. Add the schema in `TOOL_DEFINITIONS`.
+2. Add a branch in `execute_tool()` that returns a string (or short structured value) — never a multi-MB blob.
+3. Cap large outputs (truncate + indicate). The current `read_file` / `run_command` already do this — follow that pattern.
+4. Respect `Config.dry_run` if the tool mutates state.
+5. Add a smoke test in `tests/` (we'll need to create `tests/`; see `docs/EXTENDING.md`).
+
+## Response handling in the loop
+
+The loop reads `response.content` for both `text` blocks and `tool_use` blocks. After each turn:
+
+- Append the assistant message verbatim.
+- For each `tool_use`, run `execute_tool()` and append a `tool_result` with the same `tool_use_id`.
+- If `stop_reason == "end_turn"` and there were no `tool_use` blocks → done.
+- If there were no tool calls but not `end_turn` → exit early ("model gave up").
+- After `max_agent_turns` → exit without PR URL.
+
+## Cost / token usage
+
+`response.usage.input_tokens` and `output_tokens` are sampled per call. `UsageTracker` aggregates. Per-MTok rates are **hardcoded** for Sonnet. If the model changes, fix `COST_PER_MTok` in the same PR.
diff --git a/.cursor/rules/general.mdc b/.cursor/rules/general.mdc
new file mode 100644
index 0000000..52f35cb
--- /dev/null
+++ b/.cursor/rules/general.mdc
@@ -0,0 +1,13 @@
+---
+description: General coding standards for agentic-coder.
+alwaysApply: true
+---
+
+# General Rules
+
+- Keep the code small and obvious. This is a tool used unattended overnight; any abstraction must pay for itself.
+- No new top-level modules unless a real second use case justifies it.
+- Comments / docstrings on every public function. Internal helpers can stay terse if the name is obvious.
+- Prefer **intent / why** comments over what comments. The diff shows what.
+- No silent failures. Bare `except: pass` is banned outside Jira/Notion lifecycle hooks where the upstream is genuinely best-effort.
+- `print()` is the current logging surface. If we adopt `logging`, do it project-wide in one PR.
diff --git a/.cursor/rules/git-conventions.mdc b/.cursor/rules/git-conventions.mdc
new file mode 100644
index 0000000..ccf7b27
--- /dev/null
+++ b/.cursor/rules/git-conventions.mdc
@@ -0,0 +1,44 @@
+---
+description: Branch naming, commit format, PR text. Base branch `main`.
+alwaysApply: true
+---
+
+# Git Conventions — agentic-coder
+
+## Base branch
+
+`main` (this is a tool repo, no `develop`).
+
+## Branches
+
+- `pborges/<slug>` — Pablo's work.
+- `feat/<slug>` / `fix/<slug>` / `chore/<slug>` — anyone.
+- `agent/<slug>` — Cursor-agent-driven work.
+- `agentic-coder/<slug>` — autonomous runs against itself (rare; safer to test against `CityCatalyst`).
+
+## Commit messages
+
+Conventional Commits (`<type>(<scope>): summary`).
+
+Recommended `<scope>`: `agent`, `tools`, `adapter`, `scanner`, `cli`, `docs`, `ci`.
+
+Examples:
+
+```
+feat(adapter): add linear task source
+fix(scanner): wire exclude_glob for console.log rule
+docs(playbook): document Cloud Agents kickoff template
+chore(deps): pin anthropic to 0.50.x
+```
+
+## PRs
+
+- Title ≤ 72 chars, imperative.
+- Body: Summary (1–3 sentences) + Changes (bullets) + Commits (optional).
+- **Don't open PRs unless explicitly told** in the active task. Push the branch and stop; a human reviews and opens it.
+
+## Merge policy
+
+- **Code in `agent_factory/`, tests, docs/** — any tech-team member can merge after standard review (≥1 approval, CI green).
+- **Agentic foundation** (`AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, `profiles/`) — CTO sign-off required; after approval, anyone merges.
+- **Agents** never merge their own PRs.
diff --git a/.cursor/rules/os-shell.mdc b/.cursor/rules/os-shell.mdc
new file mode 100644
index 0000000..96210c4
--- /dev/null
+++ b/.cursor/rules/os-shell.mdc
@@ -0,0 +1,15 @@
+---
+description: POSIX shell defaults; `run.sh` is bash-only.
+alwaysApply: true
+---
+
+# OS / Shell Defaults
+
+This repo ships `run.sh` (bash). Default to POSIX `bash`.
+
+- `bash`-isms allowed inside `run.sh`. Document in comments where they're load-bearing.
+- Forward-slash paths.
+- `cat <<'EOF'` heredocs for multi-line text.
+- Avoid pagers in the agent's `run_command` invocations: append `| cat` or `--no-pager` for git.
+
+Windows users run inside WSL2.
diff --git a/.cursor/rules/project-architecture.mdc b/.cursor/rules/project-architecture.mdc
new file mode 100644
index 0000000..2097a77
--- /dev/null
+++ b/.cursor/rules/project-architecture.mdc
@@ -0,0 +1,56 @@
+---
+description: agentic-coder architecture map — agent loop, tools, adapters, scanner.
+alwaysApply: true
+---
+
+# agentic-coder Architecture
+
+```
+agentic-coder/
+├── agent_factory/
+│   ├── main.py          CLI: markdown / jira / notion / scan / watch
+│   ├── agent.py         The loop: client.messages.create + tool dispatch
+│   ├── config.py        Config dataclass + .env loader
+│   ├── context.py       Read target-repo AGENTS.md / .cursor/* into system prompt
+│   ├── tools.py         5 tools (read_file, search_code, list_directory, edit_file, run_command)
+│   ├── scanner.py       SCAN_CATEGORIES (TODO, console.log, as any, empty catch)
+│   ├── watcher.py       Polling loop + idle scan
+│   ├── preflight.py     git / gh / clean tree / remote checks
+│   ├── task_parser.py   Task dataclass + markdown parser
+│   └── adapters/
+│       ├── base.py      TaskAdapter interface
+│       ├── markdown.py  Markdown task source
+│       ├── jira.py      Jira REST adapter
+│       └── notion.py    Notion API adapter
+├── profiles/<repo>.yaml Per-target-repo defaults (NEW)
+├── prompts/             System-prompt scaffolds (NEW)
+├── tasks/               Markdown task backlogs
+└── logs/                Per-task session log (gitignored)
+```
+
+## Default model + branch
+
+- Model: `claude-sonnet-4-20250514` (`Config.model`).
+- Target base branch: `develop` (`Config.base_branch`).
+- Target branch prefix: `agentic-coder/` (`Config.branch_prefix`).
+- Max turns per task: `50` (`Config.max_agent_turns`).
+
+## Loop end conditions
+
+- `stop_reason == "end_turn"` and no `tool_use` blocks → success, returns.
+- No tool calls and not `end_turn` → exits early.
+- `max_agent_turns` reached → returns without PR URL.
+
+## Cost tracking
+
+`UsageTracker` in `agent.py` sums `usage.input_tokens` / `output_tokens` per response. Rates are **hardcoded for Sonnet** (`COST_PER_MTok = {"input": 3.0, "output": 15.0}`). If model changes, this is wrong.
+
+## Path discipline
+
+`logs/` is `Path(__file__).parent.parent / "logs"` — always inside this repo. Not the target repo.
+
+## Gotchas
+
+- `task.repo` field is parsed but never used.
+- `scanner.SCAN_CATEGORIES["console.log"].exclude_glob` is defined but **not wired** into `_search`.
+- `_extract_pr_url` only matches `github.com` URLs.
diff --git a/.cursor/rules/security-baseline.mdc b/.cursor/rules/security-baseline.mdc
new file mode 100644
index 0000000..3ddaf66
--- /dev/null
+++ b/.cursor/rules/security-baseline.mdc
@@ -0,0 +1,47 @@
+---
+description: Security baseline — secrets, shell, gh token leakage, untrusted input.
+alwaysApply: true
+---
+
+# Security Baseline — agentic-coder
+
+This tool runs commands and edits files in **other repos** on behalf of an LLM. Treat it like a remote-code-execution surface in your own org.
+
+## Secrets
+
+- `.env` is gitignored. Never commit it.
+- `.env.example` is the canonical schema; keep it in sync with `Config` defaults.
+- Never log full Anthropic / Jira / Notion / GitHub tokens. Mask after the first 4 chars.
+- `tools.py:execute_tool` strips `GITHUB_TOKEN` from the env passed to subprocess. Keep that — don't "simplify" it away.
+
+## Shell (`run_command`)
+
+- `tools.py:run_command` accepts arbitrary shell input from the model. The current blocklist is small. **Improve, don't shrink.**
+  - Keep blocking: `rm -rf /`, force-pushes to `main`/`develop`/`master`, `git config --global` edits.
+  - Cap on output: 10k chars per call (already enforced).
+  - Cap on time: 120s (already enforced).
+- Do not extend `run_command` with elevation (`sudo`).
+
+## Tool dispatch
+
+- Dry-run mode (`Config.dry_run`) must skip both `edit_file` and `run_command`. Adding a new mutating tool? It must respect dry-run.
+
+## Preflight checks
+
+- `preflight.py` must continue to refuse to operate on a non-clean working tree, on the wrong base branch, or with `gh` unauthenticated.
+- If a check is bypassed, surface `--force` explicitly — never make bypass the default.
+
+## Adapter credentials
+
+- Jira / Notion adapters fail loudly when credentials are missing — do not fall back to anonymous mode.
+- HTTP adapters time out (Jira: 15s default, Notion: 15s default) — keep them.
+
+## Commit messages and PR bodies (model-authored)
+
+- Strip credentials and bearer tokens from the model's text output before posting (defensive — the model shouldn't emit them, but the `gh pr create` path is the last barrier).
+- PR body cap: keep ≤ 4k chars to avoid surprise on GitHub UI.
+
+## Cost guardrails
+
+- Cost tracking is informational, not a hard cap. The hard cap today is `max_agent_turns=50`. Do not raise without a reason in the PR body.
+- For autonomous scenarios (`watch`), consider a daily budget: log per-day spend and stop if exceeded.
diff --git a/.cursor/skills/commit-message-standards/SKILL.md b/.cursor/skills/commit-message-standards/SKILL.md
new file mode 100644
index 0000000..76dc6ea
--- /dev/null
+++ b/.cursor/skills/commit-message-standards/SKILL.md
@@ -0,0 +1,45 @@
+---
+name: commit-message-standards
+description: Generate Conventional Commits messages for agentic-coder.
+---
+
+# commit-message-standards — agentic-coder
+
+Conventional Commits, ≤72 chars per line, imperative summary.
+
+```
+<type>(<scope>): <imperative summary>
+
+<body — explain WHY, wrap at 72>
+
+<footer — Refs: #N>
+```
+
+Recommended `<scope>`:
+- `agent` — `agent.py`, system prompt, loop changes.
+- `tools` — `tools.py`, tool definitions, `execute_tool`.
+- `adapter` — anything in `adapters/`.
+- `scanner` — `scanner.py`.
+- `cli` — `main.py`, `run.sh`, argparse.
+- `docs` — README, AGENTS.md, CLAUDE.md, docs/.
+- `ci`, `chore`, `deps`.
+
+Examples:
+
+```
+feat(adapter): add linear task source
+
+Mirrors the Jira adapter shape. Reads LINEAR_API_KEY and
+LINEAR_TEAM_ID from .env. On task complete, posts a comment with
+the PR URL via the Linear GraphQL API.
+```
+
+```
+fix(scanner): wire exclude_glob for console.log rule
+
+The SCAN_CATEGORIES dict defined `exclude_glob` for the
+`console.log` rule but `_search` ignored it, so test files were
+counted as production violations. Pass it to rg via --glob '!…'.
+```
+
+Anti-patterns: `wip`, `update`, `cleanup`, multi-purpose commits.
diff --git a/.cursor/skills/pull-request-standards/SKILL.md b/.cursor/skills/pull-request-standards/SKILL.md
new file mode 100644
index 0000000..598785d
--- /dev/null
+++ b/.cursor/skills/pull-request-standards/SKILL.md
@@ -0,0 +1,44 @@
+---
+name: pull-request-standards
+description: Draft PRs for agentic-coder. Base branch is main.
+---
+
+# pull-request-standards — agentic-coder
+
+## Derive context
+
+- Owner / repo: `git remote get-url origin` → `Open-Earth-Foundation/agentic-coder`.
+- Head: `git rev-parse --abbrev-ref HEAD`.
+- Base: **`main`** (this is a tool repo).
+
+## Title
+
+- ≤72 chars, imperative.
+- Conventional-Commit-flavoured: `feat(adapter): add linear task source`.
+
+## Body
+
+```markdown
+## Summary
+1–3 sentences: what changed and why.
+
+## Changes
+- bullet list
+
+## Verification
+- how this was tested locally (e.g. `./run.sh task 1` against a test repo)
+
+## Compatibility notes (if applicable)
+- CLI surface changes
+- .env additions / removals
+```
+
+## Push policy
+
+Branch is assumed to be already pushed. Don't `git push` unless explicitly asked.
+
+## Who merges
+
+- **Code in `agent_factory/`, tests, docs/** — any tech-team member after standard review (≥1 approval, CI green).
+- **Agentic foundation** (`AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, `profiles/`) — CTO sign-off required; then anyone merges.
+- **Agents** never merge their own PRs and do not open PRs unless explicitly told. Open the PR when told to, then stop.
diff --git a/.cursorignore b/.cursorignore
new file mode 100644
index 0000000..0db2ac5
--- /dev/null
+++ b/.cursorignore
@@ -0,0 +1,18 @@
+# Files / dirs Cursor should NOT index for embeddings or context.
+
+**/__pycache__/
+**/.pytest_cache/
+**/.venv/
+**/venv/
+
+# Logs (gitignored, large)
+logs/
+
+# Local env / secrets
+.env
+.env.local
+**/credentials*.json
+
+# OS noise
+.DS_Store
+**/.DS_Store
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..6eeb246
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,129 @@
+# agentic-coder — Agent Brief
+
+**Read this first.** This file applies whether you're an AI agent (Cursor, Cursor Cloud Agent, Claude Code, Codex) editing this repo, or a human contributor sending a PR.
+
+`agentic-coder` is **the OEF autonomous-coder tool**. It reads tasks, edits a target codebase, and opens PRs. It is dog-food: anything we add here that improves our agents must also improve this repo's own development experience.
+
+---
+
+## What this repo is
+
+A small Python CLI (`agent_factory/`) that:
+
+1. Loads tasks from a source (`markdown`, `jira`, `notion`).
+2. Runs Claude with a tool-use loop and 5 tools (`read_file`, `search_code`, `list_directory`, `edit_file`, `run_command`).
+3. Implements + tests + self-reviews + opens a PR via `gh`.
+4. Tracks token usage and per-task cost.
+5. Optionally polls (`watch`) and auto-scans the target repo (`scan`).
+
+Default model: `claude-sonnet-4-20250514`. Default base branch on the **target** repo: `develop`. Branch prefix on the target: `agentic-coder/`.
+
+## Repo layout
+
+```
+agentic-coder/
+├── AGENTS.md                    # this file (the agent contract)
+├── CLAUDE.md                    # extra context for Claude Code (mirror of AGENTS.md essentials)
+├── README.md                    # human-facing overview
+├── run.sh                       # CLI wrapper
+├── .env / .env.example          # config (Anthropic key, REPO_PATH, branch prefix, …)
+├── requirements.txt
+├── agent_factory/               # core package
+│   ├── main.py                  # CLI entry (markdown / jira / notion / scan / watch)
+│   ├── agent.py                 # agentic loop, cost tracking, PR URL extraction
+│   ├── config.py                # .env loading + Config dataclass
+│   ├── context.py               # injects target-repo AGENTS.md / .cursor/rules / skills
+│   ├── tools.py                 # 5 tool definitions + execute_tool
+│   ├── scanner.py               # autonomous repo scanner (TODO / console.log / as any / empty catch)
+│   ├── watcher.py               # continuous polling + idle scan
+│   ├── preflight.py             # git / gh / clean tree / remote checks
+│   ├── task_parser.py           # Task dataclass + markdown parser
+│   └── adapters/                # markdown / jira / notion task sources
+├── tasks/                       # markdown task backlogs
+│   ├── getting-started.md
+│   └── citycatalyst-stability.md
+├── profiles/                    # per-target-repo defaults (NEW)
+│   ├── citycatalyst.yaml
+│   ├── global-data.yaml
+│   └── agentic-coder.yaml
+├── prompts/                     # system-prompt scaffolds (NEW)
+│   └── system-base.md
+├── docs/                        # operator docs (NEW)
+│   ├── PLAYBOOK.md              # how to run overnight / cloud / scan
+│   └── EXTENDING.md             # how to add adapter / tool / scanner rule
+└── logs/                        # session logs (gitignored)
+```
+
+---
+
+## What you must NOT do
+
+- **Do not open a PR against this repo unless explicitly told** in the active task. Push the branch and stop; a human reviews and opens it.
+- **Do not merge your own PR if you are an agent.** Humans merge.
+- **Do not commit secrets** — `.env`, Anthropic keys, Jira tokens, Notion tokens.
+- **Do not break the public CLI surface** (`./run.sh markdown|jira|notion|scan|watch [...]`). It's contracted by team scripts.
+- **Do not silently change the default model or `max_agent_turns`.** These have cost implications.
+- **Do not depend on libraries beyond the minimal set** (`anthropic`, `python-dotenv`, optional `requests` for adapters). Keep `requirements.txt` boring.
+- **Do not modify `AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, or `profiles/` without CTO review.** These are the agentic foundation. (Code merges in `agent_factory/` are unaffected — any tech-team member can merge after standard review.)
+
+## What you must always do
+
+- Branch off `main`, follow Conventional Commits.
+- Update `README.md` and `docs/PLAYBOOK.md` when CLI / behaviour changes.
+- Keep parity between `.env.example` and `Config` defaults.
+- Write a smoke test for any new adapter / tool / scanner rule (see `docs/EXTENDING.md`).
+- Use the `commit-message-standards` and `pull-request-standards` skills.
+
+---
+
+## Where to find what
+
+### Operating the tool
+
+- **Run overnight against CityCatalyst:** `docs/PLAYBOOK.md` → "Watch mode (Jira)".
+- **Run a one-off task:** `./run.sh task <N>` after editing `tasks/<file>.md`.
+- **Run the scanner:** `./run.sh scan` (idle scan also runs in `watch` after 5 idle cycles).
+- **Inspect logs:** `./run.sh logs` (last 20 markdown sessions in `logs/`).
+
+### Extending the tool
+
+- **Adding a new task source (Linear, GitHub Issues, …):** `docs/EXTENDING.md` → "Add an adapter".
+- **Adding a new tool (e.g. `git_diff`, `mcp_call`):** `docs/EXTENDING.md` → "Add a tool".
+- **Adding a new scanner rule:** `docs/EXTENDING.md` → "Add a scanner category".
+- **Adding a per-repo profile:** drop a YAML in `profiles/<repo>.yaml` and pass `--profile <repo>` to the CLI.
+
+### Skills
+
+| Want to | Use |
+|---------|-----|
+| Write a commit message for this repo | `.cursor/skills/commit-message-standards/SKILL.md` |
+| Open / draft a PR for this repo | `.cursor/skills/pull-request-standards/SKILL.md` |
+
+### Rules
+
+| Topic | Open |
+|-------|------|
+| General code taste | `.cursor/rules/general.mdc` |
+| Repo architecture | `.cursor/rules/project-architecture.mdc` |
+| Branches, commits, PRs | `.cursor/rules/git-conventions.mdc` |
+| Security baseline (secrets, shell, gh) | `.cursor/rules/security-baseline.mdc` |
+| OS / shell defaults | `.cursor/rules/os-shell.mdc` |
+| Anthropic / tool-use | `.cursor/rules/anthropic-tool-use.mdc` |
+| Adapters | `.cursor/rules/adapters.mdc` |
+
+---
+
+## Quickstart (humans)
+
+```bash
+git clone git@github.com:Open-Earth-Foundation/agentic-coder.git
+cd agentic-coder
+python3 -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt
+cp .env.example .env       # fill ANTHROPIC_API_KEY, REPO_PATH, etc.
+
+./run.sh list              # see available tasks
+./run.sh task 1            # run task 1 against the target repo
+```
+
+The full playbook (overnight + Cloud Agents + scanner) lives in `docs/PLAYBOOK.md`.
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..3c90300
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,55 @@
+# Claude Code — Context for `agentic-coder`
+
+This file supplements `AGENTS.md` with the conventions Claude Code (and similar) need to work in this repo. Read `AGENTS.md` first.
+
+## What this codebase is
+
+A small Python CLI that drives an autonomous coder loop using `anthropic.Anthropic.messages.create` with `tools` for an LLM tool-use workflow. The agent runs against a **target** repository configured via `REPO_PATH` in `.env`.
+
+## Where the loop lives
+
+- `agent_factory/agent.py:run_agent()` — the main loop. ~50 turns max. Returns when the model returns `end_turn` with no tool calls (or when `max_agent_turns` is hit).
+- `agent_factory/agent.py:SYSTEM_PROMPT` — the system prompt template; `branch_prefix` and `base_branch` are formatted in. The prompt is also augmented with `gather_project_context()` from the target repo.
+- `agent_factory/tools.py:TOOL_DEFINITIONS` + `execute_tool()` — 5 tools: `read_file`, `search_code`, `list_directory`, `edit_file`, `run_command`. `run_command` strips `GITHUB_TOKEN` from the env passed to subprocess.
+- `agent_factory/context.py:gather_project_context()` — reads `README.md`, `.cursor/rules/**/*.{md,mdc}`, `.cursor/skills/**/SKILL.md`, `AGENTS.md`, `CONTRIBUTING.md` from the target repo, capped at 12k chars.
+
+## Where the adapters live
+
+- `agent_factory/adapters/{markdown,jira,notion}.py` — each implements `TaskAdapter` (`fetch_tasks`, `on_task_started`, `on_task_completed`).
+
+## Important defaults / pitfalls
+
+- **Cost rates** are hardcoded for Sonnet ($3/M input, $15/M output) in `agent.py:COST_PER_MTok`. If the model changes, these are wrong.
+- **PR URL extraction** uses a single regex (`https://github\\.com/[^\\s)]+/pull/\\d+`). It only catches GitHub URLs.
+- **`gh` is invoked by the model**, not by Python. Preflight checks `gh auth status` but doesn't validate the model uses `gh` correctly.
+- **Logs** go to `agentic-coder/logs/` (not the target repo's logs/).
+- **`task.repo`** is parsed from markdown but never used. (Open ticket in `docs/agent-runbook.md`.)
+- **Scanner `console.log` rule** has an `exclude_glob` field that is **not wired** in `scanner.py:_search`.
+- **`config.validate()`** only checks `ANTHROPIC_API_KEY`; `REPO_PATH` validation lives in `main.py`.
+- **Heredoc PR creation** is done by the model — see the `git workflow` section of `SYSTEM_PROMPT`.
+
+## Conventions
+
+- Conventional Commits (`feat(scope): …`).
+- Branch off `main`. PR opens with title prefix `feat(...)` / `fix(...)` etc.
+- Imports: stdlib → third-party → local.
+- Type hints on public functions.
+- `pathlib.Path` for filesystem.
+- `logging` if you add logging (currently mostly `print` to terminal).
+
+## Gotchas when extending
+
+- **Adding a new tool**: also update `_summarize_tool_output()` if its output is too large. The agent panics on multi-MB tool returns.
+- **Adding a new adapter**: register it in `_build_adapter()` (`main.py`) and the argparse subparser. The adapter's `__init__` should accept a `Config` so we don't drift on env loading.
+- **Changing `max_tokens`** (16384 default in `client.messages.create`): be aware of cost. Defaults exist for a reason.
+- **Multi-repo**: not implemented. The roadmap calls for `profiles/<repo>.yaml`. If you start that work, see `profiles/citycatalyst.yaml` for the shape.
+
+## Running locally
+
+```bash
+python -m agent_factory markdown tasks/getting-started.md --task 1
+python -m agent_factory scan
+python -m agent_factory watch jira
+```
+
+`run.sh` wraps these with sensible defaults (`TASKS_FILE`, `.venv` activation).
diff --git a/README.md b/README.md
index 10168bf..1cd3085 100644
--- a/README.md
+++ b/README.md
@@ -219,11 +219,24 @@ NOTION_DATABASE_ID=your-database-id
 
 ```
 agentic-coder/
+├── AGENTS.md                     # Agent brief (read first)
+├── CLAUDE.md                     # Extra context for Claude Code
 ├── run.sh                        # CLI wrapper
 ├── .env / .env.example           # Configuration
-├── tasks/                        # Markdown task files
-│   ├── tasks-example.md
-│   └── demo-tasks.md
+├── .cursor/
+│   ├── rules/                    # Cursor rules (general, architecture, security, …)
+│   └── skills/                   # Named workflows (commit-message-standards, pull-request-standards)
+├── profiles/                     # Per-target-repo defaults (citycatalyst, global-data, agentic-coder)
+├── prompts/
+│   └── system-base.md            # Reference for the live system prompt
+├── tasks/                        # Markdown task backlogs
+│   ├── getting-started.md
+│   ├── citycatalyst-stability.md
+│   ├── global-data-cleanup.md
+│   └── self-improvement.md
+├── docs/
+│   ├── PLAYBOOK.md               # How to operate (overnight, watch, scan)
+│   └── EXTENDING.md              # How to add adapters, tools, scanner rules
 ├── logs/                         # Session logs (gitignored)
 └── agent_factory/                # Core package
     ├── main.py                   # CLI entry point
@@ -272,6 +285,16 @@ agentic-coder/
 - [ ] MCP server integration for richer tool use
 - [ ] Custom scanner rules (configurable patterns per project)
 
+## Documentation map
+
+- **[`AGENTS.md`](AGENTS.md)** — agent brief; read first if you're an AI agent or new contributor.
+- **[`CLAUDE.md`](CLAUDE.md)** — extra context for Claude Code-style sessions.
+- **[`docs/PLAYBOOK.md`](docs/PLAYBOOK.md)** — operator guide: overnight runs, watch mode, scan, Cloud Agents.
+- **[`docs/EXTENDING.md`](docs/EXTENDING.md)** — how to add adapters, tools, scanner rules, profiles.
+- **[`profiles/`](profiles/)** — per-target-repo defaults (CityCatalyst, global-data, agentic-coder).
+- **[`prompts/system-base.md`](prompts/system-base.md)** — reference for the live system prompt.
+- **[`tasks/`](tasks/)** — curated task backlogs (citycatalyst-stability, global-data-cleanup, self-improvement).
+
 ## Why this exists
 
 Most AI coding tools are either:
diff --git a/docs/EXTENDING.md b/docs/EXTENDING.md
new file mode 100644
index 0000000..09f5c7f
--- /dev/null
+++ b/docs/EXTENDING.md
@@ -0,0 +1,143 @@
+# agentic-coder — Extending
+
+How to add adapters, tools, scanner rules, and per-repo profiles.
+
+---
+
+## Add a task adapter (e.g. Linear, GitHub Issues)
+
+1. Create `agent_factory/adapters/<source>.py` subclassing `TaskAdapter`:
+
+   ```python
+   from agent_factory.adapters.base import TaskAdapter
+   from agent_factory.task_parser import Task
+
+   class LinearAdapter(TaskAdapter):
+       def __init__(self, config):
+           self.api_key = os.environ["LINEAR_API_KEY"]
+           self.team_id = os.environ["LINEAR_TEAM_ID"]
+           if not self.api_key:
+               raise RuntimeError("LINEAR_API_KEY missing")
+
+       def fetch_tasks(self) -> list[Task]: ...
+       def on_task_started(self, task: Task) -> None: ...
+       def on_task_completed(self, task: Task, result) -> None: ...
+   ```
+
+2. Register in `main.py:_build_adapter()`:
+
+   ```python
+   if args.command == "linear":
+       return LinearAdapter(config)
+   ```
+
+3. Add a subparser in `main.py:_build_argparser()`:
+
+   ```python
+   sub.add_parser("linear", help="Pull tasks from Linear")
+   ```
+
+4. Update `.env.example`:
+
+   ```
+   # Linear (optional — needed for ./run.sh linear)
+   # LINEAR_API_KEY=lin_api_...
+   # LINEAR_TEAM_ID=...
+   ```
+
+5. Add a smoke test in `tests/adapters/test_linear.py` (we don't have a tests/ folder yet — start with `pytest` and a single test).
+
+6. Document in `README.md` and `docs/PLAYBOOK.md`.
+
+7. Commit + PR (`feat(adapter): add linear task source`).
+
+---
+
+## Add a tool
+
+1. Add an entry in `tools.py:TOOL_DEFINITIONS` with `name`, `description` (LLM-facing), and `input_schema`.
+
+2. Add a branch in `execute_tool()`:
+
+   ```python
+   if name == "git_diff":
+       result = subprocess.run(["git", "--no-pager", "diff", "HEAD"], ...)
+       return _truncate(result.stdout, 10_000)
+   ```
+
+3. **Respect `Config.dry_run`** if the tool mutates state.
+
+4. Cap output (`_truncate(..., 10_000)`).
+
+5. Add to `system prompt` in `agent.py` if the tool is non-obvious to the model.
+
+6. Smoke test (`tests/test_tools.py`).
+
+7. Commit + PR (`feat(tools): add git_diff tool`).
+
+---
+
+## Add a scanner rule
+
+In `scanner.py:SCAN_CATEGORIES`:
+
+```python
+{
+    "name": "missing_abort_signal",
+    "pattern": r"\\bfetch\\(",                         # naive — refine with rg --pcre2
+    "glob": "*.{ts,tsx}",
+    "exclude_glob": "**/test/**",                     # honour this in _search!
+    "task_type": "improvement",
+    "description_template": (
+        "Found {count} fetch() calls without an AbortSignal across "
+        "{file_count} files. Add timeout + signal per the "
+        "tighten-fetch-resilience skill."
+    ),
+}
+```
+
+**Important**: there's a known bug — `_search` doesn't currently honour `exclude_glob` for the `console.log` rule. Wire it in the same PR if you add a rule that needs exclusions.
+
+Commit + PR (`feat(scanner): add missing_abort_signal rule`).
+
+---
+
+## Add a per-repo profile
+
+1. Create `profiles/<repo>.yaml` (see `profiles/citycatalyst.yaml` for the shape).
+
+2. Until the CLI loader is implemented, document the values to copy into `.env` in the README of the target repo.
+
+3. Wiring it into the CLI is a future feature — track in `tasks/self-improvement.md`.
+
+---
+
+## Add a system-prompt change
+
+1. Update `prompts/system-base.md` with the new instructions.
+
+2. Translate into the Python triple-string in `agent_factory/agent.py:SYSTEM_PROMPT`.
+
+3. Run a smoke task (`./run.sh task 1` against a small target) and verify the agent still ships.
+
+4. Commit + PR (`feat(agent): instruct model to ...`).
+
+---
+
+## Lift the model / rates
+
+If you switch the default model (`Config.model`):
+
+1. Update `agent_factory/agent.py:COST_PER_MTok` to the new rates (input + output per million tokens).
+
+2. Update `.env.example` if model selection is now env-driven.
+
+3. Update `README.md` mention of "Sonnet".
+
+4. Commit + PR (`chore(agent): switch default model to ...`).
+
+---
+
+## Tests
+
+We don't have a `tests/` folder yet. The first test we add should be a smoke for `gather_project_context()` (it exercises file IO + globbing). Do that as part of the next adapter / tool addition.
diff --git a/docs/PLAYBOOK.md b/docs/PLAYBOOK.md
new file mode 100644
index 0000000..956a403
--- /dev/null
+++ b/docs/PLAYBOOK.md
@@ -0,0 +1,145 @@
+# agentic-coder — Operator Playbook
+
+How to actually run this thing in anger. For users (CTO, engineers), not for contributors.
+
+---
+
+## 0 — One-time setup
+
+```bash
+git clone git@github.com:Open-Earth-Foundation/agentic-coder.git
+cd agentic-coder
+python3 -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt
+cp .env.example .env
+
+# fill in:
+#   ANTHROPIC_API_KEY=sk-ant-...
+#   REPO_PATH=../CityCatalyst                (or wherever the target repo is)
+#   BRANCH_PREFIX=agentic-coder
+#   BASE_BRANCH=develop
+
+gh auth login                                # required
+```
+
+Verify:
+
+```bash
+./run.sh list                                # should print tasks
+```
+
+---
+
+## 1 — Run a single task (smoke)
+
+Pick something tiny from `tasks/getting-started.md`:
+
+```bash
+./run.sh task 1
+```
+
+You'll see the agent: explore → plan → edit → test → push → PR. Watch the cost line at the end.
+
+---
+
+## 2 — Burn the backlog (markdown source)
+
+Curate a backlog file (e.g. `tasks/citycatalyst-stability.md`):
+
+```bash
+TASKS_FILE=tasks/citycatalyst-stability.md ./run.sh all
+```
+
+Each `## ` heading becomes one PR.
+
+---
+
+## 3 — Overnight (Jira source)
+
+Best mode for "queue-and-go-to-sleep":
+
+```bash
+./run.sh watch jira
+```
+
+What happens:
+
+- Polls Jira every **2 minutes** for issues with the label `agent-ready` (configurable via `JIRA_AGENT_LABEL`).
+- For each issue: transitions to "In Progress", runs the loop, opens PR, comments PR link, leaves the issue for human review.
+- After **5 idle cycles** (~10 minutes), runs an **idle scan** of the target repo (TODO / `console.log` / `as any` / empty `catch`) and processes one cleanup PR.
+- `Ctrl+C` to stop.
+
+> Tip: run inside `tmux` or `screen` so a closed terminal doesn't kill it.
+
+---
+
+## 4 — Overnight (Notion source)
+
+Same shape as Jira:
+
+```bash
+./run.sh watch notion
+```
+
+Looks for pages with `Status` = "Agent Ready" in the database referenced by `NOTION_DATABASE_ID`.
+
+---
+
+## 5 — Autonomous repo scan (no source)
+
+When the inbox is empty but you want forward motion:
+
+```bash
+./run.sh scan
+```
+
+Surfaces aggregated TODO / `console.log` / `as any` / empty-catch hits and proposes one cleanup PR per category that has ≥ 2 hits.
+
+---
+
+## 6 — Cursor Cloud Agents kickoff (alternative)
+
+For tasks too small for `agentic-coder` but too big for in-IDE Cursor, use Cursor Cloud Agents from `CityCatalyst/.cursor/cloud/<flow>.md`. The Cloud Agent reads the same `AGENTS.md` and `.cursor/rules/`, so the result is consistent with overnight runs.
+
+---
+
+## 7 — Inspect runs
+
+Every task writes a markdown log to `agentic-coder/logs/`:
+
+```bash
+./run.sh logs                 # last 20 logs
+ls -lt logs/ | head -20
+
+cat logs/<file>.md            # full transcript + cost + PR URL
+```
+
+---
+
+## 8 — Cost / budget
+
+- The default model is Sonnet — cost rates hardcoded at $3/M input, $15/M output.
+- Per-task cost is printed at the end and stored in the log.
+- For `watch` mode, watch the cumulative cost across logs (`grep "Est. cost" logs/*.md`).
+- Soft budget rails live in `profiles/<repo>.yaml`. The CLI doesn't enforce them yet — see `tasks/self-improvement.md`.
+
+---
+
+## 9 — Common errors
+
+| Error | Fix |
+|-------|-----|
+| `gh auth status` fails | `gh auth login` — make sure the OEF SSO is granted. |
+| `working tree not clean` | `git stash` or `git status` to inspect. The agent refuses to start dirty. |
+| `not on base branch` | `git checkout develop` (or whatever `BASE_BRANCH` is). The agent will also try, but fail loudly if it can't. |
+| `no tasks found` | Check `JIRA_AGENT_LABEL`, `NOTION_DATABASE_ID`, or your tasks markdown headings. |
+| `model returned no tool calls and not end_turn` | Often a prompt issue or rate-limit. Re-run; if persistent, dial `max_agent_turns` lower and split the task. |
+
+---
+
+## 10 — Safety
+
+- The agent **does not merge** and does not open PRs unless told. PRs land on a branch; a human reviews and merges (any tech-team member, after standard review).
+- `run_command` has a small blocklist — see `agent_factory/tools.py`. Don't shrink it.
+- Any new mutating tool must respect `Config.dry_run`.
+- Cost tracking is informational, not enforcing — keep an eye on it for autonomous runs.
diff --git a/profiles/agentic-coder.yaml b/profiles/agentic-coder.yaml
new file mode 100644
index 0000000..469ab02
--- /dev/null
+++ b/profiles/agentic-coder.yaml
@@ -0,0 +1,22 @@
+# Profile: agentic-coder running against itself.
+#
+# Useful for autonomous self-improvement runs (small, focused).
+# Keep budgets tight.
+
+repo_path: "."
+base_branch: "main"
+branch_prefix: "agent"
+default_tasks_file: "tasks/self-improvement.md"
+
+notes:
+  - "Read AGENTS.md, CLAUDE.md, and `.cursor/rules/security-baseline.mdc`."
+  - "Do not extend `tools.run_command` blocklist without a security review."
+  - "Keep `requirements.txt` minimal (anthropic, dotenv, optional adapter deps)."
+  - "Branch: agent/<slug>. Don't open PRs; push and stop."
+
+scan:
+  enable_idle_scan: false           # too risky to scan our own surface unattended
+
+cost_budget:
+  per_task_usd_warn: 0.5
+  per_day_usd_hard_cap: 2.0
diff --git a/profiles/citycatalyst.yaml b/profiles/citycatalyst.yaml
new file mode 100644
index 0000000..af9e168
--- /dev/null
+++ b/profiles/citycatalyst.yaml
@@ -0,0 +1,26 @@
+# Profile: CityCatalyst (Open-Earth-Foundation/CityCatalyst)
+#
+# Loaded with `--profile citycatalyst` (planned). Until the CLI flag lands,
+# copy these values into `.env` manually.
+
+repo_path: "../CityCatalyst"
+base_branch: "develop"
+branch_prefix: "agentic-coder"
+default_tasks_file: "tasks/citycatalyst-stability.md"
+
+# Hints for the agent (consumed via system-prompt augmentation, not yet wired).
+notes:
+  - "Always run `npm run lint && npm run prettier && npm run jest` before pushing."
+  - "After API route changes, run `npm run openapi:lint`."
+  - "Read AGENTS.md and .cursor/rules/security-baseline.mdc before editing."
+  - "Use `apiHandler` for all routes; never re-implement auth or error handling."
+  - "Ticket prefix: ON-####. Pull from Linear / Jira when present."
+
+scan:
+  enable_idle_scan: true
+  max_pr_per_idle: 1
+  categories: [todo_fixme, console_log, as_any, empty_catch]
+
+cost_budget:
+  per_task_usd_warn: 1.0
+  per_day_usd_hard_cap: 10.0
diff --git a/profiles/global-data.yaml b/profiles/global-data.yaml
new file mode 100644
index 0000000..33d1ba7
--- /dev/null
+++ b/profiles/global-data.yaml
@@ -0,0 +1,24 @@
+# Profile: CityCatalyst-global-data
+#
+# Loaded with `--profile global-data` (planned). Until the CLI flag lands,
+# copy these values into `.env` manually.
+
+repo_path: "../CityCatalyst-global-data"
+base_branch: "develop"
+branch_prefix: "agentic-coder"
+default_tasks_file: "tasks/global-data-cleanup.md"
+
+notes:
+  - "Read AGENTS.md and .cursor/rules/identity-keys.mdc before editing pipelines."
+  - "Use `actor_id` / `locode` for city joins, never `city_id`."
+  - "All `modelled.*` SQL writes must be `INSERT … ON CONFLICT … DO UPDATE`."
+  - "Run the `definition-of-done-check` skill before flipping `production_approved: true`."
+  - "Test docker stack locally if you touched `cc-mage/` Dockerfile or compose."
+
+scan:
+  enable_idle_scan: false           # data pipelines need human eyes
+  categories: []
+
+cost_budget:
+  per_task_usd_warn: 1.5
+  per_day_usd_hard_cap: 5.0
diff --git a/prompts/system-base.md b/prompts/system-base.md
new file mode 100644
index 0000000..c78688f
--- /dev/null
+++ b/prompts/system-base.md
@@ -0,0 +1,85 @@
+# System prompt scaffold (reference — actual prompt is built in `agent.py`)
+
+This file documents the canonical structure of the system prompt the agent
+runs with. The live version is built in `agent_factory/agent.py:SYSTEM_PROMPT`
++ `gather_project_context()`. If you change the live prompt, update this file
+in the same PR.
+
+---
+
+## Structure
+
+```
+<role>
+You are an autonomous software engineering agent working on
+{repo_path}. Your job is to take the task below, implement it
+correctly, validate it, and ship a PR.
+</role>
+
+<environment>
+- Working directory: {repo_path}
+- Base branch: {base_branch}
+- Branch prefix you must use: {branch_prefix}/...
+- Tools available: read_file, search_code, list_directory, edit_file, run_command
+- You have shell access via `run_command`. Default: POSIX bash.
+- `gh` CLI is authenticated.
+</environment>
+
+<task>
+{task description, derived from the task source — markdown / Jira / Notion}
+</task>
+
+<acceptance_criteria>
+{bullets from the task, when present}
+</acceptance_criteria>
+
+<files>
+{comma-separated files, when present}
+</files>
+
+<project_context>
+{injected by gather_project_context() — README excerpt + .cursor/rules/* +
+.cursor/skills/*/SKILL.md + AGENTS.md + CONTRIBUTING.md, capped at ~12k chars}
+</project_context>
+
+<workflow>
+1. Explore: read the relevant files. Don't guess.
+2. Plan: short bullet list (visible in your output) of what you'll change.
+3. Implement: small precise edits. Reuse helpers; don't reinvent.
+4. Test: run the project's test suite (npm/pytest/uv). If you broke a test,
+   fix it. If you added a new file, add a test for it (when feasible).
+5. Validate: run linter / type checker. Fix what you broke.
+6. Self-review: `git diff` and look for: console.log, half-written code,
+   secrets, broken imports, leftover TODOs.
+7. Ship:
+   a. `git checkout {base_branch}`
+   b. `git checkout -b {branch_prefix}/<slug>` (kebab-case from task title)
+   c. `git add -A && git commit -m "..."` (Conventional Commits, see project's
+      commit-message-standards skill)
+   d. `git push -u origin HEAD`
+   e. `gh pr create --base {base_branch} --title "..." --body "$(cat <<'EOF' ... EOF)"`
+</workflow>
+
+<rules>
+- Follow the project's .cursor/rules — they are NOT optional.
+- Use the project's named skills (.cursor/skills) when their description matches the task.
+- DO NOT commit secrets.
+- DO NOT force-push.
+- DO NOT merge anything — humans review and merge.
+- DO NOT open PRs unless instructed; if `auto_open_pr` is false, push and stop.
+- If you cannot satisfy the acceptance criteria, surface that clearly and stop.
+  A correctly-failed task is far better than a confidently-wrong PR.
+</rules>
+
+<output>
+At the end of the task:
+- The PR URL on its own line, OR
+- A clear "STOPPED: <reason>" message if you decided not to ship.
+</output>
+```
+
+## Why we keep this file
+
+When we change `SYSTEM_PROMPT` in code, the diff is hard to read because
+the live version is one Python triple-string. This markdown file is the
+canonical reference — copy it, then translate into Python.
diff --git a/tasks/global-data-cleanup.md b/tasks/global-data-cleanup.md
new file mode 100644
index 0000000..bedadb2
--- /dev/null
+++ b/tasks/global-data-cleanup.md
@@ -0,0 +1,50 @@
+# CityCatalyst-global-data — agent-friendly tasks
+
+Run against `REPO_PATH=../CityCatalyst-global-data BASE_BRANCH=develop`.
+
+## Update README to reference knowledge-base/ instead of domain-knowledge/
+
+- **type**: docs
+- **description**: The repo's `README.md` still describes a `domain-knowledge/` folder, but the actual folder is now `knowledge-base/`. Update the layout block and any prose references. Don't add new sections; just align with the current tree.
+- **files**: README.md
+
+### Acceptance criteria
+
+- Layout block lists `knowledge-base/` (not `domain-knowledge/`).
+- All references to `domain-knowledge` are updated.
+- The file remains accurate against the current tree.
+
+## Fix typo in knowledge-base/catalog/index.yaml
+
+- **type**: docs
+- **description**: `knowledge-base/catalog/index.yaml` references `items/climate-projec.md` (missing the `t`). The actual file is `topics/climate-project.md`. Fix the path so the catalog points at the real file.
+- **files**: knowledge-base/catalog/index.yaml
+
+### Acceptance criteria
+
+- The path resolves to an existing file.
+- No other entries are touched.
+
+## Seed knowledge-base/topics/glossary.md
+
+- **type**: docs
+- **description**: `knowledge-base/topics/glossary.md` is empty. Seed it with concise definitions for: GPC, GHGI, CCRA, HIAP, MEED, locode, actor_id, datasource_name, gpc_reference_number, gpcmethod_id, modelled, raw_data, release_id. Pull definitions from `AGENTS.md` and `engineering-standards/data-model-design.md` — do not invent new terms.
+- **files**: knowledge-base/topics/glossary.md
+
+### Acceptance criteria
+
+- Each term has a 1–3 sentence definition.
+- Terms link to the relevant doc when one exists.
+- No invented or speculative entries.
+
+## Audit cc-mage/requirements.txt vs imports
+
+- **type**: improvement
+- **description**: Several blocks under `cc-mage/` import `boto3`, `requests`, `sqlalchemy` but those packages are not in `cc-mage/requirements.txt` — they're satisfied by the Mage base image. List what's actually imported in `cc-mage/{data_loaders,transformers,data_exporters,utils}/**/*.py`, deduplicate, and add the missing entries to `requirements.txt` with the version currently used in the base image.
+- **files**: cc-mage/requirements.txt
+
+### Acceptance criteria
+
+- All third-party imports in `cc-mage/**/*.py` are represented in `requirements.txt`.
+- Versions pin to what the Mage base image currently provides (don't guess; check the image).
+- The Docker build still succeeds.
diff --git a/tasks/self-improvement.md b/tasks/self-improvement.md
new file mode 100644
index 0000000..9e424d5
--- /dev/null
+++ b/tasks/self-improvement.md
@@ -0,0 +1,81 @@
+# agentic-coder — Self-Improvement Tasks
+
+These tasks improve the **tool itself** (`agentic-coder`). Run with:
+
+```bash
+REPO_PATH=. BASE_BRANCH=main BRANCH_PREFIX=agent ./run.sh task <N>
+```
+
+Order is "easiest → hardest." Pick the top unchecked one.
+
+## Wire scanner exclude_glob for console.log rule
+
+- **type**: bugfix
+- **description**: `agent_factory/scanner.py:SCAN_CATEGORIES["console.log"]` defines `exclude_glob` (test directories) but `_search` does not pass it to `rg`. As a result, the scanner counts test files as production violations. Wire `exclude_glob` through `_search` so it adds `--glob '!<pattern>'` (or `-g '!<pattern>'` for ripgrep) to the underlying call.
+- **files**: agent_factory/scanner.py
+
+### Acceptance criteria
+
+- `_search` accepts and applies the optional `exclude_glob` from the category dict.
+- Default behaviour for categories without `exclude_glob` is unchanged.
+- A unit test (or smoke test) demonstrates that test files are excluded from the `console.log` count.
+
+## Make COST_PER_MTok model-aware
+
+- **type**: improvement
+- **description**: `agent_factory/agent.py:COST_PER_MTok` is hardcoded for Sonnet ($3/M input, $15/M output). When the configured model changes (e.g. Opus, Haiku), the cost line is wrong. Replace the constant with a small lookup keyed by model id, with a sensible default + a warning log when the model isn't in the lookup.
+- **files**: agent_factory/agent.py
+
+### Acceptance criteria
+
+- `UsageTracker` consults a `COST_RATES` mapping keyed by `Config.model`.
+- Unknown models log a one-line warning and fall back to a documented default.
+- A unit test asserts cost computation for at least 2 known models.
+
+## Use task.repo if present (multi-repo support — minimal)
+
+- **type**: feature
+- **description**: `Task.repo` is parsed from markdown but never used. As a first step toward multi-repo support, when `task.repo` is set and is a path that exists, override `Config.repo_path` for that task. Don't change other tasks. Log the override clearly.
+- **files**: agent_factory/main.py, agent_factory/task_parser.py (only if needed)
+
+### Acceptance criteria
+
+- When a markdown task has `**repo**: ../some-other-repo`, the agent runs against that path.
+- Tasks without `**repo**` use `Config.repo_path` as before.
+- Logging makes the override visible.
+
+## Per-task daily cost cap (soft enforcement)
+
+- **type**: feature
+- **description**: Add a per-day USD budget, defaulting to a high value (e.g. $20/day). Sum cost across `logs/*.md` for today's date and refuse to start a new task if the cap is reached. Surface a clear message and exit non-zero. Configurable via env (`DAILY_COST_USD_CAP`).
+- **files**: agent_factory/agent.py, agent_factory/config.py, agent_factory/main.py
+
+### Acceptance criteria
+
+- New `Config.daily_cost_usd_cap` (default 20.0).
+- Before each task, `main.py` reads today's logs, sums cost, refuses if over cap.
+- Verbose log line shows current spend / cap before each task starts.
+
+## Honour profiles/<repo>.yaml
+
+- **type**: feature
+- **description**: Implement `--profile <name>` flag that loads `profiles/<name>.yaml` and uses its values as defaults (env overrides take precedence). Touches `Config` and `main.py`. Don't change CLI surface for non-`--profile` usage.
+- **files**: agent_factory/config.py, agent_factory/main.py, requirements.txt (add pyyaml)
+
+### Acceptance criteria
+
+- `./run.sh ... --profile citycatalyst ...` loads `profiles/citycatalyst.yaml`.
+- Values become defaults; explicit env / CLI overrides win.
+- README + PLAYBOOK updated.
+- Smoke test exercises a profile load.
+
+## First test (smoke for gather_project_context)
+
+- **type**: improvement
+- **description**: We have no `tests/` folder. Create `tests/test_context.py` with a smoke for `gather_project_context()`: point it at this repo's own `.cursor/`, assert the returned string contains expected substrings (`AGENTS.md`, `general.mdc`).
+- **files**: tests/test_context.py, requirements.txt (add pytest)
+
+### Acceptance criteria
+
+- `pytest tests/test_context.py` passes locally.
+- README mentions `pytest` for tests.

From 2618d5e335cb70e3decab611a981b6e6b4605e73 Mon Sep 17 00:00:00 2001
From: Pablo Borges <90059865+pablo-ibco@users.noreply.github.com>
Date: Fri, 29 May 2026 07:54:11 -0300
Subject: [PATCH 2/3] chore: address review feedback on agentic foundation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Soften "CTO" curator wording across AGENTS.md, git-conventions,
pull-request-standards, and PLAYBOOK to "core engineering team" /
"operators". Guardrail (foundation files need explicit review)
stays — only the framing is more team-oriented for an opensource
read.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .cursor/rules/git-conventions.mdc              | 2 +-
 .cursor/skills/pull-request-standards/SKILL.md | 2 +-
 AGENTS.md                                      | 2 +-
 docs/PLAYBOOK.md                               | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.cursor/rules/git-conventions.mdc b/.cursor/rules/git-conventions.mdc
index ccf7b27..74236f9 100644
--- a/.cursor/rules/git-conventions.mdc
+++ b/.cursor/rules/git-conventions.mdc
@@ -40,5 +40,5 @@ chore(deps): pin anthropic to 0.50.x
 ## Merge policy
 
 - **Code in `agent_factory/`, tests, docs/** — any tech-team member can merge after standard review (≥1 approval, CI green).
-- **Agentic foundation** (`AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, `profiles/`) — CTO sign-off required; after approval, anyone merges.
+- **Agentic foundation** (`AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, `profiles/`) — core-team sign-off required; after approval, anyone merges.
 - **Agents** never merge their own PRs.
diff --git a/.cursor/skills/pull-request-standards/SKILL.md b/.cursor/skills/pull-request-standards/SKILL.md
index 598785d..3564413 100644
--- a/.cursor/skills/pull-request-standards/SKILL.md
+++ b/.cursor/skills/pull-request-standards/SKILL.md
@@ -40,5 +40,5 @@ Branch is assumed to be already pushed. Don't `git push` unless explicitly asked
 ## Who merges
 
 - **Code in `agent_factory/`, tests, docs/** — any tech-team member after standard review (≥1 approval, CI green).
-- **Agentic foundation** (`AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, `profiles/`) — CTO sign-off required; then anyone merges.
+- **Agentic foundation** (`AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, `profiles/`) — core-team sign-off required; then anyone merges.
 - **Agents** never merge their own PRs and do not open PRs unless explicitly told. Open the PR when told to, then stop.
diff --git a/AGENTS.md b/AGENTS.md
index 6eeb246..707f3db 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -64,7 +64,7 @@ agentic-coder/
 - **Do not break the public CLI surface** (`./run.sh markdown|jira|notion|scan|watch [...]`). It's contracted by team scripts.
 - **Do not silently change the default model or `max_agent_turns`.** These have cost implications.
 - **Do not depend on libraries beyond the minimal set** (`anthropic`, `python-dotenv`, optional `requests` for adapters). Keep `requirements.txt` boring.
-- **Do not modify `AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, or `profiles/` without CTO review.** These are the agentic foundation. (Code merges in `agent_factory/` are unaffected — any tech-team member can merge after standard review.)
+- **Do not modify `AGENTS.md`, `CLAUDE.md`, `.cursor/rules/`, `.cursor/skills/`, `prompts/`, or `profiles/` without core-team review.** These are the agentic foundation, curated by the core engineering team. (Code merges in `agent_factory/` are unaffected — any tech-team member can merge after standard review.)
 
 ## What you must always do
 
diff --git a/docs/PLAYBOOK.md b/docs/PLAYBOOK.md
index 956a403..aac861c 100644
--- a/docs/PLAYBOOK.md
+++ b/docs/PLAYBOOK.md
@@ -1,6 +1,6 @@
 # agentic-coder — Operator Playbook
 
-How to actually run this thing in anger. For users (CTO, engineers), not for contributors.
+How to actually run this thing in anger. For operators (engineers running the tool against a target repo), not for contributors to `agentic-coder` itself.
 
 ---
 

From 82daf960d8c0a5f782563259c76b6556ef7176fe Mon Sep 17 00:00:00 2001
From: Pablo Borges <90059865+pablo-ibco@users.noreply.github.com>
Date: Thu, 11 Jun 2026 15:10:19 -0300
Subject: [PATCH 3/3] feat: add evaluator step, report command, and error
 resilience

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 README.md                  | 25 ++++++++++-
 agent_factory/agent.py     | 85 +++++++++++++++++++++++++++++++++++---
 agent_factory/config.py    |  1 +
 agent_factory/evaluator.py | 79 +++++++++++++++++++++++++++++++++++
 agent_factory/main.py      | 49 +++++++++++++++++++++-
 agent_factory/tools.py     | 45 +++++++++++++++++++-
 6 files changed, 273 insertions(+), 11 deletions(-)
 create mode 100644 agent_factory/evaluator.py

diff --git a/README.md b/README.md
index 1cd3085..dfd3495 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,22 @@ The "assign tasks and go to sleep" mode:
 
 When tasks are found, it processes them. When idle for 5 cycles, it runs a repo scan and fixes what it finds. `Ctrl+C` to stop.
 
+### Evaluator
+
+After the agent finishes its work, an optional evaluator pass runs a second Claude call to review the diff. It checks whether the change is minimal, correct, tested, and safe to merge. If it fails, the agent gets one more iteration to fix the issues.
+
+The evaluator is enabled by default. Disable it in config with `enable_evaluator: False`.
+
+### Report
+
+View a summary of all past runs:
+
+```bash
+python -m agent_factory report
+```
+
+Shows total tasks run, success rate (tasks that produced a PR), total estimated cost, and a list of recent PRs.
+
 ### Session logs
 
 Every completed task is logged to `logs/` with the full summary, PR URL, and cost:
@@ -142,13 +158,14 @@ Supported fields:
 
 ### Agent tools
 
-The agent has 5 tools it can call during its loop:
+The agent has 6 tools it can call during its loop:
 
 | Tool | What it does |
 |------|-------------|
 | `read_file` | Read any file in the repo (with line numbers) |
 | `search_code` | Regex search across the codebase (via ripgrep) |
 | `list_directory` | Explore the repo structure |
+| `find_files` | Find files by name pattern (via `find`) |
 | `edit_file` | Precise string replacement in files |
 | `run_command` | Git operations, linting, testing, type checking |
 
@@ -247,7 +264,8 @@ agentic-coder/
     ├── watcher.py                # Continuous polling loop
     ├── preflight.py              # Pre-flight checks
     ├── task_parser.py            # Task dataclass + markdown parser
-    ├── tools.py                  # 5 agent tools
+    ├── evaluator.py              # Evaluator pass (diff review)
+    ├── tools.py                  # 6 agent tools
     └── adapters/
         ├── base.py               # TaskAdapter interface
         ├── markdown.py           # Markdown file adapter
@@ -272,6 +290,9 @@ agentic-coder/
 - [x] Session logging
 - [x] Testing loop (run existing tests, write new ones)
 - [x] Self-validation (linter, type checker, self-review)
+- [x] Evaluator pass (second Claude review of the diff before merge)
+- [x] Report command (session log summary with success rate and cost)
+- [x] Error resilience (retry with backoff on rate limits / 5xx)
 
 ### Next
 
diff --git a/agent_factory/agent.py b/agent_factory/agent.py
index 193d618..dc5b14f 100644
--- a/agent_factory/agent.py
+++ b/agent_factory/agent.py
@@ -4,6 +4,7 @@
 
 import json
 import re
+import time
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
@@ -15,6 +16,7 @@
 
 from .config import Config
 from .context import gather_project_context
+from .evaluator import evaluate_changes
 from .task_parser import Task
 from .tools import TOOL_DEFINITIONS, execute_tool
 
@@ -188,16 +190,13 @@ def run_agent(task: Task, config: Config, repo_root: str) -> tuple[str, str | No
         console.print(f"[dim]Injected {ctx_len:,} chars of project context (rules, skills, README)[/dim]")
 
     all_text: list[str] = []
+    evaluator_remediation_done = False
 
     for turn in range(config.max_agent_turns):
         console.print(f"\n[dim]--- Turn {turn + 1}/{config.max_agent_turns} ---[/dim]")
 
-        response = client.messages.create(
-            model=config.model,
-            max_tokens=16384,
-            system=system,
-            tools=TOOL_DEFINITIONS,
-            messages=messages,
+        response = _api_call_with_retry(
+            client, config.model, system, TOOL_DEFINITIONS, messages
         )
         tracker.record(response)
 
@@ -212,6 +211,27 @@ def run_agent(task: Task, config: Config, repo_root: str) -> tuple[str, str | No
             all_text.append(tb.text)
 
         if response.stop_reason == "end_turn" and not tool_calls:
+            if (
+                config.enable_evaluator
+                and not config.dry_run
+                and not evaluator_remediation_done
+            ):
+                eval_result = _run_evaluator(task, config, repo_root)
+                if eval_result is not None:
+                    passed, feedback = eval_result
+                    if not passed:
+                        evaluator_remediation_done = True
+                        messages.append({
+                            "role": "user",
+                            "content": (
+                                "The evaluator reviewed your diff and found issues. "
+                                "You have ONE more iteration to fix them.\n\n"
+                                f"**Evaluator feedback:**\n{feedback}"
+                            ),
+                        })
+                        console.print("[yellow]Evaluator requested remediation — one more iteration.[/yellow]")
+                        continue
+
             final_text = "\n".join(all_text)
             pr_url = _extract_pr_url(final_text)
             console.print(Panel(
@@ -261,6 +281,59 @@ def run_agent(task: Task, config: Config, repo_root: str) -> tuple[str, str | No
     return final_text, None
 
 
+def _api_call_with_retry(
+    client: anthropic.Anthropic,
+    model: str,
+    system: str,
+    tools: list,
+    messages: list,
+    max_retries: int = 3,
+) -> Any:
+    """Call client.messages.create with exponential backoff on transient errors."""
+    for attempt in range(max_retries):
+        try:
+            return client.messages.create(
+                model=model,
+                max_tokens=16384,
+                system=system,
+                tools=tools,
+                messages=messages,
+            )
+        except (
+            anthropic.RateLimitError,
+            anthropic.APIConnectionError,
+            anthropic.InternalServerError,
+        ) as e:
+            if attempt == max_retries - 1:
+                raise
+            wait = 2 ** (attempt + 1)
+            console.print(f"[yellow]API error ({type(e).__name__}), retrying in {wait}s…[/yellow]")
+            time.sleep(wait)
+    raise RuntimeError("Unreachable")
+
+
+def _run_evaluator(
+    task: Task, config: Config, repo_root: str
+) -> tuple[bool, str] | None:
+    """Get the current diff and run the evaluator. Returns None if no diff."""
+    import subprocess
+    result = subprocess.run(
+        ["git", "diff", "HEAD~1"],
+        capture_output=True, text=True, cwd=repo_root,
+    )
+    diff = result.stdout.strip()
+    if not diff:
+        result = subprocess.run(
+            ["git", "diff"],
+            capture_output=True, text=True, cwd=repo_root,
+        )
+        diff = result.stdout.strip()
+    if not diff:
+        console.print("[dim]Evaluator skipped — no diff found.[/dim]")
+        return None
+    return evaluate_changes(diff, task, config)
+
+
 def _save_session_log(task: Task, summary: str, pr_url: str | None, config: Config, tracker: UsageTracker | None = None) -> None:
     """Save a log of the completed task to the logs directory."""
     log_dir = Path(__file__).parent.parent / "logs"
diff --git a/agent_factory/config.py b/agent_factory/config.py
index 968c14a..9f5554f 100644
--- a/agent_factory/config.py
+++ b/agent_factory/config.py
@@ -31,6 +31,7 @@ class Config:
     repo_path: str = field(default_factory=lambda: os.environ.get("REPO_PATH", ""))
     auto_commit: bool = True
     dry_run: bool = False
+    enable_evaluator: bool = True
 
     def validate(self) -> None:
         if not self.anthropic_api_key:
diff --git a/agent_factory/evaluator.py b/agent_factory/evaluator.py
new file mode 100644
index 0000000..4a47049
--- /dev/null
+++ b/agent_factory/evaluator.py
@@ -0,0 +1,79 @@
+"""Evaluator step: a second Claude call to review the agent's diff."""
+
+from __future__ import annotations
+
+import anthropic
+from rich.console import Console
+
+from .config import Config
+from .task_parser import Task
+
+console = Console()
+
+EVALUATOR_SYSTEM_PROMPT = """\
+You are a senior code reviewer evaluating a diff produced by an AI coding agent.
+
+The agent was given a task and produced the diff below. Your job is to decide \
+whether the change is ready to merge.
+
+## Criteria
+
+1. **Minimal** — Only changes what the task requires, no unrelated modifications.
+2. **Correct** — The logic is right. No obvious bugs, off-by-one errors, or \
+missing edge cases.
+3. **Tested** — If the codebase has a test framework, tests were added or \
+existing tests still pass.
+4. **Safe** — No secrets, no destructive operations, no security regressions.
+5. **Style** — Follows the existing code style. No unnecessary comments.
+
+## Response format
+
+Respond with EXACTLY one of these on the first line:
+
+PASS
+FAIL
+
+Then on subsequent lines, provide brief feedback (2-5 bullet points). If PASS, \
+note what looks good. If FAIL, explain what must be fixed.
+"""
+
+
+def evaluate_changes(
+    diff: str, task: Task, config: Config
+) -> tuple[bool, str]:
+    """Run a fresh Claude call to review the agent's diff.
+
+    Returns (passed, feedback).
+    """
+    client = anthropic.Anthropic(api_key=config.anthropic_api_key)
+
+    user_message = (
+        f"## Task\n{task.title}\n\n"
+        f"## Description\n{task.description or '(none)'}\n\n"
+        f"## Diff\n```diff\n{diff}\n```"
+    )
+
+    console.print("[dim]Running evaluator pass…[/dim]")
+
+    response = client.messages.create(
+        model=config.model,
+        max_tokens=2048,
+        system=EVALUATOR_SYSTEM_PROMPT,
+        messages=[{"role": "user", "content": user_message}],
+    )
+
+    text = ""
+    for block in response.content:
+        if block.type == "text":
+            text += block.text
+
+    text = text.strip()
+    first_line = text.split("\n", 1)[0].strip().upper()
+    passed = first_line == "PASS"
+    feedback = text.split("\n", 1)[1].strip() if "\n" in text else text
+
+    status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
+    console.print(f"  Evaluator: {status}")
+    console.print(f"  [dim]{feedback[:300]}[/dim]")
+
+    return passed, feedback
diff --git a/agent_factory/main.py b/agent_factory/main.py
index 3b1fd12..0e4a799 100644
--- a/agent_factory/main.py
+++ b/agent_factory/main.py
@@ -34,6 +34,46 @@ def _build_adapter(args: argparse.Namespace) -> TaskAdapter:
         return MarkdownAdapter(args.tasks_file)
 
 
+def _print_report() -> None:
+    """Read all session logs and print a summary."""
+    import re
+    log_dir = Path(__file__).parent.parent / "logs"
+    if not log_dir.is_dir():
+        console.print("[yellow]No logs directory found.[/yellow]")
+        return
+
+    log_files = sorted(log_dir.glob("*.md"))
+    if not log_files:
+        console.print("[yellow]No session logs found.[/yellow]")
+        return
+
+    total_tasks = len(log_files)
+    pr_urls: list[str] = []
+    total_cost = 0.0
+
+    for lf in log_files:
+        text = lf.read_text()
+        pr_match = re.search(r"\*\*PR:\*\*\s*(https://github\.com/\S+/pull/\d+)", text)
+        if pr_match:
+            pr_urls.append(pr_match.group(1))
+        cost_match = re.search(r"Est\. cost: \$([0-9.]+)", text)
+        if cost_match:
+            total_cost += float(cost_match.group(1))
+
+    success_rate = (len(pr_urls) / total_tasks * 100) if total_tasks else 0
+
+    console.print(Panel("[bold]Agent Report[/bold]", style="blue"))
+    console.print(f"  Total tasks run:  {total_tasks}")
+    console.print(f"  PRs created:      {len(pr_urls)}")
+    console.print(f"  Success rate:     {success_rate:.0f}%")
+    console.print(f"  Total est. cost:  ${total_cost:.4f}")
+
+    if pr_urls:
+        console.print(f"\n  [bold]Recent PRs:[/bold]")
+        for url in pr_urls[-10:]:
+            console.print(f"    {url}")
+
+
 def main() -> None:
     parser = argparse.ArgumentParser(
         description="Agent Factory — autonomous coding agent.",
@@ -52,6 +92,9 @@ def main() -> None:
     # --- notion ---
     sub.add_parser("notion", help="Fetch tasks from Notion database (set NOTION_* in .env)")
 
+    # --- report ---
+    sub.add_parser("report", help="Print a summary of all session logs (tasks, success rate, cost)")
+
     # --- scan ---
     scan_parser = sub.add_parser("scan", help="Scan repo for improvements and fix them")
     scan_parser.add_argument("--max-tasks", type=int, default=3, help="Max improvements to fix per run.")
@@ -63,7 +106,7 @@ def main() -> None:
     watch_parser.add_argument("--no-scan", action="store_true", help="Disable repo scanning when idle.")
 
     # --- global options ---
-    for p in [parser, md_parser, jira_parser, scan_parser, watch_parser]:
+    for p in [parser, md_parser, jira_parser, scan_parser, watch_parser, ]:
         p.add_argument("--repo", default=None, help="Path to git repo (or REPO_PATH in .env).")
         p.add_argument("--api-key", help="Anthropic API key (or ANTHROPIC_API_KEY in .env).")
         p.add_argument("--model", default=None, help="Anthropic model to use.")
@@ -87,6 +130,10 @@ def main() -> None:
             parser.print_help()
             sys.exit(0)
 
+    if args.source == "report":
+        _print_report()
+        sys.exit(0)
+
     config_kwargs: dict = {}
     if args.api_key:
         config_kwargs["anthropic_api_key"] = args.api_key
diff --git a/agent_factory/tools.py b/agent_factory/tools.py
index 7960b52..f2ff6f4 100644
--- a/agent_factory/tools.py
+++ b/agent_factory/tools.py
@@ -56,6 +56,19 @@
             "required": ["path", "old_string", "new_string"],
         },
     },
+    {
+        "name": "find_files",
+        "description": "Find files by name pattern in the repo. Uses the `find` command. Useful for locating files when you know part of the name.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "pattern": {"type": "string", "description": "File name pattern (glob), e.g. '*.test.ts' or 'auth*.py'."},
+                "path": {"type": "string", "description": "Optional subdirectory to scope the search."},
+                "type": {"type": "string", "description": "Type filter: 'f' for files, 'd' for directories. Default 'f'.", "default": "f"},
+            },
+            "required": ["pattern"],
+        },
+    },
     {
         "name": "run_command",
         "description": "Run a shell command in the repo directory. Use for git operations, linting, testing, etc. Returns stdout and stderr.",
@@ -82,6 +95,8 @@ def execute_tool(name: str, args: dict, repo_root: str) -> str:
         return _list_directory(root, args["path"], args.get("depth", 1))
     elif name == "edit_file":
         return _edit_file(root, args["path"], args["old_string"], args["new_string"])
+    elif name == "find_files":
+        return _find_files(root, args["pattern"], args.get("path"), args.get("type", "f"))
     elif name == "run_command":
         return _run_command(root, args["command"])
     else:
@@ -175,6 +190,32 @@ def _edit_file(root: Path, rel_path: str, old_string: str, new_string: str) -> s
     return f"Successfully edited {rel_path}"
 
 
+def _find_files(root: Path, pattern: str, path: str | None, file_type: str) -> str:
+    search_path = str(root / path) if path else str(root)
+    cmd = [
+        "find", search_path,
+        "-name", pattern,
+        "-type", file_type or "f",
+        "-not", "-path", "*/.git/*",
+        "-not", "-path", "*/node_modules/*",
+        "-not", "-path", "*/.venv/*",
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+        output = result.stdout.strip()
+        if not output:
+            return "No files found."
+        rel_lines = []
+        for line in output.splitlines()[:100]:
+            try:
+                rel_lines.append(str(Path(line).relative_to(root)))
+            except ValueError:
+                rel_lines.append(line)
+        return "\n".join(rel_lines)
+    except Exception as e:
+        return f"Error: {e}"
+
+
 def _run_command(root: Path, command: str) -> str:
     blocked = ["rm -rf /", "rm -rf ~", "push --force", "push -f"]
     if any(b in command for b in blocked):
@@ -186,7 +227,7 @@ def _run_command(root: Path, command: str) -> str:
             capture_output=True,
             text=True,
             cwd=str(root),
-            timeout=120,
+            timeout=300,
             env={
                 **{k: v for k, v in os.environ.items() if k != "GITHUB_TOKEN"},
                 "GIT_TERMINAL_PROMPT": "0",
@@ -201,6 +242,6 @@ def _run_command(root: Path, command: str) -> str:
             output += f"\n(exit code: {result.returncode})"
         return output.strip()[:10_000] or "(no output)"
     except subprocess.TimeoutExpired:
-        return "Error: command timed out after 120 seconds."
+        return "Error: command timed out after 300 seconds."
     except Exception as e:
         return f"Error: {e}"