From 344f60aef42caf99e539af215fa56311110d6b0c Mon Sep 17 00:00:00 2001 From: kmajdoub Date: Thu, 28 May 2026 18:01:07 +0200 Subject: [PATCH] feat(brainstormer): axis-aligned epic/ticket generator with anti-cosmetic guardrail (#123) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the `Brainstormer` SDK session that reads a ProductVision (from #122), scans the open backlog via `gh_client`, and proposes epics + tickets that must cite an axis + customer story. The post-SDK guardrail drops anything missing citations, citing an unknown axis, or substring-matching an axis's `rejected_as_cosmetic` list — defense in depth against the loop drifting into cosmetic work. - src/forge_loop/brainstormer.py — `Brainstormer.run(vision)`, `BrainstormReport`, `ProposedEpic`, `ProposedTicket`. - src/forge_loop/_brainstormer_sdk.py — sync SDK shim mirroring `_critic_sdk.py` shape. - src/forge_loop/briefs/brainstormer.md.tmpl — primes the session with vision (verbatim), axes (yaml-rendered), open backlog, and the hard-refusal rubric. - src/forge_loop/gh_client.py — adds `list_open_backlog` + `OpenBacklog` so callers don't shell to `gh`. - tests/test_brainstormer.py — happy path, every filter (missing axis, missing story, unknown axis, cosmetic title/body match, case-insensitive), empty-vision guardrail, SDK timeout + error + malformed-output handling, integration with fixtures, prompt- contents assertion. Part of #121. --- src/forge_loop/_brainstormer_sdk.py | 58 +++ src/forge_loop/brainstormer.py | 325 +++++++++++++++++ src/forge_loop/briefs/__init__.py | 3 +- src/forge_loop/briefs/brainstormer.md.tmpl | 59 ++++ src/forge_loop/gh_client.py | 52 +++ tests/test_brainstormer.py | 390 +++++++++++++++++++++ 6 files changed, 886 insertions(+), 1 deletion(-) create mode 100644 src/forge_loop/_brainstormer_sdk.py create mode 100644 src/forge_loop/brainstormer.py create mode 100644 src/forge_loop/briefs/brainstormer.md.tmpl create mode 100644 tests/test_brainstormer.py diff --git a/src/forge_loop/_brainstormer_sdk.py b/src/forge_loop/_brainstormer_sdk.py new file mode 100644 index 0000000..b567293 --- /dev/null +++ b/src/forge_loop/_brainstormer_sdk.py @@ -0,0 +1,58 @@ +"""SDK-driven brainstormer shim (issue #123). + +Mirrors :mod:`forge_loop._critic_sdk`: a single synchronous entry point +that drives one Claude Agent SDK session and returns ``last_message`` + +``duration_s`` + ``error``. The brainstormer reads the final assistant +message as a JSON ``BrainstormReport`` payload. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from forge_loop._critic_sdk import CriticSdkResult, run_critic_sdk + +# Type alias for callers that want a self-documenting return name. +BrainstormerSdkResult = CriticSdkResult + + +def run_brainstormer_sdk( + prompt: str, + *, + cwd: Path, + timeout_s: int, + model: str | None = None, + thinking_budget: str | None = None, + allowed_mcp_servers: tuple[str, ...] | None = None, + load_timeout_ms: int | None = None, + strict_mcp_config: bool = False, + mcp_servers: dict[str, Any] | None = None, + add_dirs: tuple[Path, ...] = (), + query_fn: Any = None, + options_cls: Any = None, +) -> BrainstormerSdkResult: + """One-shot SDK session for the brainstormer. + + Shape-identical to :func:`forge_loop._critic_sdk.run_critic_sdk` so + tests can swap one for the other. Implemented as a thin pass-through + rather than a copy of the asyncio plumbing — there's exactly one + correct way to drive a single SDK session, and we already have it. + """ + return run_critic_sdk( + prompt, + cwd=cwd, + timeout_s=timeout_s, + model=model, + thinking_budget=thinking_budget, + allowed_mcp_servers=allowed_mcp_servers, + load_timeout_ms=load_timeout_ms, + strict_mcp_config=strict_mcp_config, + mcp_servers=mcp_servers, + add_dirs=add_dirs, + query_fn=query_fn, + options_cls=options_cls, + ) + + +__all__ = ["BrainstormerSdkResult", "run_brainstormer_sdk"] diff --git a/src/forge_loop/brainstormer.py b/src/forge_loop/brainstormer.py new file mode 100644 index 0000000..e173dc8 --- /dev/null +++ b/src/forge_loop/brainstormer.py @@ -0,0 +1,325 @@ +"""Brainstormer — propose axis-aligned epics + tickets from product vision. + +Issue #123 (part of epic #121). Depends on ``ProductVision`` from #122. + +The brainstormer is the sprint loop's *generator* of new work. Without +it, the loop drains the existing backlog and then drifts into cosmetic +tinkering. With it, every newly proposed item must: + +1. Cite an axis (by exact name) declared in :class:`ProductVision`. +2. Carry a customer story tied to that axis's ``customer`` field. +3. Survive the anti-cosmetic guardrail — substring-match against the + axis's ``rejected_as_cosmetic`` list. + +The SDK session does the proposing; the guardrail (this module) drops +anything that fails the rubric. The session is *not* trusted to enforce +its own refusal contract — defense in depth. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable + +from pydantic import BaseModel, ConfigDict, Field, ValidationError + +from forge_loop.log import get_logger +from forge_loop.product_vision import ProductVision + +__all__ = [ + "BrainstormReport", + "Brainstormer", + "ProposedEpic", + "ProposedTicket", +] + +_log = get_logger("forge_loop.brainstormer") + + +class _ProposedBase(BaseModel): + """Common shape for proposed epics + tickets. + + Extra keys ignored for forward compatibility with future SDK output + versions — we only enforce the fields the rubric cares about. + """ + + model_config = ConfigDict(extra="ignore") + + title: str + body: str = "" + axis: str = "" + customer_story: str = "" + + +class ProposedEpic(_ProposedBase): + """An epic proposed by the brainstormer SDK session.""" + + +class ProposedTicket(_ProposedBase): + """A ticket proposed by the brainstormer SDK session.""" + + +class BrainstormReport(BaseModel): + """Filtered output — only items that survived the anti-cosmetic guardrail.""" + + model_config = ConfigDict(extra="ignore") + + proposed_epics: list[ProposedEpic] = Field(default_factory=list) + proposed_tickets: list[ProposedTicket] = Field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Internals: prompt rendering + filtering +# --------------------------------------------------------------------------- + + +def _render_axes_block(vision: ProductVision) -> str: + """Render axes as a deterministic, prompt-friendly structured block.""" + import yaml + + payload = { + "axes": [ + { + "name": a.name, + "customer": a.customer, + "valuable_means": a.valuable_means, + "acceptable_work": list(a.acceptable_work), + "rejected_as_cosmetic": list(a.rejected_as_cosmetic), + } + for a in vision.axes + ] + } + return yaml.safe_dump(payload, sort_keys=False).strip() + + +def _render_backlog_block(backlog: Any) -> str: + """Render the open backlog as a compact, prompt-friendly block. + + Accepts an ``OpenBacklog`` (preferred) or any object exposing ``epics`` + and ``tickets`` iterables of ``(number, title)``-shaped items. The + flexibility keeps tests free of githubkit-shape coupling. + """ + def _fmt(items: Any) -> str: + out: list[str] = [] + for it in items or []: + num = getattr(it, "number", None) or (it.get("number") if isinstance(it, dict) else None) + title = getattr(it, "title", None) or (it.get("title") if isinstance(it, dict) else "") + out.append(f" - #{num}: {title}") + return "\n".join(out) if out else " (none)" + + epics = getattr(backlog, "epics", []) or [] + tickets = getattr(backlog, "tickets", []) or [] + return f"Open epics:\n{_fmt(epics)}\n\nOpen tickets:\n{_fmt(tickets)}" + + +def _parse_sdk_payload(last_message: str) -> dict[str, Any]: + """Extract the trailing JSON object from the SDK's last message. + + The prompt requires the JSON object to be on the LAST line. We accept + "anywhere in the tail" defensively — find the last ``{`` that begins + a balanced object. Raises ``ValueError`` (callers re-raise as + ``RuntimeError``) on malformed input. + """ + text = (last_message or "").strip() + if not text: + raise ValueError("empty SDK output") + + # Try a fast path: the whole message is JSON. + try: + obj = json.loads(text) + if isinstance(obj, dict): + return obj + except json.JSONDecodeError: + pass + + # Fall back: scan for the last balanced ``{...}`` substring. + depth = 0 + start = -1 + best: str | None = None + for i, ch in enumerate(text): + if ch == "{": + if depth == 0: + start = i + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0 and start >= 0: + best = text[start : i + 1] + if best is None: + raise ValueError("no JSON object found in SDK output") + try: + obj = json.loads(best) + except json.JSONDecodeError as exc: + raise ValueError(f"malformed JSON in SDK output: {exc}") from exc + if not isinstance(obj, dict): + raise ValueError("SDK output JSON is not an object") + return obj + + +def _filter_items( + items: list[Any], + vision: ProductVision, + *, + kind: str, +) -> list[Any]: + """Drop items that fail the rubric. Returns the surviving list. + + Drop reasons (logged at INFO): + * ``missing_axis`` — empty ``axis`` + * ``missing_customer_story`` — empty ``customer_story`` + * ``unknown_axis`` — ``axis`` not in ``vision.axes`` + * ``cosmetic_match`` — title/body contains a ``rejected_as_cosmetic`` + phrase (case-insensitive substring) for the cited axis + """ + axes_by_name = {a.name: a for a in vision.axes} + survivors: list[Any] = [] + for it in items: + title = it.title + axis_name = (it.axis or "").strip() + story = (it.customer_story or "").strip() + if not axis_name: + _log.info("brainstormer_dropped", kind=kind, title=title, reason="missing_axis") + continue + if not story: + _log.info( + "brainstormer_dropped", kind=kind, title=title, reason="missing_customer_story" + ) + continue + axis = axes_by_name.get(axis_name) + if axis is None: + _log.info( + "brainstormer_dropped", + kind=kind, + title=title, + axis=axis_name, + reason="unknown_axis", + ) + continue + haystack = f"{title}\n{it.body}".lower() + cosmetic_hit = next( + ( + phrase + for phrase in axis.rejected_as_cosmetic + if phrase and phrase.lower() in haystack + ), + None, + ) + if cosmetic_hit is not None: + _log.info( + "brainstormer_dropped", + kind=kind, + title=title, + axis=axis_name, + phrase=cosmetic_hit, + reason="cosmetic_match", + ) + continue + survivors.append(it) + return survivors + + +# --------------------------------------------------------------------------- +# Public class +# --------------------------------------------------------------------------- + + +@dataclass +class Brainstormer: + """Drive one brainstormer SDK session and return a filtered report. + + Args: + repo_path: Working directory the SDK session runs in. + owner / repo: GitHub coordinates for the open-backlog scan. If + either is empty, the backlog block renders as "(none)" — the + session still runs but without backlog context. + gh_client: Injected :class:`forge_loop.gh_client.GhClient`. If + None, a real :class:`GithubkitClient` is constructed lazily. + sdk_fn: Injection point for ``run_brainstormer_sdk`` (tests stub + this to avoid network + SDK install). + timeout_s: Hard cap on the SDK session. + """ + + repo_path: Path = Path(".") + owner: str = "" + repo: str = "" + gh_client: Any = None + sdk_fn: Callable[..., Any] | None = None + timeout_s: int = 300 + model: str | None = None + + def run(self, vision: ProductVision) -> BrainstormReport: + """Entry point — see module docstring.""" + if vision is None or not getattr(vision, "axes", None): + raise ValueError( + "brainstormer requires a non-empty ProductVision with at least one axis" + ) + + # 1. Build the prompt. + backlog = self._scan_backlog() + prompt = self._render_prompt(vision, backlog) + + # 2. Drive the SDK session. + sdk_fn = self.sdk_fn or self._default_sdk_fn() + result = sdk_fn(prompt, cwd=self.repo_path, timeout_s=self.timeout_s, model=self.model) + err = getattr(result, "error", None) + timed_out = getattr(result, "timed_out", False) + if timed_out or err == "timeout": + raise RuntimeError("brainstormer SDK session timed out — no report produced") + if err: + raise RuntimeError(f"brainstormer SDK session failed: {err}") + + last_message = getattr(result, "last_message", "") or "" + + # 3. Parse + validate the payload. + try: + payload = _parse_sdk_payload(last_message) + raw = BrainstormReport.model_validate(payload) + except (ValueError, ValidationError) as exc: + raise RuntimeError( + f"brainstormer SDK returned malformed output ({exc}); " + f"last_message={last_message!r}" + ) from exc + + # 4. Apply the anti-cosmetic guardrail. + return BrainstormReport( + proposed_epics=_filter_items(list(raw.proposed_epics), vision, kind="epic"), + proposed_tickets=_filter_items(list(raw.proposed_tickets), vision, kind="ticket"), + ) + + # -- helpers -------------------------------------------------------- + + def _scan_backlog(self) -> Any: + """Return open backlog via gh_client (or an empty stand-in).""" + from forge_loop.gh_client import OpenBacklog, list_open_backlog + + if not self.owner or not self.repo: + return OpenBacklog() + client = self.gh_client + if client is None: + try: + from forge_loop.gh_client import GithubkitClient + + client = GithubkitClient() + except Exception: # noqa: BLE001 — boundary; degrade gracefully + return OpenBacklog() + try: + return list_open_backlog(client, self.owner, self.repo) + except Exception: # noqa: BLE001 — boundary; degrade gracefully + return OpenBacklog() + + def _render_prompt(self, vision: ProductVision, backlog: Any) -> str: + from forge_loop.briefs import render_brief + + return render_brief( + "brainstormer", + vision_markdown=vision.vision_markdown, + axes_block=_render_axes_block(vision), + backlog_block=_render_backlog_block(backlog), + ) + + def _default_sdk_fn(self) -> Callable[..., Any]: + from forge_loop._brainstormer_sdk import run_brainstormer_sdk + + return run_brainstormer_sdk diff --git a/src/forge_loop/briefs/__init__.py b/src/forge_loop/briefs/__init__.py index 422035d..2abc943 100644 --- a/src/forge_loop/briefs/__init__.py +++ b/src/forge_loop/briefs/__init__.py @@ -19,12 +19,13 @@ from pathlib import Path from typing import Any -KINDS = ("worker", "po", "critic") +KINDS = ("worker", "po", "critic", "brainstormer") _ENV_OVERRIDES = { "worker": "LOOP_WORKER_BRIEF", "po": "LOOP_PO_BRIEF", "critic": "LOOP_CRITIC_BRIEF", + "brainstormer": "LOOP_BRAINSTORMER_BRIEF", } diff --git a/src/forge_loop/briefs/brainstormer.md.tmpl b/src/forge_loop/briefs/brainstormer.md.tmpl new file mode 100644 index 0000000..70e5be6 --- /dev/null +++ b/src/forge_loop/briefs/brainstormer.md.tmpl @@ -0,0 +1,59 @@ +You are the BRAINSTORMER agent in a Titan sprint loop. Your job: propose +axis-aligned epics + tickets that move the product along its declared +axes of value — and refuse anything cosmetic. + +# Product vision (verbatim) + +{vision_markdown} + +# Axes of value (structured) + +{axes_block} + +# Current open GitHub backlog + +{backlog_block} + +# Hard-refusal rubric + +A proposal is REJECTED if any of the following are true: + +1. It omits the `axis` field, or `axis` is empty. +2. It omits the `customer_story` field, or `customer_story` is empty. +3. Its `axis` value does not match a `name` from the axes block above + (exact, case-sensitive). +4. Its title or body contains (case-insensitive substring) any phrase + listed under that axis's `rejected_as_cosmetic`. + +These are not stylistic preferences — proposals failing the rubric are +dropped post-hoc by the operator. Do not propose: + * rename / refactor work whose only value is tidiness + * comment / docstring polishing that doesn't change behavior + * reflows, formatter-equivalent diffs, or "cleanup" without a customer + +DO propose work that: + * cites a specific axis by `name` + * carries a `customer_story` — a verbatim quote or paraphrase tied to + that axis's `customer` field — explaining *who* benefits and *how* + * is not already represented by an open epic or open ticket above + +# Output + +Emit ONE JSON object on the LAST line of your reply, no markdown fence, +no prose after it: + +{{"proposed_epics": [ + {{"title": "...", + "body": "...", + "axis": "", + "customer_story": "..."}} + ], + "proposed_tickets": [ + {{"title": "...", + "body": "...", + "axis": "", + "customer_story": "..."}} + ]}} + +Empty lists are valid. Better to propose nothing than to propose +cosmetic work. diff --git a/src/forge_loop/gh_client.py b/src/forge_loop/gh_client.py index 8a150f2..8bca24a 100644 --- a/src/forge_loop/gh_client.py +++ b/src/forge_loop/gh_client.py @@ -280,12 +280,64 @@ def get_pull(self, owner: str, repo: str, number: int) -> PullRequest | None: return self.pulls.get((owner, repo, number)) +# --------------------------------------------------------------------------- +# Backlog helper — used by the brainstormer (issue #123). Lives here so +# call sites stay out of subprocess / gh-CLI territory. +# --------------------------------------------------------------------------- + + +@dataclass +class OpenBacklog: + """Open backlog snapshot: epics (label=``epic``) vs. everything else. + + The brainstormer feeds this into its prompt so the SDK session can + see what already exists and avoid re-proposing duplicates. + """ + + epics: list[Issue] = field(default_factory=list) + tickets: list[Issue] = field(default_factory=list) + + +def list_open_backlog( + client: GhClient, + owner: str, + repo: str, + *, + limit: int = 50, + epic_label: str = "epic", +) -> OpenBacklog: + """Return open epics + tickets for ``owner/repo``. + + Implementation: one labelled list for epics, one un-labelled list for + "everything open" minus those epics. Both are bounded by ``limit`` so + a stale repo doesn't blow up the prompt budget. + + We avoid shelling out to ``gh`` directly — the brainstormer should + only ever touch GitHub through :class:`GhClient`. + """ + epics = client.issues_by_label(owner, repo, epic_label, limit) + epic_numbers = {e.number for e in epics} + + # "Tickets" = open issues that aren't epics. The protocol only exposes + # ``issues_by_label`` — we approximate "all open" by listing the empty + # label-set; implementations that don't support that gracefully return + # [] and the brainstormer simply gets no ticket context. + try: + all_open = client.issues_by_label(owner, repo, "", limit * 2) + except Exception: # noqa: BLE001 — best-effort context only + all_open = [] + tickets = [i for i in all_open if i.number not in epic_numbers][:limit] + return OpenBacklog(epics=epics, tickets=tickets) + + __all__ = [ "GhClient", "GhError", "GithubkitClient", "Issue", "MockGhClient", + "OpenBacklog", "PullRequest", + "list_open_backlog", "resolve_token", ] diff --git a/tests/test_brainstormer.py b/tests/test_brainstormer.py new file mode 100644 index 0000000..c9cd7b1 --- /dev/null +++ b/tests/test_brainstormer.py @@ -0,0 +1,390 @@ +"""Tests for ``forge_loop.brainstormer`` — issue #123.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path + +import pytest + +from forge_loop.brainstormer import ( + BrainstormReport, + Brainstormer, + ProposedEpic, + ProposedTicket, + _parse_sdk_payload, + _render_axes_block, + _render_backlog_block, +) +from forge_loop.gh_client import Issue, MockGhClient, OpenBacklog, list_open_backlog +from forge_loop.product_vision import Axis, ProductVision, discover + +FIXTURES = Path(__file__).parent / "fixtures" / "product_vision" + + +# --------------------------------------------------------------------------- +# Fixtures / helpers +# --------------------------------------------------------------------------- + + +def _vision( + *, + axes: list[Axis] | None = None, + md: str = "# Vision\nDeliver value.", +) -> ProductVision: + if axes is None: + axes = [ + Axis( + name="throughput", + customer="solo operator", + valuable_means="more issues closed per hour", + acceptable_work=["implement features"], + rejected_as_cosmetic=["rename variables", "reflow whitespace"], + ), + Axis( + name="reliability", + customer="downstream consumer", + valuable_means="fewer rollbacks", + acceptable_work=["add tests"], + rejected_as_cosmetic=["tweak log levels"], + ), + ] + return ProductVision(vision_markdown=md, axes=axes) + + +@dataclass +class _StubResult: + last_message: str = "" + duration_s: float = 0.0 + timed_out: bool = False + error: str | None = None + + +def _stub_sdk(payload: dict | str, *, error: str | None = None, timed_out: bool = False): + """Build a stub ``run_brainstormer_sdk`` returning a canned payload.""" + body = payload if isinstance(payload, str) else json.dumps(payload) + + captured: dict = {} + + def _fn(prompt: str, *, cwd, timeout_s, model=None, **_kw) -> _StubResult: + captured["prompt"] = prompt + captured["cwd"] = cwd + captured["timeout_s"] = timeout_s + captured["model"] = model + return _StubResult( + last_message=body, + duration_s=0.01, + timed_out=timed_out, + error=error, + ) + + _fn.captured = captured # type: ignore[attr-defined] + return _fn + + +# --------------------------------------------------------------------------- +# Happy path +# --------------------------------------------------------------------------- + + +def test_happy_path_two_epics_three_tickets() -> None: + payload = { + "proposed_epics": [ + { + "title": "Lift cost telemetry into the dashboard", + "body": "Operators need mid-run cost visibility.", + "axis": "throughput", + "customer_story": "As the solo operator I want to see live $/hr.", + }, + { + "title": "Surface critic verdicts in the runner log", + "body": "Reviewers want fast feedback.", + "axis": "reliability", + "customer_story": "As a downstream consumer, fewer mystery merges.", + }, + ], + "proposed_tickets": [ + { + "title": "Add live $/hr widget", + "body": "Stream cost events to the dashboard.", + "axis": "throughput", + "customer_story": "Operator says: I lose track of spend mid-run.", + }, + { + "title": "Persist critic findings", + "body": "Write findings to eventdb.", + "axis": "reliability", + "customer_story": "Consumer: I want a trail when a regression slips.", + }, + { + "title": "Add ratelimit smoke test", + "body": "Confirm github API ratelimit handler trips correctly.", + "axis": "reliability", + "customer_story": "Consumer: I lose hours when the loop wedges on 403s.", + }, + ], + } + fn = _stub_sdk(payload) + report = Brainstormer(sdk_fn=fn).run(_vision()) + + assert isinstance(report, BrainstormReport) + assert len(report.proposed_epics) == 2 + assert len(report.proposed_tickets) == 3 + assert all(isinstance(e, ProposedEpic) for e in report.proposed_epics) + assert all(isinstance(t, ProposedTicket) for t in report.proposed_tickets) + + +# --------------------------------------------------------------------------- +# Filters +# --------------------------------------------------------------------------- + + +def test_cosmetic_filter_drops_only_the_cosmetic(monkeypatch) -> None: + payload = { + "proposed_epics": [], + "proposed_tickets": [ + { + "title": "rename variables for clarity in worker.py", + "body": "tidy.", + "axis": "throughput", + "customer_story": "operator likes tidy names", + }, + { + "title": "ship cost telemetry widget", + "body": "real customer win.", + "axis": "throughput", + "customer_story": "operator wants live $/hr", + }, + ], + } + # Capture drop-log invocations directly — structlog's PrintLogger + # caches sys.stderr at config time, so capsys/capfd don't see them. + drops: list[dict] = [] + + class _RecLogger: + def info(self, event, **kw): + drops.append({"event": event, **kw}) + + # other levels are no-ops for this assertion. + def warning(self, *a, **kw): pass + def error(self, *a, **kw): pass + def debug(self, *a, **kw): pass + + from forge_loop import brainstormer as bs_mod + monkeypatch.setattr(bs_mod, "_log", _RecLogger()) + + report = Brainstormer(sdk_fn=_stub_sdk(payload)).run(_vision()) + assert len(report.proposed_tickets) == 1 + assert "cost telemetry" in report.proposed_tickets[0].title + assert any(d["event"] == "brainstormer_dropped" and d.get("reason") == "cosmetic_match" + for d in drops) + + +def test_missing_citation_filters_drop_both() -> None: + payload = { + "proposed_epics": [], + "proposed_tickets": [ + { + "title": "no axis", + "body": "x", + "axis": "", + "customer_story": "operator: I need this", + }, + { + "title": "no story", + "body": "x", + "axis": "throughput", + "customer_story": "", + }, + { + "title": "survivor", + "body": "x", + "axis": "throughput", + "customer_story": "operator: I need this", + }, + ], + } + report = Brainstormer(sdk_fn=_stub_sdk(payload)).run(_vision()) + assert [t.title for t in report.proposed_tickets] == ["survivor"] + + +def test_unknown_axis_dropped() -> None: + payload = { + "proposed_epics": [], + "proposed_tickets": [ + { + "title": "bogus axis", + "body": "x", + "axis": "no-such-axis", + "customer_story": "x", + }, + ], + } + report = Brainstormer(sdk_fn=_stub_sdk(payload)).run(_vision()) + assert report.proposed_tickets == [] + + +def test_cosmetic_match_in_body_also_dropped() -> None: + """Adversarial: SDK cites a valid axis but slips a rejected phrase into the body.""" + payload = { + "proposed_epics": [], + "proposed_tickets": [ + { + "title": "Refresh dashboard styling", + "body": "We should reflow whitespace in dashboard/css.", + "axis": "throughput", + "customer_story": "operator: feels nicer", + } + ], + } + report = Brainstormer(sdk_fn=_stub_sdk(payload)).run(_vision()) + assert report.proposed_tickets == [] + + +def test_cosmetic_filter_case_insensitive() -> None: + payload = { + "proposed_epics": [], + "proposed_tickets": [ + { + "title": "RENAME VARIABLES for clarity", + "body": "x", + "axis": "throughput", + "customer_story": "operator: tidy", + } + ], + } + report = Brainstormer(sdk_fn=_stub_sdk(payload)).run(_vision()) + assert report.proposed_tickets == [] + + +# --------------------------------------------------------------------------- +# Hard failures +# --------------------------------------------------------------------------- + + +def test_empty_vision_axes_raises_value_error() -> None: + """vision_with_no_axes is rejected upstream by ProductVision validation + (min_length=1). The brainstormer must also reject a model-validated + vision whose axes list was emptied post-construction or whose value is + None — defense in depth.""" + b = Brainstormer(sdk_fn=_stub_sdk({"proposed_epics": [], "proposed_tickets": []})) + with pytest.raises(ValueError, match="non-empty ProductVision"): + b.run(None) # type: ignore[arg-type] + + # Also: a ProductVision-shaped object whose axes is empty (forced) + class _Empty: + axes: list = [] + vision_markdown = "" + + with pytest.raises(ValueError, match="non-empty ProductVision"): + b.run(_Empty()) # type: ignore[arg-type] + + +def test_sdk_timeout_raises_runtime_error() -> None: + fn = _stub_sdk("", error="timeout", timed_out=True) + with pytest.raises(RuntimeError, match="timed out"): + Brainstormer(sdk_fn=fn).run(_vision()) + + +def test_sdk_generic_error_raises_runtime_error() -> None: + fn = _stub_sdk("", error="sdk_session_failed: ValueError: boom") + with pytest.raises(RuntimeError, match="brainstormer SDK session failed"): + Brainstormer(sdk_fn=fn).run(_vision()) + + +def test_malformed_sdk_output_raises_with_raw_message() -> None: + fn = _stub_sdk("this is not JSON at all") + with pytest.raises(RuntimeError) as excinfo: + Brainstormer(sdk_fn=fn).run(_vision()) + assert "this is not JSON at all" in str(excinfo.value) + + +def test_missing_keys_in_sdk_output_treated_as_empty_lists() -> None: + # JSON parses, but neither key is present — pydantic defaults kick in + # and the report is just empty. (Not a malformed-output failure.) + fn = _stub_sdk({}) + report = Brainstormer(sdk_fn=fn).run(_vision()) + assert report.proposed_epics == [] + assert report.proposed_tickets == [] + + +def test_json_with_trailing_prose_still_parsed() -> None: + # The brief asks for JSON on the LAST line, but SDKs sometimes append + # whitespace. We accept "balanced object near the end". + payload = {"proposed_epics": [], "proposed_tickets": []} + fn = _stub_sdk("Here is the report:\n" + json.dumps(payload) + "\n") + report = Brainstormer(sdk_fn=fn).run(_vision()) + assert isinstance(report, BrainstormReport) + + +# --------------------------------------------------------------------------- +# Integration — prompt rendering uses fixtures +# --------------------------------------------------------------------------- + + +def test_integration_renders_vision_axes_and_rubric_into_prompt() -> None: + vision = discover(FIXTURES / "valid_full") + fn = _stub_sdk({"proposed_epics": [], "proposed_tickets": []}) + b = Brainstormer(sdk_fn=fn) # no owner/repo → backlog block is "(none)" + b.run(vision) + prompt = fn.captured["prompt"] # type: ignore[attr-defined] + # Vision markdown verbatim + assert "## Who" in prompt and "## How" in prompt + # Axes block — uses canonical axis names + rejected_as_cosmetic phrases + assert "throughput" in prompt + assert "rename variables" in prompt + # Rubric language is inlined + assert "rejected_as_cosmetic" in prompt.lower() or "Hard-refusal rubric" in prompt + # Backlog placeholder rendered even with no client + assert "(none)" in prompt + + +# --------------------------------------------------------------------------- +# Backlog helper (gh_client wrapper) +# --------------------------------------------------------------------------- + + +def test_list_open_backlog_partitions_epics_and_tickets() -> None: + client = MockGhClient() + # First call (label="epic") returns epics; second call (label="") returns + # everything including the epics. + epic = Issue(number=10, title="epic A", labels=["epic"]) + t1 = Issue(number=11, title="ticket A") + t2 = Issue(number=12, title="ticket B") + + seq: list[list[Issue]] = [[epic], [epic, t1, t2]] + original = client.issues_by_label + + def _by_label(owner, repo, label, limit): # noqa: ARG001 + original(owner, repo, label, limit) # record the call + return list(seq.pop(0)) + + client.issues_by_label = _by_label # type: ignore[assignment] + backlog = list_open_backlog(client, "o", "r", limit=10) + assert [e.number for e in backlog.epics] == [10] + assert [t.number for t in backlog.tickets] == [11, 12] + + +def test_render_backlog_block_handles_empty_and_populated() -> None: + empty = _render_backlog_block(OpenBacklog()) + assert "(none)" in empty + full = _render_backlog_block(OpenBacklog(epics=[Issue(number=1, title="E")], tickets=[Issue(number=2, title="T")])) + assert "#1: E" in full and "#2: T" in full + + +def test_render_axes_block_includes_rejected_phrases() -> None: + block = _render_axes_block(_vision()) + assert "throughput" in block + assert "rename variables" in block + assert "tweak log levels" in block + + +def test_parse_sdk_payload_rejects_empty_message() -> None: + with pytest.raises(ValueError, match="empty"): + _parse_sdk_payload("") + + +def test_parse_sdk_payload_rejects_non_object_json() -> None: + with pytest.raises(ValueError): + _parse_sdk_payload("[1, 2, 3]")