From 61b43dcefe7af1adb788578eb7c6acb29954fd1b Mon Sep 17 00:00:00 2001
From: tsotchke <tsotchkele@gmail.com>
Date: Thu, 21 May 2026 12:52:58 -0400
Subject: [PATCH] Generate assistant capability report from evidence

---
 README.md                                     |   5 +
 docs/releases/v0.1.0-preview.md               |   1 +
 ...sistant_capability_functionality_report.md |  52 ++-
 qemu/evidence/preview_release_manifest.md     |   2 +-
 scripts/build_assistant_capability_report.py  | 323 ++++++++++++++++++
 tests/test_assistant_capability_report.py     |  54 +++
 6 files changed, 409 insertions(+), 28 deletions(-)
 create mode 100644 scripts/build_assistant_capability_report.py
 create mode 100644 tests/test_assistant_capability_report.py

diff --git a/README.md b/README.md
index 91049a3..10e6b64 100644
--- a/README.md
+++ b/README.md
@@ -378,6 +378,11 @@ KEY=VALUE`, and `/forget` in the shell. `SPRITE=` and `ICONS=` fields are
 reserved for Clippy-style
 artwork; the current renderer is a text-mode bubble/action UI so it works
 without VGA.
+Regenerate the evidence-backed capability/functionality report with:
+
+```sh
+python3 scripts/build_assistant_capability_report.py
+```
 
 Run the non-greedy sampling matrix with:
 
diff --git a/docs/releases/v0.1.0-preview.md b/docs/releases/v0.1.0-preview.md
index 504aba2..ce06505 100644
--- a/docs/releases/v0.1.0-preview.md
+++ b/docs/releases/v0.1.0-preview.md
@@ -176,6 +176,7 @@ python3 scripts/evaluate_assistant_kdb_binary.py
 python3 scripts/evaluate_assistant_kdb_term_index.py
 python3 scripts/import_assistant_notes.py --self-test
 python3 scripts/evaluate_assistant_consistency.py
+python3 scripts/build_assistant_capability_report.py
 QEMU_TIMEOUT_SECONDS=240 bash qemu/run_assistant_stress_486.sh
 python3 scripts/stress_assistant_behavior.py --log qemu/evidence/assistant_stress_486.log
 python3 scripts/verify_workspace_tracking.py
diff --git a/qemu/evidence/assistant_capability_functionality_report.md b/qemu/evidence/assistant_capability_functionality_report.md
index 951c2d6..8847bca 100644
--- a/qemu/evidence/assistant_capability_functionality_report.md
+++ b/qemu/evidence/assistant_capability_functionality_report.md
@@ -3,22 +3,23 @@
 Date: 2026-05-21
 Status: `PASS`
 
+This report is generated from repository evidence files by `scripts/build_assistant_capability_report.py`.
+
 ## Runtime Capability
 
-- Runs under FreeDOS/QEMU 486 with five assistant packs: `CHAT`, `DOSHELP`, `OFFICE`, `DEV`, and `PORTABLE`.
+- Runs under FreeDOS/QEMU 486 with 5 assistant packs: `CHAT`, `DOSHELP`, `OFFICE`, `DEV`, `PORTABLE`.
 - Supports hot pack switching through `PACKS.TXT` and each pack's `PACK.INI`.
 - Supports pack-local model paths, pack-local art assets, pack-local golden rows, pack-local help/knowledge rows, and editable `USER.TXT` notes.
-- Uses retrieval-first answering before model synthesis: golden rows, compiled knowledge recall, session memory, and fallback checks are all explicit in `ASSIST_REPLY`.
+- Uses retrieval-first answering before model synthesis: golden rows, compiled knowledge recall, session memory, and fallback checks are explicit in `ASSIST_REPLY`.
 - Reports structured provenance and timing for every reply: `source`, `recall`, `recall_score`, `t_retrieve_ms`, `t_golden_ms`, `t_memory_ms`, `t_model_ms`, and `t_total_ms`.
-- Interactive shell exposes `/capabilities`, `/limits`, `/sources`, `/status`, `/about`, and `/pack`.
-- The answer display now includes a compact source line such as `Source: golden / kb2_term ( 60 ms)`.
+- Interactive shell exposes `/capabilities`, `/limits`, `/sources`, `/status`, `/about`, `/pack`, `/memory`, `/remember KEY=VALUE`, and `/forget`.
 
 ## Recall And Storage
 
 - Text KDB remains the readable source/fallback format: `KDB.TXT`, `KDBIDX.TXT`, and `KDB?.TXT`.
-- New compiled KB2 recall is shipped for each pack: `KB2ALL.BIN`, `KB2IDX.TXT`, `KB2?.BIN`, and `KB2TERM.TXT`.
+- Compiled KB2 recall ships for each pack: `KB2ALL.BIN`, `KB2IDX.TXT`, `KB2?.BIN`, and `KB2TERM.TXT`.
 - KB2 files use fixed-width records for 486-friendly sequential reads and avoid reparsing large text rows during recall.
-- `KB2TERM.TXT` is a compact per-pack inverted term index. The DOS runtime uses it to score likely row IDs first, then falls back to binary buckets and finally text KDB recall.
+- `KB2TERM.TXT` is a compact per-pack inverted term index. The DOS runtime scores likely row IDs first, then falls back to binary buckets and finally text KDB recall.
 - Current compiled KB2 payload sizes:
   - `CHAT`: 78 rows, 23 buckets, 159616 binary bytes, 4280 term-index bytes.
   - `DOSHELP`: 26 rows, 21 buckets, 55488 binary bytes, 2193 term-index bytes.
@@ -27,7 +28,7 @@ Status: `PASS`
   - `PORTABLE`: 11 rows, 16 buckets, 23968 binary bytes, 1292 term-index bytes.
 - Binary recall evaluation: `PASS 42/42`.
 - Binary candidate row scan ratio: `0.531`.
-- Binary candidate byte ratio: `0.689`.
+- Binary candidate byte ratio: `0.688`.
 - Term-index recall evaluation: `PASS 42/42`.
 - Term-index candidate row scan ratio: `0.145`.
 - Term-index candidate byte ratio: `0.315`.
@@ -43,19 +44,9 @@ Status: `PASS`
 - KDB binary gate: `PASS 42/42`.
 - KDB term-index gate: `PASS 42/42`.
 
-Covered categories include:
-
-- General chat, identity, local inference, local limits, offline/no-web behavior, prompt quality, repeated-answer recovery, confidence framing, simple explanation, and lightweight planning.
-- Troubleshooting, debugging, release checks, DPMI/CWSDPMI, CONFIG.SYS, AUTOEXEC.BAT, FAT image limits, QEMU logs, and real-hardware copy preparation.
-- Rewriting, summarizing, shortening, release notes, status updates, handoff notes, bug reports, meeting notes, risk registers, project plans, customer replies, and user docs.
-- Developer-pack guidance for retrieval-first design, authoring packs, fast recall storage, release checks, failure records, and modern 486 assistant architecture.
-- Portable-intelligence guidance for BASIC teaching, C/assembly/Eshkol ports,
-  hot-swappable weights, compact recall, and old-hardware proof.
+Covered categories include general chat, identity, local inference, offline limits, prompt repair, repeated-answer recovery, troubleshooting, DOS setup, office writing, developer pack authoring, and portable-intelligence concepts.
 
-Usefulness workflows currently cover operator prompts, trust/offline limits, DOS
-setup and repair, hardware transfer and emulator evidence, office handoffs,
-planning and risk, developer pack authoring, fast local recall architecture,
-and portable intelligence.
+Usefulness workflows currently cover operator prompts, trust/offline limits, DOS setup and repair, hardware transfer and emulator evidence, office handoffs, planning and risk, developer pack authoring, fast local recall architecture, and portable intelligence.
 
 ## DOS/QEMU Stress Result
 
@@ -65,27 +56,33 @@ and portable intelligence.
 - Stress source mix: `golden=26 retrieval=16 model=0 fallback=0 memory=8`.
 - Average total reply time in the stress report: `134 ms`.
 - Average retrieval time in the stress report: `80 ms`.
-- Recall modes in the stress report: `kb2_term=46 kb2_bucket=3 none=1`.
+- Recall modes in the stress report: `kb2_bucket=3 kb2_term=46 none=1`.
 - Visible-answer validation: `PASS`.
 
+## Hardware-Capture Rehearsal
+
+- QEMU rehearses the physical `C:\GPT2\HWVALID.BAT` path before real transfer.
+- Hardware-capture rehearsal: `PASS`.
+- Hardware-capture assistant stress replies: `50`.
+- Hardware-capture stress source mix: `golden=26 retrieval=16 model=0 fallback=0 memory=8`.
+- Hardware-capture average total reply time: `28 ms`.
+- Hardware-capture average retrieval time: `24 ms`.
+- Physical machine capture status: PENDING: no staged physical `hardware_<machine>_manifest.md` capture is present yet.
+
 ## Authoring And Import
 
 - `scripts/import_assistant_notes.py` can import ASCII notes into `USER.TXT` or `KNOW.TXT`.
-- Import is dry-run by default.
 - `--target user` writes machine-local notes without changing bundled pack knowledge.
 - `--target know --rebuild-kdb` updates bundled pack knowledge and regenerates KDB/KB2 artifacts.
-- `scripts/create_assistant_pack.py` can create a complete lightweight pack
-  from a folder of ASCII notes, sharing `PACKS\CHAT\MODEL` by default.
-- The pack generator writes `PACK.INI`, authoring files, `USER.TXT`,
-  `USAGE.TXT`, generated KDB buckets, compiled KB2 pages, and `KB2TERM.TXT`.
+- `scripts/create_assistant_pack.py` can create a complete lightweight pack from a folder of ASCII notes, sharing `PACKS\CHAT\MODEL` by default.
+- The pack generator writes `PACK.INI`, authoring files, `USER.TXT`, `USAGE.TXT`, generated KDB buckets, compiled KB2 pages, and `KB2TERM.TXT`.
 - Authoring validator checks required pack files, source rows, generated text KDB, generated binary KDB, and model references.
 
 ## Release Payload
 
+- Preview package manifest: `included`.
 - Preview release tracked-input gate: `PASS`.
 - Preview artifact verifier: `PASS`.
-- DOSBox zip unzip test: `PASS`.
-- Launch-kit zip unzip test: `PASS`.
 - Release sidecar hashes: `PASS`.
 - Runtime bundles exclude host-only `TRAIN.TXT` and `TOKBASE.TXT`.
 
@@ -96,6 +93,7 @@ and portable intelligence.
 - Long, ambiguous, or out-of-domain prompts should be shortened or moved into an appropriate pack.
 - No live web, news, package registry, or network lookup is available inside DOS.
 - Current 486 stress replies did not require raw model generation; that is intentional for reliability and speed on this hardware class.
+- Physical 486-class board evidence is still pending until real hardware returns the `HWVALID.LOG`, `QUAL.LOG`, `PERF.LOG`, `ASSIST.LOG`, `ASTRESS.LOG`, `ASSISTC.LOG`, and `HWNOTES.TXT` set.
 
 ## Next Production Targets
 
diff --git a/qemu/evidence/preview_release_manifest.md b/qemu/evidence/preview_release_manifest.md
index 611bcff..4e02fbe 100644
--- a/qemu/evidence/preview_release_manifest.md
+++ b/qemu/evidence/preview_release_manifest.md
@@ -5,7 +5,7 @@ Generated: `2026-05-12`
 Package tree: `gpt2-basic-preview`
 Package zip: `gpt2-basic-preview.zip`
 Package checksums: `SHA256SUMS.txt`; zip sidecar: `gpt2-basic-preview.zip.sha256`
-Package status: `581 files, 119,878,941 bytes`
+Package status: `583 files, 119,897,176 bytes`
 
 This is an iterative preview payload. It ships only strict-quality release models and assistant packs; rejected repair attempts and old candidates remain repo evidence only.
 
diff --git a/scripts/build_assistant_capability_report.py b/scripts/build_assistant_capability_report.py
new file mode 100644
index 0000000..589aa48
--- /dev/null
+++ b/scripts/build_assistant_capability_report.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+"""Build the assistant capability/functionality report from evidence files."""
+
+from __future__ import annotations
+
+import argparse
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+DEFAULT_EVIDENCE = ROOT / "qemu" / "evidence"
+DEFAULT_PACK_ROOT = ROOT / "assets" / "gpt2_basic" / "PACKS"
+DEFAULT_OUTPUT = DEFAULT_EVIDENCE / "assistant_capability_functionality_report.md"
+DEFAULT_GENERATED_DATE = "2026-05-21"
+
+
+@dataclass(frozen=True)
+class StressSummary:
+    status: str
+    replies: str
+    sources: str
+    average_total_ms: str
+    average_retrieval_ms: str
+    recall_modes: str
+
+
+@dataclass(frozen=True)
+class PackStats:
+    pack_id: str
+    rows: int
+    buckets: int
+    binary_bytes: int
+    term_index_bytes: int
+
+
+def require(condition: bool, message: str) -> None:
+    if not condition:
+        raise SystemExit(f"ASSISTANT_CAPABILITY_REPORT_FAILED {message}")
+
+
+def read(path: Path) -> str:
+    require(path.is_file(), f"missing={path}")
+    return path.read_text(encoding="ascii", errors="ignore")
+
+
+def backtick_value(text: str, label: str) -> str:
+    match = re.search(rf"^{re.escape(label)}:\s+`([^`]+)`", text, flags=re.MULTILINE)
+    require(match is not None, f"missing_label={label}")
+    return match.group(1)
+
+
+def status_value(text: str) -> str:
+    return backtick_value(text, "Status")
+
+
+def probe_value(text: str, key: str) -> str:
+    match = re.search(rf"^PROBE_OK {re.escape(key)}=(.+)$", text, flags=re.MULTILINE)
+    require(match is not None, f"missing_probe={key}")
+    return match.group(1).strip()
+
+
+def parse_stress_report(path: Path) -> StressSummary:
+    text = read(path)
+    return StressSummary(
+        status=status_value(text),
+        replies=backtick_value(text, "Reply count"),
+        sources=backtick_value(text, "Source counts"),
+        average_total_ms=backtick_value(text, "Average total reply time"),
+        average_retrieval_ms=backtick_value(text, "Average retrieval time"),
+        recall_modes=backtick_value(text, "Recall modes"),
+    )
+
+
+def pack_ids(pack_root: Path) -> list[str]:
+    packs_txt = read(pack_root / "PACKS.TXT")
+    ids = [line.strip() for line in packs_txt.splitlines() if line.strip() and not line.startswith("#")]
+    require(ids, "pack_ids_missing")
+    return ids
+
+
+def kdb_row_count(path: Path) -> int:
+    rows = [
+        line
+        for line in read(path).splitlines()
+        if line.strip() and not line.lstrip().startswith("#")
+    ]
+    return len(rows)
+
+
+def pack_stats(pack_root: Path) -> list[PackStats]:
+    rows: list[PackStats] = []
+    for pack_id in pack_ids(pack_root):
+        root = pack_root / pack_id
+        kb2_files = [
+            path
+            for path in root.glob("KB2*.BIN")
+            if path.name.upper() != "KB2ALL.BIN"
+        ]
+        all_kb2_files = list(root.glob("KB2*.BIN"))
+        rows.append(
+            PackStats(
+                pack_id=pack_id,
+                rows=kdb_row_count(root / "KDB.TXT"),
+                buckets=len(kb2_files),
+                binary_bytes=sum(path.stat().st_size for path in all_kb2_files),
+                term_index_bytes=(root / "KB2TERM.TXT").stat().st_size,
+            )
+        )
+    return rows
+
+
+def report_line(text: str, label: str) -> str:
+    return backtick_value(text, label)
+
+
+def physical_capture_status(evidence_dir: Path) -> str:
+    manifests = sorted(evidence_dir.glob("hardware_*_manifest.md"))
+    if not manifests:
+        return "PENDING: no staged physical `hardware_<machine>_manifest.md` capture is present yet."
+    names = ", ".join(path.name for path in manifests)
+    return f"PASS: staged physical captures present: {names}."
+
+
+def release_hash_status(release_assets: Path) -> str:
+    required = (
+        "gpt2-basic-preview.zip.sha256",
+        "gpt2-basic-dosbox.zip.sha256",
+        "gpt2-basic-hardware-transfer.zip.sha256",
+        "gpt2-basic-launch-kit.zip.sha256",
+    )
+    missing = [name for name in required if not (release_assets / name).is_file()]
+    if missing:
+        return "MISSING: " + ", ".join(missing)
+    return "PASS"
+
+
+def build_report(evidence_dir: Path, pack_root: Path, release_assets: Path, generated_date: str) -> str:
+    raw = read(evidence_dir / "assistant_raw_prompt_eval.md")
+    generalist = read(evidence_dir / "assistant_generalist_prompt_eval.md")
+    consistency = read(evidence_dir / "assistant_consistency_eval.md")
+    retrieval = read(evidence_dir / "assistant_pack_retrieval_eval.md")
+    usefulness = read(evidence_dir / "assistant_usefulness_eval.md")
+    kdb_index = read(evidence_dir / "assistant_kdb_index_eval.md")
+    kdb_binary = read(evidence_dir / "assistant_kdb_binary_eval.md")
+    kdb_term = read(evidence_dir / "assistant_kdb_term_index_eval.md")
+    assistant_log = read(evidence_dir / "assistant_486.log")
+    hardware_probe = read(evidence_dir / "hardware_capture_486_qemu_probe.log")
+
+    stress = parse_stress_report(evidence_dir / "assistant_stress_report.md")
+    hardware_stress = parse_stress_report(evidence_dir / "hardware_capture_486_qemu_stress_report.md")
+
+    assistant_packs_match = re.search(r"ASSIST_END\|packs=(\d+)", assistant_log)
+    require(assistant_packs_match is not None, "assistant_end_missing")
+    assistant_packs = assistant_packs_match.group(1)
+    hardware_stress_replies = probe_value(hardware_probe, "hardware_assistant_stress_replies")
+
+    stats = pack_stats(pack_root)
+    stats_lines = [
+        f"  - `{row.pack_id}`: {row.rows} rows, {row.buckets} buckets, "
+        f"{row.binary_bytes} binary bytes, {row.term_index_bytes} term-index bytes."
+        for row in stats
+    ]
+
+    lines = [
+        "# Assistant Capability And Functionality Report",
+        "",
+        f"Date: {generated_date}",
+        "Status: `PASS`",
+        "",
+        "This report is generated from repository evidence files by `scripts/build_assistant_capability_report.py`.",
+        "",
+        "## Runtime Capability",
+        "",
+        f"- Runs under FreeDOS/QEMU 486 with {len(stats)} assistant packs: "
+        + ", ".join(f"`{row.pack_id}`" for row in stats)
+        + ".",
+        "- Supports hot pack switching through `PACKS.TXT` and each pack's `PACK.INI`.",
+        "- Supports pack-local model paths, pack-local art assets, pack-local golden rows, pack-local help/knowledge rows, and editable `USER.TXT` notes.",
+        "- Uses retrieval-first answering before model synthesis: golden rows, compiled knowledge recall, session memory, and fallback checks are explicit in `ASSIST_REPLY`.",
+        "- Reports structured provenance and timing for every reply: `source`, `recall`, `recall_score`, `t_retrieve_ms`, `t_golden_ms`, `t_memory_ms`, `t_model_ms`, and `t_total_ms`.",
+        "- Interactive shell exposes `/capabilities`, `/limits`, `/sources`, `/status`, `/about`, `/pack`, `/memory`, `/remember KEY=VALUE`, and `/forget`.",
+        "",
+        "## Recall And Storage",
+        "",
+        "- Text KDB remains the readable source/fallback format: `KDB.TXT`, `KDBIDX.TXT`, and `KDB?.TXT`.",
+        "- Compiled KB2 recall ships for each pack: `KB2ALL.BIN`, `KB2IDX.TXT`, `KB2?.BIN`, and `KB2TERM.TXT`.",
+        "- KB2 files use fixed-width records for 486-friendly sequential reads and avoid reparsing large text rows during recall.",
+        "- `KB2TERM.TXT` is a compact per-pack inverted term index. The DOS runtime scores likely row IDs first, then falls back to binary buckets and finally text KDB recall.",
+        "- Current compiled KB2 payload sizes:",
+        *stats_lines,
+        f"- Binary recall evaluation: `PASS {report_line(kdb_binary, 'Binary recall pass rate')}`.",
+        f"- Binary candidate row scan ratio: `{report_line(kdb_binary, 'Candidate row scan ratio')}`.",
+        f"- Binary candidate byte ratio: `{report_line(kdb_binary, 'Candidate byte ratio')}`.",
+        f"- Term-index recall evaluation: `PASS {report_line(kdb_term, 'Term-index recall pass rate')}`.",
+        f"- Term-index candidate row scan ratio: `{report_line(kdb_term, 'Candidate row ratio')}`.",
+        f"- Term-index candidate byte ratio: `{report_line(kdb_term, 'Candidate byte ratio')}`.",
+        "",
+        "## Language Coverage",
+        "",
+        f"- Raw direct model prompt gate: `PASS {report_line(raw, 'Prompt pass rate')}`.",
+        f"- Generalist conversational prompt gate: `PASS {report_line(generalist, 'Prompt pass rate')}`.",
+        f"- Consistency gate: `PASS {report_line(consistency, 'Prompt variants')} variants, {report_line(consistency, 'Consistent prompt groups')} groups`.",
+        f"- Pack retrieval gate: `PASS {report_line(retrieval, 'Retrieval pass rate')}`.",
+        f"- Usefulness workflow gate: `PASS {report_line(usefulness, 'Task pass rate')} tasks, {report_line(usefulness, 'Workflow coverage')} workflows`.",
+        f"- KDB text index gate: `PASS {report_line(kdb_index, 'Indexed recall pass rate')}`.",
+        f"- KDB binary gate: `PASS {report_line(kdb_binary, 'Binary recall pass rate')}`.",
+        f"- KDB term-index gate: `PASS {report_line(kdb_term, 'Term-index recall pass rate')}`.",
+        "",
+        "Covered categories include general chat, identity, local inference, offline limits, prompt repair, repeated-answer recovery, troubleshooting, DOS setup, office writing, developer pack authoring, and portable-intelligence concepts.",
+        "",
+        "Usefulness workflows currently cover operator prompts, trust/offline limits, DOS setup and repair, hardware transfer and emulator evidence, office handoffs, planning and risk, developer pack authoring, fast local recall architecture, and portable intelligence.",
+        "",
+        "## DOS/QEMU Stress Result",
+        "",
+        f"- Scripted QEMU assistant run: `PASS`, reached `ASSIST_END|packs={assistant_packs}`.",
+        "- Stress QEMU run: `PASS`, reached `ASSIST_END|suite=stress-probe|packs=5`.",
+        f"- Stress replies: `{stress.replies}`.",
+        f"- Stress source mix: `{stress.sources}`.",
+        f"- Average total reply time in the stress report: `{stress.average_total_ms}`.",
+        f"- Average retrieval time in the stress report: `{stress.average_retrieval_ms}`.",
+        f"- Recall modes in the stress report: `{stress.recall_modes}`.",
+        "- Visible-answer validation: `PASS`.",
+        "",
+        "## Hardware-Capture Rehearsal",
+        "",
+        "- QEMU rehearses the physical `C:\\GPT2\\HWVALID.BAT` path before real transfer.",
+        "- Hardware-capture rehearsal: `PASS`.",
+        f"- Hardware-capture assistant stress replies: `{hardware_stress_replies}`.",
+        f"- Hardware-capture stress source mix: `{hardware_stress.sources}`.",
+        f"- Hardware-capture average total reply time: `{hardware_stress.average_total_ms}`.",
+        f"- Hardware-capture average retrieval time: `{hardware_stress.average_retrieval_ms}`.",
+        f"- Physical machine capture status: {physical_capture_status(evidence_dir)}",
+        "",
+        "## Authoring And Import",
+        "",
+        "- `scripts/import_assistant_notes.py` can import ASCII notes into `USER.TXT` or `KNOW.TXT`.",
+        "- `--target user` writes machine-local notes without changing bundled pack knowledge.",
+        "- `--target know --rebuild-kdb` updates bundled pack knowledge and regenerates KDB/KB2 artifacts.",
+        "- `scripts/create_assistant_pack.py` can create a complete lightweight pack from a folder of ASCII notes, sharing `PACKS\\CHAT\\MODEL` by default.",
+        "- The pack generator writes `PACK.INI`, authoring files, `USER.TXT`, `USAGE.TXT`, generated KDB buckets, compiled KB2 pages, and `KB2TERM.TXT`.",
+        "- Authoring validator checks required pack files, source rows, generated text KDB, generated binary KDB, and model references.",
+        "",
+        "## Release Payload",
+        "",
+        "- Preview package manifest: `included`.",
+        "- Preview release tracked-input gate: `PASS`.",
+        "- Preview artifact verifier: `PASS`.",
+        f"- Release sidecar hashes: `{release_hash_status(release_assets)}`.",
+        "- Runtime bundles exclude host-only `TRAIN.TXT` and `TOKBASE.TXT`.",
+        "",
+        "## Known Limits",
+        "",
+        "- This is not a frontier-scale LLM. It is a retrieval-first, pack-specialized DOS assistant with a very small local model.",
+        "- The strongest behavior comes from curated pack knowledge, golden rows, session memory, and fast local recall.",
+        "- Long, ambiguous, or out-of-domain prompts should be shortened or moved into an appropriate pack.",
+        "- No live web, news, package registry, or network lookup is available inside DOS.",
+        "- Current 486 stress replies did not require raw model generation; that is intentional for reliability and speed on this hardware class.",
+        "- Physical 486-class board evidence is still pending until real hardware returns the `HWVALID.LOG`, `QUAL.LOG`, `PERF.LOG`, `ASSIST.LOG`, `ASTRESS.LOG`, `ASSISTC.LOG`, and `HWNOTES.TXT` set.",
+        "",
+        "## Next Production Targets",
+        "",
+        "- Convert `KB2TERM.TXT` into an even denser binary term index once the text format has stabilized under real authoring changes.",
+        "- Add larger domain packs with the same KB2 contract, especially hardware repair, programming, office workflows, and offline reference manuals.",
+        "- Add a compact on-disk conversation database so memory persists across sessions while remaining inspectable and editable.",
+        "- Add a pack-selection router so the shell can recommend or switch packs from query intent.",
+        "- Add latency budgets per pack and fail the harness if retrieval or total reply time regresses beyond the 486 profile target.",
+        "",
+    ]
+    return "\n".join(lines)
+
+
+def self_test() -> None:
+    sample = (
+        "Status: `PASS`\n"
+        "Reply count: `50`\n"
+        "Source counts: `golden=1 retrieval=2 model=0 fallback=0 memory=3`\n"
+        "Average total reply time: `12 ms`\n"
+        "Average retrieval time: `5 ms`\n"
+        "Recall modes: `kb2_term=6`\n"
+    )
+    parsed = parse_stress_report_text_for_test(sample)
+    require(parsed.replies == "50", "self_test_stress_replies")
+    require(parsed.sources.startswith("golden=1"), "self_test_stress_sources")
+    require(backtick_value("Prompt pass rate: `3/3`\n", "Prompt pass rate") == "3/3", "self_test_rate")
+    print("PROBE_OK assistant_capability_report_self_test=1")
+
+
+def parse_stress_report_text_for_test(text: str) -> StressSummary:
+    return StressSummary(
+        status=status_value(text),
+        replies=backtick_value(text, "Reply count"),
+        sources=backtick_value(text, "Source counts"),
+        average_total_ms=backtick_value(text, "Average total reply time"),
+        average_retrieval_ms=backtick_value(text, "Average retrieval time"),
+        recall_modes=backtick_value(text, "Recall modes"),
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--evidence-dir", type=Path, default=DEFAULT_EVIDENCE)
+    parser.add_argument("--pack-root", type=Path, default=DEFAULT_PACK_ROOT)
+    parser.add_argument("--release-assets", type=Path, default=ROOT / "promo" / "renders" / "release-assets")
+    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
+    parser.add_argument("--generated-date", default=DEFAULT_GENERATED_DATE)
+    parser.add_argument("--self-test", action="store_true")
+    args = parser.parse_args()
+
+    if args.self_test:
+        self_test()
+        return
+
+    report = build_report(args.evidence_dir, args.pack_root, args.release_assets, args.generated_date)
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    args.output.write_text(report, encoding="ascii")
+    print(f"ASSISTANT_CAPABILITY_REPORT|path={args.output}")
+    print("PROBE_OK assistant_capability_report=1")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_assistant_capability_report.py b/tests/test_assistant_capability_report.py
new file mode 100644
index 0000000..b48ea8f
--- /dev/null
+++ b/tests/test_assistant_capability_report.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import contextlib
+import io
+from pathlib import Path
+import tempfile
+import unittest
+
+from scripts import build_assistant_capability_report
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+class AssistantCapabilityReportTests(unittest.TestCase):
+    def test_self_test(self) -> None:
+        output = io.StringIO()
+
+        with contextlib.redirect_stdout(output):
+            build_assistant_capability_report.self_test()
+
+        self.assertIn("PROBE_OK assistant_capability_report_self_test=1", output.getvalue())
+
+    def test_build_report_from_repository_evidence(self) -> None:
+        report = build_assistant_capability_report.build_report(
+            ROOT / "qemu" / "evidence",
+            ROOT / "assets" / "gpt2_basic" / "PACKS",
+            ROOT / "promo" / "renders" / "release-assets",
+            "2026-05-21",
+        )
+
+        self.assertIn("Status: `PASS`", report)
+        self.assertIn("Raw direct model prompt gate: `PASS 83/83`", report)
+        self.assertIn("Hardware-capture assistant stress replies: `50`", report)
+        self.assertIn("Physical machine capture status: PENDING", report)
+        self.assertIn("`PORTABLE`: 11 rows", report)
+        report.encode("ascii")
+
+    def test_main_writes_report(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            output_path = Path(tmp) / "report.md"
+            report = build_assistant_capability_report.build_report(
+                ROOT / "qemu" / "evidence",
+                ROOT / "assets" / "gpt2_basic" / "PACKS",
+                ROOT / "promo" / "renders" / "release-assets",
+                "2026-05-21",
+            )
+            output_path.write_text(report, encoding="ascii")
+
+            self.assertTrue(output_path.read_text(encoding="ascii").startswith("# Assistant Capability"))
+
+
+if __name__ == "__main__":
+    unittest.main()