diff --git a/README.md b/README.md index 10e6b64..292c3ee 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,13 @@ Regenerate the evidence-backed capability/functionality report with: python3 scripts/build_assistant_capability_report.py ``` +Regenerate the retrieval-only recall benchmark after `ASSIST.EXE --recall-probe` +has produced `qemu/evidence/assistant_recall_486.log`: + +```sh +python3 scripts/benchmark_assistant_recall.py +``` + Run the non-greedy sampling matrix with: ```sh @@ -466,7 +473,7 @@ physical 486-class DOS machine. Pentium timing is useful scaling evidence, but it is not a blocker for the solid 486-focused release. The hardware ladder is tracked in [`docs/hardware-validation.md`](docs/hardware-validation.md), with a DOS capture batch under `hardware/HWVALID.BAT` that writes `QUAL.LOG`, -`PERF.LOG`, `ASSIST.LOG`, `ASTRESS.LOG`, and `ASSISTC.LOG`, strict host +`PERF.LOG`, `ASSIST.LOG`, `ASTRESS.LOG`, `ARECALL.LOG`, and `ASSISTC.LOG`, strict host verification through `scripts/verify_hardware_capture.py --require-filled-notes`, and release evidence staging through `scripts/stage_hardware_capture_evidence.py`. The physical assistant gate now diff --git a/docs/assistant-intelligence-roadmap.md b/docs/assistant-intelligence-roadmap.md index f9d3155..302e3c3 100644 --- a/docs/assistant-intelligence-roadmap.md +++ b/docs/assistant-intelligence-roadmap.md @@ -57,12 +57,14 @@ falling back to the full KDB. - Lightweight domain pack without retraining: `PORTABLE` ships portable intelligence notes generated from `data/assistant_pack_notes/portable` and shares the CHAT model. +- Retrieval-only recall probe: `ASSIST.EXE --recall-probe` measures the KB2/KDB + recall path across every shipped pack without model generation. ## Next Milestones - Add more domain packs for hardware repair, programming, and offline reference manuals using the same generated KDB/KB2 contract. -- Measure binary KDB scan time in QEMU and on real hardware, then decide +- Compare recall-probe timing on QEMU and physical hardware, then decide whether the next storage step should be topic shards or offset tables. - Add persistent memory slots beyond name, goal, style, and problem. - Add a measured recall benchmark in QEMU and on physical 486 hardware. diff --git a/docs/hardware-validation.md b/docs/hardware-validation.md index 97aa133..38314b2 100644 --- a/docs/hardware-validation.md +++ b/docs/hardware-validation.md @@ -9,7 +9,7 @@ the core release gate. | Tier | Status | Hardware | Release Role | Required Logs | |---|---|---|---|---| | 0 | Complete | QEMU 486 profiles | Preview release gate | compile, quality, perf, assistant, vectors | -| 1 | Next gate | Any working 486-class DOS PC with 32-64 MB RAM | Solid release baseline | `QUAL.LOG`, `PERF.LOG`, `ASSIST.LOG`, `ASTRESS.LOG`, `ASSISTC.LOG` | +| 1 | Next gate | Any working 486-class DOS PC with 32-64 MB RAM | Solid release baseline | `QUAL.LOG`, `PERF.LOG`, `ASSIST.LOG`, `ASTRESS.LOG`, `ARECALL.LOG`, `ASSISTC.LOG` | | 2 | Useful | Faster 486DX2/DX4 or comparable late 486 board | Performance confidence | repeated `PERF.LOG`, optional kernel perf | | 3 | Optional | Pentium 60/90/133+ | Scaling comparison only | `PERF.LOG`, optional quality confirmation | | 4 | Optional | 386 or 486SX no-FPU class system | Compatibility stress test | quality and perf if memory allows | @@ -57,6 +57,7 @@ GPT2.EXE --quality-all > QUAL.LOG GPT2.EXE --perf > PERF.LOG ASSIST.EXE --scripted > ASSIST.LOG ASSIST.EXE --stress-probe > ASTRESS.LOG +ASSIST.EXE --recall-probe > ARECALL.LOG ``` Also keep the assistant compile log when building on the target: @@ -112,6 +113,8 @@ verifies the paired `hardware__manifest.md` checksum table. - `ASTRESS.LOG` includes `ASSIST_END|suite=stress-probe|packs=5`, exactly 50 `ASSIST_REPLY|` rows, no `status=model_unavailable` rows, and records for CHAT, DOSHELP, OFFICE, DEV, and PORTABLE. +- `ARECALL.LOG` includes `ASSIST_END|suite=recall-probe|packs=5`, exactly 42 + `ASSIST_RECALL|` rows, and validated KB2/KDB recall answers for every pack. - `ASSISTC.LOG` includes `ASSIST_COMPILE_OK` when target-side compilation is attempted. - The hardware notes identify machine key, CPU, clock, RAM, DOS version, @@ -127,6 +130,7 @@ qemu/evidence/hardware__quality.log qemu/evidence/hardware__perf.log qemu/evidence/hardware__assistant.log qemu/evidence/hardware__assistant_stress.log +qemu/evidence/hardware__assistant_recall.log qemu/evidence/hardware__assistant_compile.log qemu/evidence/hardware__notes.md qemu/evidence/hardware__manifest.md diff --git a/docs/releases/v0.1.0-preview.md b/docs/releases/v0.1.0-preview.md index ce06505..d29551b 100644 --- a/docs/releases/v0.1.0-preview.md +++ b/docs/releases/v0.1.0-preview.md @@ -174,10 +174,12 @@ python3 scripts/evaluate_assistant_pack_retrieval.py python3 scripts/evaluate_assistant_kdb_index.py python3 scripts/evaluate_assistant_kdb_binary.py python3 scripts/evaluate_assistant_kdb_term_index.py +python3 scripts/benchmark_assistant_recall.py python3 scripts/import_assistant_notes.py --self-test python3 scripts/evaluate_assistant_consistency.py python3 scripts/build_assistant_capability_report.py QEMU_TIMEOUT_SECONDS=240 bash qemu/run_assistant_stress_486.sh +QEMU_TIMEOUT_SECONDS=240 bash qemu/run_assistant_recall_486.sh python3 scripts/stress_assistant_behavior.py --log qemu/evidence/assistant_stress_486.log python3 scripts/verify_workspace_tracking.py python3 scripts/build_preview_release.py --force diff --git a/hardware/HWVALID.BAT b/hardware/HWVALID.BAT index a448195..d12790f 100644 --- a/hardware/HWVALID.BAT +++ b/hardware/HWVALID.BAT @@ -6,6 +6,7 @@ if exist QUAL.LOG del QUAL.LOG if exist PERF.LOG del PERF.LOG if exist ASSIST.LOG del ASSIST.LOG if exist ASTRESS.LOG del ASTRESS.LOG +if exist ARECALL.LOG del ARECALL.LOG if exist ASSISTC.LOG del ASSISTC.LOG echo HW_CAPTURE_BEGIN>HWVALID.LOG @@ -50,6 +51,9 @@ ASSIST.EXE --scripted > ASSIST.LOG echo Running ASSIST.EXE --stress-probe... echo HW_STEP^|assistant_stress>>HWVALID.LOG ASSIST.EXE --stress-probe > ASTRESS.LOG +echo Running ASSIST.EXE --recall-probe... +echo HW_STEP^|assistant_recall>>HWVALID.LOG +ASSIST.EXE --recall-probe > ARECALL.LOG goto done :missing_exe @@ -66,5 +70,5 @@ goto done echo HW_CAPTURE_END>>HWVALID.LOG echo. echo Hardware validation capture complete. -echo Copy HWVALID.LOG, QUAL.LOG, PERF.LOG, ASSIST.LOG, ASTRESS.LOG, ASSISTC.LOG, -echo and HWNOTES.TXT back to the host for verification. +echo Copy HWVALID.LOG, QUAL.LOG, PERF.LOG, ASSIST.LOG, ASTRESS.LOG, ARECALL.LOG, +echo ASSISTC.LOG, and HWNOTES.TXT back to the host for verification. diff --git a/hardware/README.md b/hardware/README.md index 3e0ff84..ff7be03 100644 --- a/hardware/README.md +++ b/hardware/README.md @@ -47,12 +47,14 @@ QUAL.LOG PERF.LOG ASSIST.LOG ASTRESS.LOG +ARECALL.LOG ASSISTC.LOG ``` `ASSIST.LOG` is the five-pack scripted assistant proof. `ASTRESS.LOG` is the 50-reply stress probe for CHAT, DOSHELP, OFFICE, DEV, and PORTABLE on the same -machine. +machine. `ARECALL.LOG` is the retrieval-only recall benchmark for the same pack +set. Fill in `HWNOTES.TXT` with CPU, clock, RAM, DOS version, storage, cache/turbo state, FreeBASIC version, and any setup notes. diff --git a/qemu/evidence/assistant_capability_functionality_report.md b/qemu/evidence/assistant_capability_functionality_report.md index 8847bca..c74b5c5 100644 --- a/qemu/evidence/assistant_capability_functionality_report.md +++ b/qemu/evidence/assistant_capability_functionality_report.md @@ -32,6 +32,10 @@ This report is generated from repository evidence files by `scripts/build_assist - Term-index recall evaluation: `PASS 42/42`. - Term-index candidate row scan ratio: `0.145`. - Term-index candidate byte ratio: `0.315`. +- QEMU recall benchmark: `PASS 42 cases`. +- QEMU recall average retrieval time: `61 ms`. +- QEMU recall max retrieval time: `110 ms`. +- QEMU recall modes: `kb2_term=42`. ## Language Coverage @@ -43,6 +47,7 @@ This report is generated from repository evidence files by `scripts/build_assist - KDB text index gate: `PASS 42/42`. - KDB binary gate: `PASS 42/42`. - KDB term-index gate: `PASS 42/42`. +- DOS recall benchmark gate: `PASS 42 cases`. Covered categories include general chat, identity, local inference, offline limits, prompt repair, repeated-answer recovery, troubleshooting, DOS setup, office writing, developer pack authoring, and portable-intelligence concepts. @@ -67,6 +72,8 @@ Usefulness workflows currently cover operator prompts, trust/offline limits, DOS - Hardware-capture stress source mix: `golden=26 retrieval=16 model=0 fallback=0 memory=8`. - Hardware-capture average total reply time: `28 ms`. - Hardware-capture average retrieval time: `24 ms`. +- Hardware-capture recall benchmark: `PASS 42 cases`. +- Hardware-capture recall average retrieval time: `82 ms`. - Physical machine capture status: PENDING: no staged physical `hardware__manifest.md` capture is present yet. ## Authoring And Import diff --git a/qemu/evidence/assistant_recall_486.log b/qemu/evidence/assistant_recall_486.log new file mode 100644 index 0000000..f41c14a --- /dev/null +++ b/qemu/evidence/assistant_recall_486.log @@ -0,0 +1,181 @@ ++------------------------------------------------------------+ + +| GPT2-BASIC Assistant Shell | + +| Pack-driven text UI; VGA sprite/icon slots are pack assets. | + ++------------------------------------------------------------+ + + + +ASSIST_BEGIN|suite=recall-probe|version=1 + +Available packs: + + CHAT - Conversation Pack + + DOSHELP - DOS Help Assistant + + OFFICE - Office Assistant + + DEV - Developer Pack + + PORTABLE - Portable Intelligence + + + +Pack : CHAT - Conversation Pack + +Model: PACKS\CHAT\MODEL + +Usage: /about + +Sprite asset: PACKS\CHAT\CHAT.SPR + +Icon asset : PACKS\CHAT\CHAT.ICN + +ASSIST_PACK|id=CHAT|title=Conversation Pack|model=PACKS\CHAT\MODEL|sprite=PACKS\CHAT\CHAT.SPR|icons=PACKS\CHAT\CHAT.ICN + + + +ASSIST_RECALL|pack=CHAT|query=how can i ask better questions|recall=kb2_term|recall_score=33|t_retrieve_ms=110|answer=Better prompts: Say the goal, give one detail, and ask for the next useful step. + +ASSIST_RECALL|pack=CHAT|query=what makes this intelligent on a small computer|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Small-computer usefulness: A tiny local model becomes more useful with retrieval, memory, and quick focused help without a network. + +ASSIST_RECALL|pack=CHAT|query=which pack should i use for writing|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Pack switching: Use CHAT for conversation, DOSHELP for DOS setup, and OFFICE for writing tasks. + +ASSIST_RECALL|pack=CHAT|query=can this work without the internet|recall=kb2_term|recall_score=33|t_retrieve_ms=60|answer=Network limit: I cannot browse the internet from DOS; I answer from local model weights and pack files. + +ASSIST_RECALL|pack=CHAT|query=how do i recover from a bad answer|recall=kb2_term|recall_score=33|t_retrieve_ms=110|answer=Mistake recovery: If an answer is wrong, ask a shorter question, switch packs, or give the exact error. + +ASSIST_RECALL|pack=CHAT|query=what proof helps me trust this|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Trust evidence: Trust proof comes from visible files, local weights, reproducible tests, and QEMU or hardware logs. + +ASSIST_RECALL|pack=CHAT|query=how should i compare options|recall=kb2_term|recall_score=39|t_retrieve_ms=60|answer=Compare options: Name the options, list one tradeoff for each, then choose the practical next step. + +ASSIST_RECALL|pack=CHAT|query=help me plan work in small steps|recall=kb2_term|recall_score=66|t_retrieve_ms=110|answer=Planning work: Break the job into small steps, do the blocking step first, and verify each result. + +ASSIST_RECALL|pack=CHAT|query=what should a useful answer look like|recall=kb2_term|recall_score=42|t_retrieve_ms=110|answer=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on. + +ASSIST_RECALL|pack=CHAT|query=can you explain something simply|recall=kb2_term|recall_score=24|t_retrieve_ms=50|answer=Simple explanation: Use plain words, one example, and a short answer that fits the prompt. + +ASSIST_RECALL|pack=CHAT|query=what can you know without web access|recall=kb2_term|recall_score=51|t_retrieve_ms=60|answer=No web access: Without internet, I cannot fetch news or live facts; use local notes or give the facts in the prompt. + +ASSIST_RECALL|pack=CHAT|query=how do i show confidence in an answer|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Answer confidence: Say what is known from local files, what is inferred, and what remains uncertain. + +Pack : DOSHELP - DOS Help Assistant + +Model: PACKS\DOSHELP\MODEL + +Usage: /about + +Sprite asset: PACKS\DOSHELP\DOSHELP.SPR + +Icon asset : PACKS\DOSHELP\DOSHELP.ICN + +ASSIST_PACK|id=DOSHELP|title=DOS Help Assistant|model=PACKS\DOSHELP\MODEL|sprite=PACKS\DOSHELP\DOSHELP.SPR|icons=PACKS\DOSHELP\DOSHELP.ICN + + + +ASSIST_RECALL|pack=DOSHELP|query=what happens before autoexec bat runs|recall=kb2_term|recall_score=57|t_retrieve_ms=110|answer=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. + +ASSIST_RECALL|pack=DOSHELP|query=why use 8.3 filenames in batches|recall=kb2_term|recall_score=24|t_retrieve_ms=60|answer=DOS filenames: Use 8.3 filenames for maximum DOS compatibility and predictable batch files. + +ASSIST_RECALL|pack=DOSHELP|query=how should i prepare files for real hardware|recall=kb2_term|recall_score=48|t_retrieve_ms=50|answer=Hardware copy: Copy GPT2, MODEL, PACKS, CWSDPMI, and batch files together before testing on real DOS. + +ASSIST_RECALL|pack=DOSHELP|query=what should i do when cwsdpmi is missing|recall=kb2_term|recall_score=39|t_retrieve_ms=60|answer=Missing CWSDPMI: If a protected-mode program fails to start, copy CWSDPMI.EXE beside it and rerun the command. + +ASSIST_RECALL|pack=DOSHELP|query=how do i mount the dosbox bundle|recall=kb2_term|recall_score=54|t_retrieve_ms=50|answer=DOSBox mount: Mount the bundle directory as C:, change to C:\GPT2, then run the batch file for the desired profile. + +ASSIST_RECALL|pack=DOSHELP|query=what if the fat image is full|recall=kb2_term|recall_score=60|t_retrieve_ms=60|answer=FAT image full: Remove host-only training files or grow the disk image when FAT image assembly runs out of space. + +ASSIST_RECALL|pack=DOSHELP|query=what logs matter from qemu|recall=kb2_term|recall_score=39|t_retrieve_ms=50|answer=QEMU logs: Capture compile logs, run logs, and copied evidence files before trusting an emulator result. + +ASSIST_RECALL|pack=DOSHELP|query=how do i handle a dos memory error|recall=kb2_term|recall_score=57|t_retrieve_ms=60|answer=DOS memory error: Free conventional memory by unloading TSRs, loading drivers high, or using a smaller profile. + +ASSIST_RECALL|pack=DOSHELP|query=how should a batch menu work|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Batch menu: Offer numbered choices, validate the input, and keep each branch short and reversible. + +Pack : OFFICE - Office Assistant + +Model: PACKS\OFFICE\MODEL + +Usage: /about + +Sprite asset: PACKS\OFFICE\OFFICE.SPR + +Icon asset : PACKS\OFFICE\OFFICE.ICN + +ASSIST_PACK|id=OFFICE|title=Office Assistant|model=PACKS\OFFICE\MODEL|sprite=PACKS\OFFICE\OFFICE.SPR|icons=PACKS\OFFICE\OFFICE.ICN + + + +ASSIST_RECALL|pack=OFFICE|query=how should i write a handoff note|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Handoff note: Say what is done, what remains, where evidence lives, and who owns the next action. + +ASSIST_RECALL|pack=OFFICE|query=what belongs in a bug report|recall=kb2_term|recall_score=36|t_retrieve_ms=0|answer=Bug report shape: Include expected behavior, actual behavior, reproduction steps, logs, and the suspected area. + +ASSIST_RECALL|pack=OFFICE|query=make a compact release note|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Release note shape: Lead with what changed, list proof, then state any known limits plainly. + +ASSIST_RECALL|pack=OFFICE|query=what should meeting notes capture|recall=kb2_term|recall_score=51|t_retrieve_ms=0|answer=Meeting notes: Capture decisions, owners, dates, open questions, and follow-up actions. + +ASSIST_RECALL|pack=OFFICE|query=help me write a project plan|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Project plan: List the goal, milestones, owners, risks, and the next checkpoint. + +ASSIST_RECALL|pack=OFFICE|query=how do i track risks|recall=kb2_term|recall_score=24|t_retrieve_ms=60|answer=Risk register: For each risk, record impact, likelihood, mitigation, owner, and review date. + +ASSIST_RECALL|pack=OFFICE|query=what is a useful test plan|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Test plan: Define scope, cases, expected results, evidence files, and pass or fail criteria. + +ASSIST_RECALL|pack=OFFICE|query=how should i reply to a customer|recall=kb2_term|recall_score=36|t_retrieve_ms=60|answer=Customer reply: Acknowledge the issue, give the current status, state the next action, and avoid overpromising. + +ASSIST_RECALL|pack=OFFICE|query=how do i write user docs|recall=kb2_term|recall_score=51|t_retrieve_ms=50|answer=User docs: Write the task goal, prerequisites, exact steps, expected result, and troubleshooting note. + +Pack : DEV - Developer Pack + +Model: PACKS\CHAT\MODEL + +Usage: /about + +Sprite asset: PACKS\CHAT\CHAT.SPR + +Icon asset : PACKS\CHAT\CHAT.ICN + +ASSIST_PACK|id=DEV|title=Developer Pack|model=PACKS\CHAT\MODEL|sprite=PACKS\CHAT\CHAT.SPR|icons=PACKS\CHAT\CHAT.ICN + + + +ASSIST_RECALL|pack=DEV|query=how can this feel modern on a 486|recall=kb2_term|recall_score=36|t_retrieve_ms=60|answer=Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. + +ASSIST_RECALL|pack=DEV|query=what does retrieval first mean|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. + +ASSIST_RECALL|pack=DEV|query=how do i author a pack|recall=kb2_term|recall_score=39|t_retrieve_ms=60|answer=Pack authoring: Write HELP and KNOW rows, rebuild KDB, run the authoring validator, then run retrieval and QEMU gates. + +ASSIST_RECALL|pack=DEV|query=what should i check before release|recall=kb2_term|recall_score=42|t_retrieve_ms=110|answer=Release check: Verify tests, logs, artifact names, checksums, release notes, and the target tag. + +ASSIST_RECALL|pack=DEV|query=how should we store fast recall data|recall=kb2_term|recall_score=45|t_retrieve_ms=50|answer=High velocity recall: Compile notes into compact keyword rows so DOS scans less text and reaches the answer faster. + +ASSIST_RECALL|pack=DEV|query=what should a failure record include|recall=kb2_term|recall_score=39|t_retrieve_ms=50|answer=Failure record: Record the command, input, expected result, actual result, log path, and next experiment. + +Pack : PORTABLE - Portable Intelligence + +Model: PACKS\CHAT\MODEL + +Usage: /about + +Sprite asset: PACKS\CHAT\CHAT.SPR + +Icon asset : PACKS\CHAT\CHAT.ICN + +ASSIST_PACK|id=PORTABLE|title=Portable Intelligence|model=PACKS\CHAT\MODEL|sprite=PACKS\CHAT\CHAT.SPR|icons=PACKS\CHAT\CHAT.ICN + + + +ASSIST_RECALL|pack=PORTABLE|query=what does portable intelligence mean|recall=kb2_term|recall_score=57|t_retrieve_ms=60|answer=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. + +ASSIST_RECALL|pack=PORTABLE|query=why is basic useful for teaching ai|recall=kb2_term|recall_score=57|t_retrieve_ms=50|answer=basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. + +ASSIST_RECALL|pack=PORTABLE|query=how could this move to c or assembly|recall=kb2_term|recall_score=15|t_retrieve_ms=60|answer=runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. + +ASSIST_RECALL|pack=PORTABLE|query=why do hot swappable weights matter|recall=kb2_term|recall_score=45|t_retrieve_ms=50|answer=domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. + +ASSIST_RECALL|pack=PORTABLE|query=how should tiny machines store recall|recall=kb2_term|recall_score=72|t_retrieve_ms=60|answer=tiny machine recall: Tiny machines should store recall as compact indexed rows so slow processors scan fewer bytes before answering. + +ASSIST_RECALL|pack=PORTABLE|query=what proof shows this works on old hardware|recall=kb2_term|recall_score=63|t_retrieve_ms=0|answer=old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. + +ASSIST_END|suite=recall-probe|packs=5 diff --git a/qemu/evidence/assistant_recall_benchmark.md b/qemu/evidence/assistant_recall_benchmark.md new file mode 100644 index 0000000..23316e1 --- /dev/null +++ b/qemu/evidence/assistant_recall_benchmark.md @@ -0,0 +1,56 @@ +# Assistant Recall Benchmark + +Status: `PASS` +Recall case count: `42` +Average retrieval time: `61 ms` +Max retrieval time: `110 ms` +Average recall score: `42` +Pack counts: `CHAT=12 DEV=6 DOSHELP=9 OFFICE=9 PORTABLE=6` +Recall modes: `kb2_term=42` + +This benchmark is generated from `ASSIST.EXE --recall-probe` and measures local pack recall without model generation. + +| Pack | Recall | Score | Retrieve ms | Query | Answer | +|---|---|---:|---:|---|---| +| CHAT | kb2_term | 33 | 110 | how can i ask better questions | Better prompts: Say the goal, give one detail, and ask for the next useful step. | +| CHAT | kb2_term | 36 | 50 | what makes this intelligent on a small computer | Small-computer usefulness: A tiny local model becomes more useful with retrieval, memory, and quick focused help without a network. | +| CHAT | kb2_term | 36 | 50 | which pack should i use for writing | Pack switching: Use CHAT for conversation, DOSHELP for DOS setup, and OFFICE for writing tasks. | +| CHAT | kb2_term | 33 | 60 | can this work without the internet | Network limit: I cannot browse the internet from DOS; I answer from local model weights and pack files. | +| CHAT | kb2_term | 33 | 110 | how do i recover from a bad answer | Mistake recovery: If an answer is wrong, ask a shorter question, switch packs, or give the exact error. | +| CHAT | kb2_term | 36 | 50 | what proof helps me trust this | Trust evidence: Trust proof comes from visible files, local weights, reproducible tests, and QEMU or hardware logs. | +| CHAT | kb2_term | 39 | 60 | how should i compare options | Compare options: Name the options, list one tradeoff for each, then choose the practical next step. | +| CHAT | kb2_term | 66 | 110 | help me plan work in small steps | Planning work: Break the job into small steps, do the blocking step first, and verify each result. | +| CHAT | kb2_term | 42 | 110 | what should a useful answer look like | Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on. | +| CHAT | kb2_term | 24 | 50 | can you explain something simply | Simple explanation: Use plain words, one example, and a short answer that fits the prompt. | +| CHAT | kb2_term | 51 | 60 | what can you know without web access | No web access: Without internet, I cannot fetch news or live facts; use local notes or give the facts in the prompt. | +| CHAT | kb2_term | 36 | 50 | how do i show confidence in an answer | Answer confidence: Say what is known from local files, what is inferred, and what remains uncertain. | +| DOSHELP | kb2_term | 57 | 110 | what happens before autoexec bat runs | AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. | +| DOSHELP | kb2_term | 24 | 60 | why use 8.3 filenames in batches | DOS filenames: Use 8.3 filenames for maximum DOS compatibility and predictable batch files. | +| DOSHELP | kb2_term | 48 | 50 | how should i prepare files for real hardware | Hardware copy: Copy GPT2, MODEL, PACKS, CWSDPMI, and batch files together before testing on real DOS. | +| DOSHELP | kb2_term | 39 | 60 | what should i do when cwsdpmi is missing | Missing CWSDPMI: If a protected-mode program fails to start, copy CWSDPMI.EXE beside it and rerun the command. | +| DOSHELP | kb2_term | 54 | 50 | how do i mount the dosbox bundle | DOSBox mount: Mount the bundle directory as C:, change to C:\GPT2, then run the batch file for the desired profile. | +| DOSHELP | kb2_term | 60 | 60 | what if the fat image is full | FAT image full: Remove host-only training files or grow the disk image when FAT image assembly runs out of space. | +| DOSHELP | kb2_term | 39 | 50 | what logs matter from qemu | QEMU logs: Capture compile logs, run logs, and copied evidence files before trusting an emulator result. | +| DOSHELP | kb2_term | 57 | 60 | how do i handle a dos memory error | DOS memory error: Free conventional memory by unloading TSRs, loading drivers high, or using a smaller profile. | +| DOSHELP | kb2_term | 36 | 50 | how should a batch menu work | Batch menu: Offer numbered choices, validate the input, and keep each branch short and reversible. | +| OFFICE | kb2_term | 36 | 50 | how should i write a handoff note | Handoff note: Say what is done, what remains, where evidence lives, and who owns the next action. | +| OFFICE | kb2_term | 36 | 0 | what belongs in a bug report | Bug report shape: Include expected behavior, actual behavior, reproduction steps, logs, and the suspected area. | +| OFFICE | kb2_term | 36 | 110 | make a compact release note | Release note shape: Lead with what changed, list proof, then state any known limits plainly. | +| OFFICE | kb2_term | 51 | 0 | what should meeting notes capture | Meeting notes: Capture decisions, owners, dates, open questions, and follow-up actions. | +| OFFICE | kb2_term | 36 | 110 | help me write a project plan | Project plan: List the goal, milestones, owners, risks, and the next checkpoint. | +| OFFICE | kb2_term | 24 | 60 | how do i track risks | Risk register: For each risk, record impact, likelihood, mitigation, owner, and review date. | +| OFFICE | kb2_term | 36 | 50 | what is a useful test plan | Test plan: Define scope, cases, expected results, evidence files, and pass or fail criteria. | +| OFFICE | kb2_term | 36 | 60 | how should i reply to a customer | Customer reply: Acknowledge the issue, give the current status, state the next action, and avoid overpromising. | +| OFFICE | kb2_term | 51 | 50 | how do i write user docs | User docs: Write the task goal, prerequisites, exact steps, expected result, and troubleshooting note. | +| DEV | kb2_term | 36 | 60 | how can this feel modern on a 486 | Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. | +| DEV | kb2_term | 36 | 50 | what does retrieval first mean | Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. | +| DEV | kb2_term | 39 | 60 | how do i author a pack | Pack authoring: Write HELP and KNOW rows, rebuild KDB, run the authoring validator, then run retrieval and QEMU gates. | +| DEV | kb2_term | 42 | 110 | what should i check before release | Release check: Verify tests, logs, artifact names, checksums, release notes, and the target tag. | +| DEV | kb2_term | 45 | 50 | how should we store fast recall data | High velocity recall: Compile notes into compact keyword rows so DOS scans less text and reaches the answer faster. | +| DEV | kb2_term | 39 | 50 | what should a failure record include | Failure record: Record the command, input, expected result, actual result, log path, and next experiment. | +| PORTABLE | kb2_term | 57 | 60 | what does portable intelligence mean | portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. | +| PORTABLE | kb2_term | 57 | 50 | why is basic useful for teaching ai | basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. | +| PORTABLE | kb2_term | 15 | 60 | how could this move to c or assembly | runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. | +| PORTABLE | kb2_term | 45 | 50 | why do hot swappable weights matter | domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. | +| PORTABLE | kb2_term | 72 | 60 | how should tiny machines store recall | tiny machine recall: Tiny machines should store recall as compact indexed rows so slow processors scan fewer bytes before answering. | +| PORTABLE | kb2_term | 63 | 0 | what proof shows this works on old hardware | old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. | diff --git a/qemu/evidence/assistant_recall_compile_486.log b/qemu/evidence/assistant_recall_compile_486.log new file mode 100644 index 0000000..5ea8371 --- /dev/null +++ b/qemu/evidence/assistant_recall_compile_486.log @@ -0,0 +1,9 @@ +ASSIST_COMPILE_OK + Volume in drive C is GPT2BASIC + Volume Serial Number is 741D-1607 + + Directory of C:\ + +ASSIST EXE 387,584 05/21/2026 5:26p + 1 file(s) 387,584 bytes + 0 dir(s) 59,691,008 bytes free diff --git a/qemu/evidence/hardware_capture_486_qemu/ARECALL.LOG b/qemu/evidence/hardware_capture_486_qemu/ARECALL.LOG new file mode 100644 index 0000000..3229280 --- /dev/null +++ b/qemu/evidence/hardware_capture_486_qemu/ARECALL.LOG @@ -0,0 +1,181 @@ ++------------------------------------------------------------+ + +| GPT2-BASIC Assistant Shell | + +| Pack-driven text UI; VGA sprite/icon slots are pack assets. | + ++------------------------------------------------------------+ + + + +ASSIST_BEGIN|suite=recall-probe|version=1 + +Available packs: + + CHAT - Conversation Pack + + DOSHELP - DOS Help Assistant + + OFFICE - Office Assistant + + DEV - Developer Pack + + PORTABLE - Portable Intelligence + + + +Pack : CHAT - Conversation Pack + +Model: PACKS\CHAT\MODEL + +Usage: /about + +Sprite asset: PACKS\CHAT\CHAT.SPR + +Icon asset : PACKS\CHAT\CHAT.ICN + +ASSIST_PACK|id=CHAT|title=Conversation Pack|model=PACKS\CHAT\MODEL|sprite=PACKS\CHAT\CHAT.SPR|icons=PACKS\CHAT\CHAT.ICN + + + +ASSIST_RECALL|pack=CHAT|query=how can i ask better questions|recall=kb2_term|recall_score=33|t_retrieve_ms=110|answer=Better prompts: Say the goal, give one detail, and ask for the next useful step. + +ASSIST_RECALL|pack=CHAT|query=what makes this intelligent on a small computer|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Small-computer usefulness: A tiny local model becomes more useful with retrieval, memory, and quick focused help without a network. + +ASSIST_RECALL|pack=CHAT|query=which pack should i use for writing|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Pack switching: Use CHAT for conversation, DOSHELP for DOS setup, and OFFICE for writing tasks. + +ASSIST_RECALL|pack=CHAT|query=can this work without the internet|recall=kb2_term|recall_score=33|t_retrieve_ms=110|answer=Network limit: I cannot browse the internet from DOS; I answer from local model weights and pack files. + +ASSIST_RECALL|pack=CHAT|query=how do i recover from a bad answer|recall=kb2_term|recall_score=33|t_retrieve_ms=110|answer=Mistake recovery: If an answer is wrong, ask a shorter question, switch packs, or give the exact error. + +ASSIST_RECALL|pack=CHAT|query=what proof helps me trust this|recall=kb2_term|recall_score=36|t_retrieve_ms=60|answer=Trust evidence: Trust proof comes from visible files, local weights, reproducible tests, and QEMU or hardware logs. + +ASSIST_RECALL|pack=CHAT|query=how should i compare options|recall=kb2_term|recall_score=39|t_retrieve_ms=110|answer=Compare options: Name the options, list one tradeoff for each, then choose the practical next step. + +ASSIST_RECALL|pack=CHAT|query=help me plan work in small steps|recall=kb2_term|recall_score=66|t_retrieve_ms=110|answer=Planning work: Break the job into small steps, do the blocking step first, and verify each result. + +ASSIST_RECALL|pack=CHAT|query=what should a useful answer look like|recall=kb2_term|recall_score=42|t_retrieve_ms=110|answer=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on. + +ASSIST_RECALL|pack=CHAT|query=can you explain something simply|recall=kb2_term|recall_score=24|t_retrieve_ms=50|answer=Simple explanation: Use plain words, one example, and a short answer that fits the prompt. + +ASSIST_RECALL|pack=CHAT|query=what can you know without web access|recall=kb2_term|recall_score=51|t_retrieve_ms=110|answer=No web access: Without internet, I cannot fetch news or live facts; use local notes or give the facts in the prompt. + +ASSIST_RECALL|pack=CHAT|query=how do i show confidence in an answer|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Answer confidence: Say what is known from local files, what is inferred, and what remains uncertain. + +Pack : DOSHELP - DOS Help Assistant + +Model: PACKS\DOSHELP\MODEL + +Usage: /about + +Sprite asset: PACKS\DOSHELP\DOSHELP.SPR + +Icon asset : PACKS\DOSHELP\DOSHELP.ICN + +ASSIST_PACK|id=DOSHELP|title=DOS Help Assistant|model=PACKS\DOSHELP\MODEL|sprite=PACKS\DOSHELP\DOSHELP.SPR|icons=PACKS\DOSHELP\DOSHELP.ICN + + + +ASSIST_RECALL|pack=DOSHELP|query=what happens before autoexec bat runs|recall=kb2_term|recall_score=57|t_retrieve_ms=110|answer=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. + +ASSIST_RECALL|pack=DOSHELP|query=why use 8.3 filenames in batches|recall=kb2_term|recall_score=24|t_retrieve_ms=60|answer=DOS filenames: Use 8.3 filenames for maximum DOS compatibility and predictable batch files. + +ASSIST_RECALL|pack=DOSHELP|query=how should i prepare files for real hardware|recall=kb2_term|recall_score=48|t_retrieve_ms=110|answer=Hardware copy: Copy GPT2, MODEL, PACKS, CWSDPMI, and batch files together before testing on real DOS. + +ASSIST_RECALL|pack=DOSHELP|query=what should i do when cwsdpmi is missing|recall=kb2_term|recall_score=39|t_retrieve_ms=50|answer=Missing CWSDPMI: If a protected-mode program fails to start, copy CWSDPMI.EXE beside it and rerun the command. + +ASSIST_RECALL|pack=DOSHELP|query=how do i mount the dosbox bundle|recall=kb2_term|recall_score=54|t_retrieve_ms=110|answer=DOSBox mount: Mount the bundle directory as C:, change to C:\GPT2, then run the batch file for the desired profile. + +ASSIST_RECALL|pack=DOSHELP|query=what if the fat image is full|recall=kb2_term|recall_score=60|t_retrieve_ms=60|answer=FAT image full: Remove host-only training files or grow the disk image when FAT image assembly runs out of space. + +ASSIST_RECALL|pack=DOSHELP|query=what logs matter from qemu|recall=kb2_term|recall_score=39|t_retrieve_ms=110|answer=QEMU logs: Capture compile logs, run logs, and copied evidence files before trusting an emulator result. + +ASSIST_RECALL|pack=DOSHELP|query=how do i handle a dos memory error|recall=kb2_term|recall_score=57|t_retrieve_ms=50|answer=DOS memory error: Free conventional memory by unloading TSRs, loading drivers high, or using a smaller profile. + +ASSIST_RECALL|pack=DOSHELP|query=how should a batch menu work|recall=kb2_term|recall_score=36|t_retrieve_ms=170|answer=Batch menu: Offer numbered choices, validate the input, and keep each branch short and reversible. + +Pack : OFFICE - Office Assistant + +Model: PACKS\OFFICE\MODEL + +Usage: /about + +Sprite asset: PACKS\OFFICE\OFFICE.SPR + +Icon asset : PACKS\OFFICE\OFFICE.ICN + +ASSIST_PACK|id=OFFICE|title=Office Assistant|model=PACKS\OFFICE\MODEL|sprite=PACKS\OFFICE\OFFICE.SPR|icons=PACKS\OFFICE\OFFICE.ICN + + + +ASSIST_RECALL|pack=OFFICE|query=how should i write a handoff note|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Handoff note: Say what is done, what remains, where evidence lives, and who owns the next action. + +ASSIST_RECALL|pack=OFFICE|query=what belongs in a bug report|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Bug report shape: Include expected behavior, actual behavior, reproduction steps, logs, and the suspected area. + +ASSIST_RECALL|pack=OFFICE|query=make a compact release note|recall=kb2_term|recall_score=36|t_retrieve_ms=60|answer=Release note shape: Lead with what changed, list proof, then state any known limits plainly. + +ASSIST_RECALL|pack=OFFICE|query=what should meeting notes capture|recall=kb2_term|recall_score=51|t_retrieve_ms=50|answer=Meeting notes: Capture decisions, owners, dates, open questions, and follow-up actions. + +ASSIST_RECALL|pack=OFFICE|query=help me write a project plan|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Project plan: List the goal, milestones, owners, risks, and the next checkpoint. + +ASSIST_RECALL|pack=OFFICE|query=how do i track risks|recall=kb2_term|recall_score=24|t_retrieve_ms=60|answer=Risk register: For each risk, record impact, likelihood, mitigation, owner, and review date. + +ASSIST_RECALL|pack=OFFICE|query=what is a useful test plan|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Test plan: Define scope, cases, expected results, evidence files, and pass or fail criteria. + +ASSIST_RECALL|pack=OFFICE|query=how should i reply to a customer|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Customer reply: Acknowledge the issue, give the current status, state the next action, and avoid overpromising. + +ASSIST_RECALL|pack=OFFICE|query=how do i write user docs|recall=kb2_term|recall_score=51|t_retrieve_ms=60|answer=User docs: Write the task goal, prerequisites, exact steps, expected result, and troubleshooting note. + +Pack : DEV - Developer Pack + +Model: PACKS\CHAT\MODEL + +Usage: /about + +Sprite asset: PACKS\CHAT\CHAT.SPR + +Icon asset : PACKS\CHAT\CHAT.ICN + +ASSIST_PACK|id=DEV|title=Developer Pack|model=PACKS\CHAT\MODEL|sprite=PACKS\CHAT\CHAT.SPR|icons=PACKS\CHAT\CHAT.ICN + + + +ASSIST_RECALL|pack=DEV|query=how can this feel modern on a 486|recall=kb2_term|recall_score=36|t_retrieve_ms=50|answer=Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. + +ASSIST_RECALL|pack=DEV|query=what does retrieval first mean|recall=kb2_term|recall_score=36|t_retrieve_ms=110|answer=Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. + +ASSIST_RECALL|pack=DEV|query=how do i author a pack|recall=kb2_term|recall_score=39|t_retrieve_ms=60|answer=Pack authoring: Write HELP and KNOW rows, rebuild KDB, run the authoring validator, then run retrieval and QEMU gates. + +ASSIST_RECALL|pack=DEV|query=what should i check before release|recall=kb2_term|recall_score=42|t_retrieve_ms=110|answer=Release check: Verify tests, logs, artifact names, checksums, release notes, and the target tag. + +ASSIST_RECALL|pack=DEV|query=how should we store fast recall data|recall=kb2_term|recall_score=45|t_retrieve_ms=50|answer=High velocity recall: Compile notes into compact keyword rows so DOS scans less text and reaches the answer faster. + +ASSIST_RECALL|pack=DEV|query=what should a failure record include|recall=kb2_term|recall_score=39|t_retrieve_ms=60|answer=Failure record: Record the command, input, expected result, actual result, log path, and next experiment. + +Pack : PORTABLE - Portable Intelligence + +Model: PACKS\CHAT\MODEL + +Usage: /about + +Sprite asset: PACKS\CHAT\CHAT.SPR + +Icon asset : PACKS\CHAT\CHAT.ICN + +ASSIST_PACK|id=PORTABLE|title=Portable Intelligence|model=PACKS\CHAT\MODEL|sprite=PACKS\CHAT\CHAT.SPR|icons=PACKS\CHAT\CHAT.ICN + + + +ASSIST_RECALL|pack=PORTABLE|query=what does portable intelligence mean|recall=kb2_term|recall_score=57|t_retrieve_ms=110|answer=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. + +ASSIST_RECALL|pack=PORTABLE|query=why is basic useful for teaching ai|recall=kb2_term|recall_score=57|t_retrieve_ms=50|answer=basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. + +ASSIST_RECALL|pack=PORTABLE|query=how could this move to c or assembly|recall=kb2_term|recall_score=15|t_retrieve_ms=60|answer=runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. + +ASSIST_RECALL|pack=PORTABLE|query=why do hot swappable weights matter|recall=kb2_term|recall_score=45|t_retrieve_ms=50|answer=domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. + +ASSIST_RECALL|pack=PORTABLE|query=how should tiny machines store recall|recall=kb2_term|recall_score=72|t_retrieve_ms=110|answer=tiny machine recall: Tiny machines should store recall as compact indexed rows so slow processors scan fewer bytes before answering. + +ASSIST_RECALL|pack=PORTABLE|query=what proof shows this works on old hardware|recall=kb2_term|recall_score=63|t_retrieve_ms=60|answer=old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. + +ASSIST_END|suite=recall-probe|packs=5 diff --git a/qemu/evidence/hardware_capture_486_qemu/ASSIST.LOG b/qemu/evidence/hardware_capture_486_qemu/ASSIST.LOG index 1f0522a..2ed5288 100644 --- a/qemu/evidence/hardware_capture_486_qemu/ASSIST.LOG +++ b/qemu/evidence/hardware_capture_486_qemu/ASSIST.LOG @@ -132,7 +132,7 @@ Loaded vocabulary with 4096 tokens and 0 merges, mode 2 ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=Hello, what can you do?|canonical=Hello, what can you do|source=golden|recall=kb2_term|recall_score=15|t_retrieve_ms=0|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Starting a conversation: Hello from DOS.|golden=I can chat in DOS.|memory=|generated=|answer=I can chat in DOS. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=Hello, what can you do?|canonical=Hello, what can you do|source=golden|recall=kb2_term|recall_score=15|t_retrieve_ms=110|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=6640|actions=chat,ask,idea,explain,cancel|retrieval=Starting a conversation: Hello from DOS.|golden=I can chat in DOS.|memory=|generated=|answer=I can chat in DOS. +------------------------------------------------------------+ @@ -142,7 +142,7 @@ ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=Hello, what can you do? I can chat in DOS. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 110 ms) @@ -228,7 +228,7 @@ Loaded vocabulary with 4096 tokens and 0 merges, mode 2 ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096 -ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=How do I tune CONFIG.SYS memory for this assistant?|canonical=How do I tune CONFIG.SYS memory for this assistant|source=golden|recall=kb2_term|recall_score=57|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=show_config,explain_xms,more,cancel|retrieval=CONFIG.SYS memory: Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and preserve conventional memory.|golden=Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and preserve conventional memory.|memory=Context: previous question was Hello, what can you do; previous answer was I can chat in DOS.|generated=|answer=Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and preserve conventional memory. +ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=How do I tune CONFIG.SYS memory for this assistant?|canonical=How do I tune CONFIG.SYS memory for this assistant|source=golden|recall=kb2_term|recall_score=57|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=8950|actions=show_config,explain_xms,more,cancel|retrieval=CONFIG.SYS memory: Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and preserve conventional memory.|golden=Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and preserve conventional memory.|memory=Context: previous question was Hello, what can you do; previous answer was I can chat in DOS.|generated=|answer=Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and preserve conventional memory. +------------------------------------------------------------+ @@ -324,7 +324,7 @@ Loaded vocabulary with 4096 tokens and 0 merges, mode 2 ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096 -ASSIST_REPLY|pack=OFFICE|intent=office_rewrite|ui=text|query=Rewrite this memo in a professional tone.|canonical=Rewrite this memo in a professional tone|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=rewrite,shorten,formalize,cancel|retrieval=Rewrite selected text: Use rewrite when the user has selected prose and wants a clearer, shorter, more professional version.|golden=Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims.|memory=Context: previous question was How do I tune CONFIG.SYS memory for this assistant; previous answer was Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and p.|generated=|answer=Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims. +ASSIST_REPLY|pack=OFFICE|intent=office_rewrite|ui=text|query=Rewrite this memo in a professional tone.|canonical=Rewrite this memo in a professional tone|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=9010|actions=rewrite,shorten,formalize,cancel|retrieval=Rewrite selected text: Use rewrite when the user has selected prose and wants a clearer, shorter, more professional version.|golden=Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims.|memory=Context: previous question was How do I tune CONFIG.SYS memory for this assistant; previous answer was Load HIMEM first, use DOS HIGH UMB, keep FILES and BUFFERS modest, and p.|generated=|answer=Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims. +------------------------------------------------------------+ @@ -334,7 +334,7 @@ ASSIST_REPLY|pack=OFFICE|intent=office_rewrite|ui=text|query=Rewrite this memo i Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims. -Source: golden / kb2_term ( 110 ms) +Source: golden / kb2_term ( 60 ms) @@ -420,7 +420,7 @@ Loaded vocabulary with 4096 tokens and 0 merges, mode 2 ASSIST_MODEL|pack=DEV|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096 -ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=How can this feel modern on a 486?|canonical=How can this feel modern on a 486|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=100|t_golden_ms=0|t_memory_ms=60|t_model_ms=0|t_total_ms=0|actions=debug,test,release,explain,cancel|retrieval=Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|golden=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|memory=Context: previous question was Rewrite this memo in a professional tone; previous answer was Keep the message direct, polite, concrete, and free of jokes, filler, or.|generated=|answer=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. +ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=How can this feel modern on a 486?|canonical=How can this feel modern on a 486|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=5760|actions=debug,test,release,explain,cancel|retrieval=Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|golden=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|memory=Context: previous question was Rewrite this memo in a professional tone; previous answer was Keep the message direct, polite, concrete, and free of jokes, filler, or.|generated=|answer=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. +------------------------------------------------------------+ @@ -430,7 +430,7 @@ ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=How can this feel modern Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. -Source: golden / kb2_term ( 100 ms) +Source: golden / kb2_term ( 50 ms) @@ -506,7 +506,7 @@ Actions: ASSIST_MODEL|pack=PORTABLE|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=What does portable intelligence mean?|canonical=What does portable intelligence mean|source=retrieval|recall=kb2_term|recall_score=57|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=60|t_model_ms=0|t_total_ms=170|actions=explain,compare,teach,validate,cancel|retrieval=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network.|golden=|memory=Context: previous question was How can this feel modern on a 486; previous answer was Use small hot-loaded weights, compact retrieval databases, persistent me.|generated=|answer=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. +ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=What does portable intelligence mean?|canonical=What does portable intelligence mean|source=retrieval|recall=kb2_term|recall_score=57|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=explain,compare,teach,validate,cancel|retrieval=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network.|golden=|memory=Context: previous question was How can this feel modern on a 486; previous answer was Use small hot-loaded weights, compact retrieval databases, persistent me.|generated=|answer=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. +------------------------------------------------------------+ diff --git a/qemu/evidence/hardware_capture_486_qemu/ASTRESS.LOG b/qemu/evidence/hardware_capture_486_qemu/ASTRESS.LOG index 2b0c1c4..605c6ff 100644 --- a/qemu/evidence/hardware_capture_486_qemu/ASTRESS.LOG +++ b/qemu/evidence/hardware_capture_486_qemu/ASTRESS.LOG @@ -56,11 +56,11 @@ ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lex ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=why did my answer repeat itself|canonical=why did my answer repeat itself|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on.|golden=If I repeat, reset the prompt and ask one shorter question.|memory=|generated=|answer=If I repeat, reset the prompt and ask one shorter question. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=why did my answer repeat itself|canonical=why did my answer repeat itself|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=60|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on.|golden=If I repeat, reset the prompt and ask one shorter question.|memory=|generated=|answer=If I repeat, reset the prompt and ask one shorter question. Answer: If I repeat, reset the prompt and ask one shorter question. -Source: golden / kb2_term ( 50 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -74,11 +74,11 @@ Source: golden / kb2_term ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=tell me why this old computer model matters|canonical=tell me why this old computer model matters|source=retrieval|recall=kb2_term|recall_score=63|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Old hardware: The demo matters because a tiny local model can run on old DOS-style hardware without a network.|golden=|memory=Context: previous question was why did my answer repeat itself; previous answer was If I repeat, reset the prompt and ask one shorter question.|generated=|answer=Old hardware: The demo matters because a tiny local model can run on old DOS-style hardware without a network. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=tell me why this old computer model matters|canonical=tell me why this old computer model matters|source=retrieval|recall=kb2_term|recall_score=63|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Old hardware: The demo matters because a tiny local model can run on old DOS-style hardware without a network.|golden=|memory=Context: previous question was why did my answer repeat itself; previous answer was If I repeat, reset the prompt and ask one shorter question.|generated=|answer=Old hardware: The demo matters because a tiny local model can run on old DOS-style hardware without a network. Answer: Old hardware: The demo matters because a tiny local model can run on old DOS-style hardware without a network. -Source: retrieval / kb2_term ( 60 ms) +Source: retrieval / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -92,11 +92,11 @@ Source: retrieval / kb2_term ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=make a tiny plan for fixing a bug|canonical=make a tiny plan for fixing a bug|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Debug plan: Start with the failing command, expected result, and first error line, then test one small fix.|golden=Check the first error, change one thing, then test again.|memory=Context: previous question was tell me why this old computer model matters; previous answer was Old hardware: The demo matters because a tiny local model can run on old.|generated=|answer=Check the first error, change one thing, then test again. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=make a tiny plan for fixing a bug|canonical=make a tiny plan for fixing a bug|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Debug plan: Start with the failing command, expected result, and first error line, then test one small fix.|golden=Check the first error, change one thing, then test again.|memory=Context: previous question was tell me why this old computer model matters; previous answer was Old hardware: The demo matters because a tiny local model can run on old.|generated=|answer=Check the first error, change one thing, then test again. Answer: Check the first error, change one thing, then test again. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -110,11 +110,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is the difference between a prompt and an answer|canonical=what is the difference between a prompt and an answer|source=retrieval|recall=kb2_term|recall_score=30|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Explain mode: Use plain words, one example, and a short answer that fits the prompt.|golden=|memory=Context: previous question was make a tiny plan for fixing a bug; previous answer was Check the first error, change one thing, then test again.|generated=|answer=Explain mode: Use plain words, one example, and a short answer that fits the prompt. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is the difference between a prompt and an answer|canonical=what is the difference between a prompt and an answer|source=retrieval|recall=kb2_term|recall_score=30|t_retrieve_ms=110|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=170|actions=chat,ask,idea,explain,cancel|retrieval=Explain mode: Use plain words, one example, and a short answer that fits the prompt.|golden=|memory=Context: previous question was make a tiny plan for fixing a bug; previous answer was Check the first error, change one thing, then test again.|generated=|answer=Explain mode: Use plain words, one example, and a short answer that fits the prompt. Answer: Explain mode: Use plain words, one example, and a short answer that fits the prompt. -Source: retrieval / kb2_term ( 50 ms) +Source: retrieval / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -128,11 +128,11 @@ Source: retrieval / kb2_term ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=explain|ui=text|query=can you explain what local inference means|canonical=can you explain what local inference means|source=golden|recall=kb2_term|recall_score=57|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=explain,example,more,cancel|retrieval=Local inference: Local inference means the DOS program reads model weights and produces the answer on this machine.|golden=Local inference means the DOS program reads model weights and produces the answer on this machine.|memory=Context: previous question was what is the difference between a prompt and an answer; previous answer was Explain mode: Use plain words, one example, and a short answer that fits.|generated=|answer=Local inference means the DOS program reads model weights and produces the answer on this machine. +ASSIST_REPLY|pack=CHAT|intent=explain|ui=text|query=can you explain what local inference means|canonical=can you explain what local inference means|source=golden|recall=kb2_term|recall_score=57|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=explain,example,more,cancel|retrieval=Local inference: Local inference means the DOS program reads model weights and produces the answer on this machine.|golden=Local inference means the DOS program reads model weights and produces the answer on this machine.|memory=Context: previous question was what is the difference between a prompt and an answer; previous answer was Explain mode: Use plain words, one example, and a short answer that fits.|generated=|answer=Local inference means the DOS program reads model weights and produces the answer on this machine. Answer: Local inference means the DOS program reads model weights and produces the answer on this machine. -Source: golden / kb2_term ( 60 ms) +Source: golden / kb2_term ( 110 ms) [ explain,example,more,cancel ] @@ -146,11 +146,11 @@ Source: golden / kb2_term ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i feel stuck debugging this|canonical=i feel stuck debugging this|source=golden|recall=kb2_bucket|recall_score=15|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=Check the first error, change one thing, then test again.|memory=Context: previous question was can you explain what local inference means; previous answer was Local inference means the DOS program reads model weights and produces t.|generated=|answer=Check the first error, change one thing, then test again. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i feel stuck debugging this|canonical=i feel stuck debugging this|source=golden|recall=kb2_bucket|recall_score=15|t_retrieve_ms=270|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=270|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=Check the first error, change one thing, then test again.|memory=Context: previous question was can you explain what local inference means; previous answer was Local inference means the DOS program reads model weights and produces t.|generated=|answer=Check the first error, change one thing, then test again. Answer: Check the first error, change one thing, then test again. -Source: golden / kb2_bucket ( 50 ms) +Source: golden / kb2_bucket ( 270 ms) [ chat,ask,idea,explain,cancel ] @@ -164,11 +164,11 @@ Source: golden / kb2_bucket ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what should i do if the answer sounds weird|canonical=what should i do if the answer sounds weird|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on.|golden=Retry with a shorter prompt or switch packs.|memory=Context: previous question was i feel stuck debugging this; previous answer was Check the first error, change one thing, then test again.|generated=|answer=Retry with a shorter prompt or switch packs. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what should i do if the answer sounds weird|canonical=what should i do if the answer sounds weird|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on.|golden=Retry with a shorter prompt or switch packs.|memory=Context: previous question was i feel stuck debugging this; previous answer was Check the first error, change one thing, then test again.|generated=|answer=Retry with a shorter prompt or switch packs. Answer: Retry with a shorter prompt or switch packs. -Source: golden / kb2_term ( 60 ms) +Source: golden / kb2_term ( 50 ms) [ chat,ask,idea,explain,cancel ] @@ -182,11 +182,11 @@ Source: golden / kb2_term ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=give me a status update about a delayed release|canonical=give me a status update about a delayed release|source=retrieval|recall=kb2_term|recall_score=39|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Release status: Check the tag target, release assets, checksums, and test result before calling the release done.|golden=|memory=Context: previous question was what should i do if the answer sounds weird; previous answer was Retry with a shorter prompt or switch packs.|generated=|answer=Release status: Check the tag target, release assets, checksums, and test result before calling the release done. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=give me a status update about a delayed release|canonical=give me a status update about a delayed release|source=retrieval|recall=kb2_term|recall_score=39|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Release status: Check the tag target, release assets, checksums, and test result before calling the release done.|golden=|memory=Context: previous question was what should i do if the answer sounds weird; previous answer was Retry with a shorter prompt or switch packs.|generated=|answer=Release status: Check the tag target, release assets, checksums, and test result before calling the release done. Answer: Release status: Check the tag target, release assets, checksums, and test result before calling the release done. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ chat,ask,idea,explain,cancel ] @@ -200,11 +200,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=can you browse the internet from dos|canonical=can you browse the internet from dos|source=golden|recall=kb2_term|recall_score=45|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Network limit: I cannot browse the internet from DOS; I answer from local model weights and pack files.|golden=I cannot browse the internet from DOS.|memory=Context: previous question was give me a status update about a delayed release; previous answer was Release status: Check the tag target, release assets, checksums, and tes.|generated=|answer=I cannot browse the internet from DOS. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=can you browse the internet from dos|canonical=can you browse the internet from dos|source=golden|recall=kb2_term|recall_score=45|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Network limit: I cannot browse the internet from DOS; I answer from local model weights and pack files.|golden=I cannot browse the internet from DOS.|memory=Context: previous question was give me a status update about a delayed release; previous answer was Release status: Check the tag target, release assets, checksums, and tes.|generated=|answer=I cannot browse the internet from DOS. Answer: I cannot browse the internet from DOS. -Source: golden / kb2_term ( 50 ms) +Source: golden / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -218,11 +218,11 @@ Source: golden / kb2_term ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=can we talk about games|canonical=can we talk about games|source=golden|recall=kb2_term|recall_score=15|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=Yes, games are a fine topic.|memory=Context: previous question was can you browse the internet from dos; previous answer was I cannot browse the internet from DOS.|generated=|answer=Yes, games are a fine topic. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=can we talk about games|canonical=can we talk about games|source=golden|recall=kb2_term|recall_score=15|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=Yes, games are a fine topic.|memory=Context: previous question was can you browse the internet from dos; previous answer was I cannot browse the internet from DOS.|generated=|answer=Yes, games are a fine topic. Answer: Yes, games are a fine topic. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 50 ms) [ chat,ask,idea,explain,cancel ] @@ -236,11 +236,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i am tired|canonical=i am tired|source=golden|recall=none|recall_score=0|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=|golden=Rest if you can.|memory=Context: previous question was can we talk about games; previous answer was Yes, games are a fine topic.|generated=|answer=Rest if you can. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i am tired|canonical=i am tired|source=golden|recall=none|recall_score=0|t_retrieve_ms=270|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=330|actions=chat,ask,idea,explain,cancel|retrieval=|golden=Rest if you can.|memory=Context: previous question was can we talk about games; previous answer was Yes, games are a fine topic.|generated=|answer=Rest if you can. Answer: Rest if you can. -Source: golden / none ( 110 ms) +Source: golden / none ( 270 ms) [ chat,ask,idea,explain,cancel ] @@ -254,11 +254,11 @@ Source: golden / none ( 110 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i feel lonely|canonical=i feel lonely|source=golden|recall=kb2_bucket|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=I can keep you company briefly.|memory=Context: previous question was i am tired; previous answer was Rest if you can.|generated=|answer=I can keep you company briefly. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i feel lonely|canonical=i feel lonely|source=golden|recall=kb2_bucket|recall_score=15|t_retrieve_ms=110|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=160|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=I can keep you company briefly.|memory=Context: previous question was i am tired; previous answer was Rest if you can.|generated=|answer=I can keep you company briefly. Answer: I can keep you company briefly. -Source: golden / kb2_bucket ( 60 ms) +Source: golden / kb2_bucket ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -272,11 +272,11 @@ Source: golden / kb2_bucket ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=do you enjoy music|canonical=do you enjoy music|source=retrieval|recall=kb2_term|recall_score=21|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Music topic: I can talk about music.|golden=|memory=Context: previous question was i feel lonely; previous answer was I can keep you company briefly.|generated=|answer=Music topic: I can talk about music. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=do you enjoy music|canonical=do you enjoy music|source=retrieval|recall=kb2_term|recall_score=21|t_retrieve_ms=60|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Music topic: I can talk about music.|golden=|memory=Context: previous question was i feel lonely; previous answer was I can keep you company briefly.|generated=|answer=Music topic: I can talk about music. Answer: Music topic: I can talk about music. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -290,11 +290,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what should i do if i am bored|canonical=what should i do if i am bored|source=golden|recall=kb2_term|recall_score=18|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Boredom help: Try one small project.|golden=Try one small project.|memory=Context: previous question was do you enjoy music; previous answer was Music topic: I can talk about music.|generated=|answer=Try one small project. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what should i do if i am bored|canonical=what should i do if i am bored|source=golden|recall=kb2_term|recall_score=18|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Boredom help: Try one small project.|golden=Try one small project.|memory=Context: previous question was do you enjoy music; previous answer was Music topic: I can talk about music.|generated=|answer=Try one small project. Answer: Try one small project. -Source: golden / kb2_term ( 50 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -308,11 +308,11 @@ Source: golden / kb2_term ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=how do i relax for a minute|canonical=how do i relax for a minute|source=golden|recall=kb2_term|recall_score=33|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Relax help: Breathe slowly and rest for a minute.|golden=Breathe slowly and rest for a minute.|memory=Context: previous question was what should i do if i am bored; previous answer was Try one small project.|generated=|answer=Breathe slowly and rest for a minute. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=how do i relax for a minute|canonical=how do i relax for a minute|source=golden|recall=kb2_term|recall_score=33|t_retrieve_ms=60|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Relax help: Breathe slowly and rest for a minute.|golden=Breathe slowly and rest for a minute.|memory=Context: previous question was what should i do if i am bored; previous answer was Try one small project.|generated=|answer=Breathe slowly and rest for a minute. Answer: Breathe slowly and rest for a minute. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -326,11 +326,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is friendship|canonical=what is friendship|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=0|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Friendship meaning: Friendship is care and trust.|golden=Friendship is care and trust.|memory=Context: previous question was how do i relax for a minute; previous answer was Breathe slowly and rest for a minute.|generated=|answer=Friendship is care and trust. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is friendship|canonical=what is friendship|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Friendship meaning: Friendship is care and trust.|golden=Friendship is care and trust.|memory=Context: previous question was how do i relax for a minute; previous answer was Breathe slowly and rest for a minute.|generated=|answer=Friendship is care and trust. Answer: Friendship is care and trust. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -344,11 +344,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what can we discuss|canonical=what can we discuss|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=We can discuss ideas, feelings, games, or DOS.|memory=Context: previous question was what is friendship; previous answer was Friendship is care and trust.|generated=|answer=We can discuss ideas, feelings, games, or DOS. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what can we discuss|canonical=what can we discuss|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Discussion topics: We can discuss ideas, feelings, games, music, or DOS.|golden=We can discuss ideas, feelings, games, or DOS.|memory=Context: previous question was what is friendship; previous answer was Friendship is care and trust.|generated=|answer=We can discuss ideas, feelings, games, or DOS. Answer: We can discuss ideas, feelings, games, or DOS. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -362,11 +362,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is your favorite food|canonical=what is your favorite food|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Food topic: I do not eat, but I can talk about food.|golden=I do not eat, but I can talk about food.|memory=Context: previous question was what can we discuss; previous answer was We can discuss ideas, feelings, games, or DOS.|generated=|answer=I do not eat, but I can talk about food. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is your favorite food|canonical=what is your favorite food|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=50|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=100|actions=chat,ask,idea,explain,cancel|retrieval=Food topic: I do not eat, but I can talk about food.|golden=I do not eat, but I can talk about food.|memory=Context: previous question was what can we discuss; previous answer was We can discuss ideas, feelings, games, or DOS.|generated=|answer=I do not eat, but I can talk about food. Answer: I do not eat, but I can talk about food. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 50 ms) [ chat,ask,idea,explain,cancel ] @@ -380,11 +380,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is a goal|canonical=what is a goal|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Goal meaning: A goal is something you want to reach.|golden=A goal is something you want to reach.|memory=Context: previous question was what is your favorite food; previous answer was I do not eat, but I can talk about food.|generated=|answer=A goal is something you want to reach. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is a goal|canonical=what is a goal|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=60|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Goal meaning: A goal is something you want to reach.|golden=A goal is something you want to reach.|memory=Context: previous question was what is your favorite food; previous answer was I do not eat, but I can talk about food.|generated=|answer=A goal is something you want to reach. Answer: A goal is something you want to reach. -Source: golden / kb2_term ( 50 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -398,11 +398,11 @@ Source: golden / kb2_term ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=how do i improve|canonical=how do i improve|source=golden|recall=kb2_term|recall_score=12|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Practice help: Practice one small thing each day.|golden=Practice one small thing each day.|memory=Context: previous question was what is a goal; previous answer was A goal is something you want to reach.|generated=|answer=Practice one small thing each day. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=how do i improve|canonical=how do i improve|source=golden|recall=kb2_term|recall_score=12|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Practice help: Practice one small thing each day.|golden=Practice one small thing each day.|memory=Context: previous question was what is a goal; previous answer was A goal is something you want to reach.|generated=|answer=Practice one small thing each day. Answer: Practice one small thing each day. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -416,11 +416,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=my name is Operator|canonical=my name is Operator|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Worry help: Name the worry, then choose one step.|golden=|memory=Context: user name is Operator; previous question was how do i improve; previous answer was Practice one small thing each day.|generated=|answer=I will remember your name is Operator. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=my name is Operator|canonical=my name is Operator|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Worry help: Name the worry, then choose one step.|golden=|memory=Context: user name is Operator; previous question was how do i improve; previous answer was Practice one small thing each day.|generated=|answer=I will remember your name is Operator. Answer: I will remember your name is Operator. -Source: memory / kb2_term ( 60 ms) +Source: memory / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -434,11 +434,11 @@ Source: memory / kb2_term ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is my name|canonical=what is my name|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Worry help: Name the worry, then choose one step.|golden=|memory=Context: user name is Operator; previous question was my name is Operator; previous answer was I will remember your name is Operator.|generated=|answer=Your name is Operator. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what is my name|canonical=what is my name|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Worry help: Name the worry, then choose one step.|golden=|memory=Context: user name is Operator; previous question was my name is Operator; previous answer was I will remember your name is Operator.|generated=|answer=Your name is Operator. Answer: Your name is Operator. -Source: memory / kb2_term ( 50 ms) +Source: memory / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -452,11 +452,11 @@ Source: memory / kb2_term ( 50 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=we are working on the DOSBox assistant|canonical=we are working on the DOSBox assistant|source=memory|recall=kb2_term|recall_score=30|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Memory use: Tell me facts with phrases like my name is, we are working on, or I prefer.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; previous question was what is my name; previous answer was Your name is Operator.|generated=|answer=I will remember we are working on the DOSBox assistant. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=we are working on the DOSBox assistant|canonical=we are working on the DOSBox assistant|source=memory|recall=kb2_term|recall_score=30|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Memory use: Tell me facts with phrases like my name is, we are working on, or I prefer.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; previous question was what is my name; previous answer was Your name is Operator.|generated=|answer=I will remember we are working on the DOSBox assistant. Answer: I will remember we are working on the DOSBox assistant. -Source: memory / kb2_term ( 0 ms) +Source: memory / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -470,11 +470,11 @@ Source: memory / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what are we working on|canonical=what are we working on|source=memory|recall=kb2_term|recall_score=30|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Memory use: Tell me facts with phrases like my name is, we are working on, or I prefer.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; previous question was we are working on the DOSBox assistant; previous answer was I will remember we are working on the DOSBox assistant.|generated=|answer=We are working on the DOSBox assistant. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what are we working on|canonical=what are we working on|source=memory|recall=kb2_term|recall_score=30|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Memory use: Tell me facts with phrases like my name is, we are working on, or I prefer.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; previous question was we are working on the DOSBox assistant; previous answer was I will remember we are working on the DOSBox assistant.|generated=|answer=We are working on the DOSBox assistant. Answer: We are working on the DOSBox assistant. -Source: memory / kb2_term ( 60 ms) +Source: memory / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -488,11 +488,11 @@ Source: memory / kb2_term ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i prefer short answers|canonical=i prefer short answers|source=memory|recall=kb2_term|recall_score=30|t_retrieve_ms=0|t_golden_ms=50|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=chat,ask,idea,explain,cancel|retrieval=Hardware limits: Old hardware favors short prompts, brief answers, small models, and retrieval from text files.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what are we working on; previous answer was We are working on the DOSBox assistant.|generated=|answer=I will remember you prefer short answers. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=i prefer short answers|canonical=i prefer short answers|source=memory|recall=kb2_term|recall_score=30|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Hardware limits: Old hardware favors short prompts, brief answers, small models, and retrieval from text files.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what are we working on; previous answer was We are working on the DOSBox assistant.|generated=|answer=I will remember you prefer short answers. Answer: I will remember you prefer short answers. -Source: memory / kb2_term ( 0 ms) +Source: memory / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -506,11 +506,11 @@ Source: memory / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=how should you answer me|canonical=how should you answer me|source=memory|recall=kb2_term|recall_score=21|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was i prefer short answers; previous answer was I will remember you prefer short answers.|generated=|answer=I should answer short answers. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=how should you answer me|canonical=how should you answer me|source=memory|recall=kb2_term|recall_score=21|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was i prefer short answers; previous answer was I will remember you prefer short answers.|generated=|answer=I should answer short answers. Answer: I should answer short answers. -Source: memory / kb2_term ( 0 ms) +Source: memory / kb2_term ( 110 ms) [ chat,ask,idea,explain,cancel ] @@ -524,7 +524,7 @@ Source: memory / kb2_term ( 0 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what did i just ask|canonical=what did i just ask|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Repeat control: If I repeat, ask a shorter question or switch packs.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how should you answer me; previous answer was I should answer short answers.|generated=|answer=You just asked: how should you answer me. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what did i just ask|canonical=what did i just ask|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=chat,ask,idea,explain,cancel|retrieval=Repeat control: If I repeat, ask a shorter question or switch packs.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how should you answer me; previous answer was I should answer short answers.|generated=|answer=You just asked: how should you answer me. Answer: You just asked: how should you answer me. @@ -542,11 +542,11 @@ Source: memory / kb2_term ( 60 ms) ASSIST_MODEL|pack=CHAT|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what do you remember|canonical=what do you remember|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=chat,ask,idea,explain,cancel|retrieval=Session memory: I remember only small facts during this session.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what did i just ask; previous answer was You just asked: how should you answer me.|generated=|answer=Memory: name=Operator; goal=the DOSBox assistant; style=short answers; last_user=what did i just ask. +ASSIST_REPLY|pack=CHAT|intent=general_chat|ui=text|query=what do you remember|canonical=what do you remember|source=memory|recall=kb2_term|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=chat,ask,idea,explain,cancel|retrieval=Session memory: I remember only small facts during this session.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what did i just ask; previous answer was You just asked: how should you answer me.|generated=|answer=Memory: name=Operator; goal=the DOSBox assistant; style=short answers; last_user=what did i just ask. Answer: Memory: name=Operator; goal=the DOSBox assistant; style=short answers; last_user=what did i just ask. -Source: memory / kb2_term ( 0 ms) +Source: memory / kb2_term ( 60 ms) [ chat,ask,idea,explain,cancel ] @@ -584,11 +584,11 @@ ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer= ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=how do i keep conventional memory free|canonical=how do i keep conventional memory free|source=retrieval|recall=kb2_term|recall_score=54|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=show_config,explain_xms,more,cancel|retrieval=Conventional memory: Keep drivers high, trim TSR programs, and preserve low memory for old DOS programs.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what do you remember; previous answer was Memory: name=Operator; goal=the DOSBox assistant; style=short answers; l.|generated=|answer=Conventional memory: Keep drivers high, trim TSR programs, and preserve low memory for old DOS programs. +ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=how do i keep conventional memory free|canonical=how do i keep conventional memory free|source=retrieval|recall=kb2_term|recall_score=54|t_retrieve_ms=170|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=170|actions=show_config,explain_xms,more,cancel|retrieval=Conventional memory: Keep drivers high, trim TSR programs, and preserve low memory for old DOS programs.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what do you remember; previous answer was Memory: name=Operator; goal=the DOSBox assistant; style=short answers; l.|generated=|answer=Conventional memory: Keep drivers high, trim TSR programs, and preserve low memory for old DOS programs. Answer: Conventional memory: Keep drivers high, trim TSR programs, and preserve low memory for old DOS programs. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 170 ms) [ show_config,explain_xms,more,cancel ] @@ -602,11 +602,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=my autoexec is too long what should i change|canonical=my autoexec is too long what should i change|source=retrieval|recall=kb2_term|recall_score=36|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=show_config,explain_xms,more,cancel|retrieval=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how do i keep conventional memory free; previous answer was Conventional memory: Keep drivers high, trim TSR programs, and preserve .|generated=|answer=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. +ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=my autoexec is too long what should i change|canonical=my autoexec is too long what should i change|source=retrieval|recall=kb2_term|recall_score=36|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=show_config,explain_xms,more,cancel|retrieval=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how do i keep conventional memory free; previous answer was Conventional memory: Keep drivers high, trim TSR programs, and preserve .|generated=|answer=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. Answer: AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ show_config,explain_xms,more,cancel ] @@ -620,7 +620,7 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=how should i clean autoexec.bat|canonical=how should i clean autoexec.bat|source=golden|recall=kb2_term|recall_score=42|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=show_config,explain_xms,more,cancel|retrieval=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools.|golden=Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when needed.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was my autoexec is too long what should i change; previous answer was AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT .|generated=|answer=Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when needed. +ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=how should i clean autoexec.bat|canonical=how should i clean autoexec.bat|source=golden|recall=kb2_term|recall_score=42|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=show_config,explain_xms,more,cancel|retrieval=AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools.|golden=Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when needed.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was my autoexec is too long what should i change; previous answer was AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT .|generated=|answer=Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when needed. Answer: Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when needed. @@ -638,11 +638,11 @@ Source: golden / kb2_term ( 50 ms) ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DOSHELP|intent=dos_batch|ui=text|query=write a batch command that checks for model files|canonical=write a batch command that checks for model files|source=retrieval|recall=kb2_term|recall_score=66|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=write_batch,explain_command,more,cancel|retrieval=Batch file help: In a batch file, use IF EXIST checks, clear status messages, and 8.3 DOS-compatible names for model files.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how should i clean autoexec.bat; previous answer was Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when ne.|generated=|answer=Batch file help: In a batch file, use IF EXIST checks, clear status messages, and 8.3 DOS-compatible names for model files. +ASSIST_REPLY|pack=DOSHELP|intent=dos_batch|ui=text|query=write a batch command that checks for model files|canonical=write a batch command that checks for model files|source=retrieval|recall=kb2_term|recall_score=66|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=write_batch,explain_command,more,cancel|retrieval=Batch file help: In a batch file, use IF EXIST checks, clear status messages, and 8.3 DOS-compatible names for model files.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how should i clean autoexec.bat; previous answer was Keep AUTOEXEC BAT short, trim PATH, and load resident tools only when ne.|generated=|answer=Batch file help: In a batch file, use IF EXIST checks, clear status messages, and 8.3 DOS-compatible names for model files. Answer: Batch file help: In a batch file, use IF EXIST checks, clear status messages, and 8.3 DOS-compatible names for model files. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ write_batch,explain_command,more,cancel ] @@ -656,11 +656,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DOSHELP|intent=general_chat|ui=text|query=why does protected mode need a dpmi host|canonical=why does protected mode need a dpmi host|source=retrieval|recall=kb2_term|recall_score=87|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=show_config,explain_xms,write_batch,cancel|retrieval=DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI.EXE beside the program.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was write a batch command that checks for model files; previous answer was Batch file help: In a batch file, use IF EXIST checks, clear status mess.|generated=|answer=DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI.EXE beside the program. +ASSIST_REPLY|pack=DOSHELP|intent=general_chat|ui=text|query=why does protected mode need a dpmi host|canonical=why does protected mode need a dpmi host|source=retrieval|recall=kb2_term|recall_score=87|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=show_config,explain_xms,write_batch,cancel|retrieval=DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI.EXE beside the program.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was write a batch command that checks for model files; previous answer was Batch file help: In a batch file, use IF EXIST checks, clear status mess.|generated=|answer=DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI.EXE beside the program. Answer: DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI.EXE beside the program. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ show_config,explain_xms,write_batch,cancel ] @@ -674,11 +674,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=DOSHELP|path=PACKS\DOSHELP\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=what does config.sys do|canonical=what does config.sys do|source=retrieval|recall=kb2_term|recall_score=42|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=show_config,explain_xms,more,cancel|retrieval=CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as the safe CONFIG SYS baseline.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was why does protected mode need a dpmi host; previous answer was DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI..|generated=|answer=CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as the safe CONFIG SYS baseline. +ASSIST_REPLY|pack=DOSHELP|intent=dos_memory|ui=text|query=what does config.sys do|canonical=what does config.sys do|source=retrieval|recall=kb2_term|recall_score=42|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=show_config,explain_xms,more,cancel|retrieval=CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as the safe CONFIG SYS baseline.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was why does protected mode need a dpmi host; previous answer was DPMI host: Protected-mode DOS programs need a DPMI host such as CWSDPMI..|generated=|answer=CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as the safe CONFIG SYS baseline. Answer: CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as the safe CONFIG SYS baseline. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ show_config,explain_xms,more,cancel ] @@ -716,11 +716,11 @@ ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=le ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=OFFICE|intent=office_rewrite|ui=text|query=make this sentence sound professional: the release broke|canonical=make this sentence sound professional: the release broke|source=golden|recall=kb2_term|recall_score=18|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=rewrite,shorten,formalize,cancel|retrieval=Professional tone: Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims.|golden=Use direct, polite, professional wording, name the release problem, and end with the next action.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what does config.sys do; previous answer was CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as.|generated=|answer=Use direct, polite, professional wording, name the release problem, and end with the next action. +ASSIST_REPLY|pack=OFFICE|intent=office_rewrite|ui=text|query=make this sentence sound professional: the release broke|canonical=make this sentence sound professional: the release broke|source=golden|recall=kb2_term|recall_score=18|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=rewrite,shorten,formalize,cancel|retrieval=Professional tone: Keep the message direct, polite, concrete, and free of jokes, filler, or unsupported claims.|golden=Use direct, polite, professional wording, name the release problem, and end with the next action.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what does config.sys do; previous answer was CONFIG.SYS example: Use HIMEM, DOS HIGH UMB, FILES 30, and BUFFERS 20 as.|generated=|answer=Use direct, polite, professional wording, name the release problem, and end with the next action. Answer: Use direct, polite, professional wording, name the release problem, and end with the next action. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 110 ms) [ rewrite,shorten,formalize,cancel ] @@ -734,11 +734,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=OFFICE|intent=office_summary|ui=text|query=summarize this: tests passed but the tag was stale|canonical=summarize this: tests passed but the tag was stale|source=golden|recall=kb2_term|recall_score=15|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=summarize,bullets,shorten,cancel|retrieval=Clarity action: State what happened, why it matters, and the next action; keep artifact, tag, checksum, and test details concrete.|golden=Summary: tests passed, the tag was stale, and the next action is to refresh the tag.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was make this sentence sound professional: the release broke; previous answer was Use direct, polite, professional wording, name the release problem, and .|generated=|answer=Summary: tests passed, the tag was stale, and the next action is to refresh the tag. +ASSIST_REPLY|pack=OFFICE|intent=office_summary|ui=text|query=summarize this: tests passed but the tag was stale|canonical=summarize this: tests passed but the tag was stale|source=golden|recall=kb2_term|recall_score=15|t_retrieve_ms=50|t_golden_ms=60|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=summarize,bullets,shorten,cancel|retrieval=Clarity action: State what happened, why it matters, and the next action; keep artifact, tag, checksum, and test details concrete.|golden=Summary: tests passed, the tag was stale, and the next action is to refresh the tag.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was make this sentence sound professional: the release broke; previous answer was Use direct, polite, professional wording, name the release problem, and .|generated=|answer=Summary: tests passed, the tag was stale, and the next action is to refresh the tag. Answer: Summary: tests passed, the tag was stale, and the next action is to refresh the tag. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 50 ms) [ summarize,bullets,shorten,cancel ] @@ -752,11 +752,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=OFFICE|intent=office_summary|ui=text|query=summarize: tests passed but dosbox needed a helper file|canonical=summarize: tests passed but dosbox needed a helper file|source=golden|recall=kb2_bucket|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=summarize,bullets,shorten,cancel|retrieval=Test plan: Define scope, cases, expected results, evidence files, and pass or fail criteria.|golden=Summary: tests passed, DOSBox needed a helper file. Include it.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was summarize this: tests passed but the tag was stale; previous answer was Summary: tests passed, the tag was stale, and the next action is to refr.|generated=|answer=Summary: tests passed, DOSBox needed a helper file. Include it. +ASSIST_REPLY|pack=OFFICE|intent=office_summary|ui=text|query=summarize: tests passed but dosbox needed a helper file|canonical=summarize: tests passed but dosbox needed a helper file|source=golden|recall=kb2_bucket|recall_score=15|t_retrieve_ms=270|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=270|actions=summarize,bullets,shorten,cancel|retrieval=Test plan: Define scope, cases, expected results, evidence files, and pass or fail criteria.|golden=Summary: tests passed, DOSBox needed a helper file. Include it.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was summarize this: tests passed but the tag was stale; previous answer was Summary: tests passed, the tag was stale, and the next action is to refr.|generated=|answer=Summary: tests passed, DOSBox needed a helper file. Include it. Answer: Summary: tests passed, DOSBox needed a helper file. Include it. -Source: golden / kb2_bucket ( 60 ms) +Source: golden / kb2_bucket ( 270 ms) [ summarize,bullets,shorten,cancel ] @@ -770,11 +770,11 @@ Source: golden / kb2_bucket ( 60 ms) ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=OFFICE|intent=office_summary|ui=text|query=shorten: we need to verify the release before publishing|canonical=shorten: we need to verify the release before publishing|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=summarize,bullets,shorten,cancel|retrieval=Shorten action: Shortening should keep the original intent and remove qualifiers, duplicate phrases, and low-value background.|golden=Short version: verify the release before publishing.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was summarize: tests passed but dosbox needed a helper file; previous answer was Summary: tests passed, DOSBox needed a helper file. Include it.|generated=|answer=Short version: verify the release before publishing. +ASSIST_REPLY|pack=OFFICE|intent=office_summary|ui=text|query=shorten: we need to verify the release before publishing|canonical=shorten: we need to verify the release before publishing|source=golden|recall=kb2_term|recall_score=21|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=summarize,bullets,shorten,cancel|retrieval=Shorten action: Shortening should keep the original intent and remove qualifiers, duplicate phrases, and low-value background.|golden=Short version: verify the release before publishing.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was summarize: tests passed but dosbox needed a helper file; previous answer was Summary: tests passed, DOSBox needed a helper file. Include it.|generated=|answer=Short version: verify the release before publishing. Answer: Short version: verify the release before publishing. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 50 ms) [ summarize,bullets,shorten,cancel ] @@ -788,11 +788,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=OFFICE|intent=general_chat|ui=text|query=write a polite status update about a delayed build|canonical=write a polite status update about a delayed build|source=golden|recall=kb2_term|recall_score=51|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=rewrite,shorten,summarize,formalize,cancel|retrieval=Status update: Use direct polite wording, include the current blocker, and end with the next action or timing.|golden=Use direct, polite, concrete wording, include the current blocker, and end with the next action.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was shorten: we need to verify the release before publishing; previous answer was Short version: verify the release before publishing.|generated=|answer=Use direct, polite, concrete wording, include the current blocker, and end with the next action. +ASSIST_REPLY|pack=OFFICE|intent=general_chat|ui=text|query=write a polite status update about a delayed build|canonical=write a polite status update about a delayed build|source=golden|recall=kb2_term|recall_score=51|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=rewrite,shorten,summarize,formalize,cancel|retrieval=Status update: Use direct polite wording, include the current blocker, and end with the next action or timing.|golden=Use direct, polite, concrete wording, include the current blocker, and end with the next action.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was shorten: we need to verify the release before publishing; previous answer was Short version: verify the release before publishing.|generated=|answer=Use direct, polite, concrete wording, include the current blocker, and end with the next action. Answer: Use direct, polite, concrete wording, include the current blocker, and end with the next action. -Source: golden / kb2_term ( 50 ms) +Source: golden / kb2_term ( 110 ms) [ rewrite,shorten,summarize,formalize,cancel ] @@ -806,11 +806,11 @@ Source: golden / kb2_term ( 50 ms) ASSIST_MODEL|pack=OFFICE|path=PACKS\OFFICE\MODEL|profile=486sx-safe|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=OFFICE|intent=general_chat|ui=text|query=make this clearer: the artifact uploaded but the tag was stale|canonical=make this clearer: the artifact uploaded but the tag was stale|source=golden|recall=kb2_term|recall_score=30|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=rewrite,shorten,summarize,formalize,cancel|retrieval=Clarity action: State what happened, why it matters, and the next action; keep artifact, tag, checksum, and test details concrete.|golden=State that the artifact uploaded, the tag was stale, and the next action is to refresh the tag.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was write a polite status update about a delayed build; previous answer was Use direct, polite, concrete wording, include the current blocker, and e.|generated=|answer=State that the artifact uploaded, the tag was stale, and the next action is to refresh the tag. +ASSIST_REPLY|pack=OFFICE|intent=general_chat|ui=text|query=make this clearer: the artifact uploaded but the tag was stale|canonical=make this clearer: the artifact uploaded but the tag was stale|source=golden|recall=kb2_term|recall_score=30|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=rewrite,shorten,summarize,formalize,cancel|retrieval=Clarity action: State what happened, why it matters, and the next action; keep artifact, tag, checksum, and test details concrete.|golden=State that the artifact uploaded, the tag was stale, and the next action is to refresh the tag.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was write a polite status update about a delayed build; previous answer was Use direct, polite, concrete wording, include the current blocker, and e.|generated=|answer=State that the artifact uploaded, the tag was stale, and the next action is to refresh the tag. Answer: State that the artifact uploaded, the tag was stale, and the next action is to refresh the tag. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 110 ms) [ rewrite,shorten,summarize,formalize,cancel ] @@ -848,11 +848,11 @@ ASSIST_MODEL|pack=DEV|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexi ASSIST_MODEL|pack=DEV|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=how can this feel modern on a 486|canonical=how can this feel modern on a 486|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=debug,test,release,explain,cancel|retrieval=Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|golden=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was make this clearer: the artifact uploaded but the tag was stale; previous answer was State that the artifact uploaded, the tag was stale, and the next action.|generated=|answer=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. +ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=how can this feel modern on a 486|canonical=how can this feel modern on a 486|source=golden|recall=kb2_term|recall_score=36|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=debug,test,release,explain,cancel|retrieval=Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|golden=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was make this clearer: the artifact uploaded but the tag was stale; previous answer was State that the artifact uploaded, the tag was stale, and the next action.|generated=|answer=Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. Answer: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 50 ms) [ debug,test,release,explain,cancel ] @@ -866,11 +866,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=DEV|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=what does retrieval first mean|canonical=what does retrieval first mean|source=retrieval|recall=kb2_term|recall_score=36|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=debug,test,release,explain,cancel|retrieval=Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how can this feel modern on a 486; previous answer was Use small hot-loaded weights, compact retrieval databases, persistent me.|generated=|answer=Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. +ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=what does retrieval first mean|canonical=what does retrieval first mean|source=retrieval|recall=kb2_term|recall_score=36|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=debug,test,release,explain,cancel|retrieval=Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how can this feel modern on a 486; previous answer was Use small hot-loaded weights, compact retrieval databases, persistent me.|generated=|answer=Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. Answer: Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. -Source: retrieval / kb2_term ( 50 ms) +Source: retrieval / kb2_term ( 60 ms) [ debug,test,release,explain,cancel ] @@ -884,11 +884,11 @@ Source: retrieval / kb2_term ( 50 ms) ASSIST_MODEL|pack=DEV|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=how do i author a pack|canonical=how do i author a pack|source=golden|recall=kb2_term|recall_score=39|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=debug,test,release,explain,cancel|retrieval=Pack authoring: Write HELP and KNOW rows, rebuild KDB, run the authoring validator, then run retrieval and QEMU gates.|golden=Write HELP and KNOW rows, rebuild KDB, run the validator, then run retrieval and QEMU gates.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what does retrieval first mean; previous answer was Retrieval first: Answer from KDB, USER notes, memory, and golden rows be.|generated=|answer=Write HELP and KNOW rows, rebuild KDB, run the validator, then run retrieval and QEMU gates. +ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=how do i author a pack|canonical=how do i author a pack|source=golden|recall=kb2_term|recall_score=39|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=debug,test,release,explain,cancel|retrieval=Pack authoring: Write HELP and KNOW rows, rebuild KDB, run the authoring validator, then run retrieval and QEMU gates.|golden=Write HELP and KNOW rows, rebuild KDB, run the validator, then run retrieval and QEMU gates.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what does retrieval first mean; previous answer was Retrieval first: Answer from KDB, USER notes, memory, and golden rows be.|generated=|answer=Write HELP and KNOW rows, rebuild KDB, run the validator, then run retrieval and QEMU gates. Answer: Write HELP and KNOW rows, rebuild KDB, run the validator, then run retrieval and QEMU gates. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 110 ms) [ debug,test,release,explain,cancel ] @@ -902,11 +902,11 @@ Source: golden / kb2_term ( 0 ms) ASSIST_MODEL|pack=DEV|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=what should i check before release|canonical=what should i check before release|source=golden|recall=kb2_term|recall_score=42|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=debug,test,release,explain,cancel|retrieval=Release check: Verify tests, logs, artifact names, checksums, release notes, and the target tag.|golden=Verify tests, logs, artifact names, checksums, release notes, and the target tag.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how do i author a pack; previous answer was Write HELP and KNOW rows, rebuild KDB, run the validator, then run retri.|generated=|answer=Verify tests, logs, artifact names, checksums, release notes, and the target tag. +ASSIST_REPLY|pack=DEV|intent=general_chat|ui=text|query=what should i check before release|canonical=what should i check before release|source=golden|recall=kb2_term|recall_score=42|t_retrieve_ms=110|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=110|actions=debug,test,release,explain,cancel|retrieval=Release check: Verify tests, logs, artifact names, checksums, release notes, and the target tag.|golden=Verify tests, logs, artifact names, checksums, release notes, and the target tag.|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how do i author a pack; previous answer was Write HELP and KNOW rows, rebuild KDB, run the validator, then run retri.|generated=|answer=Verify tests, logs, artifact names, checksums, release notes, and the target tag. Answer: Verify tests, logs, artifact names, checksums, release notes, and the target tag. -Source: golden / kb2_term ( 0 ms) +Source: golden / kb2_term ( 110 ms) [ debug,test,release,explain,cancel ] @@ -934,11 +934,11 @@ ASSIST_PACK|id=PORTABLE|title=Portable Intelligence|model=PACKS\CHAT\MODEL|sprit ASSIST_MODEL|pack=PORTABLE|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=what does portable intelligence mean|canonical=what does portable intelligence mean|source=retrieval|recall=kb2_term|recall_score=57|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=explain,compare,teach,validate,cancel|retrieval=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what should i check before release; previous answer was Verify tests, logs, artifact names, checksums, release notes, and the ta.|generated=|answer=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. +ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=what does portable intelligence mean|canonical=what does portable intelligence mean|source=retrieval|recall=kb2_term|recall_score=57|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=explain,compare,teach,validate,cancel|retrieval=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what should i check before release; previous answer was Verify tests, logs, artifact names, checksums, release notes, and the ta.|generated=|answer=portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. Answer: portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. -Source: retrieval / kb2_term ( 60 ms) +Source: retrieval / kb2_term ( 50 ms) [ explain,compare,teach,validate,cancel ] @@ -952,11 +952,11 @@ Source: retrieval / kb2_term ( 60 ms) ASSIST_MODEL|pack=PORTABLE|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=why is basic useful for teaching ai|canonical=why is basic useful for teaching ai|source=retrieval|recall=kb2_term|recall_score=57|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=explain,compare,teach,validate,cancel|retrieval=basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what does portable intelligence mean; previous answer was portable meaning: Portable intelligence means small local model weights,.|generated=|answer=basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. +ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=why is basic useful for teaching ai|canonical=why is basic useful for teaching ai|source=retrieval|recall=kb2_term|recall_score=57|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=explain,compare,teach,validate,cancel|retrieval=basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was what does portable intelligence mean; previous answer was portable meaning: Portable intelligence means small local model weights,.|generated=|answer=basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. Answer: basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ explain,compare,teach,validate,cancel ] @@ -970,11 +970,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=PORTABLE|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=how could this move to c or assembly|canonical=how could this move to c or assembly|source=retrieval|recall=kb2_term|recall_score=15|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=explain,compare,teach,validate,cancel|retrieval=runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was why is basic useful for teaching ai; previous answer was basic teaching: BASIC is useful for teaching machine intelligence becaus.|generated=|answer=runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. +ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=how could this move to c or assembly|canonical=how could this move to c or assembly|source=retrieval|recall=kb2_term|recall_score=15|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=explain,compare,teach,validate,cancel|retrieval=runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was why is basic useful for teaching ai; previous answer was basic teaching: BASIC is useful for teaching machine intelligence becaus.|generated=|answer=runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. Answer: runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 60 ms) [ explain,compare,teach,validate,cancel ] @@ -988,11 +988,11 @@ Source: retrieval / kb2_term ( 0 ms) ASSIST_MODEL|pack=PORTABLE|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=why do hot swappable weights matter|canonical=why do hot swappable weights matter|source=retrieval|recall=kb2_term|recall_score=45|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=explain,compare,teach,validate,cancel|retrieval=domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how could this move to c or assembly; previous answer was runtime ports: The same assistant contract can be reimplemented in C, as.|generated=|answer=domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. +ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=why do hot swappable weights matter|canonical=why do hot swappable weights matter|source=retrieval|recall=kb2_term|recall_score=45|t_retrieve_ms=50|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=50|actions=explain,compare,teach,validate,cancel|retrieval=domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how could this move to c or assembly; previous answer was runtime ports: The same assistant contract can be reimplemented in C, as.|generated=|answer=domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. Answer: domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 50 ms) [ explain,compare,teach,validate,cancel ] @@ -1024,11 +1024,11 @@ Source: retrieval / kb2_term ( 50 ms) ASSIST_MODEL|pack=PORTABLE|path=PACKS\CHAT\MODEL|profile=486dx2-usable|tokenizer=lexicon|ctx=192|vocab=4096|reuse=1 -ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=what proof shows this works on old hardware|canonical=what proof shows this works on old hardware|source=retrieval|recall=kb2_term|recall_score=63|t_retrieve_ms=0|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=0|actions=explain,compare,teach,validate,cancel|retrieval=old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how should tiny machines store recall; previous answer was tiny machine recall: Tiny machines should store recall as compact indexe.|generated=|answer=old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. +ASSIST_REPLY|pack=PORTABLE|intent=general_chat|ui=text|query=what proof shows this works on old hardware|canonical=what proof shows this works on old hardware|source=retrieval|recall=kb2_term|recall_score=63|t_retrieve_ms=60|t_golden_ms=0|t_memory_ms=0|t_model_ms=0|t_total_ms=60|actions=explain,compare,teach,validate,cancel|retrieval=old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files.|golden=|memory=Context: user name is Operator; current goal is the DOSBox assistant; answer style is short answers; previous question was how should tiny machines store recall; previous answer was tiny machine recall: Tiny machines should store recall as compact indexe.|generated=|answer=old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. Answer: old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. -Source: retrieval / kb2_term ( 0 ms) +Source: retrieval / kb2_term ( 60 ms) [ explain,compare,teach,validate,cancel ] diff --git a/qemu/evidence/hardware_capture_486_qemu/HWVALID.LOG b/qemu/evidence/hardware_capture_486_qemu/HWVALID.LOG index 2572fb0..5fd62d5 100644 --- a/qemu/evidence/hardware_capture_486_qemu/HWVALID.LOG +++ b/qemu/evidence/hardware_capture_486_qemu/HWVALID.LOG @@ -7,4 +7,5 @@ HW_STEP|perf HW_STEP|assist_compile HW_STEP|assistant HW_STEP|assistant_stress +HW_STEP|assistant_recall HW_CAPTURE_END diff --git a/qemu/evidence/hardware_capture_486_qemu/PERF.LOG b/qemu/evidence/hardware_capture_486_qemu/PERF.LOG index 7f43fe0..aa8efeb 100644 --- a/qemu/evidence/hardware_capture_486_qemu/PERF.LOG +++ b/qemu/evidence/hardware_capture_486_qemu/PERF.LOG @@ -40,10 +40,10 @@ PERF_MODEL|profile=486sx-safe|layers=2|emb=48|heads=4|ctx=192|vocab=4096|params= PERF_RUN|name=real_inference|prompt_tokens=3|generated_tokens=35|seconds=0.7699999809265137|tokens_per_sec=45.45454658048919|last_token=48 -PERF_RUN|name=486_target|prompt_tokens=2|generated_tokens=44|seconds=1.430000066757202|tokens_per_sec=30.76922933281982|last_token=1218 +PERF_RUN|name=486_target|prompt_tokens=2|generated_tokens=44|seconds=0.9300000667572021|tokens_per_sec=47.3118245608548|last_token=1218 -PERF_RUN|name=basic_runtime|prompt_tokens=4|generated_tokens=48|seconds=0.9900000095367432|tokens_per_sec=48.48484801779036|last_token=941 +PERF_RUN|name=basic_runtime|prompt_tokens=4|generated_tokens=48|seconds=0.9300000667572021|tokens_per_sec=51.61289952093251|last_token=941 -PERF_SUMMARY|runs=3|tokens=127|seconds=3.190000057220459|tokens_per_sec=39.81191151158124 +PERF_SUMMARY|runs=3|tokens=127|seconds=2.630000114440918|tokens_per_sec=48.28897128280068 PERF_END diff --git a/qemu/evidence/hardware_capture_486_qemu/QUAL.LOG b/qemu/evidence/hardware_capture_486_qemu/QUAL.LOG index 67f0020..f1c8510 100644 --- a/qemu/evidence/hardware_capture_486_qemu/QUAL.LOG +++ b/qemu/evidence/hardware_capture_486_qemu/QUAL.LOG @@ -38,7 +38,7 @@ Production model initialization complete. Arithmetic : Q20.12 fixed-point -Generated 35 tokens in 0.8199999332427979 seconds (42.68293030413781 tokens/sec) +Generated 35 tokens in 0.7599999904632568 seconds (46.05263215683174 tokens/sec) @@ -60,7 +60,7 @@ QUALITY_PROMPT_END|real_inference QUALITY_PROMPT_BEGIN|486_target|GPT2 BASIC on a 486 -Generated 44 tokens in 0.8199999332427979 seconds (53.65854095377325 tokens/sec) +Generated 44 tokens in 0.880000114440918 seconds (49.99999349767596 tokens/sec) @@ -104,7 +104,7 @@ QUALITY_PROMPT_END|dos_model QUALITY_PROMPT_BEGIN|basic_runtime|A BASIC transformer runtime -Generated 48 tokens in 0.9899997711181641 seconds (48.48485969424616 tokens/sec) +Generated 48 tokens in 0.9900000095367432 seconds (48.48484801779036 tokens/sec) @@ -126,7 +126,7 @@ QUALITY_PROMPT_END|basic_runtime QUALITY_PROMPT_BEGIN|optimization|To improve performance on real hardware -Generated 45 tokens in 0.9900000095367432 seconds (45.45454501667847 tokens/sec) +Generated 45 tokens in 0.929999828338623 seconds (48.38710570558838 tokens/sec) @@ -148,7 +148,7 @@ QUALITY_PROMPT_END|optimization QUALITY_PROMPT_BEGIN|heldout_cache|Explain why a cache matters for text generation -Generated 42 tokens in 0.8799998760223389 seconds (47.72727945126873 tokens/sec) +Generated 42 tokens in 0.8199999332427979 seconds (51.21951636496537 tokens/sec) @@ -170,7 +170,7 @@ QUALITY_PROMPT_END|heldout_cache QUALITY_PROMPT_BEGIN|heldout_timing|How should a DOS model report timing? -Generated 43 tokens in 0.8300001621246338 seconds (51.8072187961128 tokens/sec) +Generated 43 tokens in 0.940000057220459 seconds (45.74467806645588 tokens/sec) @@ -192,7 +192,7 @@ QUALITY_PROMPT_END|heldout_timing QUALITY_PROMPT_BEGIN|heldout_limits|What limits a tiny transformer on old PCs? -Generated 35 tokens in 0.6600000858306885 seconds (53.03029613389874 tokens/sec) +Generated 35 tokens in 0.7200000286102295 seconds (48.61110917947918 tokens/sec) diff --git a/qemu/evidence/hardware_capture_486_qemu_probe.log b/qemu/evidence/hardware_capture_486_qemu_probe.log index 00a55f6..f8743a9 100644 --- a/qemu/evidence/hardware_capture_486_qemu_probe.log +++ b/qemu/evidence/hardware_capture_486_qemu_probe.log @@ -3,12 +3,14 @@ PROBE_OK hardware_quality_log=QUAL.LOG PROBE_OK hardware_perf_log=PERF.LOG PROBE_OK hardware_assistant_log=ASSIST.LOG PROBE_OK hardware_assistant_stress_log=ASTRESS.LOG +PROBE_OK hardware_assistant_recall_log=ARECALL.LOG PROBE_OK hardware_assistant_compile_log=ASSISTC.LOG PROBE_OK hardware_notes_template=HWNOTES.TXT PROBE_OK hardware_quality_prompts=10 PROBE_OK hardware_perf_runs=3 PROBE_OK hardware_assistant_replies=5 PROBE_OK hardware_assistant_stress_replies=50 +PROBE_OK hardware_assistant_recall_cases=42 PROBE_OK hardware_assistant_compile=1 PROBE_OK hardware_notes=1 PROBE_OK hardware_capture=1 diff --git a/qemu/evidence/hardware_capture_486_qemu_recall_report.md b/qemu/evidence/hardware_capture_486_qemu_recall_report.md new file mode 100644 index 0000000..fbdc0aa --- /dev/null +++ b/qemu/evidence/hardware_capture_486_qemu_recall_report.md @@ -0,0 +1,56 @@ +# Assistant Recall Benchmark + +Status: `PASS` +Recall case count: `42` +Average retrieval time: `82 ms` +Max retrieval time: `170 ms` +Average recall score: `42` +Pack counts: `CHAT=12 DEV=6 DOSHELP=9 OFFICE=9 PORTABLE=6` +Recall modes: `kb2_term=42` + +This benchmark is generated from `ASSIST.EXE --recall-probe` and measures local pack recall without model generation. + +| Pack | Recall | Score | Retrieve ms | Query | Answer | +|---|---|---:|---:|---|---| +| CHAT | kb2_term | 33 | 110 | how can i ask better questions | Better prompts: Say the goal, give one detail, and ask for the next useful step. | +| CHAT | kb2_term | 36 | 110 | what makes this intelligent on a small computer | Small-computer usefulness: A tiny local model becomes more useful with retrieval, memory, and quick focused help without a network. | +| CHAT | kb2_term | 36 | 50 | which pack should i use for writing | Pack switching: Use CHAT for conversation, DOSHELP for DOS setup, and OFFICE for writing tasks. | +| CHAT | kb2_term | 33 | 110 | can this work without the internet | Network limit: I cannot browse the internet from DOS; I answer from local model weights and pack files. | +| CHAT | kb2_term | 33 | 110 | how do i recover from a bad answer | Mistake recovery: If an answer is wrong, ask a shorter question, switch packs, or give the exact error. | +| CHAT | kb2_term | 36 | 60 | what proof helps me trust this | Trust evidence: Trust proof comes from visible files, local weights, reproducible tests, and QEMU or hardware logs. | +| CHAT | kb2_term | 39 | 110 | how should i compare options | Compare options: Name the options, list one tradeoff for each, then choose the practical next step. | +| CHAT | kb2_term | 66 | 110 | help me plan work in small steps | Planning work: Break the job into small steps, do the blocking step first, and verify each result. | +| CHAT | kb2_term | 42 | 110 | what should a useful answer look like | Useful answer: A useful answer should be brief, concrete, honest about limits, and easy to act on. | +| CHAT | kb2_term | 24 | 50 | can you explain something simply | Simple explanation: Use plain words, one example, and a short answer that fits the prompt. | +| CHAT | kb2_term | 51 | 110 | what can you know without web access | No web access: Without internet, I cannot fetch news or live facts; use local notes or give the facts in the prompt. | +| CHAT | kb2_term | 36 | 50 | how do i show confidence in an answer | Answer confidence: Say what is known from local files, what is inferred, and what remains uncertain. | +| DOSHELP | kb2_term | 57 | 110 | what happens before autoexec bat runs | AUTOEXEC.BAT hygiene: CONFIG.SYS loads drivers first, then AUTOEXEC.BAT runs commands; keep PATH short and trim resident tools. | +| DOSHELP | kb2_term | 24 | 60 | why use 8.3 filenames in batches | DOS filenames: Use 8.3 filenames for maximum DOS compatibility and predictable batch files. | +| DOSHELP | kb2_term | 48 | 110 | how should i prepare files for real hardware | Hardware copy: Copy GPT2, MODEL, PACKS, CWSDPMI, and batch files together before testing on real DOS. | +| DOSHELP | kb2_term | 39 | 50 | what should i do when cwsdpmi is missing | Missing CWSDPMI: If a protected-mode program fails to start, copy CWSDPMI.EXE beside it and rerun the command. | +| DOSHELP | kb2_term | 54 | 110 | how do i mount the dosbox bundle | DOSBox mount: Mount the bundle directory as C:, change to C:\GPT2, then run the batch file for the desired profile. | +| DOSHELP | kb2_term | 60 | 60 | what if the fat image is full | FAT image full: Remove host-only training files or grow the disk image when FAT image assembly runs out of space. | +| DOSHELP | kb2_term | 39 | 110 | what logs matter from qemu | QEMU logs: Capture compile logs, run logs, and copied evidence files before trusting an emulator result. | +| DOSHELP | kb2_term | 57 | 50 | how do i handle a dos memory error | DOS memory error: Free conventional memory by unloading TSRs, loading drivers high, or using a smaller profile. | +| DOSHELP | kb2_term | 36 | 170 | how should a batch menu work | Batch menu: Offer numbered choices, validate the input, and keep each branch short and reversible. | +| OFFICE | kb2_term | 36 | 50 | how should i write a handoff note | Handoff note: Say what is done, what remains, where evidence lives, and who owns the next action. | +| OFFICE | kb2_term | 36 | 110 | what belongs in a bug report | Bug report shape: Include expected behavior, actual behavior, reproduction steps, logs, and the suspected area. | +| OFFICE | kb2_term | 36 | 60 | make a compact release note | Release note shape: Lead with what changed, list proof, then state any known limits plainly. | +| OFFICE | kb2_term | 51 | 50 | what should meeting notes capture | Meeting notes: Capture decisions, owners, dates, open questions, and follow-up actions. | +| OFFICE | kb2_term | 36 | 110 | help me write a project plan | Project plan: List the goal, milestones, owners, risks, and the next checkpoint. | +| OFFICE | kb2_term | 24 | 60 | how do i track risks | Risk register: For each risk, record impact, likelihood, mitigation, owner, and review date. | +| OFFICE | kb2_term | 36 | 50 | what is a useful test plan | Test plan: Define scope, cases, expected results, evidence files, and pass or fail criteria. | +| OFFICE | kb2_term | 36 | 110 | how should i reply to a customer | Customer reply: Acknowledge the issue, give the current status, state the next action, and avoid overpromising. | +| OFFICE | kb2_term | 51 | 60 | how do i write user docs | User docs: Write the task goal, prerequisites, exact steps, expected result, and troubleshooting note. | +| DEV | kb2_term | 36 | 50 | how can this feel modern on a 486 | Modern 486 LLM path: Use small hot-loaded weights, compact retrieval databases, persistent memory, and short synthesis replies. | +| DEV | kb2_term | 36 | 110 | what does retrieval first mean | Retrieval first: Answer from KDB, USER notes, memory, and golden rows before asking the small model to synthesize. | +| DEV | kb2_term | 39 | 60 | how do i author a pack | Pack authoring: Write HELP and KNOW rows, rebuild KDB, run the authoring validator, then run retrieval and QEMU gates. | +| DEV | kb2_term | 42 | 110 | what should i check before release | Release check: Verify tests, logs, artifact names, checksums, release notes, and the target tag. | +| DEV | kb2_term | 45 | 50 | how should we store fast recall data | High velocity recall: Compile notes into compact keyword rows so DOS scans less text and reaches the answer faster. | +| DEV | kb2_term | 39 | 60 | what should a failure record include | Failure record: Record the command, input, expected result, actual result, log path, and next experiment. | +| PORTABLE | kb2_term | 57 | 110 | what does portable intelligence mean | portable meaning: Portable intelligence means small local model weights, retrieval, and memory can run on old machines without a network. | +| PORTABLE | kb2_term | 57 | 50 | why is basic useful for teaching ai | basic teaching: BASIC is useful for teaching machine intelligence because plain arrays, files, and integer arithmetic make the mechanism inspectable. | +| PORTABLE | kb2_term | 15 | 60 | how could this move to c or assembly | runtime ports: The same assistant contract can be reimplemented in C, assembly, Eshkol, or calculator BASIC when files, arrays, and loops exist. | +| PORTABLE | kb2_term | 45 | 50 | why do hot swappable weights matter | domain weight loading: Hot swappable weights load domain behavior into a tiny resident shell without rebuilding the whole runtime. | +| PORTABLE | kb2_term | 72 | 110 | how should tiny machines store recall | tiny machine recall: Tiny machines should store recall as compact indexed rows so slow processors scan fewer bytes before answering. | +| PORTABLE | kb2_term | 63 | 60 | what proof shows this works on old hardware | old hardware proof: Proof for old hardware needs local logs, repeatable tests, QEMU or hardware captures, and visible source files. | diff --git a/qemu/evidence/preview_release_manifest.md b/qemu/evidence/preview_release_manifest.md index 4e02fbe..d07ac28 100644 --- a/qemu/evidence/preview_release_manifest.md +++ b/qemu/evidence/preview_release_manifest.md @@ -5,7 +5,7 @@ Generated: `2026-05-12` Package tree: `gpt2-basic-preview` Package zip: `gpt2-basic-preview.zip` Package checksums: `SHA256SUMS.txt`; zip sidecar: `gpt2-basic-preview.zip.sha256` -Package status: `583 files, 119,897,176 bytes` +Package status: `592 files, 119,965,348 bytes` This is an iterative preview payload. It ships only strict-quality release models and assistant packs; rejected repair attempts and old candidates remain repo evidence only. @@ -82,6 +82,9 @@ This is an iterative preview payload. It ships only strict-quality release model - `qemu/evidence/assistant_pack_probe.log` - `qemu/evidence/assistant_pack_retrieval_eval.md` - `qemu/evidence/assistant_raw_prompt_eval.md` +- `qemu/evidence/assistant_recall_486.log` +- `qemu/evidence/assistant_recall_benchmark.md` +- `qemu/evidence/assistant_recall_compile_486.log` - `qemu/evidence/assistant_showcase_video.md` - `qemu/evidence/assistant_stress_486.log` - `qemu/evidence/assistant_stress_compile_486.log` @@ -90,6 +93,7 @@ This is an iterative preview payload. It ships only strict-quality release model - `qemu/evidence/compile_main_486.log` - `qemu/evidence/exported_model_quality_inventory.md` - `qemu/evidence/gold_curriculum_v5_clean_repair_report.md` +- `qemu/evidence/hardware_capture_486_qemu/ARECALL.LOG` - `qemu/evidence/hardware_capture_486_qemu/ASSIST.LOG` - `qemu/evidence/hardware_capture_486_qemu/ASSISTC.LOG` - `qemu/evidence/hardware_capture_486_qemu/ASTRESS.LOG` @@ -98,6 +102,7 @@ This is an iterative preview payload. It ships only strict-quality release model - `qemu/evidence/hardware_capture_486_qemu/PERF.LOG` - `qemu/evidence/hardware_capture_486_qemu/QUAL.LOG` - `qemu/evidence/hardware_capture_486_qemu_probe.log` +- `qemu/evidence/hardware_capture_486_qemu_recall_report.md` - `qemu/evidence/hardware_capture_486_qemu_stress_report.md` - `qemu/evidence/hardware_capture_probe.log` - `qemu/evidence/hardware_perf_report.md` @@ -152,6 +157,7 @@ python3 scripts/build_dosbox_bundle.py --self-test python3 scripts/build_hardware_transfer.py --self-test python3 scripts/evaluate_assistant_kdb_binary.py python3 scripts/evaluate_assistant_kdb_term_index.py +python3 scripts/benchmark_assistant_recall.py python3 scripts/build_preview_release.py --self-test python3 scripts/verify_preview_artifacts.py --self-test python3 scripts/verify_workspace_tracking.py diff --git a/qemu/fdauto_assist_recall.bat b/qemu/fdauto_assist_recall.bat new file mode 100644 index 0000000..9e468c4 --- /dev/null +++ b/qemu/fdauto_assist_recall.bat @@ -0,0 +1,55 @@ +@echo off +set DOSDRV=A: +set DOSDIR=A:\FREEDOS +set PATH=A:\FREEDOS\BIN;C:\FBC;C:\FBC\BIN\DOS +set DIRCMD=/OGN /Y +set COPYCMD=/-Y + +echo GPT2-BASIC QEMU 486 assistant recall probe +echo. +C: +cd \ +if not exist GPT2SRC\ASSIST.BAS goto missing_source +if exist MODEL\GPT2CFG.TXT goto have_cfg +if exist MODEL\TINYCFG.TXT goto have_cfg +goto missing_model +:have_cfg +if exist MODEL\GPT2FX.BIN goto have_fx +if exist MODEL\TINYFX.BIN goto have_fx +goto missing_model +:have_fx +if exist MODEL\GPT2EXP.BIN goto have_exp +if exist MODEL\TINYEXP.BIN goto have_exp +goto missing_model +:have_exp +if exist ARECALL.LOG del ARECALL.LOG +if exist ARECALLC.LOG del ARECALLC.LOG +echo Compiling ASSIST.EXE recall probe... +fbc -x ASSIST.EXE GPT2SRC\ASSIST.BAS > ARECALLC.LOG +if errorlevel 1 goto assist_compile_failed +echo ASSIST_COMPILE_OK +echo ASSIST_COMPILE_OK >> ARECALLC.LOG +dir ASSIST.EXE +dir ASSIST.EXE >> ARECALLC.LOG +echo Running C:\ASSIST.EXE --recall-probe... +ASSIST.EXE --recall-probe > ARECALL.LOG +type ARECALL.LOG +goto done + +:assist_compile_failed +echo ASSIST_COMPILE_FAILED +type ARECALLC.LOG +goto done + +:missing_source +echo Missing GPT2SRC\ASSIST.BAS. +goto done + +:missing_model +echo Missing fixed-point GPT2-BASIC model files in C:\MODEL. +echo Run scripts\train_gpt2_basic.py on the host first. + +:done +echo. +echo Powering off QEMU. +A:\FREEDOS\BIN\FDAPM.COM POWEROFF diff --git a/qemu/run_assistant_recall_486.sh b/qemu/run_assistant_recall_486.sh new file mode 100755 index 0000000..26e652d --- /dev/null +++ b/qemu/run_assistant_recall_486.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BOOT_IMAGE="$ROOT/qemu/boot-test.img" +HDD_IMAGE="$ROOT/qemu/gpt2hdd.img" +MODEL_DIR="${1:-$ROOT/assets/gpt2_basic/MODEL}" +PACK_DIR="${2:-$ROOT/assets/gpt2_basic/PACKS}" +QEMU_TIMEOUT_SECONDS="${QEMU_TIMEOUT_SECONDS:-180}" + +if [[ "$MODEL_DIR" != /* ]]; then + MODEL_DIR="$ROOT/$MODEL_DIR" +fi +if [[ "$PACK_DIR" != /* ]]; then + PACK_DIR="$ROOT/$PACK_DIR" +fi + +model_name="$(basename "$MODEL_DIR")" +model_key="$(printf '%s' "$model_name" | tr '[:upper:]' '[:lower:]' | tr -cs '[:alnum:]' '_' | sed 's/^_*//;s/_*$//')" +if [[ "$model_key" == "model" ]]; then + suffix="" +else + suffix="_$model_key" +fi + +if [[ ! -f "$HDD_IMAGE" ]]; then + echo "missing $HDD_IMAGE" >&2 + echo "Create the FreeDOS/FreeBASIC hard-disk image first." >&2 + exit 1 +fi + +if [[ ! -f "$BOOT_IMAGE" ]]; then + echo "missing $BOOT_IMAGE" >&2 + echo "Create the FreeDOS boot-floppy image first." >&2 + exit 1 +fi + +if [[ ! -d "$PACK_DIR" ]]; then + echo "missing assistant pack directory: $PACK_DIR" >&2 + exit 1 +fi + +if pgrep -f qemu-system-i386 >/dev/null 2>&1; then + echo "qemu-system-i386 is already running; stop it before updating the boot image." >&2 + exit 1 +fi + +python3 "$ROOT/qemu/make_dos_staging.py" + +python3 "$ROOT/scripts/model_report.py" --model-dir "$MODEL_DIR" --strict +python3 "$ROOT/qemu/fat_image_put.py" "$HDD_IMAGE" \ + --put-tree "$ROOT/qemu/staging/GPT2SRC" GPT2SRC +python3 "$ROOT/qemu/fat_image_put.py" "$HDD_IMAGE" \ + --put-tree "$MODEL_DIR" MODEL +python3 "$ROOT/qemu/fat_image_put.py" "$HDD_IMAGE" \ + --exclude-name TRAIN.TXT \ + --exclude-name TOKBASE.TXT \ + --put-tree "$PACK_DIR" PACKS +python3 "$ROOT/qemu/fat_image_put.py" "$BOOT_IMAGE" \ + --put "$ROOT/qemu/fdauto_assist_recall.bat" FDAUTO.BAT + +echo "Running GPT2-BASIC assistant recall probe under QEMU 486." +echo "Model directory: $MODEL_DIR" +echo "Pack directory: $PACK_DIR" +echo "If the FreeDOS language menu appears, press Enter for English." + +set +e +env TERM=xterm qemu-system-i386 \ + -machine isapc \ + -cpu 486 \ + -m 64 \ + -drive "file=$BOOT_IMAGE,format=raw,if=floppy,index=0" \ + -drive "file=$HDD_IMAGE,format=raw,if=ide,index=0,media=disk" \ + -boot a \ + -display curses \ + -monitor none \ + -no-reboot & +qemu_pid=$! +qemu_status=0 +elapsed=0 +while kill -0 "$qemu_pid" >/dev/null 2>&1; do + if [[ "$elapsed" -ge "$QEMU_TIMEOUT_SECONDS" ]]; then + echo "QEMU recall timeout reached; stopping emulator and extracting logs." >&2 + kill "$qemu_pid" >/dev/null 2>&1 + wait "$qemu_pid" + qemu_status=143 + break + fi + sleep 1 + elapsed=$((elapsed + 1)) +done +if [[ "$qemu_status" -eq 0 ]]; then + wait "$qemu_pid" + qemu_status=$? +fi +set -e + +if [[ "$qemu_status" -ne 0 && "$qemu_status" -ne 143 ]]; then + exit "$qemu_status" +fi + +mkdir -p "$ROOT/qemu/evidence" +recall_log="$ROOT/qemu/evidence/assistant_recall_486${suffix}.log" +compile_log="$ROOT/qemu/evidence/assistant_recall_compile_486${suffix}.log" +python3 "$ROOT/qemu/fat_image_put.py" "$HDD_IMAGE" \ + --get-text ARECALL.LOG "$recall_log" \ + --get-text ARECALLC.LOG "$compile_log" +python3 "$ROOT/scripts/benchmark_assistant_recall.py" --log "$recall_log" +echo "wrote $recall_log" +echo "wrote $compile_log" diff --git a/qemu/run_hardware_capture_486.sh b/qemu/run_hardware_capture_486.sh index e2d3338..b8aed50 100755 --- a/qemu/run_hardware_capture_486.sh +++ b/qemu/run_hardware_capture_486.sh @@ -8,6 +8,7 @@ MODEL_DIR="${1:-$ROOT/assets/gpt2_basic/MODEL}" PACK_DIR="${2:-$ROOT/assets/gpt2_basic/PACKS}" GPT2_EXE="${3:-$ROOT/qemu/evidence/GPT2.EXE}" CAPTURE_DIR="$ROOT/qemu/evidence/hardware_capture_486_qemu" +QEMU_TIMEOUT_SECONDS="${QEMU_TIMEOUT_SECONDS:-420}" if [[ "$MODEL_DIR" != /* ]]; then MODEL_DIR="$ROOT/$MODEL_DIR" @@ -88,8 +89,25 @@ env TERM=xterm qemu-system-i386 \ -boot a \ -display curses \ -monitor none \ - -no-reboot -qemu_status=$? + -no-reboot & +qemu_pid=$! +qemu_status=0 +elapsed=0 +while kill -0 "$qemu_pid" >/dev/null 2>&1; do + if [[ "$elapsed" -ge "$QEMU_TIMEOUT_SECONDS" ]]; then + echo "QEMU hardware-capture timeout reached; stopping emulator and extracting logs." >&2 + kill "$qemu_pid" >/dev/null 2>&1 + wait "$qemu_pid" + qemu_status=143 + break + fi + sleep 1 + elapsed=$((elapsed + 1)) +done +if [[ "$qemu_status" -eq 0 ]]; then + wait "$qemu_pid" + qemu_status=$? +fi set -e if [[ "$qemu_status" -ne 0 && "$qemu_status" -ne 143 ]]; then @@ -103,12 +121,16 @@ python3 "$ROOT/qemu/fat_image_put.py" "$HDD_IMAGE" \ --get-text GPT2/PERF.LOG "$CAPTURE_DIR/PERF.LOG" \ --get-text GPT2/ASSIST.LOG "$CAPTURE_DIR/ASSIST.LOG" \ --get-text GPT2/ASTRESS.LOG "$CAPTURE_DIR/ASTRESS.LOG" \ + --get-text GPT2/ARECALL.LOG "$CAPTURE_DIR/ARECALL.LOG" \ --get-text GPT2/ASSISTC.LOG "$CAPTURE_DIR/ASSISTC.LOG" \ --get-text GPT2/HWNOTES.TXT "$CAPTURE_DIR/HWNOTES.TXT" python3 "$ROOT/scripts/verify_hardware_capture.py" \ --capture-dir "$CAPTURE_DIR" \ > "$ROOT/qemu/evidence/hardware_capture_486_qemu_probe.log" +python3 "$ROOT/scripts/benchmark_assistant_recall.py" \ + --log "$CAPTURE_DIR/ARECALL.LOG" \ + --report "$ROOT/qemu/evidence/hardware_capture_486_qemu_recall_report.md" echo "wrote $CAPTURE_DIR" echo "wrote $ROOT/qemu/evidence/hardware_capture_486_qemu_probe.log" diff --git a/scripts/benchmark_assistant_recall.py b/scripts/benchmark_assistant_recall.py new file mode 100644 index 0000000..2d5b128 --- /dev/null +++ b/scripts/benchmark_assistant_recall.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +"""Benchmark ASSIST.EXE recall-probe logs for coverage and latency.""" + +from __future__ import annotations + +import argparse +import sys +from collections import Counter +from dataclasses import dataclass +from pathlib import Path + +SCRIPT_DIR = Path(__file__).resolve().parent +if str(SCRIPT_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPT_DIR)) + +from evaluate_assistant_pack_retrieval import CASES, RetrievalCase, validate + + +ROOT = Path(__file__).resolve().parents[1] +DEFAULT_LOG = ROOT / "qemu" / "evidence" / "assistant_recall_486.log" +DEFAULT_REPORT = ROOT / "qemu" / "evidence" / "assistant_recall_benchmark.md" +DEFAULT_MAX_AVERAGE_MS = 250 +DEFAULT_MAX_SINGLE_MS = 1500 + + +@dataclass(frozen=True) +class RecallRecord: + pack: str + query: str + recall: str + score: int + retrieve_ms: int + answer: str + + +def require(condition: bool, message: str) -> None: + if not condition: + raise SystemExit(f"ASSISTANT_RECALL_BENCHMARK_FAILED {message}") + + +def parse_record(line: str) -> dict[str, str]: + fields: dict[str, str] = {} + for part in line.rstrip().split("|")[1:]: + if "=" not in part: + continue + key, value = part.split("=", 1) + fields[key] = value + return fields + + +def parse_records(text: str) -> list[RecallRecord]: + records: list[RecallRecord] = [] + for line in text.splitlines(): + if not line.startswith("ASSIST_RECALL|"): + continue + fields = parse_record(line) + score_text = fields.get("recall_score", "") + timing_text = fields.get("t_retrieve_ms", "") + require(score_text.isdigit(), f"bad_score={fields.get('pack', '')}:{fields.get('query', '')}") + require(timing_text.isdigit(), f"bad_timing={fields.get('pack', '')}:{fields.get('query', '')}") + records.append( + RecallRecord( + fields.get("pack", ""), + fields.get("query", ""), + fields.get("recall", ""), + int(score_text), + int(timing_text), + fields.get("answer", ""), + ) + ) + return records + + +def expected_cases() -> dict[tuple[str, str], RetrievalCase]: + return {(case.pack, case.query): case for case in CASES} + + +def validate_records( + records: list[RecallRecord], + max_average_ms: int, + max_single_ms: int, +) -> tuple[Counter[str], Counter[str]]: + expected = expected_cases() + seen: set[tuple[str, str]] = set() + pack_counts: Counter[str] = Counter() + recall_counts: Counter[str] = Counter() + require(len(records) == len(expected), f"record_count={len(records)}/{len(expected)}") + for record in records: + key = (record.pack, record.query) + require(key in expected, f"unexpected_recall={record.pack}:{record.query}") + require(key not in seen, f"duplicate_recall={record.pack}:{record.query}") + seen.add(key) + pack_counts[record.pack] += 1 + recall_counts[record.recall] += 1 + require(record.recall not in ("", "none"), f"no_recall_mode={record.pack}:{record.query}") + require(record.score > 0, f"nonpositive_score={record.pack}:{record.query}") + require(record.answer.strip(), f"empty_answer={record.pack}:{record.query}") + require(record.retrieve_ms <= max_single_ms, f"slow_recall={record.pack}:{record.query}:{record.retrieve_ms}") + reason = validate(expected[key], record.answer) + require(reason is None, f"{reason}:{record.pack}:{record.query}:{record.answer}") + missing = sorted(set(expected) - seen) + require(not missing, f"missing_recall={missing}") + average_ms = sum(record.retrieve_ms for record in records) // len(records) + require(average_ms <= max_average_ms, f"average_recall_ms={average_ms}>{max_average_ms}") + require(recall_counts["kb2_term"] + recall_counts["kb2_bucket"] + recall_counts["kb2_full"] >= len(records) // 2, "too_little_binary_recall") + return pack_counts, recall_counts + + +def markdown_report(records: list[RecallRecord], pack_counts: Counter[str], recall_counts: Counter[str]) -> str: + timings = [record.retrieve_ms for record in records] + scores = [record.score for record in records] + average_ms = sum(timings) // len(timings) if timings else 0 + max_ms = max(timings) if timings else 0 + average_score = sum(scores) // len(scores) if scores else 0 + lines = [ + "# Assistant Recall Benchmark", + "", + "Status: `PASS`", + f"Recall case count: `{len(records)}`", + f"Average retrieval time: `{average_ms} ms`", + f"Max retrieval time: `{max_ms} ms`", + f"Average recall score: `{average_score}`", + "Pack counts: `" + " ".join(f"{pack}={count}" for pack, count in sorted(pack_counts.items())) + "`", + "Recall modes: `" + " ".join(f"{mode}={count}" for mode, count in sorted(recall_counts.items())) + "`", + "", + "This benchmark is generated from `ASSIST.EXE --recall-probe` and measures local pack recall without model generation.", + "", + "| Pack | Recall | Score | Retrieve ms | Query | Answer |", + "|---|---|---:|---:|---|---|", + ] + for record in records: + lines.append( + "| {pack} | {recall} | {score} | {ms} | {query} | {answer} |".format( + pack=record.pack, + recall=record.recall, + score=record.score, + ms=record.retrieve_ms, + query=record.query.replace("|", "/"), + answer=record.answer.replace("|", "/"), + ) + ) + lines.append("") + return "\n".join(lines) + + +def run_benchmark(log: Path, report: Path, max_average_ms: int, max_single_ms: int) -> int: + text = log.read_text(encoding="ascii", errors="ignore") + require("ASSIST_BEGIN|suite=recall-probe|version=1" in text, "recall_begin_missing") + require("ASSIST_END|suite=recall-probe|packs=5" in text, "recall_end_missing") + records = parse_records(text) + pack_counts, recall_counts = validate_records(records, max_average_ms, max_single_ms) + report.parent.mkdir(parents=True, exist_ok=True) + report.write_text(markdown_report(records, pack_counts, recall_counts), encoding="ascii") + average_ms = sum(record.retrieve_ms for record in records) // len(records) + max_ms = max(record.retrieve_ms for record in records) + print(f"PROBE_OK assistant_recall_benchmark_cases={len(records)}") + print( + "ASSISTANT_RECALL_BENCHMARK|" + f"cases={len(records)}|average_ms={average_ms}|max_ms={max_ms}|report={report}" + ) + print("PROBE_OK assistant_recall_benchmark_pass=1") + return 0 + + +def self_test() -> None: + lines = ["ASSIST_BEGIN|suite=recall-probe|version=1"] + for case in CASES: + answer = " ".join(case.terms) + "." + lines.append( + "ASSIST_RECALL|pack={pack}|query={query}|recall=kb2_term|" + "recall_score=99|t_retrieve_ms=3|answer={answer}".format( + pack=case.pack, + query=case.query, + answer=answer, + ) + ) + lines.append("ASSIST_END|suite=recall-probe|packs=5") + records = parse_records("\n".join(lines)) + pack_counts, recall_counts = validate_records(records, DEFAULT_MAX_AVERAGE_MS, DEFAULT_MAX_SINGLE_MS) + report = markdown_report(records, pack_counts, recall_counts) + assert "Status: `PASS`" in report + assert f"Recall case count: `{len(CASES)}`" in report + print("PROBE_OK assistant_recall_benchmark_self_test=1") + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--log", type=Path, default=DEFAULT_LOG) + parser.add_argument("--report", type=Path, default=DEFAULT_REPORT) + parser.add_argument("--max-average-ms", type=int, default=DEFAULT_MAX_AVERAGE_MS) + parser.add_argument("--max-single-ms", type=int, default=DEFAULT_MAX_SINGLE_MS) + parser.add_argument("--self-test", action="store_true") + args = parser.parse_args() + if args.self_test: + self_test() + return + raise SystemExit(run_benchmark(args.log, args.report, args.max_average_ms, args.max_single_ms)) + + +if __name__ == "__main__": + main() diff --git a/scripts/build_assistant_capability_report.py b/scripts/build_assistant_capability_report.py index 589aa48..c9bbe2c 100644 --- a/scripts/build_assistant_capability_report.py +++ b/scripts/build_assistant_capability_report.py @@ -142,6 +142,7 @@ def build_report(evidence_dir: Path, pack_root: Path, release_assets: Path, gene consistency = read(evidence_dir / "assistant_consistency_eval.md") retrieval = read(evidence_dir / "assistant_pack_retrieval_eval.md") usefulness = read(evidence_dir / "assistant_usefulness_eval.md") + recall_benchmark = read(evidence_dir / "assistant_recall_benchmark.md") kdb_index = read(evidence_dir / "assistant_kdb_index_eval.md") kdb_binary = read(evidence_dir / "assistant_kdb_binary_eval.md") kdb_term = read(evidence_dir / "assistant_kdb_term_index_eval.md") @@ -150,6 +151,7 @@ def build_report(evidence_dir: Path, pack_root: Path, release_assets: Path, gene stress = parse_stress_report(evidence_dir / "assistant_stress_report.md") hardware_stress = parse_stress_report(evidence_dir / "hardware_capture_486_qemu_stress_report.md") + hardware_recall = read(evidence_dir / "hardware_capture_486_qemu_recall_report.md") assistant_packs_match = re.search(r"ASSIST_END\|packs=(\d+)", assistant_log) require(assistant_packs_match is not None, "assistant_end_missing") @@ -196,6 +198,10 @@ def build_report(evidence_dir: Path, pack_root: Path, release_assets: Path, gene f"- Term-index recall evaluation: `PASS {report_line(kdb_term, 'Term-index recall pass rate')}`.", f"- Term-index candidate row scan ratio: `{report_line(kdb_term, 'Candidate row ratio')}`.", f"- Term-index candidate byte ratio: `{report_line(kdb_term, 'Candidate byte ratio')}`.", + f"- QEMU recall benchmark: `PASS {report_line(recall_benchmark, 'Recall case count')} cases`.", + f"- QEMU recall average retrieval time: `{report_line(recall_benchmark, 'Average retrieval time')}`.", + f"- QEMU recall max retrieval time: `{report_line(recall_benchmark, 'Max retrieval time')}`.", + f"- QEMU recall modes: `{report_line(recall_benchmark, 'Recall modes')}`.", "", "## Language Coverage", "", @@ -207,6 +213,7 @@ def build_report(evidence_dir: Path, pack_root: Path, release_assets: Path, gene f"- KDB text index gate: `PASS {report_line(kdb_index, 'Indexed recall pass rate')}`.", f"- KDB binary gate: `PASS {report_line(kdb_binary, 'Binary recall pass rate')}`.", f"- KDB term-index gate: `PASS {report_line(kdb_term, 'Term-index recall pass rate')}`.", + f"- DOS recall benchmark gate: `PASS {report_line(recall_benchmark, 'Recall case count')} cases`.", "", "Covered categories include general chat, identity, local inference, offline limits, prompt repair, repeated-answer recovery, troubleshooting, DOS setup, office writing, developer pack authoring, and portable-intelligence concepts.", "", @@ -231,6 +238,8 @@ def build_report(evidence_dir: Path, pack_root: Path, release_assets: Path, gene f"- Hardware-capture stress source mix: `{hardware_stress.sources}`.", f"- Hardware-capture average total reply time: `{hardware_stress.average_total_ms}`.", f"- Hardware-capture average retrieval time: `{hardware_stress.average_retrieval_ms}`.", + f"- Hardware-capture recall benchmark: `PASS {report_line(hardware_recall, 'Recall case count')} cases`.", + f"- Hardware-capture recall average retrieval time: `{report_line(hardware_recall, 'Average retrieval time')}`.", f"- Physical machine capture status: {physical_capture_status(evidence_dir)}", "", "## Authoring And Import", diff --git a/scripts/build_hardware_transfer.py b/scripts/build_hardware_transfer.py index 8f3ab22..f1ebb00 100644 --- a/scripts/build_hardware_transfer.py +++ b/scripts/build_hardware_transfer.py @@ -204,6 +204,7 @@ def return_instructions() -> str: " PERF.LOG\n" " ASSIST.LOG\n" " ASTRESS.LOG\n" + " ARECALL.LOG\n" " ASSISTC.LOG\n" " HWNOTES.TXT\n" "\n" diff --git a/scripts/build_preview_release.py b/scripts/build_preview_release.py index 5bb8c13..1432944 100644 --- a/scripts/build_preview_release.py +++ b/scripts/build_preview_release.py @@ -98,6 +98,9 @@ class ReleaseModel: "assistant_pack_retrieval_eval.md", "assistant_pack_probe.log", "assistant_raw_prompt_eval.md", + "assistant_recall_486.log", + "assistant_recall_benchmark.md", + "assistant_recall_compile_486.log", "assistant_showcase_video.md", "assistant_stress_486.log", "assistant_stress_compile_486.log", @@ -107,6 +110,7 @@ class ReleaseModel: "exported_model_quality_inventory.md", "gold_curriculum_v5_clean_repair_report.md", "hardware_capture_486_qemu_probe.log", + "hardware_capture_486_qemu_recall_report.md", "hardware_capture_486_qemu_stress_report.md", "hardware_capture_probe.log", "hardware_performance_matrix.md", @@ -471,6 +475,7 @@ def write_manifest( "python3 scripts/build_hardware_transfer.py --self-test", "python3 scripts/evaluate_assistant_kdb_binary.py", "python3 scripts/evaluate_assistant_kdb_term_index.py", + "python3 scripts/benchmark_assistant_recall.py", "python3 scripts/build_preview_release.py --self-test", "python3 scripts/verify_preview_artifacts.py --self-test", "python3 scripts/verify_workspace_tracking.py", diff --git a/scripts/stage_hardware_capture_evidence.py b/scripts/stage_hardware_capture_evidence.py index 50e3efc..6dd75c2 100644 --- a/scripts/stage_hardware_capture_evidence.py +++ b/scripts/stage_hardware_capture_evidence.py @@ -26,6 +26,7 @@ ("perf", "PERF.LOG", "perf.log"), ("assistant", "ASSIST.LOG", "assistant.log"), ("assistant_stress", "ASTRESS.LOG", "assistant_stress.log"), + ("assistant_recall", "ARECALL.LOG", "assistant_recall.log"), ("assistant_compile", "ASSISTC.LOG", "assistant_compile.log"), ) @@ -295,6 +296,26 @@ def write_sample_capture(root: Path) -> None: "\n".join(stress_lines) + "\n", encoding="ascii", ) + recall_lines = [ + "ASSIST_BEGIN|suite=recall-probe|version=1", + *[f"ASSIST_PACK|id={pack_id}" for pack_id in verify_hardware_capture.EXPECTED_ASSISTANT_PACKS], + ] + for case in verify_hardware_capture.benchmark_assistant_recall.CASES: + recall_lines.append( + "ASSIST_RECALL|pack={pack}|query={query}|recall=kb2_term|" + "recall_score=99|t_retrieve_ms=3|answer={answer}".format( + pack=case.pack, + query=case.query, + answer=" ".join(case.terms) + ".", + ) + ) + recall_lines.append( + f"ASSIST_END|suite=recall-probe|packs={verify_hardware_capture.EXPECTED_ASSISTANT_PACK_COUNT}" + ) + (root / "ARECALL.LOG").write_text( + "\n".join(recall_lines) + "\n", + encoding="ascii", + ) (root / "ASSISTC.LOG").write_text("ASSIST_COMPILE_OK\n", encoding="ascii") (root / "HWNOTES.TXT").write_text( "Machine key: 486dx2_66_dos622\n" @@ -341,7 +362,7 @@ def self_test() -> None: (evidence / "hardware_486dx2_66_dos622_manifest.md").exists(), "self_test_missing_manifest", ) - require(len(written) == 8, "self_test_staged_count") + require(len(written) == 9, "self_test_staged_count") print("PROBE_OK hardware_stage_self_test=1") diff --git a/scripts/verify_assistant_packs.py b/scripts/verify_assistant_packs.py index 367fbfe..5217066 100644 --- a/scripts/verify_assistant_packs.py +++ b/scripts/verify_assistant_packs.py @@ -17,6 +17,9 @@ DEFAULT_STRESS_LOG = ROOT / "qemu" / "evidence" / "assistant_stress_486.log" DEFAULT_STRESS_COMPILE_LOG = ROOT / "qemu" / "evidence" / "assistant_stress_compile_486.log" DEFAULT_STRESS_REPORT = ROOT / "qemu" / "evidence" / "assistant_stress_report.md" +DEFAULT_RECALL_LOG = ROOT / "qemu" / "evidence" / "assistant_recall_486.log" +DEFAULT_RECALL_COMPILE_LOG = ROOT / "qemu" / "evidence" / "assistant_recall_compile_486.log" +DEFAULT_RECALL_REPORT = ROOT / "qemu" / "evidence" / "assistant_recall_benchmark.md" DEFAULT_RAW_PROMPT_REPORT = ROOT / "qemu" / "evidence" / "assistant_raw_prompt_eval.md" DEFAULT_GENERALIST_PROMPT_REPORT = ROOT / "qemu" / "evidence" / "assistant_generalist_prompt_eval.md" DEFAULT_RETRIEVAL_REPORT = ROOT / "qemu" / "evidence" / "assistant_pack_retrieval_eval.md" @@ -36,6 +39,8 @@ KDB_TERM_INDEX_MIN_CASES = 42 KDB_TERM_INDEX_MAX_ROW_RATIO = 0.35 STRESS_REPLY_COUNT = 50 +RECALL_CASE_COUNT = 42 +RECALL_MAX_AVERAGE_MS = 250 @dataclass(frozen=True) @@ -173,6 +178,8 @@ def verify_source() -> None: "AssistSaveMemoryFacts", "AssistGuardProbe", "AssistStressProbe", + "AssistRecallProbe", + "AssistEmitRecallCase", "AssistPrepareGenerationPrompt", "AssistPrefillPrompt", "AssistVisibleToken", @@ -195,6 +202,9 @@ def verify_source() -> None: "AssistCanonicalQuery", 'prompt = "User: " + canonical_query', 'command_line = "--stress-probe"', + 'command_line = "--recall-probe"', + "ASSIST_BEGIN|suite=recall-probe|version=1", + "ASSIST_RECALL", '"|query=" + AssistSafeText(query)', '"|canonical=" + AssistSafeText(canonical_query)', '"|memory=" + AssistSafeText(memory_context)', @@ -231,6 +241,7 @@ def verify_pack_quality( kdb_index_report: Path, kdb_binary_report: Path, kdb_term_index_report: Path, + recall_report: Path, ) -> None: pack_by_id = {pack.pack_id: pack for pack in packs} raw_report = read(raw_prompt_report) @@ -320,6 +331,16 @@ def verify_pack_quality( f"kdb_term_index_eval_pass_rate={kdb_term_passed}/{kdb_term_total}", ) require(term_ratio <= KDB_TERM_INDEX_MAX_ROW_RATIO, f"kdb_term_index_row_ratio={term_ratio:.3f}") + recall_text = read(recall_report) + require("Status: `PASS`" in recall_text, "assistant_recall_benchmark_not_pass") + recall_count_match = re.search(r"Recall case count:\s+`(\d+)`", recall_text) + require(recall_count_match is not None, "assistant_recall_case_count_missing") + recall_count = int(recall_count_match.group(1)) + average_match = re.search(r"Average retrieval time:\s+`(\d+) ms`", recall_text) + require(average_match is not None, "assistant_recall_average_missing") + recall_average_ms = int(average_match.group(1)) + require(recall_count >= RECALL_CASE_COUNT, f"assistant_recall_cases={recall_count}") + require(recall_average_ms <= RECALL_MAX_AVERAGE_MS, f"assistant_recall_average_ms={recall_average_ms}") for pack_id in ("CHAT", "DOSHELP", "OFFICE"): pack = pack_by_id[pack_id] report = read(evidence_dir / f"quality_report_assistant_{pack.pack_id.lower()}.md") @@ -377,6 +398,21 @@ def verify_stress_logs(stress_log: Path, stress_compile_log: Path, stress_report require("Source counts:" in report, "stress_report_source_counts") +def verify_recall_logs(recall_log: Path, recall_compile_log: Path, recall_report: Path) -> None: + recall = read(recall_log) + compile_text = read(recall_compile_log) + report = read(recall_report) + require("ASSIST_COMPILE_OK" in compile_text, "recall_compile_marker_missing") + require("ASSIST_BEGIN|suite=recall-probe|version=1" in recall, "recall_begin_marker_missing") + require("ASSIST_END|suite=recall-probe|packs=5" in recall, "recall_end_marker_missing") + require(recall.count("ASSIST_RECALL|") == RECALL_CASE_COUNT, "recall_case_count_mismatch") + require("|query=" in recall and "|answer=" in recall, "recall_structured_answer_missing") + require("Status: `PASS`" in report, "recall_report_not_pass") + require(f"Recall case count: `{RECALL_CASE_COUNT}`" in report, "recall_report_case_count") + require("Average retrieval time:" in report, "recall_report_average_missing") + require("Recall modes:" in report, "recall_report_modes_missing") + + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--pack-root", type=Path, default=DEFAULT_PACK_ROOT) @@ -386,6 +422,9 @@ def main() -> None: parser.add_argument("--stress-log", type=Path, default=DEFAULT_STRESS_LOG) parser.add_argument("--stress-compile-log", type=Path, default=DEFAULT_STRESS_COMPILE_LOG) parser.add_argument("--stress-report", type=Path, default=DEFAULT_STRESS_REPORT) + parser.add_argument("--recall-log", type=Path, default=DEFAULT_RECALL_LOG) + parser.add_argument("--recall-compile-log", type=Path, default=DEFAULT_RECALL_COMPILE_LOG) + parser.add_argument("--recall-report", type=Path, default=DEFAULT_RECALL_REPORT) parser.add_argument("--raw-prompt-report", type=Path, default=DEFAULT_RAW_PROMPT_REPORT) parser.add_argument("--generalist-prompt-report", type=Path, default=DEFAULT_GENERALIST_PROMPT_REPORT) parser.add_argument("--retrieval-report", type=Path, default=DEFAULT_RETRIEVAL_REPORT) @@ -407,9 +446,11 @@ def main() -> None: args.kdb_index_report, args.kdb_binary_report, args.kdb_term_index_report, + args.recall_report, ) verify_qemu_logs(packs, args.assistant_log, args.compile_log) verify_stress_logs(args.stress_log, args.stress_compile_log, args.stress_report) + verify_recall_logs(args.recall_log, args.recall_compile_log, args.recall_report) print(f"PROBE_OK assistant_pack_count={len(packs)}") print("PROBE_OK assistant_pack_loader=1") print("PROBE_OK assistant_pack_models=1") @@ -420,11 +461,13 @@ def main() -> None: print("PROBE_OK assistant_kdb_index_eval=1") print("PROBE_OK assistant_kdb_binary_eval=1") print("PROBE_OK assistant_kdb_term_index_eval=1") + print("PROBE_OK assistant_recall_benchmark=1") print("PROBE_OK assistant_model_switch=1") print("PROBE_OK assistant_structured_reply=1") print("PROBE_OK assistant_art_slots=1") print("PROBE_OK assistant_qemu_evidence=1") print("PROBE_OK assistant_stress_evidence=1") + print("PROBE_OK assistant_recall_evidence=1") if __name__ == "__main__": diff --git a/scripts/verify_hardware_capture.py b/scripts/verify_hardware_capture.py index 6881b0f..82d3373 100644 --- a/scripts/verify_hardware_capture.py +++ b/scripts/verify_hardware_capture.py @@ -10,8 +10,10 @@ try: from scripts import stress_assistant_behavior + from scripts import benchmark_assistant_recall except ImportError: # pragma: no cover - used when run as scripts/foo.py import stress_assistant_behavior # type: ignore + import benchmark_assistant_recall # type: ignore DEFAULT_FILES = { @@ -20,12 +22,14 @@ "perf": "PERF.LOG", "assistant": "ASSIST.LOG", "assistant_stress": "ASTRESS.LOG", + "assistant_recall": "ARECALL.LOG", "assistant_compile": "ASSISTC.LOG", "notes": "HWNOTES.TXT", } EXPECTED_ASSISTANT_PACKS = ("CHAT", "DOSHELP", "OFFICE", "DEV", "PORTABLE") EXPECTED_ASSISTANT_PACK_COUNT = len(EXPECTED_ASSISTANT_PACKS) EXPECTED_STRESS_REPLIES = len(stress_assistant_behavior.EXPECTED_CASES) +EXPECTED_RECALL_CASES = len(benchmark_assistant_recall.CASES) NOTE_FIELDS = ( "Machine key:", "CPU:", @@ -121,6 +125,29 @@ def verify_assistant_stress_log(path: Path, required: bool) -> int: return reply_count +def verify_assistant_recall_log(path: Path, required: bool) -> int: + text = read(path, required=required) + if not text: + return 0 + require("ASSIST_BEGIN|suite=recall-probe|version=1" in text, "assistant_recall_begin_missing") + require( + f"ASSIST_END|suite=recall-probe|packs={EXPECTED_ASSISTANT_PACK_COUNT}" in text, + "assistant_recall_end_missing", + ) + for pack_id in EXPECTED_ASSISTANT_PACKS: + require(f"ASSIST_PACK|id={pack_id}" in text, f"assistant_recall_pack_missing={pack_id}") + require(f"ASSIST_RECALL|pack={pack_id}" in text, f"assistant_recall_record_missing={pack_id}") + recall_count = count_matches(r"^ASSIST_RECALL\|", text) + require(recall_count == EXPECTED_RECALL_CASES, f"assistant_recall_count={recall_count}") + records = benchmark_assistant_recall.parse_records(text) + benchmark_assistant_recall.validate_records( + records, + benchmark_assistant_recall.DEFAULT_MAX_AVERAGE_MS, + benchmark_assistant_recall.DEFAULT_MAX_SINGLE_MS, + ) + return recall_count + + def verify_assistant_compile_log(path: Path, required: bool) -> bool: text = read(path, required=required) if not text: @@ -197,6 +224,24 @@ def self_test() -> None: "\n".join(stress_lines) + "\n", encoding="ascii", ) + recall_lines = [ + "ASSIST_BEGIN|suite=recall-probe|version=1", + *[f"ASSIST_PACK|id={pack_id}" for pack_id in EXPECTED_ASSISTANT_PACKS], + ] + for case in benchmark_assistant_recall.CASES: + recall_lines.append( + "ASSIST_RECALL|pack={pack}|query={query}|recall=kb2_term|" + "recall_score=99|t_retrieve_ms=3|answer={answer}".format( + pack=case.pack, + query=case.query, + answer=" ".join(case.terms) + ".", + ) + ) + recall_lines.append(f"ASSIST_END|suite=recall-probe|packs={EXPECTED_ASSISTANT_PACK_COUNT}") + (root / "ARECALL.LOG").write_text( + "\n".join(recall_lines) + "\n", + encoding="ascii", + ) (root / "ASSISTC.LOG").write_text("ASSIST_COMPILE_OK\n", encoding="ascii") (root / "HWNOTES.TXT").write_text( "Machine key: 486dx2_66_dos622\n" @@ -230,6 +275,10 @@ def verify_capture( capture_dir / DEFAULT_FILES["assistant_stress"], require_assistant, ) + assistant_recall_count = verify_assistant_recall_log( + capture_dir / DEFAULT_FILES["assistant_recall"], + require_assistant, + ) assistant_compiled = verify_assistant_compile_log(capture_dir / DEFAULT_FILES["assistant_compile"], require_assistant) notes_present = verify_notes( capture_dir / DEFAULT_FILES["notes"], @@ -244,6 +293,8 @@ def verify_capture( print(f"PROBE_OK hardware_assistant_log={DEFAULT_FILES['assistant']}") if require_assistant or assistant_stress_count: print(f"PROBE_OK hardware_assistant_stress_log={DEFAULT_FILES['assistant_stress']}") + if require_assistant or assistant_recall_count: + print(f"PROBE_OK hardware_assistant_recall_log={DEFAULT_FILES['assistant_recall']}") if require_assistant or assistant_compiled: print(f"PROBE_OK hardware_assistant_compile_log={DEFAULT_FILES['assistant_compile']}") if require_notes or notes_present: @@ -254,6 +305,8 @@ def verify_capture( print(f"PROBE_OK hardware_assistant_replies={assistant_count}") if require_assistant or assistant_stress_count: print(f"PROBE_OK hardware_assistant_stress_replies={assistant_stress_count}") + if require_assistant or assistant_recall_count: + print(f"PROBE_OK hardware_assistant_recall_cases={assistant_recall_count}") if require_assistant or assistant_compiled: print("PROBE_OK hardware_assistant_compile=1") if require_notes or notes_present: diff --git a/src/assistant.bas b/src/assistant.bas index 55891e0..862195b 100644 --- a/src/assistant.bas +++ b/src/assistant.bas @@ -160,6 +160,8 @@ DECLARE SUB AssistRenderReply(query AS STRING, use_generation AS INTEGER) DECLARE SUB AssistScriptedDemo() DECLARE SUB AssistGuardProbe() DECLARE SUB AssistStressProbe() +DECLARE SUB AssistEmitRecallCase(query AS STRING) +DECLARE SUB AssistRecallProbe() DECLARE SUB AssistInteractive() DECLARE SUB AssistMain() @@ -2303,6 +2305,85 @@ SUB AssistStressProbe() AssistShutdownModel END SUB +SUB AssistEmitRecallCase(query AS STRING) + DIM answer AS STRING + + answer = AssistRetrieve(g_assist_active_pack, AssistCanonicalQuery(query)) + PRINT "ASSIST_RECALL|pack=" + AssistTrimFixed(g_assist_packs(g_assist_active_pack).id) + _ + "|query=" + AssistSafeText(query) + _ + "|recall=" + AssistSafeText(g_assist_last_recall_mode) + _ + "|recall_score=" + LTRIM$(STR$(g_assist_last_recall_score)) + _ + "|t_retrieve_ms=" + LTRIM$(STR$(g_assist_last_retrieval_ms)) + _ + "|answer=" + AssistSafeText(answer) +END SUB + +SUB AssistRecallProbe() + AssistRenderFrame + PRINT "ASSIST_BEGIN|suite=recall-probe|version=1" + AssistPrintPackList + PRINT + + AssistSelectPack "CHAT" + AssistRenderPackStatus + AssistEmitRecallCase "how can i ask better questions" + AssistEmitRecallCase "what makes this intelligent on a small computer" + AssistEmitRecallCase "which pack should i use for writing" + AssistEmitRecallCase "can this work without the internet" + AssistEmitRecallCase "how do i recover from a bad answer" + AssistEmitRecallCase "what proof helps me trust this" + AssistEmitRecallCase "how should i compare options" + AssistEmitRecallCase "help me plan work in small steps" + AssistEmitRecallCase "what should a useful answer look like" + AssistEmitRecallCase "can you explain something simply" + AssistEmitRecallCase "what can you know without web access" + AssistEmitRecallCase "how do i show confidence in an answer" + + AssistSelectPack "DOSHELP" + AssistRenderPackStatus + AssistEmitRecallCase "what happens before autoexec bat runs" + AssistEmitRecallCase "why use 8.3 filenames in batches" + AssistEmitRecallCase "how should i prepare files for real hardware" + AssistEmitRecallCase "what should i do when cwsdpmi is missing" + AssistEmitRecallCase "how do i mount the dosbox bundle" + AssistEmitRecallCase "what if the fat image is full" + AssistEmitRecallCase "what logs matter from qemu" + AssistEmitRecallCase "how do i handle a dos memory error" + AssistEmitRecallCase "how should a batch menu work" + + AssistSelectPack "OFFICE" + AssistRenderPackStatus + AssistEmitRecallCase "how should i write a handoff note" + AssistEmitRecallCase "what belongs in a bug report" + AssistEmitRecallCase "make a compact release note" + AssistEmitRecallCase "what should meeting notes capture" + AssistEmitRecallCase "help me write a project plan" + AssistEmitRecallCase "how do i track risks" + AssistEmitRecallCase "what is a useful test plan" + AssistEmitRecallCase "how should i reply to a customer" + AssistEmitRecallCase "how do i write user docs" + + AssistSelectPack "DEV" + AssistRenderPackStatus + AssistEmitRecallCase "how can this feel modern on a 486" + AssistEmitRecallCase "what does retrieval first mean" + AssistEmitRecallCase "how do i author a pack" + AssistEmitRecallCase "what should i check before release" + AssistEmitRecallCase "how should we store fast recall data" + AssistEmitRecallCase "what should a failure record include" + + AssistSelectPack "PORTABLE" + AssistRenderPackStatus + AssistEmitRecallCase "what does portable intelligence mean" + AssistEmitRecallCase "why is basic useful for teaching ai" + AssistEmitRecallCase "how could this move to c or assembly" + AssistEmitRecallCase "why do hot swappable weights matter" + AssistEmitRecallCase "how should tiny machines store recall" + AssistEmitRecallCase "what proof shows this works on old hardware" + + PRINT "ASSIST_END|suite=recall-probe|packs=" + LTRIM$(STR$(g_assist_pack_count)) + AssistShutdownModel +END SUB + SUB AssistInteractive() DIM command_text AS STRING DIM query AS STRING @@ -2404,6 +2485,12 @@ SUB AssistMain() RETURN END IF + IF command_line = "--recall-probe" THEN + g_assist_emit_records = 1 + AssistRecallProbe + RETURN + END IF + g_assist_memory_persist = 1 AssistLoadMemoryFacts AssistInteractive diff --git a/tests/test_assistant_recall_benchmark.py b/tests/test_assistant_recall_benchmark.py new file mode 100644 index 0000000..5d77366 --- /dev/null +++ b/tests/test_assistant_recall_benchmark.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import contextlib +import io +import unittest + +from scripts import benchmark_assistant_recall + + +class AssistantRecallBenchmarkTests(unittest.TestCase): + def test_self_test(self) -> None: + output = io.StringIO() + + with contextlib.redirect_stdout(output): + benchmark_assistant_recall.self_test() + + self.assertIn("PROBE_OK assistant_recall_benchmark_self_test=1", output.getvalue()) + + def test_rejects_slow_average_recall(self) -> None: + records = [ + benchmark_assistant_recall.RecallRecord( + case.pack, + case.query, + "kb2_term", + 99, + 10, + " ".join(case.terms) + ".", + ) + for case in benchmark_assistant_recall.CASES + ] + + with self.assertRaises(SystemExit) as raised: + benchmark_assistant_recall.validate_records(records, max_average_ms=1, max_single_ms=1500) + + self.assertIn("average_recall_ms", str(raised.exception)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_stage_hardware_capture_evidence.py b/tests/test_stage_hardware_capture_evidence.py index df6bdd1..7ea4457 100644 --- a/tests/test_stage_hardware_capture_evidence.py +++ b/tests/test_stage_hardware_capture_evidence.py @@ -39,6 +39,7 @@ def test_stage_capture_writes_stable_evidence_files(self) -> None: "hardware_486dx2_66_dos622_perf.log", "hardware_486dx2_66_dos622_assistant.log", "hardware_486dx2_66_dos622_assistant_stress.log", + "hardware_486dx2_66_dos622_assistant_recall.log", "hardware_486dx2_66_dos622_assistant_compile.log", "hardware_486dx2_66_dos622_notes.md", "hardware_486dx2_66_dos622_manifest.md", diff --git a/tests/test_verify_hardware_capture.py b/tests/test_verify_hardware_capture.py index 6bbe74a..3bfb4ea 100644 --- a/tests/test_verify_hardware_capture.py +++ b/tests/test_verify_hardware_capture.py @@ -22,6 +22,7 @@ def test_verify_hardware_capture_self_test_artifacts(self) -> None: "PERF.LOG", "ASSIST.LOG", "ASTRESS.LOG", + "ARECALL.LOG", "ASSISTC.LOG", "HWNOTES.TXT", ] @@ -93,6 +94,7 @@ def test_hardware_transfer_return_instructions_are_dos_friendly(self) -> None: self.assertIn("PERF.LOG", text) self.assertIn("ASSIST.LOG", text) self.assertIn("ASTRESS.LOG", text) + self.assertIn("ARECALL.LOG", text) self.assertIn("ASSISTC.LOG", text) self.assertIn("HWNOTES.TXT", text) self.assertIn("--require-filled-notes", text)