diff --git a/devlog/2026-05-30_prefix-cache-fix/REQ.md b/devlog/2026-05-30_prefix-cache-fix/REQ.md new file mode 100644 index 0000000..083f791 --- /dev/null +++ b/devlog/2026-05-30_prefix-cache-fix/REQ.md @@ -0,0 +1,62 @@ +# REQ - Fix dynamic nudges breaking OpenAI Responses prefix cache + +- Task ID: `2026-05-30_prefix-cache-fix` +- Home Repo: `opencode-acp` +- Created: 2026-05-30 +- Status: InProgress +- Priority: P1 +- Owner: ranxianglei +- References: https://github.com/ranxianglei/opencode-acp/issues/5 + +## 1. Background & Problem Statement + +- **Context**: OpenAI Responses API uses prefix-based prompt caching. The token prefix must remain byte-stable between requests for cache hits to grow. +- **Current behavior (symptom)**: ACP injects dynamic per-turn metadata (`Context usage`, `Visible message IDs`, `Compressed block context`) into the **last user message** on every `chat.messages.transform` call. Because this user message is often early in the conversation (before many tool outputs), changing it invalidates the prefix for all subsequent content. Cache read tokens plateau at ~25.6K while total prompt tokens grow from 38K to 83K. +- **Expected behavior**: Dynamic ACP metadata should not rewrite historical messages. All per-turn dynamic content should be placed in a synthetic message at the END of the message list, so prefix cache can grow to cover all historical content. +- **Impact**: High — every turn wastes 30-50K+ tokens in non-cached input that should be cache hits. Directly increases API cost for OpenAI Responses users. + +## 2. Reproduction (if applicable) + +- **Environment**: + - OpenAI Responses API (`POST /v1/responses`) + - Model: `openai/gpt-5.4` + - ACP enabled with default config +- **Minimal reproduction steps**: + 1. Enable ACP plugin + 2. Start a session with OpenAI Responses model + 3. Send a message, then let the model make several tool calls (read, grep, etc.) + 4. Observe `cached_tokens` stops growing after ~2 turns +- **Relevant configuration**: Default ACP config (no special settings needed) + +## 3. Constraints & Non-Goals + +- **Constraints**: + - Backward compatibility: Must not change persisted state format or config schema + - Must not affect Anthropic/Gemini caching (different mechanism) + - Synthetic suffix message must not interfere with message ID assignment or compress tool +- **Non-Goals** (explicitly out of scope): + - Changing `injectMessageIds` tag injection (tags are stable once assigned per message, less impactful) + - Optimizing other prompt cache providers (focus on OpenAI Responses prefix cache) + +## 4. Acceptance Criteria (must be testable) + +- **Correctness**: + - [ ] `injectContextUsage`, `injectVisibleIdRange`, `buildCompressedBlockGuidance` write to a suffix message at the END of the message list + - [ ] Historical user messages are NOT modified by these functions + - [ ] Anchored nudges also write to the suffix message + - [ ] Compress tool still works correctly with suffix message present +- **Performance / Stability**: + - [ ] No new message ID refs assigned to the suffix message + - [ ] Suffix message does not appear in compress boundary resolution +- **Regression**: + - [ ] All 350 existing tests pass + - [ ] `npm run typecheck` passes + - [ ] `npm run build` passes + +## 5. Proposed Approach + +- **Affected modules & entry files**: + - `lib/messages/inject/inject.ts` — Add `createSuffixMessage()`, modify `injectContextUsage`, `injectVisibleIdRange`, and block guidance injection in `injectCompressNudges` + - `lib/messages/inject/utils.ts` — Add `suffixMessage` parameter to `applyAnchoredNudges` +- **Risks**: Low — changes are localized to injection targets, no data format changes +- **Rollback strategy**: Revert commit on this branch diff --git a/devlog/2026-05-30_prefix-cache-fix/WORKLOG.md b/devlog/2026-05-30_prefix-cache-fix/WORKLOG.md new file mode 100644 index 0000000..5b54c59 --- /dev/null +++ b/devlog/2026-05-30_prefix-cache-fix/WORKLOG.md @@ -0,0 +1,61 @@ +# WORKLOG - Fix dynamic nudges breaking OpenAI Responses prefix cache + +- Task ID: `2026-05-30_prefix-cache-fix` +- Home Repo: `opencode-acp` +- Status: Done +- Updated: 2026-05-30 19:30 + +## 1. Summary + +- **What was done**: Moved all dynamic ACP metadata injection (context usage, visible IDs, block guidance, anchored nudges) from historical user messages to a new synthetic suffix message at the end of the message list. Added `isSyntheticMessage()` to make synthetic messages invisible to query functions like `getLastUserMessage` and `findLastNonIgnoredMessage`. +- **Why**: ACP was modifying early user messages every turn, invalidating OpenAI Responses prefix cache for all subsequent content. Cache would plateau at ~25.6K tokens while total prompt grew to 83K+. +- **Behavior / compatibility changes**: No. Internal behavior change only — no config, state, or API changes. The same information is still injected, just at a different position. +- **Risk level**: Low + +## 2. Change Log + +### Key Files + +- `lib/messages/inject/inject.ts` — Added `createSuffixMessage()`, changed `injectContextUsage`, `injectVisibleIdRange`, and block guidance to write to suffix message instead of last user message +- `lib/messages/inject/utils.ts` — Added `suffixMessage` parameter to `applyAnchoredNudges()`, added `isSyntheticMessage` skip in `findLastNonIgnoredMessage()` +- `lib/messages/query.ts` — Added `isSyntheticMessage()` export, updated `getLastUserMessage()` to skip synthetic messages +- `tests/e2e-blocks-nudges.test.ts` — Updated 3 tests for new suffix message behavior +- `tests/e2e-message-transform.test.ts` — Updated 3 tests for new message counts + +## 3. Design & Implementation Notes + +- **Entry point**: `createSuffixMessage()` in `inject.ts` creates a synthetic user message with a stable seed (`"acp-dynamic-guidance"`), ensuring deterministic IDs that won't be assigned new mNNNNN refs +- **Prefix cache preservation**: All dynamic content goes to the suffix message at the END of the array, so historical messages remain byte-stable across turns +- **Invisibility**: `isSyntheticMessage()` checks for `msg_dcp_summary_` / `msg_dcp_text_` ID prefixes, consistent with `assignMessageRefs` which already skips these + +## 4. Testing & Verification + +### Build & Test Commands + +```sh +npm run typecheck # PASS +npm run build # PASS +npm run test # 350 tests, 350 pass, 0 fail +``` + +### Test Coverage + +- Modified test files: `tests/e2e-blocks-nudges.test.ts`, `tests/e2e-message-transform.test.ts` +- Test count: 350 total, 350 pass, 0 fail +- Key scenarios verified: + - Context usage injected into suffix message (not last user message) + - Visible ID range injected into suffix message + - Message counts include suffix message + - `stripStaleMetadata` works correctly with suffix message present + - Synthetic messages are invisible to `getLastUserMessage` and `findLastNonIgnoredMessage` + +## 5. Risk Assessment & Rollback + +- **Risk points**: Low — change is localized to injection targets, all 350 tests pass +- **Rollback method**: Revert all changes on this branch +- **Compatibility notes**: No data format or config changes + +## 6. Follow-ups (optional) + +- [ ] Monitor prefix cache hit rates in production with OpenAI Responses models +- [ ] Consider gating suffix message creation behind a config option if any edge cases emerge diff --git a/lib/messages/inject/inject.ts b/lib/messages/inject/inject.ts index d3f7164..19b8a27 100644 --- a/lib/messages/inject/inject.ts +++ b/lib/messages/inject/inject.ts @@ -17,6 +17,7 @@ import { appendToLastTextPart, appendToAllToolParts, createSyntheticTextPart, + createSyntheticUserMessage, hasContent, } from "../utils" import { @@ -31,6 +32,28 @@ import { } from "./utils" import { buildCompressedBlockGuidance } from "../../prompts/extensions/nudge" +/** + * Stable seed for the ACP dynamic guidance suffix message. + * Using a fixed seed ensures the synthetic message ID is deterministic, + * so it won't be assigned a new mNNNNN ref on each transform call. + */ +const ACP_SUFFIX_SEED = "acp-dynamic-guidance" + +/** + * Create a synthetic user message at the END of the messages array. + * All per-turn dynamic ACP content (context usage, visible IDs, nudges, etc.) + * is injected into this suffix message instead of historical user messages, + * preserving OpenAI Responses prefix cache stability. + */ +function createSuffixMessage(messages: WithParts[]): WithParts | null { + if (messages.length === 0) return null + // Use any user message as base for session/agent/model info + const base = messages.find((m) => m.info.role === "user") || messages[messages.length - 1] + const synthetic = createSyntheticUserMessage(base, "", ACP_SUFFIX_SEED) + messages.push(synthetic) + return synthetic +} + export const injectCompressNudges = ( state: SessionState, config: PluginConfig, @@ -136,19 +159,20 @@ export const injectCompressNudges = ( } } - applyAnchoredNudges(state, config, messages, prompts, compressionPriorities, currentTokens, modelContextLimit) + const suffixMessage = createSuffixMessage(messages) + + applyAnchoredNudges(state, config, messages, prompts, compressionPriorities, currentTokens, modelContextLimit, suffixMessage) - injectContextUsage(messages, currentTokens, modelContextLimit) + injectContextUsage(suffixMessage, currentTokens, modelContextLimit) if (config.compress.mode !== "message") { const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit }) - if (blockGuidance.trim()) { - const lastUser = getLastUserMessage(messages) - if (lastUser) appendToLastTextPart(lastUser, "\n\n" + blockGuidance) + if (blockGuidance.trim() && suffixMessage) { + appendToLastTextPart(suffixMessage, "\n\n" + blockGuidance) } } - injectVisibleIdRange(state, messages) + injectVisibleIdRange(state, messages, suffixMessage) if (anchorsChanged) { void saveSessionState(state, logger) @@ -156,30 +180,30 @@ export const injectCompressNudges = ( } function injectContextUsage( - messages: WithParts[], + target: WithParts | null, currentTokens?: number, modelContextLimit?: number, ): void { + if (!target) return if (currentTokens === undefined || modelContextLimit === undefined || modelContextLimit === 0) { return } - const lastUser = getLastUserMessage(messages) - if (!lastUser) return const percentage = ((currentTokens / modelContextLimit) * 100).toFixed(1) const formatK = (n: number) => (n >= 1000 ? `${(n / 1000).toFixed(1)}K` : String(n)) const usageTag = `\n\nContext usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%). ACP (Active Context Pruning) threshold: 55%. You ARE the ACP agent — use the compress tool proactively to manage context quality.` - for (const part of lastUser.parts) { + for (const part of target.parts) { if (part.type === "text") { appendToTextPart(part, usageTag) return } } - lastUser.parts.push(createSyntheticTextPart(lastUser, usageTag)) + target.parts.push(createSyntheticTextPart(target, usageTag)) } -function injectVisibleIdRange(state: SessionState, messages: WithParts[]): void { +function injectVisibleIdRange(state: SessionState, messages: WithParts[], target: WithParts | null): void { + if (!target) return const visibleRefs: string[] = [] for (const message of messages) { const ref = state.messageIds.byRawId.get(message.info.id) @@ -195,16 +219,13 @@ function injectVisibleIdRange(state: SessionState, messages: WithParts[]): void const last = visibleRefs[visibleRefs.length - 1] const rangeTag = `\n\n[Visible message IDs: ${first} to ${last} (${visibleRefs.length} messages). Only use IDs in this range for compress.]` - const lastUser = getLastUserMessage(messages) - if (!lastUser) return - - for (const part of lastUser.parts) { + for (const part of target.parts) { if (part.type === "text") { appendToTextPart(part, rangeTag) return } } - lastUser.parts.push(createSyntheticTextPart(lastUser, rangeTag)) + target.parts.push(createSyntheticTextPart(target, rangeTag)) } export const injectMessageIds = ( diff --git a/lib/messages/inject/utils.ts b/lib/messages/inject/utils.ts index abdc3ca..ffecc60 100644 --- a/lib/messages/inject/utils.ts +++ b/lib/messages/inject/utils.ts @@ -18,7 +18,7 @@ import { createSyntheticTextPart, hasContent, } from "../utils" -import { getLastUserMessage, isIgnoredUserMessage } from "../query" +import { getLastUserMessage, isIgnoredUserMessage, isSyntheticMessage } from "../query" import { getCurrentTokenUsage } from "../../token-utils" import { getActiveSummaryTokenUsage } from "../../state/utils" @@ -62,6 +62,9 @@ export function findLastNonIgnoredMessage(messages: WithParts[]): LastNonIgnored if (isIgnoredUserMessage(message)) { continue } + if (isSyntheticMessage(message)) { + continue + } return { message, index: i } } @@ -371,11 +374,53 @@ export function applyAnchoredNudges( compressionPriorities?: CompressionPriorityMap, currentTokens?: number, modelContextLimit?: number, + suffixMessage?: WithParts | null, ): void { const contextUsageInfo = buildContextUsageInfo(currentTokens, modelContextLimit) const contextLimitNudgeWithUsage = prompts.contextLimitNudge + contextUsageInfo const turnNudgeAnchors = collectTurnNudgeAnchors(state, config, messages) + if (suffixMessage) { + const nudgeParts: string[] = [] + + if (config.compress.mode === "message") { + if (state.nudges.contextLimitAnchors.size > 0) { + for (const { index } of collectAnchoredMessages(state.nudges.contextLimitAnchors, messages)) { + const guidance = buildMessagePriorityGuidance(messages, compressionPriorities, index, MESSAGE_MODE_NUDGE_PRIORITY) + nudgeParts.push(appendGuidanceToDcpTag(contextLimitNudgeWithUsage, guidance)) + } + } + if (turnNudgeAnchors.size > 0) { + for (const { index } of collectAnchoredMessages(turnNudgeAnchors, messages)) { + const guidance = buildMessagePriorityGuidance(messages, compressionPriorities, index, MESSAGE_MODE_NUDGE_PRIORITY) + nudgeParts.push(appendGuidanceToDcpTag(prompts.turnNudge, guidance)) + } + } + if (state.nudges.iterationNudgeAnchors.size > 0) { + for (const { index } of collectAnchoredMessages(state.nudges.iterationNudgeAnchors, messages)) { + const guidance = buildMessagePriorityGuidance(messages, compressionPriorities, index, MESSAGE_MODE_NUDGE_PRIORITY) + nudgeParts.push(appendGuidanceToDcpTag(prompts.iterationNudge, guidance)) + } + } + } else { + if (state.nudges.contextLimitAnchors.size > 0) { + nudgeParts.push(contextLimitNudgeWithUsage) + } + if (turnNudgeAnchors.size > 0) { + nudgeParts.push(prompts.turnNudge) + } + if (state.nudges.iterationNudgeAnchors.size > 0) { + nudgeParts.push(prompts.iterationNudge) + } + } + + const combined = nudgeParts.join("\n\n") + if (combined.trim()) { + injectAnchoredNudge(suffixMessage, combined) + } + return + } + if (config.compress.mode === "message") { applyMessageModeAnchoredNudge( state.nudges.contextLimitAnchors, diff --git a/lib/messages/query.ts b/lib/messages/query.ts index 49a9f26..70096ca 100644 --- a/lib/messages/query.ts +++ b/lib/messages/query.ts @@ -2,6 +2,11 @@ import type { PluginConfig } from "../config" import type { WithParts } from "../state" import { isMessageWithInfo } from "./shape" +export function isSyntheticMessage(message: WithParts): boolean { + const id = message?.info?.id + return typeof id === "string" && (id.startsWith("msg_dcp_summary_") || id.startsWith("msg_dcp_text_")) +} + export const getLastUserMessage = ( messages: WithParts[], startIndex?: number, @@ -12,7 +17,7 @@ export const getLastUserMessage = ( if (!isMessageWithInfo(msg)) { continue } - if (msg.info.role === "user" && !isIgnoredUserMessage(msg)) { + if (msg.info.role === "user" && !isIgnoredUserMessage(msg) && !isSyntheticMessage(msg)) { return msg } } diff --git a/lib/update.ts b/lib/update.ts index 6340143..cc9f234 100644 --- a/lib/update.ts +++ b/lib/update.ts @@ -14,7 +14,7 @@ type UpdateResult = | { updated: false; error: "remove_failed"; name: string; current: string; latest: string } | { updated: false } -const PACKAGE_NAME = "@tarquinen/opencode-dcp" +const PACKAGE_NAME = "opencode-acp" export function startAutoUpdate(ctx: PluginInput, enabled: boolean): void { if (!enabled) return diff --git a/package.json b/package.json index 1f8d501..3db4d88 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "$schema": "https://json.schemastore.org/package.json", "name": "opencode-acp", - "version": "1.1.0", + "version": "1.1.1", "type": "module", "description": "Active Context Pruning — model-driven context management for OpenCode (hardened fork of DCP with 34 bug fixes)", "main": "./dist/index.js", diff --git a/tests/e2e-blocks-nudges.test.ts b/tests/e2e-blocks-nudges.test.ts index 4602f38..a80c110 100644 --- a/tests/e2e-blocks-nudges.test.ts +++ b/tests/e2e-blocks-nudges.test.ts @@ -15,6 +15,7 @@ import type { PluginConfig } from "../lib/config" import { createChatMessageTransformHandler } from "../lib/hooks" import { Logger } from "../lib/logger" import { createSessionState, type WithParts, type SessionState } from "../lib/state" +import { isSyntheticMessage } from "../lib/messages/query" import { mkdtempSync, rmSync } from "node:fs" import { join } from "node:path" import { tmpdir } from "node:os" @@ -205,9 +206,9 @@ test("nudge injection: context usage tag injected when modelContextLimit is set" await handler({}, output) - const lastUser = output.messages.find((m: WithParts) => m.info.id === "u2") - assert.ok(lastUser, "last user message should survive") - const textParts = lastUser!.parts.filter((p: any) => p.type === "text") + const suffixMessage = output.messages.find((m: WithParts) => isSyntheticMessage(m)) + assert.ok(suffixMessage, "suffix message should be created") + const textParts = suffixMessage!.parts.filter((p: any) => p.type === "text") const combinedText = textParts.map((p: any) => p.text).join("") assert.ok(combinedText.includes("Context usage:"), "should inject context usage tag") }) @@ -410,7 +411,7 @@ test("message ID injection: IDs are appended to tool parts", async () => { // ─── Test: Visible ID range injection ─────────────────────────────────────── -test("visible ID range: range tag injected into last user message", async () => { +test("visible ID range: range tag injected into suffix message", async () => { const { state, handler } = setupPipeline(SID_A, {}, { modelContextLimit: 200000, }) @@ -427,9 +428,9 @@ test("visible ID range: range tag injected into last user message", async () => await handler({}, output) - const lastUser = output.messages.find((m: WithParts) => m.info.id === "u3") - assert.ok(lastUser) - const textParts = lastUser!.parts.filter((p: any) => p.type === "text") + const suffixMessage = output.messages.find((m: WithParts) => isSyntheticMessage(m)) + assert.ok(suffixMessage, "suffix message should be created") + const textParts = suffixMessage!.parts.filter((p: any) => p.type === "text") const combinedText = textParts.map((p: any) => p.text).join("") assert.ok( combinedText.includes("[Visible message IDs:"), @@ -585,8 +586,8 @@ test("mixed messages: only valid messages survive, IDs assigned to survivors", a await handler({}, output) - assert.equal(output.messages.length, 3, "only 3 valid messages should survive") - const ids = output.messages.map((m: WithParts) => m.info.id) + assert.equal(output.messages.length, 4, "3 valid messages + 1 suffix message") + const ids = output.messages.filter((m: WithParts) => !isSyntheticMessage(m)).map((m: WithParts) => m.info.id) assert.deepEqual(ids, ["u1", "a1", "u2"]) assert.equal(state.messageIds.byRawId.get("u1"), "m00001") diff --git a/tests/e2e-message-transform.test.ts b/tests/e2e-message-transform.test.ts index 31c6dd6..7a0fe85 100644 --- a/tests/e2e-message-transform.test.ts +++ b/tests/e2e-message-transform.test.ts @@ -19,6 +19,7 @@ import type { PluginConfig } from "../lib/config" import { createChatMessageTransformHandler } from "../lib/hooks" import { Logger } from "../lib/logger" import { createSessionState, saveSessionState, type WithParts, type SessionState } from "../lib/state" +import { isSyntheticMessage } from "../lib/messages/query" import { mkdtempSync, rmSync } from "node:fs" import { join } from "node:path" import { tmpdir } from "node:os" @@ -213,10 +214,10 @@ test("basic pipeline: assigns message IDs and preserves all messages", async () await handler({}, output) - // All 5 messages should survive the pipeline - assert.equal(output.messages.length, 5) + // All 5 messages + 1 suffix message should survive the pipeline + assert.equal(output.messages.length, 6) - // Message IDs should be assigned + // Message IDs should be assigned (suffix message excluded from ref assignment) assert.equal(state.messageIds.byRawId.get("u1"), "m00001") assert.equal(state.messageIds.byRawId.get("a1"), "m00002") assert.equal(state.messageIds.byRawId.get("u2"), "m00003") @@ -281,10 +282,11 @@ test("filterMessagesInPlace: removes messages without valid info", async () => { await handler({}, output) - // Only 2 valid messages survive - assert.equal(output.messages.length, 2) - assert.equal(output.messages[0].info.id, "u1") - assert.equal(output.messages[1].info.id, "a1") + // Only 2 valid messages + 1 suffix message survive + assert.equal(output.messages.length, 3) + const realMessages = output.messages.filter((m: WithParts) => !isSyntheticMessage(m)) + assert.equal(realMessages[0].info.id, "u1") + assert.equal(realMessages[1].info.id, "a1") }) // ─── Test: Hallucinated tags are stripped ────────────────────────────────────