From 05097ea428abfbb0020daa19f52884954932189e Mon Sep 17 00:00:00 2001 From: Sanjays2402 <51058514+Sanjays2402@users.noreply.github.com> Date: Sat, 9 May 2026 12:19:44 -0700 Subject: [PATCH 1/2] fix(memos-local-plugin): salvage partially structured L3 abstraction drafts Closes #1668 Before: `world_model_generate` aborted with `LLM_OUTPUT_MALFORMED` whenever the LLM returned a slightly-off draft, e.g. - `title` empty / non-string - `inference` / `constraints` returned as a string or non-array - list entries returned as plain strings or `{ body: ... }` shapes - `domain_tags` returned as a comma-joined string This made the L3 pipeline unusable on smaller / non-strict structured-output providers (e.g. `glm-4-flashx-250414`) even though the payload had usable content downstream. After: the inline `validate` callback is removed and `normaliseDraft` salvages the wire format: - `toEntries` accepts: * arrays (canonical) * a single string (becomes one description-only entry) * a single object (wrapped in an array) * per-entry strings (`["foo"]` -> `[{label:"", description:"foo"}]`) * per-entry objects keyed by `body` / `text` / `content` / `detail` / `summary` / `value` instead of `description`, and `name` / `title` / `heading` / `key` instead of `label` * `evidence_ids` / `evidence` aliases, including comma-joined strings - `normaliseTags` accepts: * arrays of strings (canonical) * comma / semicolon / newline-joined strings ("a, b, c" -> ["a","b","c"]) * arrays of `{label|name|tag|value|key}` objects - `deriveTitle` falls back through: cleaned LLM title -> first non-empty inference / environment / constraints label or description -> first non-heading body line -> joined domain tags A soft floor (`assertDraftMinimallyUsable`) still rejects drafts that are fully empty after normalization, so downstream validators continue to get the final say. When the parser had to coerce the wire format, an `l3.abstract.draft_salvaged` info log captures which keys were non-canonical for observability. Tests: 12/12 in `tests/unit/memory/l3/abstract.test.ts` (added 7 new salvage cases covering each scenario from the issue). --- .../core/memory/l3/abstract.ts | 318 +++++++++++++++--- .../tests/unit/memory/l3/abstract.test.ts | 153 ++++++++- 2 files changed, 430 insertions(+), 41 deletions(-) diff --git a/apps/memos-local-plugin/core/memory/l3/abstract.ts b/apps/memos-local-plugin/core/memory/l3/abstract.ts index 0bde96fb..5f256f26 100644 --- a/apps/memos-local-plugin/core/memory/l3/abstract.ts +++ b/apps/memos-local-plugin/core/memory/l3/abstract.ts @@ -89,6 +89,16 @@ export async function abstractDraft( const evidenceLang = detectDominantLanguage(langSamples); try { + // We deliberately do *not* pass an inline `validate` to `completeJson` + // here. The LLM commonly returns *partially* structured drafts on + // smaller / non-strict providers (e.g. empty `title`, `inference` / + // `constraints` returned as strings or `{body}` shapes, comma-joined + // `domain_tags`). Our `normaliseDraft` below salvages those shapes + // into a `L3AbstractionDraft`. After normalization we run a *soft* + // floor check: only if even the salvaged draft is empty (no triple + // facets, no body, no domain tags) do we treat the response as + // unusable. Downstream validators still get the final say on whether + // the salvaged draft is good enough to persist. const rsp = await llm.completeJson>( [ { role: "system", content: L3_ABSTRACTION_PROMPT.system }, @@ -102,30 +112,32 @@ export async function abstractDraft( temperature: 0.15, malformedRetries: 1, schemaHint: `{"title":"...","domain_tags":["..."],"environment":[{"label":"...","description":"...","evidenceIds":["..."]}],"inference":[...],"constraints":[...],"body":"markdown","confidence":0..1,"supersedes_world_ids":[]}`, - validate: (v) => { - const o = v as Record; - if (typeof o.title !== "string" || !(o.title as string).trim()) { - throw new MemosError( - ERROR_CODES.LLM_OUTPUT_MALFORMED, - "l3.abstraction: 'title' must be a non-empty string", - { got: o.title }, - ); - } - const triple = ["environment", "inference", "constraints"]; - for (const k of triple) { - if (!Array.isArray(o[k])) { - throw new MemosError( - ERROR_CODES.LLM_OUTPUT_MALFORMED, - `l3.abstraction: '${k}' must be an array`, - { got: o[k] }, - ); - } - } - }, }, ); const draft = normaliseDraft(rsp.value); + assertDraftMinimallyUsable(draft, rsp.value); + if (draftWasSalvaged(draft, rsp.value)) { + log.info("l3.abstract.draft_salvaged", { + clusterKey: input.cluster.key, + rawTitleType: typeof (rsp.value as Record).title, + rawTitleEmpty: + typeof (rsp.value as Record).title !== "string" || + !((rsp.value as Record).title as string).trim(), + rawTagsType: Array.isArray((rsp.value as Record).domain_tags) + ? "array" + : typeof (rsp.value as Record).domain_tags, + environmentRawType: Array.isArray((rsp.value as Record).environment) + ? "array" + : typeof (rsp.value as Record).environment, + inferenceRawType: Array.isArray((rsp.value as Record).inference) + ? "array" + : typeof (rsp.value as Record).inference, + constraintsRawType: Array.isArray((rsp.value as Record).constraints) + ? "array" + : typeof (rsp.value as Record).constraints, + }); + } if (deps.validate) deps.validate(draft); return { ok: true, draft }; } catch (err) { @@ -260,13 +272,22 @@ function packPolicy( function normaliseDraft(value: Record): L3AbstractionDraft { const triple = pickTriple(value); + const body = typeof value.body === "string" ? sanitizeDerivedMarkdown(value.body) : ""; + const domainTags = normaliseTags(value.domain_tags); + const title = deriveTitle(value.title, { + environment: triple.environment, + inference: triple.inference, + constraints: triple.constraints, + body, + domainTags, + }); return { - title: sanitizeDerivedText(value.title), - domainTags: normaliseTags(value.domain_tags), + title, + domainTags, environment: triple.environment, inference: triple.inference, constraints: triple.constraints, - body: typeof value.body === "string" ? sanitizeDerivedMarkdown(value.body) : "", + body, confidence: clamp01(typeof value.confidence === "number" ? value.confidence : 0.5), supersedesWorldIds: Array.isArray(value.supersedes_world_ids) ? (value.supersedes_world_ids as unknown[]) @@ -276,6 +297,146 @@ function normaliseDraft(value: Record): L3AbstractionDraft { }; } +/** + * Derive a usable title when the LLM returned an empty / non-string one. + * Order of preference: + * 1. The cleaned LLM-provided title. + * 2. The first inference / environment / constraints entry's label or + * description (whichever is non-empty), trimmed to ~80 chars. + * 3. The first non-heading markdown line of `body`, trimmed. + * 4. A domain-tag joined fallback like `"docker, alpine, pip"`. + * 5. Empty string — caller (`assertDraftMinimallyUsable`) will reject if + * the rest of the draft is also empty. + */ +function deriveTitle( + raw: unknown, + ctx: { + environment: L3AbstractionDraftEntry[]; + inference: L3AbstractionDraftEntry[]; + constraints: L3AbstractionDraftEntry[]; + body: string; + domainTags: string[]; + }, +): string { + const cleaned = sanitizeDerivedText(raw); + if (cleaned) return cleaned; + + const firstEntryText = (entries: L3AbstractionDraftEntry[]): string => { + for (const e of entries) { + const candidate = e.label || stripMarkdownToText(e.description); + if (candidate) return candidate; + } + return ""; + }; + const fromInference = firstEntryText(ctx.inference); + if (fromInference) return shortenForTitle(fromInference); + const fromEnvironment = firstEntryText(ctx.environment); + if (fromEnvironment) return shortenForTitle(fromEnvironment); + const fromConstraints = firstEntryText(ctx.constraints); + if (fromConstraints) return shortenForTitle(fromConstraints); + + if (ctx.body) { + for (const line of ctx.body.split(/\r?\n/)) { + const stripped = line.replace(/^\s*(?:#+\s+|[-*+]\s+|\d+\.\s+)/, "").trim(); + if (stripped) return shortenForTitle(stripMarkdownToText(stripped)); + } + } + if (ctx.domainTags.length > 0) { + return shortenForTitle(ctx.domainTags.join(", ")); + } + return ""; +} + +function stripMarkdownToText(s: string): string { + return s + .replace(/[`*_~]+/g, "") + .replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1") + .replace(/\s+/g, " ") + .trim(); +} + +function shortenForTitle(s: string): string { + const flat = s.replace(/\s+/g, " ").trim(); + if (flat.length <= 80) return flat; + return flat.slice(0, 79) + "…"; +} + +/** + * Ensure the salvaged draft has *something* worth persisting. We accept a + * draft as long as it has at least one of: + * - a non-empty title (post-derivation), + * - any triple facet entry, + * - a non-empty body, + * - at least one domain tag. + * + * This keeps the parser permissive while preventing fully empty drafts + * (which downstream code would happily index as garbage) from sneaking + * through. Downstream validators still apply stricter checks. + */ +function assertDraftMinimallyUsable( + draft: L3AbstractionDraft, + raw: Record, +): void { + const hasTitle = draft.title.trim().length > 0; + const hasTripleEntry = + draft.environment.length > 0 || + draft.inference.length > 0 || + draft.constraints.length > 0; + const hasBody = draft.body.trim().length > 0; + const hasTags = draft.domainTags.length > 0; + if (hasTitle || hasTripleEntry || hasBody || hasTags) return; + throw new MemosError( + ERROR_CODES.LLM_OUTPUT_MALFORMED, + "l3.abstraction: draft is empty after normalization", + { + rawKeys: Object.keys(raw), + title: raw.title, + environment: Array.isArray(raw.environment) ? raw.environment.length : typeof raw.environment, + inference: Array.isArray(raw.inference) ? raw.inference.length : typeof raw.inference, + constraints: Array.isArray(raw.constraints) + ? raw.constraints.length + : typeof raw.constraints, + }, + ); +} + +/** + * True iff `normaliseDraft` had to coerce the raw payload — useful for + * an `info` log so operators can see when the parser is salvaging vs. + * accepting clean drafts. + */ +function draftWasSalvaged( + draft: L3AbstractionDraft, + raw: Record, +): boolean { + const rawTitleEmpty = + typeof raw.title !== "string" || !raw.title.trim(); + if (rawTitleEmpty && draft.title) return true; + const tripleKeys = ["environment", "inference", "constraints"] as const; + for (const k of tripleKeys) { + if (!Array.isArray(raw[k])) { + // Anything non-array on the wire that we still produced entries for + // (or even legitimately empty arrays for) counts as salvaged. + return true; + } + for (const entry of raw[k] as unknown[]) { + if (typeof entry === "string") return true; + if (entry && typeof entry === "object") { + const o = entry as Record; + // Drafts that only used `body` instead of `description`, or that + // omitted `label` entirely, were salvaged into the canonical + // `{label, description}` shape. + const hasCanonicalLabel = typeof o.label === "string"; + const hasCanonicalDescription = typeof o.description === "string"; + if (!hasCanonicalLabel || !hasCanonicalDescription) return true; + } + } + } + // String / non-array `domain_tags` that we still produced tags for. + if (!Array.isArray(raw.domain_tags) && draft.domainTags.length > 0) return true; + return false; +} + function pickTriple(value: Record): { environment: L3AbstractionDraftEntry[]; inference: L3AbstractionDraftEntry[]; @@ -289,23 +450,82 @@ function pickTriple(value: Record): { } function toEntries(raw: unknown): L3AbstractionDraftEntry[] { - if (!Array.isArray(raw)) return []; - return raw - .map((r): L3AbstractionDraftEntry | null => { - if (!r || typeof r !== "object") return null; - const o = r as Record; - const label = typeof o.label === "string" ? sanitizeDerivedText(o.label) : ""; - const description = typeof o.description === "string" ? sanitizeDerivedMarkdown(o.description) : ""; - if (!label && !description) return null; - const evidenceIds = Array.isArray(o.evidenceIds) - ? (o.evidenceIds as unknown[]).filter((s): s is string => typeof s === "string") - : undefined; - return { label, description, evidenceIds }; - }) + // Accept the canonical array shape, but also salvage common LLM mistakes: + // - whole field returned as a single string -> treat as one entry's body + // - whole field returned as a single object -> wrap in an array + // - per-entry strings -> treat as the entry's `description` + // - per-entry objects using `body` / `text` / `content` instead of + // `description`, or `name` / `title` instead of `label` + let arr: unknown[]; + if (Array.isArray(raw)) { + arr = raw; + } else if (typeof raw === "string") { + const cleaned = raw.trim(); + arr = cleaned ? [cleaned] : []; + } else if (raw && typeof raw === "object") { + arr = [raw]; + } else { + return []; + } + + return arr + .map((r): L3AbstractionDraftEntry | null => coerceEntry(r)) .filter((e): e is L3AbstractionDraftEntry => e !== null) .slice(0, 16); } +function coerceEntry(r: unknown): L3AbstractionDraftEntry | null { + if (typeof r === "string") { + const description = sanitizeDerivedMarkdown(r); + if (!description) return null; + return { label: "", description }; + } + if (!r || typeof r !== "object") return null; + const o = r as Record; + + const labelRaw = firstString(o.label, o.name, o.title, o.heading, o.key); + const descriptionRaw = firstString( + o.description, + o.body, + o.text, + o.content, + o.detail, + o.summary, + o.value, + ); + + const label = labelRaw ? sanitizeDerivedText(labelRaw) : ""; + const description = descriptionRaw ? sanitizeDerivedMarkdown(descriptionRaw) : ""; + if (!label && !description) return null; + + const evidenceIds = collectEvidenceIds(o); + return evidenceIds ? { label, description, evidenceIds } : { label, description }; +} + +function firstString(...candidates: unknown[]): string | undefined { + for (const c of candidates) { + if (typeof c === "string" && c.trim().length > 0) return c; + } + return undefined; +} + +function collectEvidenceIds(o: Record): string[] | undefined { + const raw = o.evidenceIds ?? o.evidence_ids ?? o.evidence; + if (Array.isArray(raw)) { + const ids = (raw as unknown[]).filter((s): s is string => typeof s === "string"); + return ids.length > 0 ? ids : undefined; + } + if (typeof raw === "string" && raw.trim().length > 0) { + // Allow `"po_1, po_2"` style strings for forgiving providers. + const ids = raw + .split(/[\s,]+/) + .map((s) => s.trim()) + .filter((s) => s.length > 0); + return ids.length > 0 ? ids : undefined; + } + return undefined; +} + function buildBody(draft: L3AbstractionDraft): string { if (draft.body && draft.body.length > 0) return draft.body; const lines: string[] = [`# ${draft.title}`, ""]; @@ -328,10 +548,30 @@ function buildBody(draft: L3AbstractionDraft): string { } function normaliseTags(raw: unknown): string[] { - if (!Array.isArray(raw)) return []; + // Canonical shape: array of strings. Also accept comma/whitespace + // separated strings (`"docker, alpine, pip"`) and arrays that mix + // strings and `{label}` / `{name}` / `{tag}` objects, since some + // providers return that shape under structured-output mode. + let candidates: unknown[]; + if (Array.isArray(raw)) { + candidates = raw as unknown[]; + } else if (typeof raw === "string") { + candidates = raw.split(/[,;\n]+/); + } else { + return []; + } + const flat: string[] = []; + for (const c of candidates) { + if (typeof c === "string") { + flat.push(c); + } else if (c && typeof c === "object") { + const o = c as Record; + const fromObj = firstString(o.label, o.name, o.tag, o.value, o.key); + if (fromObj) flat.push(fromObj); + } + } return dedupeStrings( - (raw as unknown[]) - .filter((s): s is string => typeof s === "string") + flat .map((s) => s.trim().toLowerCase()) .filter((s) => s.length > 0 && s.length < 24), ).slice(0, 6); diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts index ff759b17..480e6eea 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts @@ -180,7 +180,7 @@ describe("memory/l3/abstract", () => { expect(res.detail).toContain("boom"); }); - it("returns llm_failed when the LLM returns missing triple", async () => { + it("salvages missing triple into an empty-but-titled draft instead of failing", async () => { const llm = fakeLlm({ completeJson: { [OP]: { @@ -193,10 +193,159 @@ describe("memory/l3/abstract", () => { { cluster: mkCluster(), evidenceByPolicy: new Map() }, { llm, log, config: cfg() }, ); + // Issue #1668: rather than aborting, the parser now returns a salvaged + // draft. The title alone is enough to keep the entry alive; downstream + // validators decide whether the (empty) facets are good enough to + // persist. + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.title).toBe("missing triple"); + expect(res.draft.environment).toEqual([]); + expect(res.draft.inference).toEqual([]); + expect(res.draft.constraints).toEqual([]); + }); + + it("returns llm_failed only when even normalisation can't recover anything", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + // no title, no triple, no body, no domain tags — truly empty. + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); expect(res.ok).toBe(false); if (res.ok) return; expect(res.reason).toBe("llm_failed"); - expect(res.detail ?? "").toMatch(/environment|inference|constraints/); + expect(res.detail ?? "").toMatch(/empty after normalization/); + }); + + it("salvages string list entries into description-only entries (issue #1668)", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + title: "alpine", + domain_tags: ["alpine"], + environment: ["runs on musl libc"], + inference: ["binary wheels miss glibc"], + constraints: ["avoid binary install"], + confidence: 0.5, + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.environment).toEqual([ + { label: "", description: "runs on musl libc" }, + ]); + expect(res.draft.inference[0]!.description).toBe("binary wheels miss glibc"); + expect(res.draft.constraints[0]!.description).toBe("avoid binary install"); + }); + + it("salvages {body: ...} list entries to canonical {label, description}", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + title: "alpine", + domain_tags: ["alpine"], + environment: [{ label: "musl", body: "no glibc available" }], + inference: [{ body: "wheels need glibc" }], + constraints: [{ name: "no-binary", text: "pip install --no-binary" }], + confidence: 0.5, + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.environment).toEqual([ + { label: "musl", description: "no glibc available" }, + ]); + expect(res.draft.inference[0]).toEqual({ + label: "", + description: "wheels need glibc", + }); + expect(res.draft.constraints[0]).toEqual({ + label: "no-binary", + description: "pip install --no-binary", + }); + }); + + it("derives a title from inference when the LLM left it blank (issue #1668)", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + title: " ", + domain_tags: ["alpine", "pip"], + environment: [], + inference: [{ label: "Binary wheels fail on alpine", description: "musl" }], + constraints: [], + confidence: 0.5, + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.title).toBe("Binary wheels fail on alpine"); + }); + + it("falls back to domain tags when title and triple are unhelpful", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + title: null, + domain_tags: ["docker", "alpine", "pip"], + environment: [], + inference: [], + constraints: [], + body: "", + confidence: 0.4, + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.title).toBe("docker, alpine, pip"); + }); + + it("splits a comma-joined domain_tags string into an array (issue #1668)", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + title: "alpine", + domain_tags: "Alpine, Python, pip, ", + environment: [{ label: "musl", description: "x" }], + inference: [], + constraints: [], + confidence: 0.5, + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.domainTags).toEqual(["alpine", "python", "pip"]); }); it("buildWorldModelRow wires draft + cluster into a persist-ready row", () => { From e3142deb66896f891d962d8d133be8d87a30e2bc Mon Sep 17 00:00:00 2001 From: Sanjays2402 <51058514+Sanjays2402@users.noreply.github.com> Date: Sat, 9 May 2026 12:49:09 -0700 Subject: [PATCH 2/2] fix(memos-local-plugin): polish L3 draft salvage edges from PR review Address Copilot review on PR #1672: - collectEvidenceIds: trim each id and drop empty entries in the array branch so providers that return `["po_1 ", " tr_2"]` no longer break UI evidence-chip classification (`startsWith("po_")` was failing on leading whitespace). Mirrors the existing string-branch behavior. - buildBody: skip the empty bold prefix when an entry has no label, so string-only entries render as `- ` instead of `- **** \u2014 `. Now that string entries are explicitly supported via coerceEntry, this rendering is reachable. - deriveTitle JSDoc: describe the actual heading/list-prefix-stripping behavior instead of claiming we skip heading lines. - normaliseTags comment: tighten to "comma/semicolon/newline-separated" to match the regex; note that whitespace inside a tag is preserved so multi-word tags survive. Tests: - New: array evidence ids with leading/trailing whitespace and empty strings get trimmed and dropped. - New: a draft mixing labelled and string-only entries renders the string-only entry as a plain bullet (no empty bold, no em-dash). - All 14 abstract.test.ts tests pass; tsc --noEmit clean. --- .../core/memory/l3/abstract.ts | 25 +++++--- .../tests/unit/memory/l3/abstract.test.ts | 59 +++++++++++++++++++ 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/apps/memos-local-plugin/core/memory/l3/abstract.ts b/apps/memos-local-plugin/core/memory/l3/abstract.ts index 5f256f26..a7089d5a 100644 --- a/apps/memos-local-plugin/core/memory/l3/abstract.ts +++ b/apps/memos-local-plugin/core/memory/l3/abstract.ts @@ -303,7 +303,8 @@ function normaliseDraft(value: Record): L3AbstractionDraft { * 1. The cleaned LLM-provided title. * 2. The first inference / environment / constraints entry's label or * description (whichever is non-empty), trimmed to ~80 chars. - * 3. The first non-heading markdown line of `body`, trimmed. + * 3. The first non-empty markdown line of `body`, with leading + * heading/list prefixes (`#`, `-`, `*`, `+`, `1.`) stripped, trimmed. * 4. A domain-tag joined fallback like `"docker, alpine, pip"`. * 5. Empty string — caller (`assertDraftMinimallyUsable`) will reject if * the rest of the draft is also empty. @@ -512,7 +513,10 @@ function firstString(...candidates: unknown[]): string | undefined { function collectEvidenceIds(o: Record): string[] | undefined { const raw = o.evidenceIds ?? o.evidence_ids ?? o.evidence; if (Array.isArray(raw)) { - const ids = (raw as unknown[]).filter((s): s is string => typeof s === "string"); + const ids = (raw as unknown[]) + .filter((s): s is string => typeof s === "string") + .map((s) => s.trim()) + .filter((s) => s.length > 0); return ids.length > 0 ? ids : undefined; } if (typeof raw === "string" && raw.trim().length > 0) { @@ -529,29 +533,32 @@ function collectEvidenceIds(o: Record): string[] | undefined { function buildBody(draft: L3AbstractionDraft): string { if (draft.body && draft.body.length > 0) return draft.body; const lines: string[] = [`# ${draft.title}`, ""]; + const renderEntry = (e: L3AbstractionDraftEntry): string => + e.label ? `- **${e.label}** — ${e.description}` : `- ${e.description}`; if (draft.environment.length > 0) { lines.push("## Environment (ℰ)"); - for (const e of draft.environment) lines.push(`- **${e.label}** — ${e.description}`); + for (const e of draft.environment) lines.push(renderEntry(e)); lines.push(""); } if (draft.inference.length > 0) { lines.push("## Inference rules (ℐ)"); - for (const e of draft.inference) lines.push(`- **${e.label}** — ${e.description}`); + for (const e of draft.inference) lines.push(renderEntry(e)); lines.push(""); } if (draft.constraints.length > 0) { lines.push("## Constraints (C)"); - for (const e of draft.constraints) lines.push(`- **${e.label}** — ${e.description}`); + for (const e of draft.constraints) lines.push(renderEntry(e)); lines.push(""); } return lines.join("\n").trim(); } function normaliseTags(raw: unknown): string[] { - // Canonical shape: array of strings. Also accept comma/whitespace - // separated strings (`"docker, alpine, pip"`) and arrays that mix - // strings and `{label}` / `{name}` / `{tag}` objects, since some - // providers return that shape under structured-output mode. + // Canonical shape: array of strings. Also accept comma/semicolon/newline- + // separated strings (`"docker, alpine, pip"`; whitespace within a tag is + // preserved so multi-word tags survive) and arrays that mix strings and + // `{label}` / `{name}` / `{tag}` objects, since some providers return + // that shape under structured-output mode. let candidates: unknown[]; if (Array.isArray(raw)) { candidates = raw as unknown[]; diff --git a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts index 480e6eea..b9ea1a14 100644 --- a/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts +++ b/apps/memos-local-plugin/tests/unit/memory/l3/abstract.test.ts @@ -348,6 +348,65 @@ describe("memory/l3/abstract", () => { expect(res.draft.domainTags).toEqual(["alpine", "python", "pip"]); }); + it("trims and drops empty evidence ids from array entries", async () => { + const llm = fakeLlm({ + completeJson: { + [OP]: { + title: "alpine", + domain_tags: ["alpine"], + environment: [ + { + label: "musl", + description: "x", + // Provider returned strings with leading/trailing whitespace + // and an empty entry; the UI evidence chip classifier relies + // on `id.startsWith("po_")` so trimming is required. + evidenceIds: ["po_1 ", " tr_2", "", " "], + }, + ], + inference: [], + constraints: [], + confidence: 0.5, + }, + }, + }); + const res = await abstractDraft( + { cluster: mkCluster(), evidenceByPolicy: new Map() }, + { llm, log, config: cfg() }, + ); + expect(res.ok).toBe(true); + if (!res.ok) return; + expect(res.draft.environment[0]!.evidenceIds).toEqual(["po_1", "tr_2"]); + }); + + it("renders string-only entries without an empty bold label in body", () => { + const cluster = mkCluster(); + const row = buildWorldModelRow({ + draft: { + title: "Alpine python deps", + domainTags: ["alpine"], + environment: [ + { label: "musl", description: "no glibc" }, + { label: "", description: "runs on musl libc" }, + ], + inference: [{ label: "", description: "binary wheels miss glibc" }], + constraints: [], + body: "", + confidence: 0.5, + }, + cluster, + episodeIds: ["ep_a"] as EpisodeId[], + inducedBy: OP, + now: NOW, + id: "wm_test" as Parameters[0]["id"], + }); + expect(row.body).toContain("- **musl** \u2014 no glibc"); + expect(row.body).toContain("- runs on musl libc"); + expect(row.body).toContain("- binary wheels miss glibc"); + expect(row.body).not.toContain("- **** \u2014"); + expect(row.body).not.toMatch(/-\s+\*\*\s*\*\*/); + }); + it("buildWorldModelRow wires draft + cluster into a persist-ready row", () => { const cluster = mkCluster(); const row = buildWorldModelRow({