diff --git a/apps/memos-local-openclaw/index.ts b/apps/memos-local-openclaw/index.ts index 5e2245198..e99652a60 100644 --- a/apps/memos-local-openclaw/index.ts +++ b/apps/memos-local-openclaw/index.ts @@ -31,6 +31,7 @@ import { SkillInstaller } from "./src/skill/installer"; import { Summarizer } from "./src/ingest/providers"; import { MEMORY_GUIDE_SKILL_MD } from "./src/skill/bundled-memory-guide"; import { Telemetry } from "./src/telemetry"; +import { withTimeout } from "./src/shared/with-timeout"; /** Remove near-duplicate hits based on summary word overlap (>70%). Keeps first (highest-scored) hit. */ @@ -1895,7 +1896,25 @@ Groups: ${groupNames.length > 0 ? groupNames.join(", ") : "(none)"}`, .catch((err: any) => { ctx.log.debug(`auto-recall: hub search failed (${err})`); return { hits: [] as any[], meta: {} }; }) : Promise.resolve({ hits: [] as any[], meta: {} }); - const [result, arHubResult] = await Promise.all([arLocalP, arHubP]); + // #1452: hard timeout around the parallel recall fan-out so a slow + // embedder/LLM can never block the prompt-build critical path. On + // timeout we fail open with no candidates and the hook returns + // without injecting memories. + const autoRecallTimeoutMs = + ctx.config.recall?.autoRecallTimeoutMs ?? DEFAULTS.autoRecallTimeoutMs; + const phase1 = await withTimeout( + Promise.all([arLocalP, arHubP]), + autoRecallTimeoutMs, + "auto-recall.search", + ctx.log, + ); + if (phase1 === null) { + const dur = performance.now() - recallT0; + store.recordToolCall("memory_search", dur, false); + try { store.recordApiLog("memory_search", { type: "auto_recall", query }, `timeout after ${autoRecallTimeoutMs}ms`, dur, false); } catch (_) { /* best-effort */ } + return; + } + const [result, arHubResult] = phase1; const localHits = result.hits.filter((h) => h.origin !== "hub-memory"); const hubLocalHits = result.hits.filter((h) => h.origin === "hub-memory"); @@ -1986,7 +2005,16 @@ Groups: ${groupNames.length > 0 ? groupNames.join(", ") : "(none)"}`, let filteredHits = allRawHits; let sufficient = false; - const filterResult = await summarizer.filterRelevant(query, mergedForFilter); + // #1452: hard timeout around the recall LLM filter so a slow model + // can never block the prompt-build critical path. Fail open with the + // unfiltered candidate set; the deduper + later prompt size guards + // still apply. + const filterResult = await withTimeout( + summarizer.filterRelevant(query, mergedForFilter), + autoRecallTimeoutMs, + "auto-recall.filter", + ctx.log, + ); if (filterResult !== null) { sufficient = filterResult.sufficient; if (filterResult.relevant.length > 0) { diff --git a/apps/memos-local-openclaw/src/config.ts b/apps/memos-local-openclaw/src/config.ts index 150b09cc4..e268e4e1f 100644 --- a/apps/memos-local-openclaw/src/config.ts +++ b/apps/memos-local-openclaw/src/config.ts @@ -66,6 +66,7 @@ export function resolveConfig(raw: Partial | undefined, stateD mmrLambda: cfg.recall?.mmrLambda ?? DEFAULTS.mmrLambda, recencyHalfLifeDays: cfg.recall?.recencyHalfLifeDays ?? DEFAULTS.recencyHalfLifeDays, vectorSearchMaxChunks: cfg.recall?.vectorSearchMaxChunks ?? DEFAULTS.vectorSearchMaxChunks, + autoRecallTimeoutMs: cfg.recall?.autoRecallTimeoutMs ?? DEFAULTS.autoRecallTimeoutMs, }, dedup: { similarityThreshold: cfg.dedup?.similarityThreshold ?? DEFAULTS.dedupSimilarityThreshold, diff --git a/apps/memos-local-openclaw/src/shared/with-timeout.ts b/apps/memos-local-openclaw/src/shared/with-timeout.ts new file mode 100644 index 000000000..5c7b12f75 --- /dev/null +++ b/apps/memos-local-openclaw/src/shared/with-timeout.ts @@ -0,0 +1,37 @@ +/** + * Race a promise against a timeout. Resolves to `null` on timeout instead of + * rejecting — clean fail-open semantics for best-effort work like auto-recall + * where a slow LLM should never block the critical path (#1452). + * + * The underlying promise is NOT cancelled (we can't cancel a fetch from here); + * we just stop waiting on it. Caller must treat the returned `null` as "give + * up, proceed without this result". + * + * @param p The promise to race. + * @param ms Timeout in milliseconds. Non-positive = no timeout (returns `p`). + * @param label Short label for the warn log on timeout. + * @param log Optional logger; logs a warning when the timeout fires. + */ +export function withTimeout( + p: Promise, + ms: number, + label: string, + log?: { warn: (msg: string) => void }, +): Promise { + if (!Number.isFinite(ms) || ms <= 0) return p as Promise; + let timer: ReturnType | undefined; + const timeout = new Promise((resolve) => { + timer = setTimeout(() => { + log?.warn(`${label}: timed out after ${ms}ms; falling back`); + resolve(null); + }, ms); + // Don't keep the event loop alive solely for this timer. + if (typeof (timer as any)?.unref === "function") (timer as any).unref(); + }); + return Promise.race([ + p.finally(() => { + if (timer !== undefined) clearTimeout(timer); + }), + timeout, + ]); +} diff --git a/apps/memos-local-openclaw/src/types.ts b/apps/memos-local-openclaw/src/types.ts index cb08eb1cf..6472cb8f0 100644 --- a/apps/memos-local-openclaw/src/types.ts +++ b/apps/memos-local-openclaw/src/types.ts @@ -312,6 +312,17 @@ export interface MemosLocalConfig { recencyHalfLifeDays?: number; /** Cap vector search to this many most recent chunks. 0 = no cap (search all; may get slower with 200k+ chunks). If you set a cap for performance, use a large value (e.g. 200000–300000) so older memories are still in the window; FTS always searches all. */ vectorSearchMaxChunks?: number; + /** + * Hard timeout (ms) for the auto-recall path inside `before_prompt_build`. + * + * Auto-recall is best-effort enrichment: if the recall LLM/embedding work + * takes longer than this, we abandon it and let the prompt build proceed + * with no auto-injected memories. Prevents a slow LLM from blocking + * gateway startup or first-turn long enough to trip health checks (#1452). + * + * Default: 8000 ms. + */ + autoRecallTimeoutMs?: number; }; dedup?: { similarityThreshold?: number; @@ -360,6 +371,7 @@ export const DEFAULTS = { skillPreferUpgrade: true, skillRedactSensitive: true, taskAutoFinalizeHours: 4, + autoRecallTimeoutMs: 8000, } as const; // ─── Plugin Hooks (OpenClaw integration) ─── diff --git a/apps/memos-local-openclaw/tests/with-timeout.test.ts b/apps/memos-local-openclaw/tests/with-timeout.test.ts new file mode 100644 index 000000000..c1fc83e16 --- /dev/null +++ b/apps/memos-local-openclaw/tests/with-timeout.test.ts @@ -0,0 +1,64 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { withTimeout } from "../src/shared/with-timeout"; + +describe("withTimeout", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("resolves with the underlying value when the promise wins the race", async () => { + const fast = new Promise((resolve) => setTimeout(() => resolve("ok"), 5)); + const racePromise = withTimeout(fast, 100, "test.fast"); + await vi.advanceTimersByTimeAsync(5); + const result = await racePromise; + expect(result).toBe("ok"); + }); + + it("returns null when the timeout fires first (fail-open semantics)", async () => { + const slow = new Promise((resolve) => setTimeout(() => resolve("late"), 100)); + const racePromise = withTimeout(slow, 10, "test.slow"); + await vi.advanceTimersByTimeAsync(10); + const result = await racePromise; + expect(result).toBeNull(); + }); + + it("logs a warning on timeout via the supplied logger", async () => { + const warn = vi.fn(); + const slow = new Promise((resolve) => setTimeout(() => resolve("late"), 100)); + const racePromise = withTimeout(slow, 5, "test.warn", { warn }); + await vi.advanceTimersByTimeAsync(5); + await racePromise; + expect(warn).toHaveBeenCalledTimes(1); + expect(warn.mock.calls[0][0]).toContain("test.warn"); + expect(warn.mock.calls[0][0]).toContain("timed out"); + }); + + it("does not time out when ms <= 0 (timeout disabled)", async () => { + const p = new Promise((resolve) => setTimeout(() => resolve("done"), 5)); + const racePromise = withTimeout(p, 0, "test.disabled"); + await vi.advanceTimersByTimeAsync(5); + const result = await racePromise; + expect(result).toBe("done"); + }); + + it("propagates rejections from the underlying promise unchanged", async () => { + const failing = Promise.reject(new Error("boom")); + await expect(withTimeout(failing, 100, "test.reject")).rejects.toThrow("boom"); + }); + + it("simulates the auto-recall hang path: a 30s LLM call falls back well before completion", async () => { + // Mimic a slow recall LLM that would hang the gateway critical path. + const hangingLLM = new Promise<{ relevant: number[]; sufficient: boolean }>( + (resolve) => setTimeout(() => resolve({ relevant: [1, 2], sufficient: true }), 30_000), + ); + const racePromise = withTimeout(hangingLLM, 8000, "auto-recall.filter"); + // Advance just past the 8s timeout — the underlying 30s promise has not resolved yet. + await vi.advanceTimersByTimeAsync(8001); + const result = await racePromise; + expect(result).toBeNull(); + }); +});