diff --git a/apps/memos-local-plugin/core/embedding/fetcher.ts b/apps/memos-local-plugin/core/embedding/fetcher.ts
index 40524aac7..de6202f56 100644
--- a/apps/memos-local-plugin/core/embedding/fetcher.ts
+++ b/apps/memos-local-plugin/core/embedding/fetcher.ts
@@ -8,8 +8,12 @@
*/
import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js";
+import { parseRetryAfterMs } from "../util/retry-after.js";
import type { EmbeddingProviderName, ProviderLogger } from "./types.js";
+/** Hard ceiling on any single retry sleep — Retry-After or otherwise. */
+const MAX_RETRY_AFTER_MS = 60_000;
+
export interface HttpPostOpts
{
url: string;
body: TBody;
@@ -46,15 +50,21 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
if (!resp.ok) {
const text = await safeText(resp);
const transient = resp.status >= 500 || resp.status === 429;
+ const retryAfterMs =
+ resp.status === 429 || resp.status === 503
+ ? parseRetryAfterMs(resp, MAX_RETRY_AFTER_MS)
+ : null;
opts.log.warn("http.non_ok", {
url: opts.url,
status: resp.status,
attempt,
transient,
+ retryAfterMs,
durationMs: Date.now() - start,
});
if (transient && attempt <= maxRetries) {
- await backoff(attempt);
+ const sleepMs = Math.min(retryAfterMs ?? backoffMs(attempt), MAX_RETRY_AFTER_MS);
+ await sleep(sleepMs);
continue;
}
throw new MemosError(
@@ -83,7 +93,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
durationMs: Date.now() - start,
});
if (transient && attempt <= maxRetries) {
- await backoff(attempt);
+ await sleep(Math.min(backoffMs(attempt), MAX_RETRY_AFTER_MS));
continue;
}
throw new MemosError(
@@ -123,10 +133,13 @@ function isTransientError(err: unknown): boolean {
return false;
}
-async function backoff(attempt: number): Promise {
+function backoffMs(attempt: number): number {
const base = 200;
const jitter = Math.floor(Math.random() * 100);
- const ms = base * 2 ** (attempt - 1) + jitter;
+ return base * 2 ** (attempt - 1) + jitter;
+}
+
+async function sleep(ms: number): Promise {
await new Promise((r) => setTimeout(r, ms));
}
diff --git a/apps/memos-local-plugin/core/llm/fetcher.ts b/apps/memos-local-plugin/core/llm/fetcher.ts
index 5e8b0317e..1fc7fb5c5 100644
--- a/apps/memos-local-plugin/core/llm/fetcher.ts
+++ b/apps/memos-local-plugin/core/llm/fetcher.ts
@@ -11,8 +11,12 @@
*/
import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js";
+import { parseRetryAfterMs } from "../util/retry-after.js";
import type { LlmProviderLogger, LlmProviderName } from "./types.js";
+/** Hard ceiling on any single retry sleep — Retry-After or otherwise. */
+const MAX_RETRY_AFTER_MS = 60_000;
+
export interface HttpPostOpts {
url: string;
body: TBody;
@@ -56,15 +60,21 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
if (!resp.ok) {
const text = await safeText(resp);
const transient = resp.status >= 500 || resp.status === 429;
+ const retryAfterMs =
+ resp.status === 429 || resp.status === 503
+ ? parseRetryAfterMs(resp, MAX_RETRY_AFTER_MS)
+ : null;
opts.log.warn("http.non_ok", {
status: resp.status,
attempt,
transient,
+ retryAfterMs,
durationMs: ms,
});
if (transient && attempt <= opts.maxRetries) {
opts.onRetry?.(attempt);
- await backoff(attempt);
+ const sleepMs = Math.min(retryAfterMs ?? backoffMs(attempt), MAX_RETRY_AFTER_MS);
+ await sleep(sleepMs);
continue;
}
throw new MemosError(
@@ -94,7 +104,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
});
if ((transient || timedOut) && attempt <= opts.maxRetries) {
opts.onRetry?.(attempt);
- await backoff(attempt);
+ await sleep(Math.min(backoffMs(attempt), MAX_RETRY_AFTER_MS));
continue;
}
if (timedOut) {
@@ -233,10 +243,13 @@ function isTimeout(err: unknown): boolean {
return false;
}
-async function backoff(attempt: number): Promise {
+function backoffMs(attempt: number): number {
const base = 250;
const jitter = Math.floor(Math.random() * 120);
- const ms = base * 2 ** (attempt - 1) + jitter;
+ return base * 2 ** (attempt - 1) + jitter;
+}
+
+async function sleep(ms: number): Promise {
await new Promise((r) => setTimeout(r, ms));
}
diff --git a/apps/memos-local-plugin/core/util/retry-after.ts b/apps/memos-local-plugin/core/util/retry-after.ts
new file mode 100644
index 000000000..22689ba55
--- /dev/null
+++ b/apps/memos-local-plugin/core/util/retry-after.ts
@@ -0,0 +1,36 @@
+/**
+ * Parse the `Retry-After` HTTP response header per RFC 7231 §7.1.3.
+ *
+ * The value can be either:
+ * - a non-negative integer of seconds (delta-seconds), or
+ * - an HTTP-date (e.g. `Wed, 21 Oct 2025 07:28:00 GMT`).
+ *
+ * Returns the wait duration in milliseconds, capped to `capMs` to prevent a
+ * hostile or buggy server from pinning the client indefinitely. Returns
+ * `null` when the header is absent, malformed, or in the past (HTTP-date
+ * already elapsed) — callers should fall back to their existing backoff
+ * strategy. A value of `0` (delta-seconds) is valid and means "retry
+ * immediately"; servers like GitHub do this briefly as a ratelimit window
+ * expires.
+ */
+export function parseRetryAfterMs(resp: Response, capMs: number): number | null {
+ const raw = resp.headers.get("retry-after");
+ if (raw == null) return null;
+ const trimmed = raw.trim();
+ if (trimmed.length === 0) return null;
+
+ // delta-seconds: a non-negative integer.
+ if (/^\d+$/.test(trimmed)) {
+ const seconds = Number(trimmed);
+ if (!Number.isFinite(seconds) || seconds < 0) return null;
+ const ms = seconds * 1000;
+ return Math.min(ms, capMs);
+ }
+
+ // HTTP-date.
+ const target = Date.parse(trimmed);
+ if (Number.isNaN(target)) return null;
+ const delta = target - Date.now();
+ if (delta <= 0) return null;
+ return Math.min(delta, capMs);
+}
diff --git a/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts b/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts
index 3b9ee5bf2..b6992bf7e 100644
--- a/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts
+++ b/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts
@@ -81,6 +81,64 @@ describe("embedding/fetcher", () => {
expect(f).toHaveBeenCalledTimes(2);
});
+ it("429 with Retry-After: sleeps at least that long before retrying", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "1" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ provider: "openai_compatible",
+ log: nullLogger(),
+ maxRetries: 1,
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Retry-After: 1 → ~1000ms; baseline backoff is 200 + jitter < 300ms.
+ expect(elapsed).toBeGreaterThanOrEqual(900);
+ });
+
+ it("429 with Retry-After: HTTP-date in the near future is honored", async () => {
+ const target = new Date(Date.now() + 1_200).toUTCString();
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": target } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ provider: "openai_compatible",
+ log: nullLogger(),
+ maxRetries: 1,
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ expect(elapsed).toBeGreaterThanOrEqual(800);
+ expect(elapsed).toBeLessThan(5_000);
+ });
+
+ it("429 with malformed Retry-After falls back to existing backoff", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "not-a-thing" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ provider: "openai_compatible",
+ log: nullLogger(),
+ maxRetries: 1,
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Fallback backoff for attempt 1: 200ms base + jitter < 300ms.
+ expect(elapsed).toBeLessThan(900);
+ });
+
it("does not retry on 400", async () => {
mockFetch([new Response("bad", { status: 400 })]);
await expect(
diff --git a/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts b/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts
index 3609aa2d8..3af76ac7b 100644
--- a/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts
+++ b/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts
@@ -82,6 +82,68 @@ describe("llm/fetcher", () => {
}
});
+ it("429 with Retry-After: sleeps at least that long before retrying", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "1" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ timeoutMs: 10_000,
+ maxRetries: 1,
+ provider: "openai_compatible",
+ log: nullLog(),
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Retry-After: 1 → ~1000ms; baseline backoff for attempt 1 is 250 + jitter < 400ms.
+ expect(elapsed).toBeGreaterThanOrEqual(900);
+ });
+
+ it("429 with Retry-After: HTTP-date in the near future is honored", async () => {
+ const target = new Date(Date.now() + 1_200).toUTCString();
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": target } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ timeoutMs: 10_000,
+ maxRetries: 1,
+ provider: "openai_compatible",
+ log: nullLog(),
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Roughly the diff (allow generous slack for clock + jitter).
+ expect(elapsed).toBeGreaterThanOrEqual(800);
+ expect(elapsed).toBeLessThan(5_000);
+ });
+
+ it("429 with malformed Retry-After falls back to existing backoff", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "not-a-thing" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ timeoutMs: 10_000,
+ maxRetries: 1,
+ provider: "openai_compatible",
+ log: nullLog(),
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Fallback backoff for attempt 1: 250ms base + jitter < 400ms; should NOT be ≥1s.
+ expect(elapsed).toBeLessThan(900);
+ });
+
it("4xx (non-429) does not retry → LLM_UNAVAILABLE", async () => {
const f = mockFetch([new Response("bad", { status: 400 })]);
try {
diff --git a/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts b/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts
new file mode 100644
index 000000000..33e7b9fe2
--- /dev/null
+++ b/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from "vitest";
+
+import { parseRetryAfterMs } from "../../../core/util/retry-after.js";
+
+function respWith(headerValue: string | null): Response {
+ const headers = new Headers();
+ if (headerValue !== null) headers.set("Retry-After", headerValue);
+ return new Response(null, { status: 429, headers });
+}
+
+describe("util/parseRetryAfterMs", () => {
+ it("returns null when header is absent", () => {
+ expect(parseRetryAfterMs(respWith(null), 60_000)).toBeNull();
+ });
+
+ it("parses delta-seconds (integer) into milliseconds", () => {
+ expect(parseRetryAfterMs(respWith("3"), 60_000)).toBe(3_000);
+ expect(parseRetryAfterMs(respWith("0"), 60_000)).toBe(0);
+ });
+
+ it("caps delta-seconds at capMs", () => {
+ expect(parseRetryAfterMs(respWith("9999"), 60_000)).toBe(60_000);
+ });
+
+ it("parses HTTP-date in the near future", () => {
+ const target = new Date(Date.now() + 2_000);
+ const ms = parseRetryAfterMs(respWith(target.toUTCString()), 60_000);
+ expect(ms).not.toBeNull();
+ // Allow generous slack for the elapsed parsing time.
+ expect(ms!).toBeGreaterThan(500);
+ expect(ms!).toBeLessThanOrEqual(2_500);
+ });
+
+ it("returns null when HTTP-date is in the past", () => {
+ const target = new Date(Date.now() - 60_000);
+ expect(parseRetryAfterMs(respWith(target.toUTCString()), 60_000)).toBeNull();
+ });
+
+ it("caps HTTP-date diff at capMs", () => {
+ const target = new Date(Date.now() + 10 * 60 * 1000);
+ expect(parseRetryAfterMs(respWith(target.toUTCString()), 60_000)).toBe(60_000);
+ });
+
+ it("returns null for malformed values", () => {
+ expect(parseRetryAfterMs(respWith("not-a-number"), 60_000)).toBeNull();
+ expect(parseRetryAfterMs(respWith(""), 60_000)).toBeNull();
+ expect(parseRetryAfterMs(respWith(" "), 60_000)).toBeNull();
+ });
+
+ it("rejects negative-looking values (no leading sign permitted)", () => {
+ // The integer regex `^\d+$` does not match `-1`, so it falls through to
+ // Date.parse which will return NaN for `-1` → null.
+ expect(parseRetryAfterMs(respWith("-1"), 60_000)).toBeNull();
+ });
+});