From 3220e1085e63f53eb3354ddcda9ae7b79c1dea34 Mon Sep 17 00:00:00 2001
From: Sanjay Santhanam <51058514+Sanjays2402@users.noreply.github.com>
Date: Sat, 9 May 2026 12:16:22 -0700
Subject: [PATCH 1/2] fix(fetchers): honor Retry-After header on 429/503
responses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
LLM and embedding fetchers in memos-local-plugin classified 429 as
transient and retried with exponential backoff, but never inspected the
Retry-After response header. Under provider rate limiting, retries fired
before the upstream-requested cooldown, extending throttling and wasting
paid API calls.
Parse Retry-After per RFC 7231 §7.1.3 (delta-seconds or HTTP-date),
cap to a sane maximum to avoid hostile pinning, and prefer it over
exponential backoff on 429/503. Fall back to the existing backoff when
the header is absent or unparseable.
Fixes #1620
---
.../core/embedding/fetcher.ts | 20 ++++--
apps/memos-local-plugin/core/llm/fetcher.ts | 20 ++++--
.../core/util/retry-after.ts | 33 ++++++++++
.../tests/unit/embedding/fetcher.test.ts | 58 +++++++++++++++++
.../tests/unit/llm/fetcher.test.ts | 62 +++++++++++++++++++
.../tests/unit/util/retry-after.test.ts | 55 ++++++++++++++++
6 files changed, 240 insertions(+), 8 deletions(-)
create mode 100644 apps/memos-local-plugin/core/util/retry-after.ts
create mode 100644 apps/memos-local-plugin/tests/unit/util/retry-after.test.ts
diff --git a/apps/memos-local-plugin/core/embedding/fetcher.ts b/apps/memos-local-plugin/core/embedding/fetcher.ts
index 40524aac7..264b57d41 100644
--- a/apps/memos-local-plugin/core/embedding/fetcher.ts
+++ b/apps/memos-local-plugin/core/embedding/fetcher.ts
@@ -8,8 +8,12 @@
*/
import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js";
+import { parseRetryAfterMs } from "../util/retry-after.js";
import type { EmbeddingProviderName, ProviderLogger } from "./types.js";
+/** Hard ceiling on any single retry sleep — Retry-After or otherwise. */
+const MAX_RETRY_AFTER_MS = 60_000;
+
export interface HttpPostOpts
{
url: string;
body: TBody;
@@ -46,15 +50,20 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
if (!resp.ok) {
const text = await safeText(resp);
const transient = resp.status >= 500 || resp.status === 429;
+ const retryAfterMs =
+ resp.status === 429 || resp.status === 503
+ ? parseRetryAfterMs(resp, MAX_RETRY_AFTER_MS)
+ : null;
opts.log.warn("http.non_ok", {
url: opts.url,
status: resp.status,
attempt,
transient,
+ retryAfterMs,
durationMs: Date.now() - start,
});
if (transient && attempt <= maxRetries) {
- await backoff(attempt);
+ await sleep(retryAfterMs ?? backoffMs(attempt));
continue;
}
throw new MemosError(
@@ -83,7 +92,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
durationMs: Date.now() - start,
});
if (transient && attempt <= maxRetries) {
- await backoff(attempt);
+ await sleep(backoffMs(attempt));
continue;
}
throw new MemosError(
@@ -123,10 +132,13 @@ function isTransientError(err: unknown): boolean {
return false;
}
-async function backoff(attempt: number): Promise {
+function backoffMs(attempt: number): number {
const base = 200;
const jitter = Math.floor(Math.random() * 100);
- const ms = base * 2 ** (attempt - 1) + jitter;
+ return base * 2 ** (attempt - 1) + jitter;
+}
+
+async function sleep(ms: number): Promise {
await new Promise((r) => setTimeout(r, ms));
}
diff --git a/apps/memos-local-plugin/core/llm/fetcher.ts b/apps/memos-local-plugin/core/llm/fetcher.ts
index 5e8b0317e..fe31d7464 100644
--- a/apps/memos-local-plugin/core/llm/fetcher.ts
+++ b/apps/memos-local-plugin/core/llm/fetcher.ts
@@ -11,8 +11,12 @@
*/
import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js";
+import { parseRetryAfterMs } from "../util/retry-after.js";
import type { LlmProviderLogger, LlmProviderName } from "./types.js";
+/** Hard ceiling on any single retry sleep — Retry-After or otherwise. */
+const MAX_RETRY_AFTER_MS = 60_000;
+
export interface HttpPostOpts {
url: string;
body: TBody;
@@ -56,15 +60,20 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
if (!resp.ok) {
const text = await safeText(resp);
const transient = resp.status >= 500 || resp.status === 429;
+ const retryAfterMs =
+ resp.status === 429 || resp.status === 503
+ ? parseRetryAfterMs(resp, MAX_RETRY_AFTER_MS)
+ : null;
opts.log.warn("http.non_ok", {
status: resp.status,
attempt,
transient,
+ retryAfterMs,
durationMs: ms,
});
if (transient && attempt <= opts.maxRetries) {
opts.onRetry?.(attempt);
- await backoff(attempt);
+ await sleep(retryAfterMs ?? backoffMs(attempt));
continue;
}
throw new MemosError(
@@ -94,7 +103,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
});
if ((transient || timedOut) && attempt <= opts.maxRetries) {
opts.onRetry?.(attempt);
- await backoff(attempt);
+ await sleep(backoffMs(attempt));
continue;
}
if (timedOut) {
@@ -233,10 +242,13 @@ function isTimeout(err: unknown): boolean {
return false;
}
-async function backoff(attempt: number): Promise {
+function backoffMs(attempt: number): number {
const base = 250;
const jitter = Math.floor(Math.random() * 120);
- const ms = base * 2 ** (attempt - 1) + jitter;
+ return base * 2 ** (attempt - 1) + jitter;
+}
+
+async function sleep(ms: number): Promise {
await new Promise((r) => setTimeout(r, ms));
}
diff --git a/apps/memos-local-plugin/core/util/retry-after.ts b/apps/memos-local-plugin/core/util/retry-after.ts
new file mode 100644
index 000000000..bbe0da8e3
--- /dev/null
+++ b/apps/memos-local-plugin/core/util/retry-after.ts
@@ -0,0 +1,33 @@
+/**
+ * Parse the `Retry-After` HTTP response header per RFC 7231 §7.1.3.
+ *
+ * The value can be either:
+ * - a non-negative integer of seconds (delta-seconds), or
+ * - an HTTP-date (e.g. `Wed, 21 Oct 2025 07:28:00 GMT`).
+ *
+ * Returns the wait duration in milliseconds, capped to `capMs` to prevent a
+ * hostile or buggy server from pinning the client indefinitely. Returns
+ * `null` when the header is absent, malformed, or non-positive — callers
+ * should fall back to their existing backoff strategy.
+ */
+export function parseRetryAfterMs(resp: Response, capMs: number): number | null {
+ const raw = resp.headers.get("retry-after");
+ if (raw == null) return null;
+ const trimmed = raw.trim();
+ if (trimmed.length === 0) return null;
+
+ // delta-seconds: a non-negative integer.
+ if (/^\d+$/.test(trimmed)) {
+ const seconds = Number(trimmed);
+ if (!Number.isFinite(seconds) || seconds < 0) return null;
+ const ms = seconds * 1000;
+ return Math.min(ms, capMs);
+ }
+
+ // HTTP-date.
+ const target = Date.parse(trimmed);
+ if (Number.isNaN(target)) return null;
+ const delta = target - Date.now();
+ if (delta <= 0) return null;
+ return Math.min(delta, capMs);
+}
diff --git a/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts b/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts
index 3b9ee5bf2..b6992bf7e 100644
--- a/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts
+++ b/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts
@@ -81,6 +81,64 @@ describe("embedding/fetcher", () => {
expect(f).toHaveBeenCalledTimes(2);
});
+ it("429 with Retry-After: sleeps at least that long before retrying", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "1" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ provider: "openai_compatible",
+ log: nullLogger(),
+ maxRetries: 1,
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Retry-After: 1 → ~1000ms; baseline backoff is 200 + jitter < 300ms.
+ expect(elapsed).toBeGreaterThanOrEqual(900);
+ });
+
+ it("429 with Retry-After: HTTP-date in the near future is honored", async () => {
+ const target = new Date(Date.now() + 1_200).toUTCString();
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": target } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ provider: "openai_compatible",
+ log: nullLogger(),
+ maxRetries: 1,
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ expect(elapsed).toBeGreaterThanOrEqual(800);
+ expect(elapsed).toBeLessThan(5_000);
+ });
+
+ it("429 with malformed Retry-After falls back to existing backoff", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "not-a-thing" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ provider: "openai_compatible",
+ log: nullLogger(),
+ maxRetries: 1,
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Fallback backoff for attempt 1: 200ms base + jitter < 300ms.
+ expect(elapsed).toBeLessThan(900);
+ });
+
it("does not retry on 400", async () => {
mockFetch([new Response("bad", { status: 400 })]);
await expect(
diff --git a/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts b/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts
index 3609aa2d8..3af76ac7b 100644
--- a/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts
+++ b/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts
@@ -82,6 +82,68 @@ describe("llm/fetcher", () => {
}
});
+ it("429 with Retry-After: sleeps at least that long before retrying", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "1" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ timeoutMs: 10_000,
+ maxRetries: 1,
+ provider: "openai_compatible",
+ log: nullLog(),
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Retry-After: 1 → ~1000ms; baseline backoff for attempt 1 is 250 + jitter < 400ms.
+ expect(elapsed).toBeGreaterThanOrEqual(900);
+ });
+
+ it("429 with Retry-After: HTTP-date in the near future is honored", async () => {
+ const target = new Date(Date.now() + 1_200).toUTCString();
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": target } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ timeoutMs: 10_000,
+ maxRetries: 1,
+ provider: "openai_compatible",
+ log: nullLog(),
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Roughly the diff (allow generous slack for clock + jitter).
+ expect(elapsed).toBeGreaterThanOrEqual(800);
+ expect(elapsed).toBeLessThan(5_000);
+ });
+
+ it("429 with malformed Retry-After falls back to existing backoff", async () => {
+ const f = mockFetch([
+ new Response("slow", { status: 429, headers: { "Retry-After": "not-a-thing" } }),
+ new Response(JSON.stringify({ ok: 1 }), { status: 200 }),
+ ]);
+ const start = Date.now();
+ await httpPostJson({
+ url: "https://x",
+ body: {},
+ timeoutMs: 10_000,
+ maxRetries: 1,
+ provider: "openai_compatible",
+ log: nullLog(),
+ });
+ const elapsed = Date.now() - start;
+ expect(f).toHaveBeenCalledTimes(2);
+ // Fallback backoff for attempt 1: 250ms base + jitter < 400ms; should NOT be ≥1s.
+ expect(elapsed).toBeLessThan(900);
+ });
+
it("4xx (non-429) does not retry → LLM_UNAVAILABLE", async () => {
const f = mockFetch([new Response("bad", { status: 400 })]);
try {
diff --git a/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts b/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts
new file mode 100644
index 000000000..33e7b9fe2
--- /dev/null
+++ b/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from "vitest";
+
+import { parseRetryAfterMs } from "../../../core/util/retry-after.js";
+
+function respWith(headerValue: string | null): Response {
+ const headers = new Headers();
+ if (headerValue !== null) headers.set("Retry-After", headerValue);
+ return new Response(null, { status: 429, headers });
+}
+
+describe("util/parseRetryAfterMs", () => {
+ it("returns null when header is absent", () => {
+ expect(parseRetryAfterMs(respWith(null), 60_000)).toBeNull();
+ });
+
+ it("parses delta-seconds (integer) into milliseconds", () => {
+ expect(parseRetryAfterMs(respWith("3"), 60_000)).toBe(3_000);
+ expect(parseRetryAfterMs(respWith("0"), 60_000)).toBe(0);
+ });
+
+ it("caps delta-seconds at capMs", () => {
+ expect(parseRetryAfterMs(respWith("9999"), 60_000)).toBe(60_000);
+ });
+
+ it("parses HTTP-date in the near future", () => {
+ const target = new Date(Date.now() + 2_000);
+ const ms = parseRetryAfterMs(respWith(target.toUTCString()), 60_000);
+ expect(ms).not.toBeNull();
+ // Allow generous slack for the elapsed parsing time.
+ expect(ms!).toBeGreaterThan(500);
+ expect(ms!).toBeLessThanOrEqual(2_500);
+ });
+
+ it("returns null when HTTP-date is in the past", () => {
+ const target = new Date(Date.now() - 60_000);
+ expect(parseRetryAfterMs(respWith(target.toUTCString()), 60_000)).toBeNull();
+ });
+
+ it("caps HTTP-date diff at capMs", () => {
+ const target = new Date(Date.now() + 10 * 60 * 1000);
+ expect(parseRetryAfterMs(respWith(target.toUTCString()), 60_000)).toBe(60_000);
+ });
+
+ it("returns null for malformed values", () => {
+ expect(parseRetryAfterMs(respWith("not-a-number"), 60_000)).toBeNull();
+ expect(parseRetryAfterMs(respWith(""), 60_000)).toBeNull();
+ expect(parseRetryAfterMs(respWith(" "), 60_000)).toBeNull();
+ });
+
+ it("rejects negative-looking values (no leading sign permitted)", () => {
+ // The integer regex `^\d+$` does not match `-1`, so it falls through to
+ // Date.parse which will return NaN for `-1` → null.
+ expect(parseRetryAfterMs(respWith("-1"), 60_000)).toBeNull();
+ });
+});
From 95b40c1cea0581ca2ffc866347bf1475edc24bcd Mon Sep 17 00:00:00 2001
From: Sanjays2402 <51058514+Sanjays2402@users.noreply.github.com>
Date: Sat, 9 May 2026 12:46:07 -0700
Subject: [PATCH 2/2] fix(fetchers): apply MAX_RETRY_AFTER_MS to backoff path
too; clarify 0 is valid
Address Copilot review on #1670:
- parseRetryAfterMs JSDoc: document that 0 (retry immediately) is a
valid value; only negative or unparseable values return null.
- LLM and embedding fetchers: cap the final sleep duration with
MAX_RETRY_AFTER_MS regardless of whether it came from Retry-After
or exponential backoff. Previously backoff could exceed 60s on
high attempt counts (config allows up to 10 retries).
---
apps/memos-local-plugin/core/embedding/fetcher.ts | 5 +++--
apps/memos-local-plugin/core/llm/fetcher.ts | 5 +++--
apps/memos-local-plugin/core/util/retry-after.ts | 7 +++++--
3 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/apps/memos-local-plugin/core/embedding/fetcher.ts b/apps/memos-local-plugin/core/embedding/fetcher.ts
index 264b57d41..de6202f56 100644
--- a/apps/memos-local-plugin/core/embedding/fetcher.ts
+++ b/apps/memos-local-plugin/core/embedding/fetcher.ts
@@ -63,7 +63,8 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
durationMs: Date.now() - start,
});
if (transient && attempt <= maxRetries) {
- await sleep(retryAfterMs ?? backoffMs(attempt));
+ const sleepMs = Math.min(retryAfterMs ?? backoffMs(attempt), MAX_RETRY_AFTER_MS);
+ await sleep(sleepMs);
continue;
}
throw new MemosError(
@@ -92,7 +93,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
durationMs: Date.now() - start,
});
if (transient && attempt <= maxRetries) {
- await sleep(backoffMs(attempt));
+ await sleep(Math.min(backoffMs(attempt), MAX_RETRY_AFTER_MS));
continue;
}
throw new MemosError(
diff --git a/apps/memos-local-plugin/core/llm/fetcher.ts b/apps/memos-local-plugin/core/llm/fetcher.ts
index fe31d7464..1fc7fb5c5 100644
--- a/apps/memos-local-plugin/core/llm/fetcher.ts
+++ b/apps/memos-local-plugin/core/llm/fetcher.ts
@@ -73,7 +73,8 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
});
if (transient && attempt <= opts.maxRetries) {
opts.onRetry?.(attempt);
- await sleep(retryAfterMs ?? backoffMs(attempt));
+ const sleepMs = Math.min(retryAfterMs ?? backoffMs(attempt), MAX_RETRY_AFTER_MS);
+ await sleep(sleepMs);
continue;
}
throw new MemosError(
@@ -103,7 +104,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise<
});
if ((transient || timedOut) && attempt <= opts.maxRetries) {
opts.onRetry?.(attempt);
- await sleep(backoffMs(attempt));
+ await sleep(Math.min(backoffMs(attempt), MAX_RETRY_AFTER_MS));
continue;
}
if (timedOut) {
diff --git a/apps/memos-local-plugin/core/util/retry-after.ts b/apps/memos-local-plugin/core/util/retry-after.ts
index bbe0da8e3..22689ba55 100644
--- a/apps/memos-local-plugin/core/util/retry-after.ts
+++ b/apps/memos-local-plugin/core/util/retry-after.ts
@@ -7,8 +7,11 @@
*
* Returns the wait duration in milliseconds, capped to `capMs` to prevent a
* hostile or buggy server from pinning the client indefinitely. Returns
- * `null` when the header is absent, malformed, or non-positive — callers
- * should fall back to their existing backoff strategy.
+ * `null` when the header is absent, malformed, or in the past (HTTP-date
+ * already elapsed) — callers should fall back to their existing backoff
+ * strategy. A value of `0` (delta-seconds) is valid and means "retry
+ * immediately"; servers like GitHub do this briefly as a ratelimit window
+ * expires.
*/
export function parseRetryAfterMs(resp: Response, capMs: number): number | null {
const raw = resp.headers.get("retry-after");