From 3220e1085e63f53eb3354ddcda9ae7b79c1dea34 Mon Sep 17 00:00:00 2001 From: Sanjay Santhanam <51058514+Sanjays2402@users.noreply.github.com> Date: Sat, 9 May 2026 12:16:22 -0700 Subject: [PATCH 1/2] fix(fetchers): honor Retry-After header on 429/503 responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLM and embedding fetchers in memos-local-plugin classified 429 as transient and retried with exponential backoff, but never inspected the Retry-After response header. Under provider rate limiting, retries fired before the upstream-requested cooldown, extending throttling and wasting paid API calls. Parse Retry-After per RFC 7231 §7.1.3 (delta-seconds or HTTP-date), cap to a sane maximum to avoid hostile pinning, and prefer it over exponential backoff on 429/503. Fall back to the existing backoff when the header is absent or unparseable. Fixes #1620 --- .../core/embedding/fetcher.ts | 20 ++++-- apps/memos-local-plugin/core/llm/fetcher.ts | 20 ++++-- .../core/util/retry-after.ts | 33 ++++++++++ .../tests/unit/embedding/fetcher.test.ts | 58 +++++++++++++++++ .../tests/unit/llm/fetcher.test.ts | 62 +++++++++++++++++++ .../tests/unit/util/retry-after.test.ts | 55 ++++++++++++++++ 6 files changed, 240 insertions(+), 8 deletions(-) create mode 100644 apps/memos-local-plugin/core/util/retry-after.ts create mode 100644 apps/memos-local-plugin/tests/unit/util/retry-after.test.ts diff --git a/apps/memos-local-plugin/core/embedding/fetcher.ts b/apps/memos-local-plugin/core/embedding/fetcher.ts index 40524aac7..264b57d41 100644 --- a/apps/memos-local-plugin/core/embedding/fetcher.ts +++ b/apps/memos-local-plugin/core/embedding/fetcher.ts @@ -8,8 +8,12 @@ */ import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js"; +import { parseRetryAfterMs } from "../util/retry-after.js"; import type { EmbeddingProviderName, ProviderLogger } from "./types.js"; +/** Hard ceiling on any single retry sleep — Retry-After or otherwise. */ +const MAX_RETRY_AFTER_MS = 60_000; + export interface HttpPostOpts { url: string; body: TBody; @@ -46,15 +50,20 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< if (!resp.ok) { const text = await safeText(resp); const transient = resp.status >= 500 || resp.status === 429; + const retryAfterMs = + resp.status === 429 || resp.status === 503 + ? parseRetryAfterMs(resp, MAX_RETRY_AFTER_MS) + : null; opts.log.warn("http.non_ok", { url: opts.url, status: resp.status, attempt, transient, + retryAfterMs, durationMs: Date.now() - start, }); if (transient && attempt <= maxRetries) { - await backoff(attempt); + await sleep(retryAfterMs ?? backoffMs(attempt)); continue; } throw new MemosError( @@ -83,7 +92,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< durationMs: Date.now() - start, }); if (transient && attempt <= maxRetries) { - await backoff(attempt); + await sleep(backoffMs(attempt)); continue; } throw new MemosError( @@ -123,10 +132,13 @@ function isTransientError(err: unknown): boolean { return false; } -async function backoff(attempt: number): Promise { +function backoffMs(attempt: number): number { const base = 200; const jitter = Math.floor(Math.random() * 100); - const ms = base * 2 ** (attempt - 1) + jitter; + return base * 2 ** (attempt - 1) + jitter; +} + +async function sleep(ms: number): Promise { await new Promise((r) => setTimeout(r, ms)); } diff --git a/apps/memos-local-plugin/core/llm/fetcher.ts b/apps/memos-local-plugin/core/llm/fetcher.ts index 5e8b0317e..fe31d7464 100644 --- a/apps/memos-local-plugin/core/llm/fetcher.ts +++ b/apps/memos-local-plugin/core/llm/fetcher.ts @@ -11,8 +11,12 @@ */ import { ERROR_CODES, MemosError } from "../../agent-contract/errors.js"; +import { parseRetryAfterMs } from "../util/retry-after.js"; import type { LlmProviderLogger, LlmProviderName } from "./types.js"; +/** Hard ceiling on any single retry sleep — Retry-After or otherwise. */ +const MAX_RETRY_AFTER_MS = 60_000; + export interface HttpPostOpts { url: string; body: TBody; @@ -56,15 +60,20 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< if (!resp.ok) { const text = await safeText(resp); const transient = resp.status >= 500 || resp.status === 429; + const retryAfterMs = + resp.status === 429 || resp.status === 503 + ? parseRetryAfterMs(resp, MAX_RETRY_AFTER_MS) + : null; opts.log.warn("http.non_ok", { status: resp.status, attempt, transient, + retryAfterMs, durationMs: ms, }); if (transient && attempt <= opts.maxRetries) { opts.onRetry?.(attempt); - await backoff(attempt); + await sleep(retryAfterMs ?? backoffMs(attempt)); continue; } throw new MemosError( @@ -94,7 +103,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< }); if ((transient || timedOut) && attempt <= opts.maxRetries) { opts.onRetry?.(attempt); - await backoff(attempt); + await sleep(backoffMs(attempt)); continue; } if (timedOut) { @@ -233,10 +242,13 @@ function isTimeout(err: unknown): boolean { return false; } -async function backoff(attempt: number): Promise { +function backoffMs(attempt: number): number { const base = 250; const jitter = Math.floor(Math.random() * 120); - const ms = base * 2 ** (attempt - 1) + jitter; + return base * 2 ** (attempt - 1) + jitter; +} + +async function sleep(ms: number): Promise { await new Promise((r) => setTimeout(r, ms)); } diff --git a/apps/memos-local-plugin/core/util/retry-after.ts b/apps/memos-local-plugin/core/util/retry-after.ts new file mode 100644 index 000000000..bbe0da8e3 --- /dev/null +++ b/apps/memos-local-plugin/core/util/retry-after.ts @@ -0,0 +1,33 @@ +/** + * Parse the `Retry-After` HTTP response header per RFC 7231 §7.1.3. + * + * The value can be either: + * - a non-negative integer of seconds (delta-seconds), or + * - an HTTP-date (e.g. `Wed, 21 Oct 2025 07:28:00 GMT`). + * + * Returns the wait duration in milliseconds, capped to `capMs` to prevent a + * hostile or buggy server from pinning the client indefinitely. Returns + * `null` when the header is absent, malformed, or non-positive — callers + * should fall back to their existing backoff strategy. + */ +export function parseRetryAfterMs(resp: Response, capMs: number): number | null { + const raw = resp.headers.get("retry-after"); + if (raw == null) return null; + const trimmed = raw.trim(); + if (trimmed.length === 0) return null; + + // delta-seconds: a non-negative integer. + if (/^\d+$/.test(trimmed)) { + const seconds = Number(trimmed); + if (!Number.isFinite(seconds) || seconds < 0) return null; + const ms = seconds * 1000; + return Math.min(ms, capMs); + } + + // HTTP-date. + const target = Date.parse(trimmed); + if (Number.isNaN(target)) return null; + const delta = target - Date.now(); + if (delta <= 0) return null; + return Math.min(delta, capMs); +} diff --git a/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts b/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts index 3b9ee5bf2..b6992bf7e 100644 --- a/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts +++ b/apps/memos-local-plugin/tests/unit/embedding/fetcher.test.ts @@ -81,6 +81,64 @@ describe("embedding/fetcher", () => { expect(f).toHaveBeenCalledTimes(2); }); + it("429 with Retry-After: sleeps at least that long before retrying", async () => { + const f = mockFetch([ + new Response("slow", { status: 429, headers: { "Retry-After": "1" } }), + new Response(JSON.stringify({ ok: 1 }), { status: 200 }), + ]); + const start = Date.now(); + await httpPostJson({ + url: "https://x", + body: {}, + provider: "openai_compatible", + log: nullLogger(), + maxRetries: 1, + }); + const elapsed = Date.now() - start; + expect(f).toHaveBeenCalledTimes(2); + // Retry-After: 1 → ~1000ms; baseline backoff is 200 + jitter < 300ms. + expect(elapsed).toBeGreaterThanOrEqual(900); + }); + + it("429 with Retry-After: HTTP-date in the near future is honored", async () => { + const target = new Date(Date.now() + 1_200).toUTCString(); + const f = mockFetch([ + new Response("slow", { status: 429, headers: { "Retry-After": target } }), + new Response(JSON.stringify({ ok: 1 }), { status: 200 }), + ]); + const start = Date.now(); + await httpPostJson({ + url: "https://x", + body: {}, + provider: "openai_compatible", + log: nullLogger(), + maxRetries: 1, + }); + const elapsed = Date.now() - start; + expect(f).toHaveBeenCalledTimes(2); + expect(elapsed).toBeGreaterThanOrEqual(800); + expect(elapsed).toBeLessThan(5_000); + }); + + it("429 with malformed Retry-After falls back to existing backoff", async () => { + const f = mockFetch([ + new Response("slow", { status: 429, headers: { "Retry-After": "not-a-thing" } }), + new Response(JSON.stringify({ ok: 1 }), { status: 200 }), + ]); + const start = Date.now(); + await httpPostJson({ + url: "https://x", + body: {}, + provider: "openai_compatible", + log: nullLogger(), + maxRetries: 1, + }); + const elapsed = Date.now() - start; + expect(f).toHaveBeenCalledTimes(2); + // Fallback backoff for attempt 1: 200ms base + jitter < 300ms. + expect(elapsed).toBeLessThan(900); + }); + it("does not retry on 400", async () => { mockFetch([new Response("bad", { status: 400 })]); await expect( diff --git a/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts b/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts index 3609aa2d8..3af76ac7b 100644 --- a/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts +++ b/apps/memos-local-plugin/tests/unit/llm/fetcher.test.ts @@ -82,6 +82,68 @@ describe("llm/fetcher", () => { } }); + it("429 with Retry-After: sleeps at least that long before retrying", async () => { + const f = mockFetch([ + new Response("slow", { status: 429, headers: { "Retry-After": "1" } }), + new Response(JSON.stringify({ ok: 1 }), { status: 200 }), + ]); + const start = Date.now(); + await httpPostJson({ + url: "https://x", + body: {}, + timeoutMs: 10_000, + maxRetries: 1, + provider: "openai_compatible", + log: nullLog(), + }); + const elapsed = Date.now() - start; + expect(f).toHaveBeenCalledTimes(2); + // Retry-After: 1 → ~1000ms; baseline backoff for attempt 1 is 250 + jitter < 400ms. + expect(elapsed).toBeGreaterThanOrEqual(900); + }); + + it("429 with Retry-After: HTTP-date in the near future is honored", async () => { + const target = new Date(Date.now() + 1_200).toUTCString(); + const f = mockFetch([ + new Response("slow", { status: 429, headers: { "Retry-After": target } }), + new Response(JSON.stringify({ ok: 1 }), { status: 200 }), + ]); + const start = Date.now(); + await httpPostJson({ + url: "https://x", + body: {}, + timeoutMs: 10_000, + maxRetries: 1, + provider: "openai_compatible", + log: nullLog(), + }); + const elapsed = Date.now() - start; + expect(f).toHaveBeenCalledTimes(2); + // Roughly the diff (allow generous slack for clock + jitter). + expect(elapsed).toBeGreaterThanOrEqual(800); + expect(elapsed).toBeLessThan(5_000); + }); + + it("429 with malformed Retry-After falls back to existing backoff", async () => { + const f = mockFetch([ + new Response("slow", { status: 429, headers: { "Retry-After": "not-a-thing" } }), + new Response(JSON.stringify({ ok: 1 }), { status: 200 }), + ]); + const start = Date.now(); + await httpPostJson({ + url: "https://x", + body: {}, + timeoutMs: 10_000, + maxRetries: 1, + provider: "openai_compatible", + log: nullLog(), + }); + const elapsed = Date.now() - start; + expect(f).toHaveBeenCalledTimes(2); + // Fallback backoff for attempt 1: 250ms base + jitter < 400ms; should NOT be ≥1s. + expect(elapsed).toBeLessThan(900); + }); + it("4xx (non-429) does not retry → LLM_UNAVAILABLE", async () => { const f = mockFetch([new Response("bad", { status: 400 })]); try { diff --git a/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts b/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts new file mode 100644 index 000000000..33e7b9fe2 --- /dev/null +++ b/apps/memos-local-plugin/tests/unit/util/retry-after.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from "vitest"; + +import { parseRetryAfterMs } from "../../../core/util/retry-after.js"; + +function respWith(headerValue: string | null): Response { + const headers = new Headers(); + if (headerValue !== null) headers.set("Retry-After", headerValue); + return new Response(null, { status: 429, headers }); +} + +describe("util/parseRetryAfterMs", () => { + it("returns null when header is absent", () => { + expect(parseRetryAfterMs(respWith(null), 60_000)).toBeNull(); + }); + + it("parses delta-seconds (integer) into milliseconds", () => { + expect(parseRetryAfterMs(respWith("3"), 60_000)).toBe(3_000); + expect(parseRetryAfterMs(respWith("0"), 60_000)).toBe(0); + }); + + it("caps delta-seconds at capMs", () => { + expect(parseRetryAfterMs(respWith("9999"), 60_000)).toBe(60_000); + }); + + it("parses HTTP-date in the near future", () => { + const target = new Date(Date.now() + 2_000); + const ms = parseRetryAfterMs(respWith(target.toUTCString()), 60_000); + expect(ms).not.toBeNull(); + // Allow generous slack for the elapsed parsing time. + expect(ms!).toBeGreaterThan(500); + expect(ms!).toBeLessThanOrEqual(2_500); + }); + + it("returns null when HTTP-date is in the past", () => { + const target = new Date(Date.now() - 60_000); + expect(parseRetryAfterMs(respWith(target.toUTCString()), 60_000)).toBeNull(); + }); + + it("caps HTTP-date diff at capMs", () => { + const target = new Date(Date.now() + 10 * 60 * 1000); + expect(parseRetryAfterMs(respWith(target.toUTCString()), 60_000)).toBe(60_000); + }); + + it("returns null for malformed values", () => { + expect(parseRetryAfterMs(respWith("not-a-number"), 60_000)).toBeNull(); + expect(parseRetryAfterMs(respWith(""), 60_000)).toBeNull(); + expect(parseRetryAfterMs(respWith(" "), 60_000)).toBeNull(); + }); + + it("rejects negative-looking values (no leading sign permitted)", () => { + // The integer regex `^\d+$` does not match `-1`, so it falls through to + // Date.parse which will return NaN for `-1` → null. + expect(parseRetryAfterMs(respWith("-1"), 60_000)).toBeNull(); + }); +}); From 95b40c1cea0581ca2ffc866347bf1475edc24bcd Mon Sep 17 00:00:00 2001 From: Sanjays2402 <51058514+Sanjays2402@users.noreply.github.com> Date: Sat, 9 May 2026 12:46:07 -0700 Subject: [PATCH 2/2] fix(fetchers): apply MAX_RETRY_AFTER_MS to backoff path too; clarify 0 is valid Address Copilot review on #1670: - parseRetryAfterMs JSDoc: document that 0 (retry immediately) is a valid value; only negative or unparseable values return null. - LLM and embedding fetchers: cap the final sleep duration with MAX_RETRY_AFTER_MS regardless of whether it came from Retry-After or exponential backoff. Previously backoff could exceed 60s on high attempt counts (config allows up to 10 retries). --- apps/memos-local-plugin/core/embedding/fetcher.ts | 5 +++-- apps/memos-local-plugin/core/llm/fetcher.ts | 5 +++-- apps/memos-local-plugin/core/util/retry-after.ts | 7 +++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/apps/memos-local-plugin/core/embedding/fetcher.ts b/apps/memos-local-plugin/core/embedding/fetcher.ts index 264b57d41..de6202f56 100644 --- a/apps/memos-local-plugin/core/embedding/fetcher.ts +++ b/apps/memos-local-plugin/core/embedding/fetcher.ts @@ -63,7 +63,8 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< durationMs: Date.now() - start, }); if (transient && attempt <= maxRetries) { - await sleep(retryAfterMs ?? backoffMs(attempt)); + const sleepMs = Math.min(retryAfterMs ?? backoffMs(attempt), MAX_RETRY_AFTER_MS); + await sleep(sleepMs); continue; } throw new MemosError( @@ -92,7 +93,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< durationMs: Date.now() - start, }); if (transient && attempt <= maxRetries) { - await sleep(backoffMs(attempt)); + await sleep(Math.min(backoffMs(attempt), MAX_RETRY_AFTER_MS)); continue; } throw new MemosError( diff --git a/apps/memos-local-plugin/core/llm/fetcher.ts b/apps/memos-local-plugin/core/llm/fetcher.ts index fe31d7464..1fc7fb5c5 100644 --- a/apps/memos-local-plugin/core/llm/fetcher.ts +++ b/apps/memos-local-plugin/core/llm/fetcher.ts @@ -73,7 +73,8 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< }); if (transient && attempt <= opts.maxRetries) { opts.onRetry?.(attempt); - await sleep(retryAfterMs ?? backoffMs(attempt)); + const sleepMs = Math.min(retryAfterMs ?? backoffMs(attempt), MAX_RETRY_AFTER_MS); + await sleep(sleepMs); continue; } throw new MemosError( @@ -103,7 +104,7 @@ export async function httpPostJson(opts: HttpPostOpts): Promise< }); if ((transient || timedOut) && attempt <= opts.maxRetries) { opts.onRetry?.(attempt); - await sleep(backoffMs(attempt)); + await sleep(Math.min(backoffMs(attempt), MAX_RETRY_AFTER_MS)); continue; } if (timedOut) { diff --git a/apps/memos-local-plugin/core/util/retry-after.ts b/apps/memos-local-plugin/core/util/retry-after.ts index bbe0da8e3..22689ba55 100644 --- a/apps/memos-local-plugin/core/util/retry-after.ts +++ b/apps/memos-local-plugin/core/util/retry-after.ts @@ -7,8 +7,11 @@ * * Returns the wait duration in milliseconds, capped to `capMs` to prevent a * hostile or buggy server from pinning the client indefinitely. Returns - * `null` when the header is absent, malformed, or non-positive — callers - * should fall back to their existing backoff strategy. + * `null` when the header is absent, malformed, or in the past (HTTP-date + * already elapsed) — callers should fall back to their existing backoff + * strategy. A value of `0` (delta-seconds) is valid and means "retry + * immediately"; servers like GitHub do this briefly as a ratelimit window + * expires. */ export function parseRetryAfterMs(resp: Response, capMs: number): number | null { const raw = resp.headers.get("retry-after");