From 5cb711193c8eda88343b4822d4435e6eb2869401 Mon Sep 17 00:00:00 2001 From: Quaternions Date: Thu, 11 Jun 2026 17:08:41 +0200 Subject: [PATCH 1/2] fix: bound sdk.shutdown() so a hung telemetry flush can't block exit When the iii engine is already gone, the OTel exporter retries forever and sdk.shutdown() never resolves, leaving the process hanging on SIGTERM until it is force-killed (~90s under systemd). Race sdk.shutdown() against a 3s timeout so the handler always reaches process.exit(). Fixes #909 Signed-off-by: Quaternions --- src/index.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/index.ts b/src/index.ts index 4233e8a67..2ac130f37 100644 --- a/src/index.ts +++ b/src/index.ts @@ -598,7 +598,16 @@ async function main() { await indexPersistence.save().catch((err) => { console.warn(`[agentmemory] Failed to save index on shutdown:`, err); }); - await sdk.shutdown(); + // #909 / iii-hq/iii#1835: when the iii engine is already gone, the OTel + // exporter retries forever and sdk.shutdown() never resolves, hanging the + // process until it's force-killed. Race it against a 3s timeout so we always + // reach process.exit(); any un-flushed telemetry is dropped on shutdown. + await Promise.race([ + sdk.shutdown(), + new Promise((resolve) => setTimeout(resolve, 3000)), + ]).catch((err) => { + console.warn(`[agentmemory] sdk.shutdown() timed out or errored:`, err); + }); clearWorkerPidfile(); process.exit(0); }; From de901b54c7a95e7ec3f90c612e540ee2d91da5aa Mon Sep 17 00:00:00 2001 From: Quaternions Date: Thu, 11 Jun 2026 17:45:12 +0200 Subject: [PATCH 2/2] refactor: separate timeout and error logging in shutdown handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CodeRabbit review: the previous catch() only logged when sdk.shutdown() threw, so the timeout path — the defensive case this guards against — was silent. Log the timeout and shutdown-error cases separately, and trim the comment to the why. Signed-off-by: Quaternions --- src/index.ts | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/index.ts b/src/index.ts index 2ac130f37..406fede8f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -598,16 +598,18 @@ async function main() { await indexPersistence.save().catch((err) => { console.warn(`[agentmemory] Failed to save index on shutdown:`, err); }); - // #909 / iii-hq/iii#1835: when the iii engine is already gone, the OTel - // exporter retries forever and sdk.shutdown() never resolves, hanging the - // process until it's force-killed. Race it against a 3s timeout so we always - // reach process.exit(); any un-flushed telemetry is dropped on shutdown. - await Promise.race([ - sdk.shutdown(), - new Promise((resolve) => setTimeout(resolve, 3000)), - ]).catch((err) => { - console.warn(`[agentmemory] sdk.shutdown() timed out or errored:`, err); + // #909 / iii-hq/iii#1835: defensive timeout prevents an indefinite hang when + // the OTel exporter is stuck retrying; un-flushed telemetry is dropped on exit. + const shutdownPromise = sdk.shutdown().catch((err) => { + console.warn(`[agentmemory] sdk.shutdown() errored:`, err); }); + const timeoutPromise = new Promise((resolve) => + setTimeout(() => { + console.warn(`[agentmemory] sdk.shutdown() exceeded 3s timeout, proceeding to exit`); + resolve(); + }, 3000), + ); + await Promise.race([shutdownPromise, timeoutPromise]); clearWorkerPidfile(); process.exit(0); };