diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ad70b1..777eebe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Relay recovery loop could not reclaim stuck messages after a pod restart because `DefaultWebhookExpiry` (30s) was shorter than `pendingIdleTimeout` (60s). Fixed by raising `DefaultWebhookExpiry` to 2 minutes and lowering `pendingIdleTimeout` to 30s, ensuring the recovery loop always has time to reclaim a stuck message before it expires. + ## [0.2.13] - 2026-06-11 ### Changed diff --git a/internal/relay/manager.go b/internal/relay/manager.go index cf81502..162768b 100644 --- a/internal/relay/manager.go +++ b/internal/relay/manager.go @@ -17,8 +17,10 @@ var ( ) const ( - // DefaultWebhookExpiry is the default time after which a queued webhook expires - DefaultWebhookExpiry = 30 * time.Second + // DefaultWebhookExpiry is the default time after which a queued webhook expires. + // Must be greater than pendingIdleTimeout + defaultRecoveryInterval so that the + // recovery loop can reclaim stuck messages before they expire. + DefaultWebhookExpiry = 2 * time.Minute ) // Webhook represents a webhook request to be delivered via relay diff --git a/internal/relay/redis_manager.go b/internal/relay/redis_manager.go index 5ace163..2b22741 100644 --- a/internal/relay/redis_manager.go +++ b/internal/relay/redis_manager.go @@ -35,7 +35,7 @@ const ( // Recovery settings defaultRecoveryInterval = 30 * time.Second // How often to check for stuck messages - pendingIdleTimeout = 60 * time.Second // How long a message can be pending before reclaim + pendingIdleTimeout = 30 * time.Second // How long a message can be pending before reclaim maxDeliveryAttempts = 3 // Max retries before dead letter ) diff --git a/internal/relay/redis_manager_test.go b/internal/relay/redis_manager_test.go index 2e0feb9..f377562 100644 --- a/internal/relay/redis_manager_test.go +++ b/internal/relay/redis_manager_test.go @@ -908,9 +908,9 @@ func TestRedisManager_Deliver_SetsExpiry(t *testing.T) { if webhook.ExpiresAt == 0 { t.Error("expected ExpiresAt to be set") } - // Should be about 30 seconds in the future - expectedMin := time.Now().Add(25 * time.Second).Unix() - expectedMax := time.Now().Add(35 * time.Second).Unix() + // Should be about 2 minutes in the future + expectedMin := time.Now().Add(115 * time.Second).Unix() + expectedMax := time.Now().Add(125 * time.Second).Unix() if webhook.ExpiresAt < expectedMin || webhook.ExpiresAt > expectedMax { t.Errorf("ExpiresAt %d not in expected range [%d, %d]", webhook.ExpiresAt, expectedMin, expectedMax) }