From 608090efe099835314c16ec570982ddffd6353a9 Mon Sep 17 00:00:00 2001 From: Adrien Delorme Date: Wed, 27 May 2026 13:08:47 +0200 Subject: [PATCH] fix(memorypullcache): surface pull failures with the image ref `memorypullcache.Fetch` swallowed remote.Image, img.Size, and io.ReadAll errors into a generic wrapper (`in remote.Image: %w`) without ever logging them or naming the image. Operators saw "Cache miss ref=..." followed by silence, even when the pull was failing (network, auth, or 404). The eventual user-visible error came from a higher layer with no ref attached, so the root cause was hard to identify. Wrap each error with the ref and log it at WARN. Same shape as the existing ategcs error-surfacing in #40. --- internal/memorypullcache/memorypullcache.go | 9 +++++--- .../memorypullcache/memorypullcache_test.go | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/internal/memorypullcache/memorypullcache.go b/internal/memorypullcache/memorypullcache.go index 2250e68..a46962d 100644 --- a/internal/memorypullcache/memorypullcache.go +++ b/internal/memorypullcache/memorypullcache.go @@ -135,12 +135,14 @@ func (c *MemoryPullCache) Fetch(ctx context.Context, ref string) (io.ReadCloser, img, err := remote.Image(parsedRef, remoteOptions...) if err != nil { - return nil, fmt.Errorf("in remote.Image: %w", err) + slog.WarnContext(ctx, "Image pull failed", slog.String("ref", ref), slog.Any("error", err)) + return nil, fmt.Errorf("while pulling image %q: %w", ref, err) } size, err := img.Size() if err != nil { - return nil, fmt.Errorf("in img.Size(): %w", err) + slog.WarnContext(ctx, "Image size lookup failed", slog.String("ref", ref), slog.Any("error", err)) + return nil, fmt.Errorf("while reading size of image %q: %w", ref, err) } if size > 100*1024*1024 { slog.InfoContext(ctx, @@ -156,7 +158,8 @@ func (c *MemoryPullCache) Fetch(ctx context.Context, ref string) (io.ReadCloser, memData, err := io.ReadAll(tarData) if err != nil { - return nil, fmt.Errorf("while reading image: %w", err) + slog.WarnContext(ctx, "Image read failed", slog.String("ref", ref), slog.Any("error", err)) + return nil, fmt.Errorf("while reading image %q: %w", ref, err) } if digestWasIncluded { diff --git a/internal/memorypullcache/memorypullcache_test.go b/internal/memorypullcache/memorypullcache_test.go index 7811209..8c50aa5 100644 --- a/internal/memorypullcache/memorypullcache_test.go +++ b/internal/memorypullcache/memorypullcache_test.go @@ -15,6 +15,8 @@ package memorypullcache import ( + "context" + "strings" "testing" ) @@ -82,3 +84,22 @@ func TestRewriteLocalRegistry(t *testing.T) { }) } } + +// TestFetchErrorIncludesRef checks that pull failures wrap the error with the +// requested image ref, so operators can identify which image failed from the +// error chain alone (without having to correlate trace IDs across systems). +func TestFetchErrorIncludesRef(t *testing.T) { + // A registry that refuses connection so remote.Image returns an error. + const ref = "127.0.0.1:1/missing/image:nope" + c, err := NewMemoryPullCache(context.Background(), nil, "") + if err != nil { + t.Fatalf("NewMemoryPullCache: %v", err) + } + _, err = c.Fetch(context.Background(), ref) + if err == nil { + t.Fatal("Fetch on an unreachable registry returned nil error") + } + if !strings.Contains(err.Error(), ref) { + t.Errorf("Fetch error %q does not contain ref %q", err, ref) + } +}