From 806a20e79e1611e5336f4536e3fb3dd7783785cf Mon Sep 17 00:00:00 2001 From: ysyneu Date: Tue, 9 Jun 2026 15:14:39 +0800 Subject: [PATCH] fix(webfetch): keep body read inside the timeout scope to stop "context canceled" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fetchURL created a per-request timeout context and deferred its cancel, then returned the *http.Response. The cancel fired the moment fetchURL returned — before WebFetch read the body via readResponseBody. net/http ties the body read to the request context, so any response still streaming when Do() returned aborted with "failed to read response: context canceled". Large/slow pages (e.g. Aliyun help docs) hit this every time because their body is still in flight when Do() returns; small pages got lucky only because their body was already buffered. The error is "context canceled", not "deadline exceeded" — it was never a real timeout, just a premature cancel. Merge the request and the body read into one timeout-scoped function (fetchBody), mirroring Safari's correct localWebFetch, so the cancel cannot fire before the read. Add a regression test that streams a slow body and asserts it is read in full (reproduces the exact error against the old structure). --- environment/webfetch.go | 55 ++++++++++++++++++++++++-------- environment/webfetch_test.go | 62 ++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 environment/webfetch_test.go diff --git a/environment/webfetch.go b/environment/webfetch.go index 646b29f..de5f70f 100644 --- a/environment/webfetch.go +++ b/environment/webfetch.go @@ -57,21 +57,15 @@ func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) format = "markdown" } - resp, err := e.fetchURL(ctx, args.URL, format, timeout) + page, err := e.fetchBody(ctx, args.URL, format, timeout) if err != nil { return nil, err } - defer func() { - _ = resp.Body.Close() - }() - body, err := readResponseBody(resp) - if err != nil { - return nil, err - } - - content := convertContent(string(body), format, resp.Header.Get("Content-Type")) + content := convertContent(string(page.body), format, page.contentType) processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig()) + // processor.Process runs on the outer ctx: the body is already fully read + // into memory, so it must not be bound to the per-fetch timeout. processed, err := processor.Process(ctx, content, "webfetch") if err != nil { return nil, err @@ -79,15 +73,33 @@ func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) return &protocol.WebFetchResult{ Content: processed.Content, - URL: resp.Request.URL.String(), + URL: page.finalURL, Truncated: processed.Truncated, FilePath: processed.FilePath, TotalSize: processed.TotalSize, }, nil } -// fetchURL performs the HTTP request via the netsafe-wrapped client. -func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout time.Duration) (*http.Response, error) { +// fetchedPage is the raw result of a single HTTP fetch: the response body plus +// the metadata WebFetch still needs after the connection has been closed. +type fetchedPage struct { + body []byte + finalURL string + contentType string +} + +// fetchBody performs the HTTP request AND reads the full response body within +// a single timeout-scoped context. +// +// Keeping the body read inside this scope is load-bearing: net/http ties the +// lifetime of resp.Body to the request context, so a cancel() that fires +// before the read — e.g. a helper that returns the *http.Response and defers +// its cancel — aborts the read with "failed to read response: context +// canceled". Large or slow pages (e.g. Aliyun help docs) trip this every time +// because their body is still streaming when Do() returns; small pages got +// lucky only because their body was already buffered. Do() and the body read +// must therefore share one cancel scope. +func (e *Environment) fetchBody(ctx context.Context, url, format string, timeout time.Duration) (*fetchedPage, error) { httpCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -105,8 +117,23 @@ func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout } return nil, fmt.Errorf("request failed: %w", err) } + defer func() { _ = resp.Body.Close() }() - return resp, nil + body, err := readResponseBody(resp) + if err != nil { + // The deadline can still be hit mid-read; surface it as a timeout + // rather than a raw "context deadline exceeded" read error. + if httpCtx.Err() == context.DeadlineExceeded { + return nil, fmt.Errorf("request timed out") + } + return nil, err + } + + return &fetchedPage{ + body: body, + finalURL: resp.Request.URL.String(), + contentType: resp.Header.Get("Content-Type"), + }, nil } // setRequestHeaders sets appropriate headers for the request. diff --git a/environment/webfetch_test.go b/environment/webfetch_test.go new file mode 100644 index 0000000..411dcd6 --- /dev/null +++ b/environment/webfetch_test.go @@ -0,0 +1,62 @@ +package environment + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// TestFetchBody_ReadsSlowStreamedBody is the regression test for the +// "context canceled" bug. The old code split the fetch across the cancel +// boundary: fetchURL created a per-request timeout context, deferred its +// cancel, and returned the *http.Response — so cancel() fired the moment +// fetchURL returned, BEFORE WebFetch read the body. net/http ties the body +// read to the request context, so any response still streaming when Do() +// returned aborted with "failed to read response: context canceled". Large or +// slow pages (e.g. Aliyun help docs) hit this every time; small pages got lucky +// because their body was already buffered. +// +// The fix keeps Do() and the body read inside one cancel scope (fetchBody). +// This test streams a body slowly enough that it cannot be pre-buffered, then +// asserts the full body is still read. +func TestFetchBody_ReadsSlowStreamedBody(t *testing.T) { + const ( + chunk = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef\n" // 65 bytes + chunks = 400 + ) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + w.WriteHeader(http.StatusOK) + flusher, ok := w.(http.Flusher) + require.True(t, ok) + flusher.Flush() // deliver headers so Do() returns before the body finishes + for i := 0; i < chunks; i++ { + if _, err := io.WriteString(w, chunk); err != nil { + return + } + flusher.Flush() + time.Sleep(time.Millisecond) // keep the body streaming, not pre-buffered + } + })) + t.Cleanup(srv.Close) + + // The production fetchClient dials through netsafe, which refuses loopback + // (see security_test.go). Swap in the test server's plain client so we can + // exercise the body-read / context-lifecycle path against a real stream; + // dial-time SSRF protection is orthogonal and covered separately. + orig := fetchClient + fetchClient = srv.Client() + t.Cleanup(func() { fetchClient = orig }) + + env := newTestEnvironment(t) + page, err := env.fetchBody(context.Background(), srv.URL, "text", 30*time.Second) + require.NoError(t, err) // pre-fix: "failed to read response: context canceled" + require.Equal(t, chunks*len(chunk), len(page.body)) + require.True(t, strings.HasPrefix(string(page.body), "0123456789abcdef")) +}