Skip to content

Commit 4e4cf6f

Browse files
authored
fix(webfetch): keep body read inside the timeout scope to stop "context canceled" (#59)
fetchURL created a per-request timeout context and deferred its cancel, then returned the *http.Response. The cancel fired the moment fetchURL returned — before WebFetch read the body via readResponseBody. net/http ties the body read to the request context, so any response still streaming when Do() returned aborted with "failed to read response: context canceled". Large/slow pages (e.g. Aliyun help docs) hit this every time because their body is still in flight when Do() returns; small pages got lucky only because their body was already buffered. The error is "context canceled", not "deadline exceeded" — it was never a real timeout, just a premature cancel. Merge the request and the body read into one timeout-scoped function (fetchBody), mirroring Safari's correct localWebFetch, so the cancel cannot fire before the read. Add a regression test that streams a slow body and asserts it is read in full (reproduces the exact error against the old structure).
1 parent 09ab85c commit 4e4cf6f

2 files changed

Lines changed: 103 additions & 14 deletions

File tree

environment/webfetch.go

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,37 +57,49 @@ func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs)
5757
format = "markdown"
5858
}
5959

60-
resp, err := e.fetchURL(ctx, args.URL, format, timeout)
60+
page, err := e.fetchBody(ctx, args.URL, format, timeout)
6161
if err != nil {
6262
return nil, err
6363
}
64-
defer func() {
65-
_ = resp.Body.Close()
66-
}()
6764

68-
body, err := readResponseBody(resp)
69-
if err != nil {
70-
return nil, err
71-
}
72-
73-
content := convertContent(string(body), format, resp.Header.Get("Content-Type"))
65+
content := convertContent(string(page.body), format, page.contentType)
7466
processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig())
67+
// processor.Process runs on the outer ctx: the body is already fully read
68+
// into memory, so it must not be bound to the per-fetch timeout.
7569
processed, err := processor.Process(ctx, content, "webfetch")
7670
if err != nil {
7771
return nil, err
7872
}
7973

8074
return &protocol.WebFetchResult{
8175
Content: processed.Content,
82-
URL: resp.Request.URL.String(),
76+
URL: page.finalURL,
8377
Truncated: processed.Truncated,
8478
FilePath: processed.FilePath,
8579
TotalSize: processed.TotalSize,
8680
}, nil
8781
}
8882

89-
// fetchURL performs the HTTP request via the netsafe-wrapped client.
90-
func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout time.Duration) (*http.Response, error) {
83+
// fetchedPage is the raw result of a single HTTP fetch: the response body plus
84+
// the metadata WebFetch still needs after the connection has been closed.
85+
type fetchedPage struct {
86+
body []byte
87+
finalURL string
88+
contentType string
89+
}
90+
91+
// fetchBody performs the HTTP request AND reads the full response body within
92+
// a single timeout-scoped context.
93+
//
94+
// Keeping the body read inside this scope is load-bearing: net/http ties the
95+
// lifetime of resp.Body to the request context, so a cancel() that fires
96+
// before the read — e.g. a helper that returns the *http.Response and defers
97+
// its cancel — aborts the read with "failed to read response: context
98+
// canceled". Large or slow pages (e.g. Aliyun help docs) trip this every time
99+
// because their body is still streaming when Do() returns; small pages got
100+
// lucky only because their body was already buffered. Do() and the body read
101+
// must therefore share one cancel scope.
102+
func (e *Environment) fetchBody(ctx context.Context, url, format string, timeout time.Duration) (*fetchedPage, error) {
91103
httpCtx, cancel := context.WithTimeout(ctx, timeout)
92104
defer cancel()
93105

@@ -105,8 +117,23 @@ func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout
105117
}
106118
return nil, fmt.Errorf("request failed: %w", err)
107119
}
120+
defer func() { _ = resp.Body.Close() }()
108121

109-
return resp, nil
122+
body, err := readResponseBody(resp)
123+
if err != nil {
124+
// The deadline can still be hit mid-read; surface it as a timeout
125+
// rather than a raw "context deadline exceeded" read error.
126+
if httpCtx.Err() == context.DeadlineExceeded {
127+
return nil, fmt.Errorf("request timed out")
128+
}
129+
return nil, err
130+
}
131+
132+
return &fetchedPage{
133+
body: body,
134+
finalURL: resp.Request.URL.String(),
135+
contentType: resp.Header.Get("Content-Type"),
136+
}, nil
110137
}
111138

112139
// setRequestHeaders sets appropriate headers for the request.

environment/webfetch_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package environment
2+
3+
import (
4+
"context"
5+
"io"
6+
"net/http"
7+
"net/http/httptest"
8+
"strings"
9+
"testing"
10+
"time"
11+
12+
"github.com/stretchr/testify/require"
13+
)
14+
15+
// TestFetchBody_ReadsSlowStreamedBody is the regression test for the
16+
// "context canceled" bug. The old code split the fetch across the cancel
17+
// boundary: fetchURL created a per-request timeout context, deferred its
18+
// cancel, and returned the *http.Response — so cancel() fired the moment
19+
// fetchURL returned, BEFORE WebFetch read the body. net/http ties the body
20+
// read to the request context, so any response still streaming when Do()
21+
// returned aborted with "failed to read response: context canceled". Large or
22+
// slow pages (e.g. Aliyun help docs) hit this every time; small pages got lucky
23+
// because their body was already buffered.
24+
//
25+
// The fix keeps Do() and the body read inside one cancel scope (fetchBody).
26+
// This test streams a body slowly enough that it cannot be pre-buffered, then
27+
// asserts the full body is still read.
28+
func TestFetchBody_ReadsSlowStreamedBody(t *testing.T) {
29+
const (
30+
chunk = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef\n" // 65 bytes
31+
chunks = 400
32+
)
33+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
34+
w.Header().Set("Content-Type", "text/plain")
35+
w.WriteHeader(http.StatusOK)
36+
flusher, ok := w.(http.Flusher)
37+
require.True(t, ok)
38+
flusher.Flush() // deliver headers so Do() returns before the body finishes
39+
for i := 0; i < chunks; i++ {
40+
if _, err := io.WriteString(w, chunk); err != nil {
41+
return
42+
}
43+
flusher.Flush()
44+
time.Sleep(time.Millisecond) // keep the body streaming, not pre-buffered
45+
}
46+
}))
47+
t.Cleanup(srv.Close)
48+
49+
// The production fetchClient dials through netsafe, which refuses loopback
50+
// (see security_test.go). Swap in the test server's plain client so we can
51+
// exercise the body-read / context-lifecycle path against a real stream;
52+
// dial-time SSRF protection is orthogonal and covered separately.
53+
orig := fetchClient
54+
fetchClient = srv.Client()
55+
t.Cleanup(func() { fetchClient = orig })
56+
57+
env := newTestEnvironment(t)
58+
page, err := env.fetchBody(context.Background(), srv.URL, "text", 30*time.Second)
59+
require.NoError(t, err) // pre-fix: "failed to read response: context canceled"
60+
require.Equal(t, chunks*len(chunk), len(page.body))
61+
require.True(t, strings.HasPrefix(string(page.body), "0123456789abcdef"))
62+
}

0 commit comments

Comments
 (0)