From 51fdad3d91f59c94875355e4c1b68ee72d18df55 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 3 Jul 2026 12:38:21 +0200
Subject: [PATCH 1/3] test: Deflake test_error_handler_can_access_page on
 Windows CI

---
 .../_playwright/test_playwright_crawler.py         | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
index 78d1789f99..a21e18a314 100644
--- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py
+++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
@@ -1238,8 +1238,18 @@ async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingConte
 
     await crawler.run([str(server_url / 'hello-world')])
 
-    assert error_handler_calls == [HELLO_WORLD.decode(), HELLO_WORLD.decode()]
-    assert failed_handler_calls == [HELLO_WORLD.decode()]
+    # The error handler runs on each retry and the failed-request handler on the final failure, each recording the
+    # page content when it received a `PlaywrightCrawlingContext` or `None` otherwise. On CI (notably Windows under
+    # `xdist` load) navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE` before the page is created, so that
+    # attempt surfaces a `BasicCrawlingContext` recorded as `None`. Such attempts are environmental noise rather than
+    # the behavior under test, so assert on the attempts that actually reached the page: at least one must have, and
+    # every one that did must expose the page HTML.
+    page_error_calls = [content for content in error_handler_calls if content is not None]
+    page_failed_calls = [content for content in failed_handler_calls if content is not None]
+
+    assert page_error_calls, 'the error handler never received a PlaywrightCrawlingContext'
+    assert all(content == HELLO_WORLD.decode() for content in page_error_calls)
+    assert all(content == HELLO_WORLD.decode() for content in page_failed_calls)
 
 
 def test_import_error_handled() -> None:

From 744356b31b90604306f676e1fcaa6af9319301b8 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 3 Jul 2026 12:43:56 +0200
Subject: [PATCH 2/3] test: Shorten flaky-test comment

---
 .../unit/crawlers/_playwright/test_playwright_crawler.py  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
index a21e18a314..354a00fc9a 100644
--- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py
+++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
@@ -1238,12 +1238,8 @@ async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingConte
 
     await crawler.run([str(server_url / 'hello-world')])
 
-    # The error handler runs on each retry and the failed-request handler on the final failure, each recording the
-    # page content when it received a `PlaywrightCrawlingContext` or `None` otherwise. On CI (notably Windows under
-    # `xdist` load) navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE` before the page is created, so that
-    # attempt surfaces a `BasicCrawlingContext` recorded as `None`. Such attempts are environmental noise rather than
-    # the behavior under test, so assert on the attempts that actually reached the page: at least one must have, and
-    # every one that did must expose the page HTML.
+    # On Windows CI, navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE`, giving an error-handler call
+    # with a non-Playwright context (`None`). Ignore those and require page-reaching attempts to see the page HTML.
     page_error_calls = [content for content in error_handler_calls if content is not None]
     page_failed_calls = [content for content in failed_handler_calls if content is not None]
 

From 1513919ad7ad45b7c5be7ac1d3e65959136a51f4 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 3 Jul 2026 16:06:20 +0200
Subject: [PATCH 3/3] test: Record only page-reaching attempts in error-handler
 flake test

---
 .../_playwright/test_playwright_crawler.py    | 30 +++++++++----------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
index 354a00fc9a..f95c6ce56f 100644
--- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py
+++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py
@@ -1220,32 +1220,30 @@ async def test_error_handler_can_access_page(server_url: URL) -> None:
     request_handler = mock.AsyncMock(side_effect=RuntimeError('Intentional crash'))
     crawler.router.default_handler(request_handler)
 
-    error_handler_calls: list[str | None] = []
+    error_handler_calls: list[str] = []
 
     @crawler.error_handler
     async def error_handler(context: BasicCrawlingContext | PlaywrightCrawlingContext, _error: Exception) -> None:
-        error_handler_calls.append(
-            await context.page.content() if isinstance(context, PlaywrightCrawlingContext) else None
-        )
+        if isinstance(context, PlaywrightCrawlingContext):
+            error_handler_calls.append(await context.page.content())
 
-    failed_handler_calls: list[str | None] = []
+    failed_handler_calls: list[str] = []
 
     @crawler.failed_request_handler
     async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingContext, _error: Exception) -> None:
-        failed_handler_calls.append(
-            await context.page.content() if isinstance(context, PlaywrightCrawlingContext) else None
-        )
+        if isinstance(context, PlaywrightCrawlingContext):
+            failed_handler_calls.append(await context.page.content())
 
     await crawler.run([str(server_url / 'hello-world')])
 
-    # On Windows CI, navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE`, giving an error-handler call
-    # with a non-Playwright context (`None`). Ignore those and require page-reaching attempts to see the page HTML.
-    page_error_calls = [content for content in error_handler_calls if content is not None]
-    page_failed_calls = [content for content in failed_handler_calls if content is not None]
-
-    assert page_error_calls, 'the error handler never received a PlaywrightCrawlingContext'
-    assert all(content == HELLO_WORLD.decode() for content in page_error_calls)
-    assert all(content == HELLO_WORLD.decode() for content in page_failed_calls)
+    # The error handler runs on each retry and the failed-request handler on the final failure. Each records the page
+    # content only when it received a `PlaywrightCrawlingContext`. On CI (notably Windows under `xdist` load) navigation
+    # can spuriously fail with `net::ERR_NO_BUFFER_SPACE` before the page is created, yielding a `BasicCrawlingContext`
+    # that never reached the page. Such attempts are environmental noise rather than the behavior under test, so assert
+    # only on the attempts that actually reached the page: at least one must have, and every one that did exposes it.
+    assert error_handler_calls, 'the error handler never received a PlaywrightCrawlingContext'
+    assert all(content == HELLO_WORLD.decode() for content in error_handler_calls)
+    assert all(content == HELLO_WORLD.decode() for content in failed_handler_calls)
 
 
 def test_import_error_handled() -> None: