From 51fdad3d91f59c94875355e4c1b68ee72d18df55 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 3 Jul 2026 12:38:21 +0200 Subject: [PATCH 1/3] test: Deflake test_error_handler_can_access_page on Windows CI --- .../_playwright/test_playwright_crawler.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py index 78d1789f99..a21e18a314 100644 --- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py +++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py @@ -1238,8 +1238,18 @@ async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingConte await crawler.run([str(server_url / 'hello-world')]) - assert error_handler_calls == [HELLO_WORLD.decode(), HELLO_WORLD.decode()] - assert failed_handler_calls == [HELLO_WORLD.decode()] + # The error handler runs on each retry and the failed-request handler on the final failure, each recording the + # page content when it received a `PlaywrightCrawlingContext` or `None` otherwise. On CI (notably Windows under + # `xdist` load) navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE` before the page is created, so that + # attempt surfaces a `BasicCrawlingContext` recorded as `None`. Such attempts are environmental noise rather than + # the behavior under test, so assert on the attempts that actually reached the page: at least one must have, and + # every one that did must expose the page HTML. + page_error_calls = [content for content in error_handler_calls if content is not None] + page_failed_calls = [content for content in failed_handler_calls if content is not None] + + assert page_error_calls, 'the error handler never received a PlaywrightCrawlingContext' + assert all(content == HELLO_WORLD.decode() for content in page_error_calls) + assert all(content == HELLO_WORLD.decode() for content in page_failed_calls) def test_import_error_handled() -> None: From 744356b31b90604306f676e1fcaa6af9319301b8 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 3 Jul 2026 12:43:56 +0200 Subject: [PATCH 2/3] test: Shorten flaky-test comment --- .../unit/crawlers/_playwright/test_playwright_crawler.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py index a21e18a314..354a00fc9a 100644 --- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py +++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py @@ -1238,12 +1238,8 @@ async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingConte await crawler.run([str(server_url / 'hello-world')]) - # The error handler runs on each retry and the failed-request handler on the final failure, each recording the - # page content when it received a `PlaywrightCrawlingContext` or `None` otherwise. On CI (notably Windows under - # `xdist` load) navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE` before the page is created, so that - # attempt surfaces a `BasicCrawlingContext` recorded as `None`. Such attempts are environmental noise rather than - # the behavior under test, so assert on the attempts that actually reached the page: at least one must have, and - # every one that did must expose the page HTML. + # On Windows CI, navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE`, giving an error-handler call + # with a non-Playwright context (`None`). Ignore those and require page-reaching attempts to see the page HTML. page_error_calls = [content for content in error_handler_calls if content is not None] page_failed_calls = [content for content in failed_handler_calls if content is not None] From 1513919ad7ad45b7c5be7ac1d3e65959136a51f4 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 3 Jul 2026 16:06:20 +0200 Subject: [PATCH 3/3] test: Record only page-reaching attempts in error-handler flake test --- .../_playwright/test_playwright_crawler.py | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py index 354a00fc9a..f95c6ce56f 100644 --- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py +++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py @@ -1220,32 +1220,30 @@ async def test_error_handler_can_access_page(server_url: URL) -> None: request_handler = mock.AsyncMock(side_effect=RuntimeError('Intentional crash')) crawler.router.default_handler(request_handler) - error_handler_calls: list[str | None] = [] + error_handler_calls: list[str] = [] @crawler.error_handler async def error_handler(context: BasicCrawlingContext | PlaywrightCrawlingContext, _error: Exception) -> None: - error_handler_calls.append( - await context.page.content() if isinstance(context, PlaywrightCrawlingContext) else None - ) + if isinstance(context, PlaywrightCrawlingContext): + error_handler_calls.append(await context.page.content()) - failed_handler_calls: list[str | None] = [] + failed_handler_calls: list[str] = [] @crawler.failed_request_handler async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingContext, _error: Exception) -> None: - failed_handler_calls.append( - await context.page.content() if isinstance(context, PlaywrightCrawlingContext) else None - ) + if isinstance(context, PlaywrightCrawlingContext): + failed_handler_calls.append(await context.page.content()) await crawler.run([str(server_url / 'hello-world')]) - # On Windows CI, navigation can spuriously fail with `net::ERR_NO_BUFFER_SPACE`, giving an error-handler call - # with a non-Playwright context (`None`). Ignore those and require page-reaching attempts to see the page HTML. - page_error_calls = [content for content in error_handler_calls if content is not None] - page_failed_calls = [content for content in failed_handler_calls if content is not None] - - assert page_error_calls, 'the error handler never received a PlaywrightCrawlingContext' - assert all(content == HELLO_WORLD.decode() for content in page_error_calls) - assert all(content == HELLO_WORLD.decode() for content in page_failed_calls) + # The error handler runs on each retry and the failed-request handler on the final failure. Each records the page + # content only when it received a `PlaywrightCrawlingContext`. On CI (notably Windows under `xdist` load) navigation + # can spuriously fail with `net::ERR_NO_BUFFER_SPACE` before the page is created, yielding a `BasicCrawlingContext` + # that never reached the page. Such attempts are environmental noise rather than the behavior under test, so assert + # only on the attempts that actually reached the page: at least one must have, and every one that did exposes it. + assert error_handler_calls, 'the error handler never received a PlaywrightCrawlingContext' + assert all(content == HELLO_WORLD.decode() for content in error_handler_calls) + assert all(content == HELLO_WORLD.decode() for content in failed_handler_calls) def test_import_error_handled() -> None: