From 94e32d0457bf7a0ab7ee9313c2f0c4b3c7033b22 Mon Sep 17 00:00:00 2001 From: gololdf1sh Date: Wed, 20 May 2026 15:27:08 +0300 Subject: [PATCH 1/2] fix(explorer): treat SPA "navigating and changing the content" as recoverable Playwright throws "page.content: Unable to retrieve content because the page is navigating and changing the content" on heavy SPAs whose client-side router rewrites the DOM mid-action (Ember, React Router, etc.). The explorer was catching only net::ERR_ABORTED / screenshot-timeout / waiting-for-fonts as recoverable; this new phrase fell through to FATAL_BROWSER_ERRORS and killed the whole crawl on the first navigation race. Add the phrase to RECOVERABLE_NAVIGATION_ERRORS so the explorer re-queues the action instead of aborting. Repro: collect docs against a Testomat.io page hosted in beta (Ember-based SPA). Without the fix, ~30% of pages fail with the fatal error on the first action. With the fix, those pages complete normally. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/explorer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/explorer.ts b/src/explorer.ts index d93e045..f5ae674 100644 --- a/src/explorer.ts +++ b/src/explorer.ts @@ -40,7 +40,7 @@ declare namespace CodeceptJS { const debugLog = createDebug('explorbot:explorer'); const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i; -const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i; +const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load|navigating and changing the content/i; interface TabInfo { url: string; From 5c37bcd8dec48af10c640353ac4479c3dba6db63 Mon Sep 17 00:00:00 2001 From: gololdf1sh Date: Wed, 20 May 2026 15:27:08 +0300 Subject: [PATCH 2/2] fix(doc-collector): repopulate page state when framenavigated stripped it After a navigation completes, ExplorBot's framenavigated handler overwrites the full ActionResult (with html/links/aria) with a stripped-down WebPageState that has only { url, title, statusCode }. The doc-collector then reads getCurrentState() and gets a state with state.html === undefined and state.links === []. Consequences: - Documentarian receives empty html -> page documentation degrades to a near-empty stub. - extractNextPaths() sees an empty links array -> the subtree crawl stops at the entry page even when many followable links exist. Two targeted fixes: 1. In the main collect loop, if state.html is falsy, force a capturePageState (with screenshots if configured). This is cheap compared to the AI documentation step that follows. 2. In extractNextPaths, if state.links is empty but state.html is present, fall back to extractLinks(state.html) so subtree traversal still finds child paths. Repro: collect against a Testomat.io project page. Before: "Pages documented: 1". After: full subtree (3-7 pages depending on the entry). Co-Authored-By: Claude Opus 4.7 (1M context) --- boat/doc-collector/src/docbot.ts | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/boat/doc-collector/src/docbot.ts b/boat/doc-collector/src/docbot.ts index 70d03f0..cc4e34b 100644 --- a/boat/doc-collector/src/docbot.ts +++ b/boat/doc-collector/src/docbot.ts @@ -5,6 +5,7 @@ import type { Link, WebPageState } from '../../../src/state-manager.ts'; import { normalizeUrl } from '../../../src/state-manager.ts'; import { sanitizeFilename } from '../../../src/utils/strings.ts'; import { tag } from '../../../src/utils/logger.ts'; +import { extractLinks } from '../../../src/utils/html.ts'; import { Documentarian, type PageDocumentation } from './ai/documentarian.ts'; import { type DocbotConfig, DocbotConfigParser } from './config.ts'; import { type DocumentedPage, renderPageDocumentation, renderSpecIndex, type SkippedPage } from './docs-renderer.ts'; @@ -92,7 +93,7 @@ class DocBot { break; } - const state = this.explorBot.getCurrentState(); + let state = this.explorBot.getCurrentState(); if (!state) { skipped.push({ url: target, @@ -100,6 +101,14 @@ class DocBot { }); continue; } + // If the current state is a stripped basic WebPageState (no html — happens when + // framenavigated fires after visit's own capture), force a fresh capture so + // links / html / aria are available for downstream link enqueue and research. + if (!state.html) { + const action = this.explorBot.getExplorer().createAction(); + await action.capturePageState({ includeScreenshot: this.shouldUseScreenshots() }).catch(() => undefined); + state = this.explorBot.getCurrentState() ?? state; + } const pageKey = this.getPageKey(state.url || target); if (documented.has(pageKey)) { @@ -189,7 +198,15 @@ class DocBot { const paths: string[] = []; const seen = new Set(); - for (const link of state.links || []) { + // state.links may be empty when framenavigated overwrote a full ActionResult with a + // stripped-down basic state. Fall back to extracting from state.html so subtree crawl + // still discovers child paths. + let links = state.links ?? []; + if (links.length === 0 && state.html) { + links = extractLinks(state.html); + } + + for (const link of links) { const nextPath = this.resolveLink(link, baseUrl); if (!nextPath) { continue;