From 94e32d0457bf7a0ab7ee9313c2f0c4b3c7033b22 Mon Sep 17 00:00:00 2001
From: gololdf1sh <oleksandr.kiriukhin@gmail.com>
Date: Wed, 20 May 2026 15:27:08 +0300
Subject: [PATCH 1/2] fix(explorer): treat SPA "navigating and changing the
 content" as recoverable

Playwright throws "page.content: Unable to retrieve content because
the page is navigating and changing the content" on heavy SPAs whose
client-side router rewrites the DOM mid-action (Ember, React Router,
etc.). The explorer was catching only net::ERR_ABORTED /
screenshot-timeout / waiting-for-fonts as recoverable; this new
phrase fell through to FATAL_BROWSER_ERRORS and killed the whole
crawl on the first navigation race.

Add the phrase to RECOVERABLE_NAVIGATION_ERRORS so the explorer
re-queues the action instead of aborting.

Repro: collect docs against a Testomat.io page hosted in beta
(Ember-based SPA). Without the fix, ~30% of pages fail with the
fatal error on the first action. With the fix, those pages
complete normally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/explorer.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/explorer.ts b/src/explorer.ts
index d93e045..f5ae674 100644
--- a/src/explorer.ts
+++ b/src/explorer.ts
@@ -40,7 +40,7 @@ declare namespace CodeceptJS {
 
 const debugLog = createDebug('explorbot:explorer');
 const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i;
-const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i;
+const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load|navigating and changing the content/i;
 
 interface TabInfo {
   url: string;

From 5c37bcd8dec48af10c640353ac4479c3dba6db63 Mon Sep 17 00:00:00 2001
From: gololdf1sh <oleksandr.kiriukhin@gmail.com>
Date: Wed, 20 May 2026 15:27:08 +0300
Subject: [PATCH 2/2] fix(doc-collector): repopulate page state when
 framenavigated stripped it

After a navigation completes, ExplorBot's framenavigated handler
overwrites the full ActionResult (with html/links/aria) with a
stripped-down WebPageState that has only { url, title, statusCode }.
The doc-collector then reads getCurrentState() and gets a state with
state.html === undefined and state.links === [].

Consequences:
- Documentarian receives empty html -> page documentation degrades
  to a near-empty stub.
- extractNextPaths() sees an empty links array -> the subtree crawl
  stops at the entry page even when many followable links exist.

Two targeted fixes:
1. In the main collect loop, if state.html is falsy, force a
   capturePageState (with screenshots if configured). This is cheap
   compared to the AI documentation step that follows.
2. In extractNextPaths, if state.links is empty but state.html is
   present, fall back to extractLinks(state.html) so subtree
   traversal still finds child paths.

Repro: collect against a Testomat.io project page. Before:
"Pages documented: 1". After: full subtree (3-7 pages depending
on the entry).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 boat/doc-collector/src/docbot.ts | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/boat/doc-collector/src/docbot.ts b/boat/doc-collector/src/docbot.ts
index 70d03f0..cc4e34b 100644
--- a/boat/doc-collector/src/docbot.ts
+++ b/boat/doc-collector/src/docbot.ts
@@ -5,6 +5,7 @@ import type { Link, WebPageState } from '../../../src/state-manager.ts';
 import { normalizeUrl } from '../../../src/state-manager.ts';
 import { sanitizeFilename } from '../../../src/utils/strings.ts';
 import { tag } from '../../../src/utils/logger.ts';
+import { extractLinks } from '../../../src/utils/html.ts';
 import { Documentarian, type PageDocumentation } from './ai/documentarian.ts';
 import { type DocbotConfig, DocbotConfigParser } from './config.ts';
 import { type DocumentedPage, renderPageDocumentation, renderSpecIndex, type SkippedPage } from './docs-renderer.ts';
@@ -92,7 +93,7 @@ class DocBot {
           break;
         }
 
-        const state = this.explorBot.getCurrentState();
+        let state = this.explorBot.getCurrentState();
         if (!state) {
           skipped.push({
             url: target,
@@ -100,6 +101,14 @@ class DocBot {
           });
           continue;
         }
+        // If the current state is a stripped basic WebPageState (no html — happens when
+        // framenavigated fires after visit's own capture), force a fresh capture so
+        // links / html / aria are available for downstream link enqueue and research.
+        if (!state.html) {
+          const action = this.explorBot.getExplorer().createAction();
+          await action.capturePageState({ includeScreenshot: this.shouldUseScreenshots() }).catch(() => undefined);
+          state = this.explorBot.getCurrentState() ?? state;
+        }
 
         const pageKey = this.getPageKey(state.url || target);
         if (documented.has(pageKey)) {
@@ -189,7 +198,15 @@ class DocBot {
     const paths: string[] = [];
     const seen = new Set<string>();
 
-    for (const link of state.links || []) {
+    // state.links may be empty when framenavigated overwrote a full ActionResult with a
+    // stripped-down basic state. Fall back to extracting from state.html so subtree crawl
+    // still discovers child paths.
+    let links = state.links ?? [];
+    if (links.length === 0 && state.html) {
+      links = extractLinks(state.html);
+    }
+
+    for (const link of links) {
       const nextPath = this.resolveLink(link, baseUrl);
       if (!nextPath) {
         continue;