diff --git a/boat/doc-collector/src/docbot.ts b/boat/doc-collector/src/docbot.ts index 70d03f0..cc4e34b 100644 --- a/boat/doc-collector/src/docbot.ts +++ b/boat/doc-collector/src/docbot.ts @@ -5,6 +5,7 @@ import type { Link, WebPageState } from '../../../src/state-manager.ts'; import { normalizeUrl } from '../../../src/state-manager.ts'; import { sanitizeFilename } from '../../../src/utils/strings.ts'; import { tag } from '../../../src/utils/logger.ts'; +import { extractLinks } from '../../../src/utils/html.ts'; import { Documentarian, type PageDocumentation } from './ai/documentarian.ts'; import { type DocbotConfig, DocbotConfigParser } from './config.ts'; import { type DocumentedPage, renderPageDocumentation, renderSpecIndex, type SkippedPage } from './docs-renderer.ts'; @@ -92,7 +93,7 @@ class DocBot { break; } - const state = this.explorBot.getCurrentState(); + let state = this.explorBot.getCurrentState(); if (!state) { skipped.push({ url: target, @@ -100,6 +101,14 @@ class DocBot { }); continue; } + // If the current state is a stripped basic WebPageState (no html — happens when + // framenavigated fires after visit's own capture), force a fresh capture so + // links / html / aria are available for downstream link enqueue and research. + if (!state.html) { + const action = this.explorBot.getExplorer().createAction(); + await action.capturePageState({ includeScreenshot: this.shouldUseScreenshots() }).catch(() => undefined); + state = this.explorBot.getCurrentState() ?? state; + } const pageKey = this.getPageKey(state.url || target); if (documented.has(pageKey)) { @@ -189,7 +198,15 @@ class DocBot { const paths: string[] = []; const seen = new Set(); - for (const link of state.links || []) { + // state.links may be empty when framenavigated overwrote a full ActionResult with a + // stripped-down basic state. Fall back to extracting from state.html so subtree crawl + // still discovers child paths. + let links = state.links ?? []; + if (links.length === 0 && state.html) { + links = extractLinks(state.html); + } + + for (const link of links) { const nextPath = this.resolveLink(link, baseUrl); if (!nextPath) { continue; diff --git a/src/explorer.ts b/src/explorer.ts index d93e045..f5ae674 100644 --- a/src/explorer.ts +++ b/src/explorer.ts @@ -40,7 +40,7 @@ declare namespace CodeceptJS { const debugLog = createDebug('explorbot:explorer'); const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i; -const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i; +const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load|navigating and changing the content/i; interface TabInfo { url: string;