Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions boat/doc-collector/src/docbot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { Link, WebPageState } from '../../../src/state-manager.ts';
import { normalizeUrl } from '../../../src/state-manager.ts';
import { sanitizeFilename } from '../../../src/utils/strings.ts';
import { tag } from '../../../src/utils/logger.ts';
import { extractLinks } from '../../../src/utils/html.ts';
import { Documentarian, type PageDocumentation } from './ai/documentarian.ts';
import { type DocbotConfig, DocbotConfigParser } from './config.ts';
import { type DocumentedPage, renderPageDocumentation, renderSpecIndex, type SkippedPage } from './docs-renderer.ts';
Expand Down Expand Up @@ -92,14 +93,22 @@ class DocBot {
break;
}

const state = this.explorBot.getCurrentState();
let state = this.explorBot.getCurrentState();
if (!state) {
skipped.push({
url: target,
reason: 'page state was not captured after navigation',
});
continue;
}
// If the current state is a stripped basic WebPageState (no html — happens when
// framenavigated fires after visit's own capture), force a fresh capture so
// links / html / aria are available for downstream link enqueue and research.
if (!state.html) {
const action = this.explorBot.getExplorer().createAction();
await action.capturePageState({ includeScreenshot: this.shouldUseScreenshots() }).catch(() => undefined);
state = this.explorBot.getCurrentState() ?? state;
}

const pageKey = this.getPageKey(state.url || target);
if (documented.has(pageKey)) {
Expand Down Expand Up @@ -189,7 +198,15 @@ class DocBot {
const paths: string[] = [];
const seen = new Set<string>();

for (const link of state.links || []) {
// state.links may be empty when framenavigated overwrote a full ActionResult with a
// stripped-down basic state. Fall back to extracting from state.html so subtree crawl
// still discovers child paths.
let links = state.links ?? [];
if (links.length === 0 && state.html) {
links = extractLinks(state.html);
}

for (const link of links) {
const nextPath = this.resolveLink(link, baseUrl);
if (!nextPath) {
continue;
Expand Down
2 changes: 1 addition & 1 deletion src/explorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ declare namespace CodeceptJS {

const debugLog = createDebug('explorbot:explorer');
const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i;
const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i;
const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load|navigating and changing the content/i;

interface TabInfo {
url: string;
Expand Down
Loading