Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
457 changes: 375 additions & 82 deletions boat/doc-collector/src/ai/documentarian.ts

Large diffs are not rendered by default.

766 changes: 766 additions & 0 deletions boat/doc-collector/src/ai/tools.ts

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions boat/doc-collector/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ export function createDocsCommands(name = 'docs'): Command {
maxPages: 100,
output: 'docs',
screenshot: true,
interactive: false,
collapseDynamicPages: true,
scope: 'site',
includePaths: [],
Expand Down
2 changes: 2 additions & 0 deletions boat/doc-collector/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class DocbotConfigParser {
maxPages: 100,
output: 'docs',
screenshot: true,
interactive: false,
collapseDynamicPages: true,
scope: 'site',
includePaths: [],
Expand Down Expand Up @@ -155,6 +156,7 @@ interface DocbotConfig {
deniedPathSegments?: string[];
minCanActions?: number;
minInteractiveElements?: number;
interactive?: boolean;
};
}

Expand Down
69 changes: 64 additions & 5 deletions boat/doc-collector/src/docbot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DocBot {
config: this.options.docsConfig,
path: this.options.path,
});
this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config);
this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config, this.explorBot.getExplorer());
this.ensureDirectory(this.configParser.getOutputDir());
this.ensureDirectory(this.getPagesDir());
}
Expand Down Expand Up @@ -128,18 +128,22 @@ class DocBot {
summary: documentation.summary,
canCount: documentation.can.length,
mightCount: documentation.might.length,
interactionCount: (documentation.interactions || []).length,
canActions: documentation.can.map((item) => item.action),
mightActions: documentation.might.map((item) => item.action),
interactionActions: (documentation.interactions || []).map((item) => item.action),
qualityNotes: documentation.qualityNotes || [],
filePath,
});
documented.add(pageKey);

const nextPaths = this.extractNextPaths(state, baseUrl, research);
const nextPaths = this.extractNextPaths(state, baseUrl, research, documentation);
const interactionPriorityPaths = new Set(this.extractInteractionPaths(baseUrl, documentation));
for (const nextPath of nextPaths) {
if (documented.has(this.getPageKey(nextPath))) {
continue;
}
if (stateManager.hasVisitedState(nextPath)) {
if (!interactionPriorityPaths.has(nextPath) && stateManager.hasVisitedState(nextPath)) {
continue;
}
this.enqueuePath(nextPath, queue, queued);
Expand Down Expand Up @@ -185,10 +189,18 @@ class DocBot {
return true;
}

private extractNextPaths(state: WebPageState, baseUrl: string, research: string): string[] {
private extractNextPaths(state: WebPageState, baseUrl: string, research: string, documentation?: PageDocumentation): string[] {
const paths: string[] = [];
const seen = new Set<string>();

for (const interactionPath of this.extractInteractionPaths(baseUrl, documentation)) {
if (seen.has(interactionPath)) {
continue;
}
seen.add(interactionPath);
paths.push(interactionPath);
}

for (const link of state.links || []) {
const nextPath = this.resolveLink(link, baseUrl);
if (!nextPath) {
Expand Down Expand Up @@ -224,11 +236,58 @@ class DocBot {
return paths;
}

private extractInteractionPaths(baseUrl: string, documentation?: PageDocumentation): string[] {
const paths: string[] = [];
const seen = new Set<string>();
const interactions = documentation?.interactions;

for (const interaction of interactions || []) {
if (interaction.targetUrl) {
const nextPath = this.resolveRawUrl(interaction.targetUrl, baseUrl);
if (nextPath && this.isEligibleNextPath(nextPath) && !seen.has(nextPath)) {
seen.add(nextPath);
paths.push(nextPath);
}
}

for (const discoveredUrl of interaction.discoveredUrls || []) {
const discoveredPath = this.resolveRawUrl(discoveredUrl, baseUrl);
if (!discoveredPath) {
continue;
}
if (!this.isEligibleNextPath(discoveredPath)) {
continue;
}
if (seen.has(discoveredPath)) {
continue;
}
seen.add(discoveredPath);
paths.push(discoveredPath);
}
}

return paths;
}

private isEligibleNextPath(nextPath: string): boolean {
if (!shouldCrawlDocPath(nextPath, this.config)) {
return false;
}
if (!this.isInScope(nextPath)) {
return false;
}
return true;
}

private resolveLink(link: Link, baseUrl: string): string | null {
return this.resolveRawUrl(link.url, baseUrl);
}

private resolveRawUrl(rawUrl: string, baseUrl: string): string | null {
let resolved: URL;

try {
resolved = new URL(link.url, baseUrl);
resolved = new URL(rawUrl, baseUrl);
} catch {
return null;
}
Expand Down
58 changes: 56 additions & 2 deletions boat/doc-collector/src/docs-renderer.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import path from 'node:path';
import type { WebPageState } from '../../../src/state-manager.ts';
import type { PageDocumentation } from './ai/documentarian.ts';
import type { PageDocumentation, StateTransition } from './ai/documentarian.ts';

function renderPageDocumentation(state: WebPageState, documentation: PageDocumentation): string {
const lines: string[] = [];
Expand All @@ -16,6 +16,28 @@ function renderPageDocumentation(state: WebPageState, documentation: PageDocumen
lines.push('');
lines.push(ensureSentence(documentation.summary));
lines.push('');

const interactions = documentation.interactions;
if (interactions && interactions.length > 0) {
lines.push('## State Transitions');
lines.push('');
for (const transition of interactions) {
lines.push(`### ${transition.action}`);
lines.push('');
lines.push(`**Before:** ${transition.before}`);
lines.push('');
lines.push(`**After:** ${transition.after}`);
lines.push('');
if (transition.newCapabilities && transition.newCapabilities.length > 0) {
lines.push('**New capabilities discovered:**');
for (const cap of transition.newCapabilities) {
lines.push(`- ${cap}`);
}
lines.push('');
}
}
}

lines.push('## User Can');
lines.push('');

Expand Down Expand Up @@ -50,6 +72,16 @@ function renderPageDocumentation(state: WebPageState, documentation: PageDocumen
lines.push('');
}

const qualityNotes = documentation.qualityNotes;
if (qualityNotes && qualityNotes.length > 0) {
lines.push('## Coverage Notes');
lines.push('');
for (const note of qualityNotes) {
lines.push(`- ${ensureSentence(note)}`);
}
lines.push('');
}

return `${lines.join('\n').trimEnd()}\n`;
}

Expand Down Expand Up @@ -79,6 +111,9 @@ function renderSpecIndex(outputDir: string, startPath: string, pages: Documented
lines.push(`Purpose: ${ensureSentence(page.summary)}`);
lines.push(`Proven actions: ${page.canCount}`);
lines.push(`Possible actions: ${page.mightCount}`);
if (page.interactionCount > 0) {
lines.push(`Interactive transitions: ${page.interactionCount}`);
}
if (page.title) {
lines.push(`Title: ${normalizeInlineText(page.title)}`);
}
Expand All @@ -99,6 +134,22 @@ function renderSpecIndex(outputDir: string, startPath: string, pages: Documented
}
lines.push('');
}

if (page.interactionActions.length > 0) {
lines.push('Interactive Findings:');
for (const action of page.interactionActions.slice(0, 3)) {
lines.push(`- ${normalizeInlineText(action)}`);
}
lines.push('');
}

if (page.qualityNotes.length > 0) {
lines.push('Coverage Notes:');
for (const note of page.qualityNotes) {
lines.push(`- ${ensureSentence(note)}`);
}
lines.push('');
}
}

if (skipped.length > 0) {
Expand Down Expand Up @@ -173,8 +224,11 @@ interface DocumentedPage {
summary: string;
canCount: number;
mightCount: number;
interactionCount: number;
canActions: string[];
mightActions: string[];
interactionActions: string[];
qualityNotes: string[];
filePath: string;
}

Expand All @@ -184,4 +238,4 @@ interface SkippedPage {
}

export { renderPageDocumentation, renderSpecIndex, ensureSentence, normalizeAction };
export type { DocumentedPage, SkippedPage };
export type { DocumentedPage, SkippedPage, StateTransition };
75 changes: 67 additions & 8 deletions docs/doc-collector.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,70 @@
# Documentation Collection
# Documentation Collection

`doc-collector` crawls pages and generates a lightweight spec:

- `output/docs/spec.md`
- `output/docs/pages/*.md`
- `output/research/*.md`
- `output/docs/spec.md` - Main index
- `output/docs/pages/*.md` - Individual page documentation
- `output/research/*.md` - Research data

Each page is summarized as:

- `Purpose`
- `User Can`
- `User Might`
- `User Can` (proven capabilities)
- `User Might` (assumed capabilities)
- `State Transitions` (when interactive mode is enabled and useful)

## Features

### Static Documentation (Default)

Analyzes pages without interaction:

- вњ… Researches page structure via Researcher agent
- вњ… Identifies UI elements and navigation
- вњ… Generates documentation from static analysis
- вњ… Fast and reliable

### Interactive Documentation

When `interactive: true` in config:

- вњ… Tries selected page interactions before final documentation
- вњ… Can capture state changes after clicking links, buttons, and tab-like controls
- вњ… Can document navigation caused by interaction
- вњ… Can enqueue URLs discovered from successful interactions
- вњ… Falls back to static documentation when interaction results are weak or unreliable

This mode is intended for cases where static research alone is not enough, for example:

- alternate page states such as tabs
- post-click behavior
- item/detail navigation
- documenting what changed after an interaction

When interaction results are useful, page docs may include:

- `State Transitions`
- `Before`
- `After`
- `New capabilities discovered`
- `Coverage Notes`

Example:

```markdown
## State Transitions

### Switched to tab: Merged
**Before:** 18 elements (tab:3, link:5, text:7)
**After:** Tab content: 21 elements (tab:3, link:8, text:7)

### Clicked "Save" button
**Before:** Form with 8 fields
**After:** Success message appeared, form cleared
**New capabilities discovered:**
- User can create new runs
- User can see run ID after creation
```

## Commands

Expand Down Expand Up @@ -74,7 +128,7 @@ export default {
deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
minCanActions: 1,
minInteractiveElements: 3,
// prompt: 'Add domain-specific guidance here',
interactive: false,
},
};
```
Expand All @@ -84,6 +138,7 @@ export default {
| `maxPages` | `100` | Maximum pages to document |
| `output` | `'docs'` | Output folder inside `output/` |
| `screenshot` | `true` | Allow screenshot-assisted research |
| `interactive` | `false` | Enable interaction attempts before final documentation |
| `prompt` | unset | Extra instructions for the Documentarian |
| `collapseDynamicPages` | `true` | Collapse dynamic URLs like `/users/123` and `/users/456` into one crawl key |
| `scope` | `'site'` | Crawl breadth mode |
Expand Down Expand Up @@ -130,8 +185,12 @@ Softer boundary than `subtree`: keep the same scope root, its descendants, and c
- same-origin only
- visited pages are tracked through the state manager
- dead loops are stopped
- next targets are discovered from links and research navigation
- next targets are discovered from links, research navigation, and successful interaction results
- low-signal pages can be skipped
- interactive mode does not replace static documentation; it augments it
- static mode is unchanged when `interactive` is disabled
- if interaction-driven generation fails, the collector falls back to static documentation
- output quality still depends on research quality

## Related Docs

Expand Down
Loading
Loading