Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/agents/planner-executor/plan-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ export function parseAction(text: string): ParsedAction {

// Strip <think>...</think> tags (Qwen/DeepSeek reasoning output)
cleaned = cleaned.replace(/<think>[\s\S]*?<\/think>/gi, '').trim();
// Some local models leak reasoning without the opening tag but still close it before the answer.
const closingThinkIndex = cleaned.toLowerCase().lastIndexOf('</think>');
if (closingThinkIndex !== -1) {
cleaned = cleaned.slice(closingThinkIndex + '</think>'.length).trim();
}
// If <think> never closed, strip from first <think> to end
cleaned = cleaned.replace(/<think>[\s\S]*$/gi, '').trim();

Expand Down
66 changes: 66 additions & 0 deletions src/agents/planner-executor/planner-executor-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,14 @@ export class PlannerExecutorAgent {
finalOutcome.status === StepStatus.SKIPPED ||
finalOutcome.status === StepStatus.VISION_FALLBACK
) {
if (
!success &&
finalOutcome.status === StepStatus.SUCCESS &&
(await this.isCartAdditionTerminal(runtime, task, plannerAction))
) {
success = true;
}

if (this.recoveryState && this.config.recovery.trackSuccessfulUrls && urlAfter) {
this.recoveryState.recordCheckpoint({
url: urlAfter,
Expand All @@ -1084,6 +1092,10 @@ export class PlannerExecutorAgent {
break;
}

if (success) {
break;
}

if (shouldContinue) {
continue;
}
Expand Down Expand Up @@ -2254,6 +2266,60 @@ export class PlannerExecutorAgent {
return false;
}

private async isCartAdditionTerminal(
runtime: AgentRuntime,
task: string,
plannerAction: StepwisePlannerResponse
): Promise<boolean> {
const taskText = task.toLowerCase();
if (
!/\badd(?:ed)?\b[\s\S]*\bcart\b|\bcart[_\s-]?addition\b/.test(taskText) ||
/\bcheckout\b|\bcheck out\b|\bpayment\b|\bplace order\b|\bbuy now\b/.test(taskText)
) {
return false;
}

const actionText = [
plannerAction.intent,
plannerAction.input,
plannerAction.goal,
plannerAction.action,
]
.filter((value): value is string => typeof value === 'string')
.join(' ')
.toLowerCase()
.replace(/[_-]+/g, ' ');

if (!/\badd(?:ed)?\b[\s\S]*\bcart\b|\bcart contains\b/.test(actionText)) {
return false;
}

try {
const snap = await runtime.snapshot({
limit: this.config.snapshot.limitBase,
screenshot: false,
goal: 'cart addition confirmation',
});
if (!snap) {
return false;
}

return (snap.elements || []).some(element => {
const label = [element.text, element.ariaLabel, element.name]
.filter((value): value is string => typeof value === 'string')
.join(' ')
.toLowerCase();
return (
/\badded to (?:cart|bag|basket)\b/.test(label) ||
/\bcart contains\s+[1-9]\d*\s+items?\b/.test(label) ||
/\b[1-9]\d*\s+items?\s+in (?:your )?(?:cart|bag|basket)\b/.test(label)
);
});
} catch {
return false;
}
}

private async attemptRecovery(runtime: AgentRuntime): Promise<boolean> {
if (!this.recoveryState) {
return false;
Expand Down
44 changes: 33 additions & 11 deletions src/utils/trace-file-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,43 @@ export class TraceFileManager {
return;
}

stream.end(() => {
resolve();
});

stream.once('error', error => {
reject(error);
});

// Timeout after 5 seconds
setTimeout(() => {
if (!stream.destroyed) {
let settled = false;
const timeout = setTimeout(() => {
if (!settled) {
settled = true;
stream.destroy();
resolve();
}
}, 5000);
timeout.unref?.();

const cleanup = () => {
clearTimeout(timeout);
stream.removeListener('error', onError);
stream.removeListener('close', onClose);
};

const onClose = () => {
if (settled) {
return;
}
settled = true;
cleanup();
resolve();
};

const onError = (error: Error) => {
if (settled) {
return;
}
settled = true;
cleanup();
reject(error);
};

stream.once('close', onClose);
stream.once('error', onError);
stream.end();
});
}

Expand Down
4 changes: 2 additions & 2 deletions tests/actions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ describe('Actions', () => {
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

patchSearchEnginePages(page);
await patchSearchEnginePages(page);

const result = await search(browser, 'sentience sdk', 'duckduckgo');
expect(result.success).toBe(true);
Expand Down Expand Up @@ -233,7 +233,7 @@ describe('Actions', () => {
try {
await browser.start();
const page = getPageOrThrow(browser);
patchExampleDotCom(page);
await patchExampleDotCom(page);
await page.goto('https://example.com');

await expect(search(browser, 'sentience sdk', 'duckduckgo')).rejects.toThrow(
Expand Down
53 changes: 53 additions & 0 deletions tests/agents/planner-executor/modal-flow.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {

class ProviderStub extends LLMProvider {
private responses: string[];
public generateCalls = 0;

constructor(responses: string[] = []) {
super();
Expand All @@ -22,6 +23,7 @@ class ProviderStub extends LLMProvider {
}

async generate(): Promise<LLMResponse> {
this.generateCalls += 1;
const content = this.responses.length
? this.responses.shift()!
: JSON.stringify({ action: 'DONE' });
Expand Down Expand Up @@ -200,6 +202,57 @@ describe('PlannerExecutorAgent modal flow parity', () => {
expect(runtime.currentUrl).toContain('/checkout');
});

it('finishes an add-to-cart task when the cart count confirms success', async () => {
const planner = new ProviderStub([
JSON.stringify({
action: 'CLICK',
intent: 'add_to_cart',
input: 'Add to Cart',
verify: [],
required: true,
}),
]);
const executor = new ProviderStub(['CLICK(1)']);
let stage: 'product' | 'cart-confirmed' = 'product';
const runtime = new RuntimeStub(
'https://shop.test/product',
() => {
if (stage === 'cart-confirmed') {
return makeSnapshot('https://shop.test/product', [
{ id: 1, role: 'button', text: 'Add to Cart', clickable: true, importance: 100 },
{
id: 9,
role: 'button',
text: 'Cart contains 1 item Total $59.99',
clickable: true,
importance: 110,
},
{ id: 10, role: 'text', text: 'Added to cart', importance: 90 },
]);
}
return makeSnapshot('https://shop.test/product', [
{ id: 1, role: 'button', text: 'Add to Cart', clickable: true, importance: 100 },
]);
},
{
onClick: elementId => {
if (elementId === 1) {
stage = 'cart-confirmed';
}
},
}
);

const agent = new PlannerExecutorAgent({ planner, executor });
const result = await agent.runStepwise(runtime, {
task: 'Search for running shoes and add the item to cart',
});

expect(result.success).toBe(true);
expect(runtime.clickCalls).toEqual([1]);
expect(planner.generateCalls).toBe(1);
});

it('does not dismiss or auto-continue drawers with checkout or cart controls for unrelated clicks', async () => {
const planner = new ProviderStub([
JSON.stringify({ action: 'CLICK', intent: 'open shipping info', verify: [] }),
Expand Down
20 changes: 20 additions & 0 deletions tests/agents/planner-executor/plan-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ describe('parseAction', () => {
});
});

it('parses the final action after leaked thinking output', () => {
expect(
parseAction(
[
'So we output exactly: TYPE(168, "noise cancelling earbuds")',
'',
'However, the problem says: "Return ONLY ONE line: TYPE(<id>, "text")"',
'',
'Output: TYPE(168, "noise cancelling earbuds")',
'</think>',
'',
'TYPE(168, "noise cancelling earbuds")',
].join('\n')
)
).toEqual({
action: 'TYPE',
args: [168, 'noise cancelling earbuds'],
});
});

it('does not treat action examples inside prose as executable output', () => {
expect(
parseAction(
Expand Down
4 changes: 2 additions & 2 deletions tests/browser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ describe('Browser Proxy Support', () => {
if (!page) {
throw new Error('Browser page is not available');
}
patchExampleDotCom(page);
await patchExampleDotCom(page);
await page.goto('https://example.com', { waitUntil: 'domcontentloaded', timeout: 20000 });

const viewportSize = await page.evaluate(() => ({
Expand Down Expand Up @@ -295,7 +295,7 @@ describe('Browser Proxy Support', () => {
expect(sentienceBrowser.getContext()).toBe(context);

// Test that we can use it
patchExampleDotCom(page);
await patchExampleDotCom(page);
await page.goto('https://example.com');
await page.waitForLoadState('networkidle', { timeout: 10000 });

Expand Down
10 changes: 5 additions & 5 deletions tests/test-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export async function createTestBrowser(headless?: boolean): Promise<SentienceBr
await browser.start();
const page = browser.getPage();
if (page) {
patchExampleDotCom(page);
await patchExampleDotCom(page);
}
return browser;
} catch (e: any) {
Expand Down Expand Up @@ -64,8 +64,8 @@ export async function setTestPageContent(page: Page, html?: string): Promise<voi
await page.setContent(html ?? DEFAULT_TEST_HTML, { waitUntil: 'domcontentloaded' });
}

export function patchExampleDotCom(page: Page): void {
void page.route(/https?:\/\/example\.com\/?.*/, async route => {
export async function patchExampleDotCom(page: Page): Promise<void> {
await page.route(/https?:\/\/example\.com\/?.*/, async route => {
await route.fulfill({
status: 200,
contentType: 'text/html',
Expand All @@ -88,8 +88,8 @@ const SEARCH_RESULTS_HTML = `<!doctype html>
</body>
</html>`;

export function patchSearchEnginePages(page: Page): void {
void page.route(
export async function patchSearchEnginePages(page: Page): Promise<void> {
await page.route(
/https?:\/\/(duckduckgo\.com|www\.google\.com|www\.bing\.com)\/.*/,
async route => {
await route.fulfill({
Expand Down
32 changes: 32 additions & 0 deletions tests/utils/trace-file-manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { EventEmitter } from 'events';
import { TraceFileManager } from '../../src/utils/trace-file-manager';
import { TraceEvent } from '../../src/tracing/types';

Expand Down Expand Up @@ -99,6 +100,37 @@ describe('TraceFileManager', () => {
await expect(TraceFileManager.closeStream(stream)).resolves.not.toThrow();
expect(stream.destroyed).toBe(true);
});

it('should wait for the close event before resolving', async () => {
class DelayedCloseStream extends EventEmitter {
destroyed = false;

end(callback?: () => void): void {
callback?.();
}

destroy(): void {
this.destroyed = true;
this.emit('close');
}
}

const stream = new DelayedCloseStream();
let resolved = false;
const closePromise = TraceFileManager.closeStream(stream as unknown as fs.WriteStream).then(
() => {
resolved = true;
}
);

await Promise.resolve();
expect(resolved).toBe(false);

stream.destroy();
await closePromise;

expect(resolved).toBe(true);
});
});

describe('fileExists', () => {
Expand Down
Loading