From a26bd626a0c32c23b2e52ca58ce13ff57d0e0ae9 Mon Sep 17 00:00:00 2001 From: chris Date: Fri, 17 Apr 2026 15:56:49 -0700 Subject: [PATCH 1/9] added critic --- .../src/narada_core/actions/models.py | 22 ++++- .../narada-core/src/narada_core/models.py | 27 +++++- packages/narada-pyodide/src/narada/window.py | 93 ++++++++++++++++++- packages/narada/src/narada/__init__.py | 5 +- packages/narada/src/narada/window.py | 93 ++++++++++++++++++- 5 files changed, 235 insertions(+), 5 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 6e68db6..e8e50e9 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -12,7 +12,7 @@ override, ) -from pydantic import BaseModel, Field, TypeAdapter, ValidationError +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. @@ -275,6 +275,24 @@ class StructuredOutput(BaseModel, Generic[_StructuredOutputT]): content: _StructuredOutputT +class CriticResult(BaseModel): + """Result from a critic agent that evaluated the main agent's output.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + validation_passed: bool + """Whether the critic determined the main agent successfully completed its task.""" + + text: str + """The critic's evaluation text.""" + + structured_output: Any + """Parsed instance of the output_schema passed in CriticConfig, or None if no schema was given.""" + + usage: AgentUsage + action_trace: ActionTrace | None = None + + class AgentResponse(BaseModel, Generic[_StructuredOutputT]): request_id: str status: Literal["success", "error", "input-required"] @@ -286,6 +304,8 @@ class AgentResponse(BaseModel, Generic[_StructuredOutputT]): ] usage: AgentUsage action_trace: ActionTrace | None = None + critic_result: CriticResult | None = None + """Result from the critic agent, populated when a CriticConfig is passed to agent().""" class AgenticSelectorClickAction(TypedDict): diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py index 8e03292..007d56b 100644 --- a/packages/narada-core/src/narada_core/models.py +++ b/packages/narada-core/src/narada_core/models.py @@ -3,7 +3,7 @@ from enum import Enum, StrEnum from typing import Annotated, Generic, Literal, NotRequired, TypedDict, TypeVar -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class Agent(Enum): @@ -65,6 +65,31 @@ class McpServer(BaseModel): selectedTools: list[str] | None = None +class CriticConfig(BaseModel): + """Configuration for a critic agent that evaluates a main agent's output. + + When passed to `agent()`, the critic runs after the main agent completes and + evaluates whether the task was successfully performed, setting a boolean + variable to indicate pass or fail. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + prompt: str = ( + "Using your context about the actions and outcome of the previous agent, " + "determine whether its task was completed successfully." + ) + """Instructions for the critic on how to evaluate the main agent's output.""" + + output_schema: type[BaseModel] | None = None + """Optional Pydantic model defining additional structured fields for the critic to populate + alongside the boolean validation variable. The validation variable should not be included + here — it is always added automatically.""" + + mcp_servers: list[McpServer] | None = None + """Optional MCP servers available to the critic agent.""" + + class RemoteDispatchChatHistoryItem(TypedDict): role: Literal["user", "assistant"] content: str diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index df6bfa1..872bd69 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -2,6 +2,7 @@ import json import os import time +import uuid from abc import ABC from http import HTTPStatus from typing import IO, TYPE_CHECKING, Any, Literal, Optional, TypeVar, cast, overload @@ -17,6 +18,7 @@ AgentResponse, AgentUsage, CloseWindowRequest, + CriticResult, ExtensionActionRequest, ExtensionActionResponse, GetFullHtmlRequest, @@ -42,13 +44,14 @@ ) from narada_core.models import ( Agent, + CriticConfig, File, McpServer, RemoteDispatchChatHistoryItem, Response, UserResourceCredentials, ) -from pydantic import BaseModel +from pydantic import BaseModel, create_model from pyodide.ffi import JsProxy, create_once_callable from pyodide.http import pyfetch @@ -138,6 +141,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -161,6 +165,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -183,6 +188,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -239,6 +245,8 @@ async def dispatch_request( body["secretVariables"] = secret_variables if input_variables is not None: body["inputVariables"] = input_variables + if critic_context is not None: + body["criticContext"] = critic_context if callback_url is not None: body["callbackUrl"] = callback_url if callback_secret is not None: @@ -328,6 +336,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[dict[str, Any]]: ... @@ -344,6 +353,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[_StructuredOutput]: ... @@ -359,6 +369,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse: """Invokes an agent in the Narada extension side panel chat.""" @@ -384,6 +395,17 @@ async def agent( else None ) + critic_result: CriticResult | None = None + if critic is not None: + critic_result = await self._run_critic( + original_prompt=prompt, + response_content=response_content, + action_trace_raw=action_trace_raw, + critic=critic, + time_zone=time_zone, + timeout=timeout, + ) + return AgentResponse( request_id=remote_dispatch_response["requestId"], status=remote_dispatch_response["status"], @@ -392,6 +414,75 @@ async def agent( structured_output=response_content.get("structuredOutput"), usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), action_trace=action_trace, + critic_result=critic_result, + ) + + async def _run_critic( + self, + *, + original_prompt: str, + response_content: dict[str, Any], + action_trace_raw: list[Any] | None, + critic: CriticConfig, + time_zone: str, + timeout: int, + ) -> CriticResult: + + if critic.output_schema is not None: + combined_fields: dict[str, Any] = { + name: (info.annotation, info) + for name, info in critic.output_schema.model_fields.items() + } + else: + combined_fields = {} + _VALIDATION_VAR = f"_narada_validation_passed_{uuid.uuid4().hex[:4]}" + combined_fields[_VALIDATION_VAR] = (bool, ...) + CriticOutputModel = create_model("CriticOutput", **combined_fields) + + critic_dispatch_response = await self.dispatch_request( + prompt=critic.prompt, + agent=Agent.PRODUCTIVITY, + output_schema=CriticOutputModel, + critic_context={ + "agentPrompt": original_prompt, + "agentOutput": response_content["text"], + "actionTrace": action_trace_raw or [], + "validationVariableName": _VALIDATION_VAR, + }, + mcp_servers=critic.mcp_servers, + time_zone=time_zone, + timeout=timeout, + ) + + critic_content = critic_dispatch_response["response"] + assert critic_content is not None + + combined_output = critic_content.get("structuredOutput") + validation_passed = ( + bool(getattr(combined_output, _VALIDATION_VAR, False)) + if combined_output is not None + else False + ) + + structured_output: BaseModel | None = None + if critic.output_schema is not None and combined_output is not None: + output_dict = combined_output.model_dump() + output_dict.pop(_VALIDATION_VAR, None) + structured_output = critic.output_schema.model_validate(output_dict) + + critic_action_trace_raw = critic_content.get("actionTrace") + critic_action_trace = ( + parse_action_trace(critic_action_trace_raw) + if critic_action_trace_raw is not None + else None + ) + + return CriticResult( + validation_passed=validation_passed, + text=critic_content["text"], + structured_output=structured_output, + usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), + action_trace=critic_action_trace, ) async def agentic_selector( diff --git a/packages/narada/src/narada/__init__.py b/packages/narada/src/narada/__init__.py index 3957d7b..cd754d6 100644 --- a/packages/narada/src/narada/__init__.py +++ b/packages/narada/src/narada/__init__.py @@ -6,7 +6,8 @@ NaradaTimeoutError, NaradaUnsupportedBrowserError, ) -from narada_core.models import Agent, File, Response, ResponseContent +from narada_core.actions.models import CriticResult +from narada_core.models import Agent, CriticConfig, File, Response, ResponseContent from narada.client import Narada from narada.config import BrowserConfig, ProxyConfig @@ -19,6 +20,8 @@ "Agent", "BrowserConfig", "CloudBrowserWindow", + "CriticConfig", + "CriticResult", "download_file", "File", "LocalBrowserWindow", diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 2b61390..67a8f32 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -2,6 +2,7 @@ import logging import os import time +import uuid from abc import ABC from dataclasses import dataclass from http import HTTPStatus @@ -20,6 +21,7 @@ AgentResponse, AgentUsage, CloseWindowRequest, + CriticResult, ExtensionActionRequest, ExtensionActionResponse, GetFullHtmlRequest, @@ -45,6 +47,7 @@ ) from narada_core.models import ( Agent, + CriticConfig, File, McpServer, RemoteDispatchChatHistoryItem, @@ -54,7 +57,7 @@ from playwright.async_api import ( BrowserContext, ) -from pydantic import BaseModel +from pydantic import BaseModel, create_model from narada.config import BrowserConfig @@ -226,6 +229,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: Mapping[str, Any] | None = None, @@ -250,6 +254,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: Mapping[str, Any] | None = None, @@ -273,6 +278,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: Mapping[str, Any] | None = None, @@ -321,6 +327,8 @@ async def dispatch_request( body["inputVariables"] = await self._normalize_input_variables( input_variables=input_variables ) + if critic_context is not None: + body["criticContext"] = critic_context if callback_url is not None: body["callbackUrl"] = callback_url if callback_secret is not None: @@ -391,6 +399,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[dict[str, Any]]: ... @@ -408,6 +417,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[_StructuredOutput]: ... @@ -424,6 +434,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse: """Invokes an agent in the Narada extension side panel chat.""" @@ -450,6 +461,17 @@ async def agent( else None ) + critic_result: CriticResult | None = None + if critic is not None: + critic_result = await self._run_critic( + original_prompt=prompt, + response_content=response_content, + action_trace_raw=action_trace_raw, + critic=critic, + time_zone=time_zone, + timeout=timeout, + ) + return AgentResponse( request_id=remote_dispatch_response["requestId"], status=remote_dispatch_response["status"], @@ -458,6 +480,75 @@ async def agent( structured_output=response_content.get("structuredOutput"), usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), action_trace=action_trace, + critic_result=critic_result, + ) + + async def _run_critic( + self, + *, + original_prompt: str, + response_content: dict[str, Any], + action_trace_raw: list[Any] | None, + critic: CriticConfig, + time_zone: str, + timeout: int, + ) -> CriticResult: + + if critic.output_schema is not None: + combined_fields: dict[str, Any] = { + name: (info.annotation, info) + for name, info in critic.output_schema.model_fields.items() + } + else: + combined_fields = {} + _VALIDATION_VAR = f"narada_validation_passed_{uuid.uuid4().hex[:4]}" + combined_fields[_VALIDATION_VAR] = (bool, ...) + CriticOutputModel = create_model("CriticOutput", **combined_fields) + + critic_dispatch_response = await self.dispatch_request( + prompt=critic.prompt, + agent=Agent.PRODUCTIVITY, + output_schema=CriticOutputModel, + critic_context={ + "agentPrompt": original_prompt, + "agentOutput": response_content["text"], + "actionTrace": action_trace_raw or [], + "validationVariableName": _VALIDATION_VAR, + }, + mcp_servers=critic.mcp_servers, + time_zone=time_zone, + timeout=timeout, + ) + + critic_content = critic_dispatch_response["response"] + assert critic_content is not None + + combined_output = critic_content.get("structuredOutput") + validation_passed = ( + bool(getattr(combined_output, _VALIDATION_VAR, False)) + if combined_output is not None + else False + ) + + structured_output: BaseModel | None = None + if critic.output_schema is not None and combined_output is not None: + output_dict = combined_output.model_dump() + output_dict.pop(_VALIDATION_VAR, None) + structured_output = critic.output_schema.model_validate(output_dict) + + critic_action_trace_raw = critic_content.get("actionTrace") + critic_action_trace = ( + parse_action_trace(critic_action_trace_raw) + if critic_action_trace_raw is not None + else None + ) + + return CriticResult( + validation_passed=validation_passed, + text=critic_content["text"], + structured_output=structured_output, + usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), + action_trace=critic_action_trace, ) async def agentic_selector( From 7a4f51315d2c6a5a4017aa435e3afffef6966f91 Mon Sep 17 00:00:00 2001 From: chris Date: Fri, 17 Apr 2026 18:21:41 -0700 Subject: [PATCH 2/9] share functionality in core --- .../src/narada_core/actions/models.py | 74 +++++++++++++++++- packages/narada-pyodide/src/narada/window.py | 75 +------------------ packages/narada/src/narada/window.py | 74 +----------------- 3 files changed, 81 insertions(+), 142 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index e8e50e9..0f39d8c 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -1,8 +1,11 @@ from __future__ import annotations +import uuid from typing import ( Annotated, Any, + Awaitable, + Callable, Generic, Literal, NotRequired, @@ -12,7 +15,8 @@ override, ) -from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError +from narada_core.models import Agent, CriticConfig +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, create_model # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. @@ -293,6 +297,74 @@ class CriticResult(BaseModel): action_trace: ActionTrace | None = None +async def run_critic( + *, + dispatch_request: Callable[..., Awaitable[Any]], + original_prompt: str, + response_content: dict[str, Any], + action_trace_raw: list[Any] | None, + critic: CriticConfig, + time_zone: str, + timeout: int, +) -> CriticResult: + if critic.output_schema is not None: + combined_fields: dict[str, Any] = { + name: (info.annotation, info) + for name, info in critic.output_schema.model_fields.items() + } + else: + combined_fields = {} + _VALIDATION_VAR = f"narada_validation_passed_{uuid.uuid4().hex[:4]}" + combined_fields[_VALIDATION_VAR] = (bool, ...) + CriticOutputModel = create_model("CriticOutput", **combined_fields) + + critic_dispatch_response = await dispatch_request( + prompt=critic.prompt, + agent=Agent.PRODUCTIVITY, + output_schema=CriticOutputModel, + critic_context={ + "agentPrompt": original_prompt, + "agentOutput": response_content["text"], + "actionTrace": action_trace_raw or [], + "validationVariableName": _VALIDATION_VAR, + }, + mcp_servers=critic.mcp_servers, + time_zone=time_zone, + timeout=timeout, + ) + + critic_content = critic_dispatch_response["response"] + assert critic_content is not None + + combined_output = critic_content.get("structuredOutput") + validation_passed = ( + bool(getattr(combined_output, _VALIDATION_VAR, False)) + if combined_output is not None + else False + ) + + structured_output: BaseModel | None = None + if critic.output_schema is not None and combined_output is not None: + output_dict = combined_output.model_dump() + output_dict.pop(_VALIDATION_VAR, None) + structured_output = critic.output_schema.model_validate(output_dict) + + critic_action_trace_raw = critic_content.get("actionTrace") + critic_action_trace = ( + parse_action_trace(critic_action_trace_raw) + if critic_action_trace_raw is not None + else None + ) + + return CriticResult( + validation_passed=validation_passed, + text=critic_content["text"], + structured_output=structured_output, + usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), + action_trace=critic_action_trace, + ) + + class AgentResponse(BaseModel, Generic[_StructuredOutputT]): request_id: str status: Literal["success", "error", "input-required"] diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 872bd69..1571618 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -2,7 +2,6 @@ import json import os import time -import uuid from abc import ABC from http import HTTPStatus from typing import IO, TYPE_CHECKING, Any, Literal, Optional, TypeVar, cast, overload @@ -36,6 +35,7 @@ RecordedClick, WriteGoogleSheetRequest, parse_action_trace, + run_critic, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, @@ -51,7 +51,7 @@ Response, UserResourceCredentials, ) -from pydantic import BaseModel, create_model +from pydantic import BaseModel from pyodide.ffi import JsProxy, create_once_callable from pyodide.http import pyfetch @@ -397,7 +397,8 @@ async def agent( critic_result: CriticResult | None = None if critic is not None: - critic_result = await self._run_critic( + critic_result = await run_critic( + dispatch_request=self.dispatch_request, original_prompt=prompt, response_content=response_content, action_trace_raw=action_trace_raw, @@ -417,74 +418,6 @@ async def agent( critic_result=critic_result, ) - async def _run_critic( - self, - *, - original_prompt: str, - response_content: dict[str, Any], - action_trace_raw: list[Any] | None, - critic: CriticConfig, - time_zone: str, - timeout: int, - ) -> CriticResult: - - if critic.output_schema is not None: - combined_fields: dict[str, Any] = { - name: (info.annotation, info) - for name, info in critic.output_schema.model_fields.items() - } - else: - combined_fields = {} - _VALIDATION_VAR = f"_narada_validation_passed_{uuid.uuid4().hex[:4]}" - combined_fields[_VALIDATION_VAR] = (bool, ...) - CriticOutputModel = create_model("CriticOutput", **combined_fields) - - critic_dispatch_response = await self.dispatch_request( - prompt=critic.prompt, - agent=Agent.PRODUCTIVITY, - output_schema=CriticOutputModel, - critic_context={ - "agentPrompt": original_prompt, - "agentOutput": response_content["text"], - "actionTrace": action_trace_raw or [], - "validationVariableName": _VALIDATION_VAR, - }, - mcp_servers=critic.mcp_servers, - time_zone=time_zone, - timeout=timeout, - ) - - critic_content = critic_dispatch_response["response"] - assert critic_content is not None - - combined_output = critic_content.get("structuredOutput") - validation_passed = ( - bool(getattr(combined_output, _VALIDATION_VAR, False)) - if combined_output is not None - else False - ) - - structured_output: BaseModel | None = None - if critic.output_schema is not None and combined_output is not None: - output_dict = combined_output.model_dump() - output_dict.pop(_VALIDATION_VAR, None) - structured_output = critic.output_schema.model_validate(output_dict) - - critic_action_trace_raw = critic_content.get("actionTrace") - critic_action_trace = ( - parse_action_trace(critic_action_trace_raw) - if critic_action_trace_raw is not None - else None - ) - - return CriticResult( - validation_passed=validation_passed, - text=critic_content["text"], - structured_output=structured_output, - usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), - action_trace=critic_action_trace, - ) - async def agentic_selector( self, *, diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 67a8f32..8cdc3a3 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -39,6 +39,7 @@ RecordedClick, WriteGoogleSheetRequest, parse_action_trace, + run_critic, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, @@ -57,7 +58,7 @@ from playwright.async_api import ( BrowserContext, ) -from pydantic import BaseModel, create_model +from pydantic import BaseModel from narada.config import BrowserConfig @@ -463,7 +464,8 @@ async def agent( critic_result: CriticResult | None = None if critic is not None: - critic_result = await self._run_critic( + critic_result = await run_critic( + dispatch_request=self.dispatch_request, original_prompt=prompt, response_content=response_content, action_trace_raw=action_trace_raw, @@ -483,74 +485,6 @@ async def agent( critic_result=critic_result, ) - async def _run_critic( - self, - *, - original_prompt: str, - response_content: dict[str, Any], - action_trace_raw: list[Any] | None, - critic: CriticConfig, - time_zone: str, - timeout: int, - ) -> CriticResult: - - if critic.output_schema is not None: - combined_fields: dict[str, Any] = { - name: (info.annotation, info) - for name, info in critic.output_schema.model_fields.items() - } - else: - combined_fields = {} - _VALIDATION_VAR = f"narada_validation_passed_{uuid.uuid4().hex[:4]}" - combined_fields[_VALIDATION_VAR] = (bool, ...) - CriticOutputModel = create_model("CriticOutput", **combined_fields) - - critic_dispatch_response = await self.dispatch_request( - prompt=critic.prompt, - agent=Agent.PRODUCTIVITY, - output_schema=CriticOutputModel, - critic_context={ - "agentPrompt": original_prompt, - "agentOutput": response_content["text"], - "actionTrace": action_trace_raw or [], - "validationVariableName": _VALIDATION_VAR, - }, - mcp_servers=critic.mcp_servers, - time_zone=time_zone, - timeout=timeout, - ) - - critic_content = critic_dispatch_response["response"] - assert critic_content is not None - - combined_output = critic_content.get("structuredOutput") - validation_passed = ( - bool(getattr(combined_output, _VALIDATION_VAR, False)) - if combined_output is not None - else False - ) - - structured_output: BaseModel | None = None - if critic.output_schema is not None and combined_output is not None: - output_dict = combined_output.model_dump() - output_dict.pop(_VALIDATION_VAR, None) - structured_output = critic.output_schema.model_validate(output_dict) - - critic_action_trace_raw = critic_content.get("actionTrace") - critic_action_trace = ( - parse_action_trace(critic_action_trace_raw) - if critic_action_trace_raw is not None - else None - ) - - return CriticResult( - validation_passed=validation_passed, - text=critic_content["text"], - structured_output=structured_output, - usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), - action_trace=critic_action_trace, - ) - async def agentic_selector( self, *, From 5c943b9441e0c7783290ceea0dd655d53352a814 Mon Sep 17 00:00:00 2001 From: chris Date: Fri, 17 Apr 2026 18:33:35 -0700 Subject: [PATCH 3/9] style --- packages/narada-core/src/narada_core/actions/models.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 0f39d8c..732d29a 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -16,7 +16,14 @@ ) from narada_core.models import Agent, CriticConfig -from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, create_model +from pydantic import ( + BaseModel, + ConfigDict, + Field, + TypeAdapter, + ValidationError, + create_model, +) # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. From 14d49d6f9dcd7bc38e197ac2f4c7820089563bb4 Mon Sep 17 00:00:00 2001 From: chris Date: Wed, 22 Apr 2026 21:33:49 -0700 Subject: [PATCH 4/9] style --- .../src/narada_core/actions/critic.py | 78 +++++++++++++++++++ .../src/narada_core/actions/models.py | 76 ------------------ packages/narada-pyodide/src/narada/window.py | 2 +- packages/narada/src/narada/window.py | 3 +- 4 files changed, 80 insertions(+), 79 deletions(-) create mode 100644 packages/narada-core/src/narada_core/actions/critic.py diff --git a/packages/narada-core/src/narada_core/actions/critic.py b/packages/narada-core/src/narada_core/actions/critic.py new file mode 100644 index 0000000..5a21fff --- /dev/null +++ b/packages/narada-core/src/narada_core/actions/critic.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from typing import Any, Awaitable, Callable + +from narada_core.models import Agent, CriticConfig +from pydantic import BaseModel, create_model + +from narada_core.actions.models import AgentUsage, CriticResult, parse_action_trace + +_VALIDATION_VAR = "narada_validation_passed" + + +async def run_critic( + *, + dispatch_request: Callable[..., Awaitable[Any]], + original_prompt: str, + response_content: dict[str, Any], + action_trace_raw: list[Any] | None, + critic: CriticConfig, + time_zone: str, + timeout: int, +) -> CriticResult: + if critic.output_schema is not None: + combined_fields: dict[str, Any] = { + name: (info.annotation, info) + for name, info in critic.output_schema.model_fields.items() + } + else: + combined_fields = {} + combined_fields[_VALIDATION_VAR] = (bool, ...) + CriticOutputModel = create_model("CriticOutput", **combined_fields) + + critic_dispatch_response = await dispatch_request( + prompt=critic.prompt, + agent=Agent.PRODUCTIVITY, + output_schema=CriticOutputModel, + critic_context={ + "agentPrompt": original_prompt, + "agentOutput": response_content["text"], + "actionTrace": action_trace_raw or [], + "validationVariableName": _VALIDATION_VAR, + }, + mcp_servers=critic.mcp_servers, + time_zone=time_zone, + timeout=timeout, + ) + + critic_content = critic_dispatch_response["response"] + if critic_content is None: + raise ValueError("Critic dispatch returned no response") + + combined_output = critic_content.get("structuredOutput") + validation_passed = ( + bool(getattr(combined_output, _VALIDATION_VAR, False)) + if combined_output is not None + else False + ) + + structured_output: BaseModel | None = None + if critic.output_schema is not None and combined_output is not None: + output_dict = combined_output.model_dump() + output_dict.pop(_VALIDATION_VAR, None) + structured_output = critic.output_schema.model_validate(output_dict) + + critic_action_trace_raw = critic_content.get("actionTrace") + critic_action_trace = ( + parse_action_trace(critic_action_trace_raw) + if critic_action_trace_raw is not None + else None + ) + + return CriticResult( + validation_passed=validation_passed, + text=critic_content["text"], + structured_output=structured_output, + usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), + action_trace=critic_action_trace, + ) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 732d29a..bbfbb82 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -1,11 +1,8 @@ from __future__ import annotations -import uuid from typing import ( Annotated, Any, - Awaitable, - Callable, Generic, Literal, NotRequired, @@ -15,14 +12,11 @@ override, ) -from narada_core.models import Agent, CriticConfig from pydantic import ( BaseModel, - ConfigDict, Field, TypeAdapter, ValidationError, - create_model, ) # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method @@ -289,8 +283,6 @@ class StructuredOutput(BaseModel, Generic[_StructuredOutputT]): class CriticResult(BaseModel): """Result from a critic agent that evaluated the main agent's output.""" - model_config = ConfigDict(arbitrary_types_allowed=True) - validation_passed: bool """Whether the critic determined the main agent successfully completed its task.""" @@ -304,74 +296,6 @@ class CriticResult(BaseModel): action_trace: ActionTrace | None = None -async def run_critic( - *, - dispatch_request: Callable[..., Awaitable[Any]], - original_prompt: str, - response_content: dict[str, Any], - action_trace_raw: list[Any] | None, - critic: CriticConfig, - time_zone: str, - timeout: int, -) -> CriticResult: - if critic.output_schema is not None: - combined_fields: dict[str, Any] = { - name: (info.annotation, info) - for name, info in critic.output_schema.model_fields.items() - } - else: - combined_fields = {} - _VALIDATION_VAR = f"narada_validation_passed_{uuid.uuid4().hex[:4]}" - combined_fields[_VALIDATION_VAR] = (bool, ...) - CriticOutputModel = create_model("CriticOutput", **combined_fields) - - critic_dispatch_response = await dispatch_request( - prompt=critic.prompt, - agent=Agent.PRODUCTIVITY, - output_schema=CriticOutputModel, - critic_context={ - "agentPrompt": original_prompt, - "agentOutput": response_content["text"], - "actionTrace": action_trace_raw or [], - "validationVariableName": _VALIDATION_VAR, - }, - mcp_servers=critic.mcp_servers, - time_zone=time_zone, - timeout=timeout, - ) - - critic_content = critic_dispatch_response["response"] - assert critic_content is not None - - combined_output = critic_content.get("structuredOutput") - validation_passed = ( - bool(getattr(combined_output, _VALIDATION_VAR, False)) - if combined_output is not None - else False - ) - - structured_output: BaseModel | None = None - if critic.output_schema is not None and combined_output is not None: - output_dict = combined_output.model_dump() - output_dict.pop(_VALIDATION_VAR, None) - structured_output = critic.output_schema.model_validate(output_dict) - - critic_action_trace_raw = critic_content.get("actionTrace") - critic_action_trace = ( - parse_action_trace(critic_action_trace_raw) - if critic_action_trace_raw is not None - else None - ) - - return CriticResult( - validation_passed=validation_passed, - text=critic_content["text"], - structured_output=structured_output, - usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), - action_trace=critic_action_trace, - ) - - class AgentResponse(BaseModel, Generic[_StructuredOutputT]): request_id: str status: Literal["success", "error", "input-required"] diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 1571618..875c199 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -7,6 +7,7 @@ from typing import IO, TYPE_CHECKING, Any, Literal, Optional, TypeVar, cast, overload from js import AbortController, setTimeout # type: ignore +from narada_core.actions.critic import run_critic from narada_core.actions.models import ( AgenticMouseAction, AgenticMouseActionRequest, @@ -35,7 +36,6 @@ RecordedClick, WriteGoogleSheetRequest, parse_action_trace, - run_critic, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 8cdc3a3..6f1050c 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -2,7 +2,6 @@ import logging import os import time -import uuid from abc import ABC from dataclasses import dataclass from http import HTTPStatus @@ -11,6 +10,7 @@ from typing import IO, Any, Mapping, TypeGuard, TypeVar, overload, override import aiohttp +from narada_core.actions.critic import run_critic from narada_core.actions.models import ( AgenticMouseAction, AgenticMouseActionRequest, @@ -39,7 +39,6 @@ RecordedClick, WriteGoogleSheetRequest, parse_action_trace, - run_critic, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, From 3e6c8cac364b2a33e1f96ae3b7c1801695d3e8d0 Mon Sep 17 00:00:00 2001 From: chris Date: Mon, 27 Apr 2026 10:59:32 -0700 Subject: [PATCH 5/9] example --- examples/critic.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 examples/critic.py diff --git a/examples/critic.py b/examples/critic.py new file mode 100644 index 0000000..145d444 --- /dev/null +++ b/examples/critic.py @@ -0,0 +1,39 @@ +import asyncio + +from narada import Narada, CriticConfig +from pydantic import BaseModel, Field + + +class SearchCriticOutput(BaseModel): + search_query_used: str = Field(description="The exact search query the agent used") + result_count: int = Field(description="The number of results the agent found") + + +async def main() -> None: + # Initialize the Narada client. + async with Narada() as narada: + window = await narada.open_and_initialize_browser_window() + + # Define a critic that verifies the agent completed the task and extracts + # additional structured information from the agent's actions. + critic = CriticConfig( + prompt=( + "Verify that the agent successfully searched Google and found results. " + "Extract the exact search query the agent used and the number of results found." + ), + output_schema=SearchCriticOutput, + ) + + # Run a task with the critic. After the main agent finishes, the critic + # evaluates whether the task was completed successfully. + response = await window.agent( + prompt='Search Google for "Narada AI" and tell me how many results were found.', + critic=critic, + ) + + print("Agent response:", response.text) + print("Critic result:", response.critic_result.validation_passed) + + +if __name__ == "__main__": + asyncio.run(main()) From 1f18780dc24f99f9c133f9652ff0f5b58f16d9f7 Mon Sep 17 00:00:00 2001 From: chris Date: Mon, 27 Apr 2026 11:02:41 -0700 Subject: [PATCH 6/9] imports for pyiodide --- packages/narada-pyodide/src/narada/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/narada-pyodide/src/narada/__init__.py b/packages/narada-pyodide/src/narada/__init__.py index e2144c1..0123e58 100644 --- a/packages/narada-pyodide/src/narada/__init__.py +++ b/packages/narada-pyodide/src/narada/__init__.py @@ -9,11 +9,14 @@ NaradaError, NaradaTimeoutError, ) -from narada_core.models import Agent, File, Response, ResponseContent +from narada_core.actions.models import CriticResult +from narada_core.models import Agent, CriticConfig, File, Response, ResponseContent __all__ = [ "__version__", "Agent", + "CriticConfig", + "CriticResult", "download_file", "File", "LocalBrowserWindow", From ff81c4f9deadbbafacacfc0e96efad7e95272c93 Mon Sep 17 00:00:00 2001 From: chris Date: Sun, 3 May 2026 21:53:28 -0700 Subject: [PATCH 7/9] style --- .../src/narada_core/actions/critic.py | 17 +++++++---- .../narada-core/src/narada_core/models.py | 29 ++++--------------- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/critic.py b/packages/narada-core/src/narada_core/actions/critic.py index 5a21fff..183e839 100644 --- a/packages/narada-core/src/narada_core/actions/critic.py +++ b/packages/narada-core/src/narada_core/actions/critic.py @@ -8,6 +8,10 @@ from narada_core.actions.models import AgentUsage, CriticResult, parse_action_trace _VALIDATION_VAR = "narada_validation_passed" +_DEFAULT_CRITIC_PROMPT = ( + "Using your context about the actions and outcome of the previous agent, " + "determine whether its task was completed successfully." +) async def run_critic( @@ -20,10 +24,11 @@ async def run_critic( time_zone: str, timeout: int, ) -> CriticResult: - if critic.output_schema is not None: + output_schema = critic.get("output_schema") + if output_schema is not None: combined_fields: dict[str, Any] = { name: (info.annotation, info) - for name, info in critic.output_schema.model_fields.items() + for name, info in output_schema.model_fields.items() } else: combined_fields = {} @@ -31,7 +36,7 @@ async def run_critic( CriticOutputModel = create_model("CriticOutput", **combined_fields) critic_dispatch_response = await dispatch_request( - prompt=critic.prompt, + prompt=critic.get("prompt", _DEFAULT_CRITIC_PROMPT), agent=Agent.PRODUCTIVITY, output_schema=CriticOutputModel, critic_context={ @@ -40,7 +45,7 @@ async def run_critic( "actionTrace": action_trace_raw or [], "validationVariableName": _VALIDATION_VAR, }, - mcp_servers=critic.mcp_servers, + mcp_servers=critic.get("mcp_servers"), time_zone=time_zone, timeout=timeout, ) @@ -57,10 +62,10 @@ async def run_critic( ) structured_output: BaseModel | None = None - if critic.output_schema is not None and combined_output is not None: + if output_schema is not None and combined_output is not None: output_dict = combined_output.model_dump() output_dict.pop(_VALIDATION_VAR, None) - structured_output = critic.output_schema.model_validate(output_dict) + structured_output = output_schema.model_validate(output_dict) critic_action_trace_raw = critic_content.get("actionTrace") critic_action_trace = ( diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py index 007d56b..a57b297 100644 --- a/packages/narada-core/src/narada_core/models.py +++ b/packages/narada-core/src/narada_core/models.py @@ -3,7 +3,7 @@ from enum import Enum, StrEnum from typing import Annotated, Generic, Literal, NotRequired, TypedDict, TypeVar -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field class Agent(Enum): @@ -65,29 +65,10 @@ class McpServer(BaseModel): selectedTools: list[str] | None = None -class CriticConfig(BaseModel): - """Configuration for a critic agent that evaluates a main agent's output. - - When passed to `agent()`, the critic runs after the main agent completes and - evaluates whether the task was successfully performed, setting a boolean - variable to indicate pass or fail. - """ - - model_config = ConfigDict(arbitrary_types_allowed=True) - - prompt: str = ( - "Using your context about the actions and outcome of the previous agent, " - "determine whether its task was completed successfully." - ) - """Instructions for the critic on how to evaluate the main agent's output.""" - - output_schema: type[BaseModel] | None = None - """Optional Pydantic model defining additional structured fields for the critic to populate - alongside the boolean validation variable. The validation variable should not be included - here — it is always added automatically.""" - - mcp_servers: list[McpServer] | None = None - """Optional MCP servers available to the critic agent.""" +class CriticConfig(TypedDict, total=False): + prompt: str + output_schema: type[BaseModel] + mcp_servers: list[McpServer] class RemoteDispatchChatHistoryItem(TypedDict): From 262ccafba02661b60714242828fe1bfa6ce4ee78 Mon Sep 17 00:00:00 2001 From: chris Date: Sun, 3 May 2026 22:46:54 -0700 Subject: [PATCH 8/9] fixes --- packages/narada-core/src/narada_core/actions/critic.py | 4 ++-- packages/narada-core/src/narada_core/actions/models.py | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/critic.py b/packages/narada-core/src/narada_core/actions/critic.py index 183e839..a8a3f23 100644 --- a/packages/narada-core/src/narada_core/actions/critic.py +++ b/packages/narada-core/src/narada_core/actions/critic.py @@ -5,7 +5,8 @@ from narada_core.models import Agent, CriticConfig from pydantic import BaseModel, create_model -from narada_core.actions.models import AgentUsage, CriticResult, parse_action_trace +from narada_core.actions.models import AgentUsage, CriticResult +from narada_core.tracing.model import parse_action_trace _VALIDATION_VAR = "narada_validation_passed" _DEFAULT_CRITIC_PROMPT = ( @@ -76,7 +77,6 @@ async def run_critic( return CriticResult( validation_passed=validation_passed, - text=critic_content["text"], structured_output=structured_output, usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), action_trace=critic_action_trace, diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index d4524a8..f94d2c9 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -15,8 +15,6 @@ from pydantic import ( BaseModel, Field, - TypeAdapter, - ValidationError, ) from narada_core.tracing import model as tracing_model @@ -48,9 +46,6 @@ class CriticResult(BaseModel): validation_passed: bool """Whether the critic determined the main agent successfully completed its task.""" - text: str - """The critic's evaluation text.""" - structured_output: Any """Parsed instance of the output_schema passed in CriticConfig, or None if no schema was given.""" From 44edf16b562904faccc742b4756c4d37446c9fdd Mon Sep 17 00:00:00 2001 From: chris Date: Sun, 3 May 2026 22:51:57 -0700 Subject: [PATCH 9/9] style --- packages/narada-core/src/narada_core/actions/models.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index f94d2c9..3b9dfbf 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -41,14 +41,8 @@ class StructuredOutput(BaseModel, Generic[_StructuredOutputT]): class CriticResult(BaseModel): - """Result from a critic agent that evaluated the main agent's output.""" - validation_passed: bool - """Whether the critic determined the main agent successfully completed its task.""" - structured_output: Any - """Parsed instance of the output_schema passed in CriticConfig, or None if no schema was given.""" - usage: AgentUsage action_trace: tracing_model.ActionTrace | None = None