diff --git a/examples/critic.py b/examples/critic.py new file mode 100644 index 0000000..145d444 --- /dev/null +++ b/examples/critic.py @@ -0,0 +1,39 @@ +import asyncio + +from narada import Narada, CriticConfig +from pydantic import BaseModel, Field + + +class SearchCriticOutput(BaseModel): + search_query_used: str = Field(description="The exact search query the agent used") + result_count: int = Field(description="The number of results the agent found") + + +async def main() -> None: + # Initialize the Narada client. + async with Narada() as narada: + window = await narada.open_and_initialize_browser_window() + + # Define a critic that verifies the agent completed the task and extracts + # additional structured information from the agent's actions. + critic = CriticConfig( + prompt=( + "Verify that the agent successfully searched Google and found results. " + "Extract the exact search query the agent used and the number of results found." + ), + output_schema=SearchCriticOutput, + ) + + # Run a task with the critic. After the main agent finishes, the critic + # evaluates whether the task was completed successfully. + response = await window.agent( + prompt='Search Google for "Narada AI" and tell me how many results were found.', + critic=critic, + ) + + print("Agent response:", response.text) + print("Critic result:", response.critic_result.validation_passed) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/packages/narada-core/src/narada_core/actions/critic.py b/packages/narada-core/src/narada_core/actions/critic.py new file mode 100644 index 0000000..5a21fff --- /dev/null +++ b/packages/narada-core/src/narada_core/actions/critic.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from typing import Any, Awaitable, Callable + +from narada_core.models import Agent, CriticConfig +from pydantic import BaseModel, create_model + +from narada_core.actions.models import AgentUsage, CriticResult, parse_action_trace + +_VALIDATION_VAR = "narada_validation_passed" + + +async def run_critic( + *, + dispatch_request: Callable[..., Awaitable[Any]], + original_prompt: str, + response_content: dict[str, Any], + action_trace_raw: list[Any] | None, + critic: CriticConfig, + time_zone: str, + timeout: int, +) -> CriticResult: + if critic.output_schema is not None: + combined_fields: dict[str, Any] = { + name: (info.annotation, info) + for name, info in critic.output_schema.model_fields.items() + } + else: + combined_fields = {} + combined_fields[_VALIDATION_VAR] = (bool, ...) + CriticOutputModel = create_model("CriticOutput", **combined_fields) + + critic_dispatch_response = await dispatch_request( + prompt=critic.prompt, + agent=Agent.PRODUCTIVITY, + output_schema=CriticOutputModel, + critic_context={ + "agentPrompt": original_prompt, + "agentOutput": response_content["text"], + "actionTrace": action_trace_raw or [], + "validationVariableName": _VALIDATION_VAR, + }, + mcp_servers=critic.mcp_servers, + time_zone=time_zone, + timeout=timeout, + ) + + critic_content = critic_dispatch_response["response"] + if critic_content is None: + raise ValueError("Critic dispatch returned no response") + + combined_output = critic_content.get("structuredOutput") + validation_passed = ( + bool(getattr(combined_output, _VALIDATION_VAR, False)) + if combined_output is not None + else False + ) + + structured_output: BaseModel | None = None + if critic.output_schema is not None and combined_output is not None: + output_dict = combined_output.model_dump() + output_dict.pop(_VALIDATION_VAR, None) + structured_output = critic.output_schema.model_validate(output_dict) + + critic_action_trace_raw = critic_content.get("actionTrace") + critic_action_trace = ( + parse_action_trace(critic_action_trace_raw) + if critic_action_trace_raw is not None + else None + ) + + return CriticResult( + validation_passed=validation_passed, + text=critic_content["text"], + structured_output=structured_output, + usage=AgentUsage.model_validate(critic_dispatch_response["usage"]), + action_trace=critic_action_trace, + ) diff --git a/packages/narada-core/src/narada_core/actions/models.py b/packages/narada-core/src/narada_core/actions/models.py index 0c456ef..7ff7aaf 100644 --- a/packages/narada-core/src/narada_core/actions/models.py +++ b/packages/narada-core/src/narada_core/actions/models.py @@ -12,7 +12,12 @@ override, ) -from pydantic import BaseModel, Field, TypeAdapter, ValidationError +from pydantic import ( + BaseModel, + Field, + TypeAdapter, + ValidationError, +) # There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method # under the hood. @@ -275,6 +280,22 @@ class StructuredOutput(BaseModel, Generic[_StructuredOutputT]): content: _StructuredOutputT +class CriticResult(BaseModel): + """Result from a critic agent that evaluated the main agent's output.""" + + validation_passed: bool + """Whether the critic determined the main agent successfully completed its task.""" + + text: str + """The critic's evaluation text.""" + + structured_output: Any + """Parsed instance of the output_schema passed in CriticConfig, or None if no schema was given.""" + + usage: AgentUsage + action_trace: ActionTrace | None = None + + class AgentResponse(BaseModel, Generic[_StructuredOutputT]): request_id: str status: Literal["success", "error", "input-required"] @@ -286,6 +307,8 @@ class AgentResponse(BaseModel, Generic[_StructuredOutputT]): ] usage: AgentUsage action_trace: ActionTrace | None = None + critic_result: CriticResult | None = None + """Result from the critic agent, populated when a CriticConfig is passed to agent().""" class AgenticSelectorClickAction(TypedDict): diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py index 8e03292..007d56b 100644 --- a/packages/narada-core/src/narada_core/models.py +++ b/packages/narada-core/src/narada_core/models.py @@ -3,7 +3,7 @@ from enum import Enum, StrEnum from typing import Annotated, Generic, Literal, NotRequired, TypedDict, TypeVar -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class Agent(Enum): @@ -65,6 +65,31 @@ class McpServer(BaseModel): selectedTools: list[str] | None = None +class CriticConfig(BaseModel): + """Configuration for a critic agent that evaluates a main agent's output. + + When passed to `agent()`, the critic runs after the main agent completes and + evaluates whether the task was successfully performed, setting a boolean + variable to indicate pass or fail. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + prompt: str = ( + "Using your context about the actions and outcome of the previous agent, " + "determine whether its task was completed successfully." + ) + """Instructions for the critic on how to evaluate the main agent's output.""" + + output_schema: type[BaseModel] | None = None + """Optional Pydantic model defining additional structured fields for the critic to populate + alongside the boolean validation variable. The validation variable should not be included + here — it is always added automatically.""" + + mcp_servers: list[McpServer] | None = None + """Optional MCP servers available to the critic agent.""" + + class RemoteDispatchChatHistoryItem(TypedDict): role: Literal["user", "assistant"] content: str diff --git a/packages/narada-pyodide/src/narada/__init__.py b/packages/narada-pyodide/src/narada/__init__.py index 544d452..1dc01d8 100644 --- a/packages/narada-pyodide/src/narada/__init__.py +++ b/packages/narada-pyodide/src/narada/__init__.py @@ -2,7 +2,8 @@ NaradaError, NaradaTimeoutError, ) -from narada_core.models import Agent, File, Response, ResponseContent +from narada_core.actions.models import CriticResult +from narada_core.models import Agent, CriticConfig, File, Response, ResponseContent from narada.client import Narada from narada.utils import download_file, render_html @@ -17,6 +18,8 @@ "__version__", "Agent", "CloudBrowserWindow", + "CriticConfig", + "CriticResult", "download_file", "File", "LocalBrowserWindow", diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/window.py index 1e12251..00c46ac 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/window.py @@ -20,6 +20,7 @@ from urllib.parse import urlencode from js import AbortController, setTimeout # type: ignore +from narada_core.actions.critic import run_critic from narada_core.actions.models import ( AgenticMouseAction, AgenticMouseActionRequest, @@ -30,6 +31,7 @@ AgentResponse, AgentUsage, CloseWindowRequest, + CriticResult, ExtensionActionRequest, ExtensionActionResponse, GetFullHtmlRequest, @@ -61,6 +63,7 @@ ) from narada_core.models import ( Agent, + CriticConfig, File, McpServer, RemoteDispatchChatHistoryItem, @@ -211,6 +214,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -234,6 +238,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -256,6 +261,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -305,6 +311,8 @@ async def dispatch_request( body["secretVariables"] = secret_variables if input_variables is not None: body["inputVariables"] = input_variables + if critic_context is not None: + body["criticContext"] = critic_context if callback_url is not None: body["callbackUrl"] = callback_url if callback_secret is not None: @@ -394,6 +402,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[dict[str, Any]]: ... @@ -410,6 +419,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[_StructuredOutput]: ... @@ -425,6 +435,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: dict[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse: """Invokes an agent in the Narada extension side panel chat.""" @@ -450,6 +461,18 @@ async def agent( else None ) + critic_result: CriticResult | None = None + if critic is not None: + critic_result = await run_critic( + dispatch_request=self.dispatch_request, + original_prompt=prompt, + response_content=response_content, + action_trace_raw=action_trace_raw, + critic=critic, + time_zone=time_zone, + timeout=timeout, + ) + return AgentResponse( request_id=remote_dispatch_response["requestId"], status=remote_dispatch_response["status"], @@ -458,6 +481,7 @@ async def agent( structured_output=response_content.get("structuredOutput"), usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), action_trace=action_trace, + critic_result=critic_result, ) async def agentic_selector( diff --git a/packages/narada/src/narada/__init__.py b/packages/narada/src/narada/__init__.py index 3f6e9fe..452ff2c 100644 --- a/packages/narada/src/narada/__init__.py +++ b/packages/narada/src/narada/__init__.py @@ -7,7 +7,8 @@ NaradaUnsupportedBrowserError, UserAbortedError, ) -from narada_core.models import Agent, File, Response, ResponseContent +from narada_core.actions.models import CriticResult +from narada_core.models import Agent, CriticConfig, File, Response, ResponseContent from narada.client import Narada from narada.config import BrowserConfig, ProxyConfig @@ -20,6 +21,8 @@ "Agent", "BrowserConfig", "CloudBrowserWindow", + "CriticConfig", + "CriticResult", "download_file", "File", "LocalBrowserWindow", diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 0a96337..8baadf9 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -21,6 +21,7 @@ ) import aiohttp +from narada_core.actions.critic import run_critic from narada_core.actions.models import ( AgenticMouseAction, AgenticMouseActionRequest, @@ -31,6 +32,7 @@ AgentResponse, AgentUsage, CloseWindowRequest, + CriticResult, ExtensionActionRequest, ExtensionActionResponse, GetFullHtmlRequest, @@ -62,6 +64,7 @@ ) from narada_core.models import ( Agent, + CriticConfig, File, McpServer, RemoteDispatchChatHistoryItem, @@ -251,6 +254,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: Mapping[str, Any] | None = None, @@ -275,6 +279,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: Mapping[str, Any] | None = None, @@ -298,6 +303,7 @@ async def dispatch_request( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: Mapping[str, Any] | None = None, @@ -346,6 +352,8 @@ async def dispatch_request( body["inputVariables"] = await self._normalize_input_variables( input_variables=input_variables ) + if critic_context is not None: + body["criticContext"] = critic_context if callback_url is not None: body["callbackUrl"] = callback_url if callback_secret is not None: @@ -416,6 +424,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[dict[str, Any]]: ... @@ -433,6 +442,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse[_StructuredOutput]: ... @@ -449,6 +459,7 @@ async def agent( mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, input_variables: Mapping[str, Any] | None = None, + critic: CriticConfig | None = None, timeout: int = 1000, ) -> AgentResponse: """Invokes an agent in the Narada extension side panel chat.""" @@ -475,6 +486,18 @@ async def agent( else None ) + critic_result: CriticResult | None = None + if critic is not None: + critic_result = await run_critic( + dispatch_request=self.dispatch_request, + original_prompt=prompt, + response_content=response_content, + action_trace_raw=action_trace_raw, + critic=critic, + time_zone=time_zone, + timeout=timeout, + ) + return AgentResponse( request_id=remote_dispatch_response["requestId"], status=remote_dispatch_response["status"], @@ -483,6 +506,7 @@ async def agent( structured_output=response_content.get("structuredOutput"), usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), action_trace=action_trace, + critic_result=critic_result, ) async def agentic_selector(