Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions examples/critic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import asyncio

from narada import Narada, CriticConfig
from pydantic import BaseModel, Field


class SearchCriticOutput(BaseModel):
search_query_used: str = Field(description="The exact search query the agent used")
result_count: int = Field(description="The number of results the agent found")


async def main() -> None:
# Initialize the Narada client.
async with Narada() as narada:
window = await narada.open_and_initialize_browser_window()

# Define a critic that verifies the agent completed the task and extracts
# additional structured information from the agent's actions.
critic = CriticConfig(
prompt=(
"Verify that the agent successfully searched Google and found results. "
"Extract the exact search query the agent used and the number of results found."
),
output_schema=SearchCriticOutput,
)

# Run a task with the critic. After the main agent finishes, the critic
# evaluates whether the task was completed successfully.
response = await window.agent(
prompt='Search Google for "Narada AI" and tell me how many results were found.',
critic=critic,
)

print("Agent response:", response.text)
print("Critic result:", response.critic_result.validation_passed)


if __name__ == "__main__":
asyncio.run(main())
78 changes: 78 additions & 0 deletions packages/narada-core/src/narada_core/actions/critic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import annotations

from typing import Any, Awaitable, Callable

from narada_core.models import Agent, CriticConfig
from pydantic import BaseModel, create_model

from narada_core.actions.models import AgentUsage, CriticResult, parse_action_trace

_VALIDATION_VAR = "narada_validation_passed"


async def run_critic(
*,
dispatch_request: Callable[..., Awaitable[Any]],
original_prompt: str,
response_content: dict[str, Any],
action_trace_raw: list[Any] | None,
critic: CriticConfig,
time_zone: str,
timeout: int,
) -> CriticResult:
if critic.output_schema is not None:
combined_fields: dict[str, Any] = {
name: (info.annotation, info)
for name, info in critic.output_schema.model_fields.items()
}
else:
combined_fields = {}
combined_fields[_VALIDATION_VAR] = (bool, ...)
CriticOutputModel = create_model("CriticOutput", **combined_fields)

critic_dispatch_response = await dispatch_request(
prompt=critic.prompt,
agent=Agent.PRODUCTIVITY,
output_schema=CriticOutputModel,
critic_context={
"agentPrompt": original_prompt,
"agentOutput": response_content["text"],
"actionTrace": action_trace_raw or [],
"validationVariableName": _VALIDATION_VAR,
},
mcp_servers=critic.mcp_servers,
time_zone=time_zone,
timeout=timeout,
)

critic_content = critic_dispatch_response["response"]
if critic_content is None:
raise ValueError("Critic dispatch returned no response")

combined_output = critic_content.get("structuredOutput")
validation_passed = (
bool(getattr(combined_output, _VALIDATION_VAR, False))
if combined_output is not None
else False
)

structured_output: BaseModel | None = None
if critic.output_schema is not None and combined_output is not None:
output_dict = combined_output.model_dump()
output_dict.pop(_VALIDATION_VAR, None)
structured_output = critic.output_schema.model_validate(output_dict)

critic_action_trace_raw = critic_content.get("actionTrace")
critic_action_trace = (
parse_action_trace(critic_action_trace_raw)
if critic_action_trace_raw is not None
else None
)

return CriticResult(
validation_passed=validation_passed,
text=critic_content["text"],
structured_output=structured_output,
usage=AgentUsage.model_validate(critic_dispatch_response["usage"]),
action_trace=critic_action_trace,
)
25 changes: 24 additions & 1 deletion packages/narada-core/src/narada_core/actions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
override,
)

from pydantic import BaseModel, Field, TypeAdapter, ValidationError
from pydantic import (
BaseModel,
Field,
TypeAdapter,
ValidationError,
)

# There is no `AgentRequest` because the `agent` action delegates to the `dispatch_request` method
# under the hood.
Expand Down Expand Up @@ -275,6 +280,22 @@ class StructuredOutput(BaseModel, Generic[_StructuredOutputT]):
content: _StructuredOutputT


class CriticResult(BaseModel):
"""Result from a critic agent that evaluated the main agent's output."""

validation_passed: bool
"""Whether the critic determined the main agent successfully completed its task."""

text: str
"""The critic's evaluation text."""

structured_output: Any
"""Parsed instance of the output_schema passed in CriticConfig, or None if no schema was given."""

usage: AgentUsage
action_trace: ActionTrace | None = None


class AgentResponse(BaseModel, Generic[_StructuredOutputT]):
request_id: str
status: Literal["success", "error", "input-required"]
Expand All @@ -286,6 +307,8 @@ class AgentResponse(BaseModel, Generic[_StructuredOutputT]):
]
usage: AgentUsage
action_trace: ActionTrace | None = None
critic_result: CriticResult | None = None
"""Result from the critic agent, populated when a CriticConfig is passed to agent()."""


class AgenticSelectorClickAction(TypedDict):
Expand Down
27 changes: 26 additions & 1 deletion packages/narada-core/src/narada_core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from enum import Enum, StrEnum
from typing import Annotated, Generic, Literal, NotRequired, TypedDict, TypeVar

from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field


class Agent(Enum):
Expand Down Expand Up @@ -65,6 +65,31 @@ class McpServer(BaseModel):
selectedTools: list[str] | None = None


class CriticConfig(BaseModel):
"""Configuration for a critic agent that evaluates a main agent's output.

When passed to `agent()`, the critic runs after the main agent completes and
evaluates whether the task was successfully performed, setting a boolean
variable to indicate pass or fail.
"""

model_config = ConfigDict(arbitrary_types_allowed=True)

prompt: str = (
"Using your context about the actions and outcome of the previous agent, "
"determine whether its task was completed successfully."
)
"""Instructions for the critic on how to evaluate the main agent's output."""

output_schema: type[BaseModel] | None = None
"""Optional Pydantic model defining additional structured fields for the critic to populate
alongside the boolean validation variable. The validation variable should not be included
here — it is always added automatically."""

mcp_servers: list[McpServer] | None = None
"""Optional MCP servers available to the critic agent."""


class RemoteDispatchChatHistoryItem(TypedDict):
role: Literal["user", "assistant"]
content: str
Expand Down
5 changes: 4 additions & 1 deletion packages/narada-pyodide/src/narada/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
NaradaError,
NaradaTimeoutError,
)
from narada_core.models import Agent, File, Response, ResponseContent
from narada_core.actions.models import CriticResult
from narada_core.models import Agent, CriticConfig, File, Response, ResponseContent

from narada.client import Narada
from narada.utils import download_file, render_html
Expand All @@ -17,6 +18,8 @@
"__version__",
"Agent",
"CloudBrowserWindow",
"CriticConfig",
"CriticResult",
"download_file",
"File",
"LocalBrowserWindow",
Expand Down
24 changes: 24 additions & 0 deletions packages/narada-pyodide/src/narada/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from urllib.parse import urlencode

from js import AbortController, setTimeout # type: ignore
from narada_core.actions.critic import run_critic
from narada_core.actions.models import (
AgenticMouseAction,
AgenticMouseActionRequest,
Expand All @@ -30,6 +31,7 @@
AgentResponse,
AgentUsage,
CloseWindowRequest,
CriticResult,
ExtensionActionRequest,
ExtensionActionResponse,
GetFullHtmlRequest,
Expand Down Expand Up @@ -61,6 +63,7 @@
)
from narada_core.models import (
Agent,
CriticConfig,
File,
McpServer,
RemoteDispatchChatHistoryItem,
Expand Down Expand Up @@ -211,6 +214,7 @@ async def dispatch_request(
mcp_servers: list[McpServer] | None = None,
secret_variables: dict[str, str] | None = None,
input_variables: dict[str, Any] | None = None,
critic_context: dict[str, Any] | None = None,
callback_url: str | None = None,
callback_secret: str | None = None,
callback_headers: dict[str, Any] | None = None,
Expand All @@ -234,6 +238,7 @@ async def dispatch_request(
mcp_servers: list[McpServer] | None = None,
secret_variables: dict[str, str] | None = None,
input_variables: dict[str, Any] | None = None,
critic_context: dict[str, Any] | None = None,
callback_url: str | None = None,
callback_secret: str | None = None,
callback_headers: dict[str, Any] | None = None,
Expand All @@ -256,6 +261,7 @@ async def dispatch_request(
mcp_servers: list[McpServer] | None = None,
secret_variables: dict[str, str] | None = None,
input_variables: dict[str, Any] | None = None,
critic_context: dict[str, Any] | None = None,
callback_url: str | None = None,
callback_secret: str | None = None,
callback_headers: dict[str, Any] | None = None,
Expand Down Expand Up @@ -305,6 +311,8 @@ async def dispatch_request(
body["secretVariables"] = secret_variables
if input_variables is not None:
body["inputVariables"] = input_variables
if critic_context is not None:
body["criticContext"] = critic_context
if callback_url is not None:
body["callbackUrl"] = callback_url
if callback_secret is not None:
Expand Down Expand Up @@ -394,6 +402,7 @@ async def agent(
mcp_servers: list[McpServer] | None = None,
secret_variables: dict[str, str] | None = None,
input_variables: dict[str, Any] | None = None,
critic: CriticConfig | None = None,
timeout: int = 1000,
) -> AgentResponse[dict[str, Any]]: ...

Expand All @@ -410,6 +419,7 @@ async def agent(
mcp_servers: list[McpServer] | None = None,
secret_variables: dict[str, str] | None = None,
input_variables: dict[str, Any] | None = None,
critic: CriticConfig | None = None,
timeout: int = 1000,
) -> AgentResponse[_StructuredOutput]: ...

Expand All @@ -425,6 +435,7 @@ async def agent(
mcp_servers: list[McpServer] | None = None,
secret_variables: dict[str, str] | None = None,
input_variables: dict[str, Any] | None = None,
critic: CriticConfig | None = None,
timeout: int = 1000,
) -> AgentResponse:
"""Invokes an agent in the Narada extension side panel chat."""
Expand All @@ -450,6 +461,18 @@ async def agent(
else None
)

critic_result: CriticResult | None = None
if critic is not None:
critic_result = await run_critic(
dispatch_request=self.dispatch_request,
original_prompt=prompt,
response_content=response_content,
action_trace_raw=action_trace_raw,
critic=critic,
time_zone=time_zone,
timeout=timeout,
)

return AgentResponse(
request_id=remote_dispatch_response["requestId"],
status=remote_dispatch_response["status"],
Expand All @@ -458,6 +481,7 @@ async def agent(
structured_output=response_content.get("structuredOutput"),
usage=AgentUsage.model_validate(remote_dispatch_response["usage"]),
action_trace=action_trace,
critic_result=critic_result,
)

async def agentic_selector(
Expand Down
5 changes: 4 additions & 1 deletion packages/narada/src/narada/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
NaradaUnsupportedBrowserError,
UserAbortedError,
)
from narada_core.models import Agent, File, Response, ResponseContent
from narada_core.actions.models import CriticResult
from narada_core.models import Agent, CriticConfig, File, Response, ResponseContent

from narada.client import Narada
from narada.config import BrowserConfig, ProxyConfig
Expand All @@ -20,6 +21,8 @@
"Agent",
"BrowserConfig",
"CloudBrowserWindow",
"CriticConfig",
"CriticResult",
"download_file",
"File",
"LocalBrowserWindow",
Expand Down
Loading
Loading