diff --git a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_agent_runner.py b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_agent_runner.py index 1969ec75..7aef05dc 100644 --- a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_agent_runner.py +++ b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_agent_runner.py @@ -1,65 +1,83 @@ -from typing import Any +from typing import Any, Dict, List, Optional from ldai import log -from ldai.providers import AgentResult, AgentRunner -from ldai.providers.types import LDAIMetrics +from ldai.providers.types import LDAIMetrics, RunnerResult from ldai_langchain.langchain_helper import ( extract_last_message_content, + get_tool_calls_from_response, sum_token_usage_from_messages, ) -class LangChainAgentRunner(AgentRunner): +class LangChainAgentRunner: """ CAUTION: This feature is experimental and should NOT be considered ready for production use. It may change or be removed without notice and is not subject to backwards compatibility guarantees. - AgentRunner implementation for LangChain. + Runner implementation for a single LangChain agent. Wraps a compiled LangChain agent graph (from ``langchain.agents.create_agent``) and delegates execution to it. Tool calling and loop management are handled internally by the graph. - Returned by LangChainRunnerFactory.create_agent(config, tools). + Returned by ``LangChainRunnerFactory.create_agent(config, tools)``. + + Implements the unified :class:`~ldai.providers.runner.Runner` protocol. """ def __init__(self, agent: Any): self._agent = agent - async def run(self, input: Any) -> AgentResult: + async def run( + self, + input: Any, + output_type: Optional[Dict[str, Any]] = None, + ) -> RunnerResult: """ - Run the agent with the given input string. + Run the agent with the given input. Delegates to the compiled LangChain agent, which handles the tool-calling loop internally. :param input: The user prompt or input to the agent - :return: AgentResult with output, raw response, and aggregated metrics + :param output_type: Reserved for future structured output support; + currently ignored. + :return: :class:`RunnerResult` with ``content``, ``raw`` response, and + metrics including aggregated token usage and observed ``tool_calls``. """ try: result = await self._agent.ainvoke({ "messages": [{"role": "user", "content": str(input)}] }) - messages = result.get("messages", []) - output = extract_last_message_content(messages) - return AgentResult( - output=output, - raw=result, + messages: List[Any] = result.get("messages", []) + content = extract_last_message_content(messages) + tool_calls = self._extract_tool_calls(messages) + return RunnerResult( + content=content, metrics=LDAIMetrics( success=True, usage=sum_token_usage_from_messages(messages), + tool_calls=tool_calls if tool_calls else None, ), + raw=result, ) except Exception as error: log.warning(f"LangChain agent run failed: {error}") - return AgentResult( - output="", - raw=None, + return RunnerResult( + content="", metrics=LDAIMetrics(success=False, usage=None), ) + @staticmethod + def _extract_tool_calls(messages: List[Any]) -> List[str]: + """Collect tool call names from all messages in the agent output.""" + names: List[str] = [] + for msg in messages: + names.extend(get_tool_calls_from_response(msg)) + return names + def get_agent(self) -> Any: """Return the underlying compiled LangChain agent.""" return self._agent diff --git a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_model_runner.py b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_model_runner.py index d504030b..576f0f4c 100644 --- a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_model_runner.py +++ b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langchain_model_runner.py @@ -1,10 +1,9 @@ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import BaseMessage from ldai import LDMessage, log -from ldai.providers.model_runner import ModelRunner -from ldai.providers.types import LDAIMetrics, ModelResponse, StructuredResponse +from ldai.providers.types import LDAIMetrics, RunnerResult from ldai_langchain.langchain_helper import ( convert_messages_to_langchain, @@ -13,12 +12,15 @@ ) -class LangChainModelRunner(ModelRunner): +class LangChainModelRunner: """ - ModelRunner implementation for LangChain. + Runner implementation for LangChain chat models. Holds a fully-configured BaseChatModel. - Returned by LangChainConnector.create_model(config). + Returned by ``LangChainRunnerFactory.create_model(config)``. + + Implements the unified :class:`~ldai.providers.runner.Runner` protocol via + :meth:`run`. """ def __init__(self, llm: BaseChatModel): @@ -32,13 +34,37 @@ def get_llm(self) -> BaseChatModel: """ return self._llm - async def invoke_model(self, messages: List[LDMessage]) -> ModelResponse: + async def run( + self, + input: Any, + output_type: Optional[Dict[str, Any]] = None, + ) -> RunnerResult: """ - Invoke the LangChain model with an array of messages. - - :param messages: Array of LDMessage objects representing the conversation - :return: ModelResponse containing the model's response and metrics + Run the LangChain model with the given input. + + :param input: A string prompt or a list of :class:`LDMessage` objects + :param output_type: Optional JSON schema dict requesting structured output. + When provided, ``parsed`` on the returned :class:`RunnerResult` is + populated with the structured data. + :return: :class:`RunnerResult` containing ``content``, ``metrics``, + ``raw`` and (when ``output_type`` is set) ``parsed``. """ + messages = self._coerce_input(input) + if output_type is not None: + return await self._run_structured(messages, output_type) + return await self._run_completion(messages) + + @staticmethod + def _coerce_input(input: Any) -> List[LDMessage]: + if isinstance(input, str): + return [LDMessage(role='user', content=input)] + if isinstance(input, list): + return input + raise TypeError( + f"Unsupported input type for LangChainModelRunner.run: {type(input).__name__}" + ) + + async def _run_completion(self, messages: List[LDMessage]) -> RunnerResult: try: langchain_messages = convert_messages_to_langchain(messages) response: BaseMessage = await self._llm.ainvoke(langchain_messages) @@ -52,36 +78,23 @@ async def invoke_model(self, messages: List[LDMessage]) -> ModelResponse: f'Multimodal response not supported, expecting a string. ' f'Content type: {type(response.content)}, Content: {response.content}' ) - metrics = LDAIMetrics(success=False, usage=metrics.usage) + return RunnerResult( + content='', + metrics=LDAIMetrics(success=False, usage=metrics.usage), + raw=response, + ) - return ModelResponse( - message=LDMessage(role='assistant', content=content), - metrics=metrics, - ) + return RunnerResult(content=content, metrics=metrics, raw=response) except Exception as error: log.warning(f'LangChain model invocation failed: {error}') - return ModelResponse( - message=LDMessage(role='assistant', content=''), + return RunnerResult( + content='', metrics=LDAIMetrics(success=False, usage=None), ) - async def invoke_structured_model( - self, - messages: List[LDMessage], - response_structure: Dict[str, Any], - ) -> StructuredResponse: - """ - Invoke the LangChain model with structured output support. - - :param messages: Array of LDMessage objects representing the conversation - :param response_structure: Dictionary defining the output structure - :return: StructuredResponse containing the structured data - """ - structured_response = StructuredResponse( - data={}, - raw_response='', - metrics=LDAIMetrics(success=False, usage=None), - ) + async def _run_structured( + self, messages: List[LDMessage], response_structure: Dict[str, Any] + ) -> RunnerResult: try: langchain_messages = convert_messages_to_langchain(messages) structured_llm = self._llm.with_structured_output(response_structure, include_raw=True) @@ -89,21 +102,34 @@ async def invoke_structured_model( if not isinstance(response, dict): log.warning(f'Structured output did not return a dict. Got: {type(response)}') - return structured_response + return RunnerResult( + content='', + metrics=LDAIMetrics(success=False, usage=None), + ) raw_response = response.get('raw') - if raw_response is not None: - if hasattr(raw_response, 'content'): - structured_response.raw_response = raw_response.content - structured_response.metrics.usage = get_ai_usage_from_response(raw_response) + usage = get_ai_usage_from_response(raw_response) if raw_response is not None else None + raw_content = raw_response.content if raw_response is not None and hasattr(raw_response, 'content') else '' if response.get('parsing_error'): log.warning('LangChain structured model invocation had a parsing error') - return structured_response + return RunnerResult( + content=raw_content, + metrics=LDAIMetrics(success=False, usage=usage), + raw=raw_response, + ) - structured_response.metrics.success = True - structured_response.data = response.get('parsed') or {} - return structured_response + parsed = response.get('parsed') or {} + return RunnerResult( + content=raw_content, + metrics=LDAIMetrics(success=True, usage=usage), + raw=raw_response, + parsed=parsed, + ) except Exception as error: log.warning(f'LangChain structured model invocation failed: {error}') - return structured_response + return RunnerResult( + content='', + metrics=LDAIMetrics(success=False, usage=None), + ) + diff --git a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langgraph_agent_graph_runner.py b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langgraph_agent_graph_runner.py index 9ecb2351..15eee41f 100644 --- a/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langgraph_agent_graph_runner.py +++ b/packages/ai-providers/server-ai-langchain/src/ldai_langchain/langgraph_agent_graph_runner.py @@ -329,8 +329,10 @@ async def run(self, input: Any) -> AgentGraphResult: messages = result.get('messages', []) output = extract_last_message_content(messages) - # Flush per-node metrics to LD trackers - all_eval_results = await handler.flush(self._graph, pending_eval_tasks) + # Flush per-node metrics to LD trackers; eval results are tracked + # internally and intentionally not exposed on AgentGraphResult here + # — judge dispatch is the managed layer's responsibility. + await handler.flush(self._graph, pending_eval_tasks) tracker.track_path(handler.path) tracker.track_duration(duration) @@ -341,7 +343,6 @@ async def run(self, input: Any) -> AgentGraphResult: output=output, raw=result, metrics=LDAIMetrics(success=True), - evaluations=all_eval_results, ) except Exception as exc: diff --git a/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py b/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py index 4018e7c3..9b8dd69d 100644 --- a/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py +++ b/packages/ai-providers/server-ai-langchain/tests/test_langchain_provider.py @@ -219,8 +219,8 @@ def test_returns_provider_name_unchanged_for_unmapped_providers(self): assert map_provider('unknown') == 'unknown' -class TestInvokeModel: - """Tests for invoke_model instance method.""" +class TestRunCompletion: + """Tests for the unified run() method (chat-completion path).""" @pytest.fixture def mock_llm(self): @@ -235,10 +235,10 @@ async def test_returns_success_true_for_string_content(self, mock_llm): provider = LangChainModelRunner(mock_llm) messages = [LDMessage(role='user', content='Hello')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) assert result.metrics.success is True - assert result.message.content == 'Test response' + assert result.content == 'Test response' @pytest.mark.asyncio async def test_returns_success_false_for_non_string_content_and_logs_warning(self, mock_llm): @@ -248,10 +248,10 @@ async def test_returns_success_false_for_non_string_content_and_logs_warning(sel provider = LangChainModelRunner(mock_llm) messages = [LDMessage(role='user', content='Hello')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) assert result.metrics.success is False - assert result.message.content == '' + assert result.content == '' @pytest.mark.asyncio async def test_returns_success_false_when_model_invocation_throws_error(self, mock_llm): @@ -261,15 +261,14 @@ async def test_returns_success_false_when_model_invocation_throws_error(self, mo provider = LangChainModelRunner(mock_llm) messages = [LDMessage(role='user', content='Hello')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) assert result.metrics.success is False - assert result.message.content == '' - assert result.message.role == 'assistant' + assert result.content == '' -class TestInvokeStructuredModel: - """Tests for invoke_structured_model instance method.""" +class TestRunStructured: + """Tests for the unified run() method (structured-output path).""" @pytest.fixture def mock_llm(self): @@ -288,10 +287,10 @@ async def test_returns_success_true_for_successful_invocation(self, mock_llm): messages = [LDMessage(role='user', content='Hello')] response_structure = {'type': 'object', 'properties': {}} - result = await provider.invoke_structured_model(messages, response_structure) + result = await provider.run(messages, output_type=response_structure) assert result.metrics.success is True - assert result.data == parsed_data + assert result.parsed == parsed_data @pytest.mark.asyncio async def test_returns_success_false_when_structured_model_invocation_throws_error(self, mock_llm): @@ -304,11 +303,11 @@ async def test_returns_success_false_when_structured_model_invocation_throws_err messages = [LDMessage(role='user', content='Hello')] response_structure = {'type': 'object', 'properties': {}} - result = await provider.invoke_structured_model(messages, response_structure) + result = await provider.run(messages, output_type=response_structure) assert result.metrics.success is False - assert result.data == {} - assert result.raw_response == '' + assert result.parsed is None + assert result.content == '' assert result.metrics.usage is None @@ -464,7 +463,7 @@ class TestLangChainAgentRunner: @pytest.mark.asyncio async def test_runs_agent_and_returns_result(self): - """Should return AgentResult with the last message content from the graph.""" + """Should return RunnerResult with the last message content from the graph.""" from ldai_langchain import LangChainAgentRunner final_msg = AIMessage(content="The answer is 42.") @@ -474,7 +473,7 @@ async def test_runs_agent_and_returns_result(self): runner = LangChainAgentRunner(mock_agent) result = await runner.run("What is the answer?") - assert result.output == "The answer is 42." + assert result.content == "The answer is 42." assert result.metrics.success is True mock_agent.ainvoke.assert_called_once_with( {"messages": [{"role": "user", "content": "What is the answer?"}]} @@ -496,7 +495,7 @@ async def test_aggregates_token_usage_across_messages(self): runner = LangChainAgentRunner(mock_agent) result = await runner.run("Hello") - assert result.output == "final answer" + assert result.content == "final answer" assert result.metrics.success is True assert result.metrics.usage is not None assert result.metrics.usage.total == 30 @@ -505,7 +504,7 @@ async def test_aggregates_token_usage_across_messages(self): @pytest.mark.asyncio async def test_returns_failure_when_exception_thrown(self): - """Should return unsuccessful AgentResult when exception is thrown.""" + """Should return unsuccessful RunnerResult when exception is thrown.""" from ldai_langchain import LangChainAgentRunner mock_agent = MagicMock() @@ -514,7 +513,7 @@ async def test_returns_failure_when_exception_thrown(self): runner = LangChainAgentRunner(mock_agent) result = await runner.run("Hello") - assert result.output == "" + assert result.content == "" assert result.metrics.success is False diff --git a/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_agent_runner.py b/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_agent_runner.py index 7e79c836..6af5a57c 100644 --- a/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_agent_runner.py +++ b/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_agent_runner.py @@ -1,28 +1,30 @@ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from ldai import log -from ldai.providers import AgentResult, AgentRunner, ToolRegistry +from ldai.providers import RunnerResult, ToolRegistry from ldai.providers.types import LDAIMetrics from ldai_openai.openai_helper import ( get_ai_usage_from_response, + get_tool_calls_from_run_items, registry_value_to_agent_tool, ) -class OpenAIAgentRunner(AgentRunner): +class OpenAIAgentRunner: """ CAUTION: This feature is experimental and should NOT be considered ready for production use. It may change or be removed without notice and is not subject to backwards compatibility guarantees. - AgentRunner implementation for OpenAI. + Runner implementation for a single OpenAI agent. Executes a single agent using the OpenAI Agents SDK (``openai-agents``). Tool calling and the agentic loop are handled internally by ``Runner.run``. - Returned by OpenAIRunnerFactory.create_agent(config, tools). + Returned by ``OpenAIRunnerFactory.create_agent(config, tools)``. + Implements the unified :class:`~ldai.providers.runner.Runner` protocol. Requires ``openai-agents`` to be installed. """ @@ -40,15 +42,22 @@ def __init__( self._tool_definitions = tool_definitions self._tools = tools - async def run(self, input: Any) -> AgentResult: + async def run( + self, + input: Any, + output_type: Optional[Dict[str, Any]] = None, + ) -> RunnerResult: """ - Run the agent with the given input string. + Run the agent with the given input. Delegates to the OpenAI Agents SDK ``Runner.run``, which handles the tool-calling loop internally. :param input: The user prompt or input to the agent - :return: AgentResult with output, raw response, and aggregated metrics + :param output_type: Reserved for future structured output support; + currently ignored. + :return: :class:`RunnerResult` with ``content``, ``raw`` response, and + metrics including aggregated token usage and observed ``tool_calls``. """ try: from agents import Agent, Runner @@ -57,7 +66,10 @@ async def run(self, input: Any) -> AgentResult: "openai-agents is required for OpenAIAgentRunner. " "Install it with: pip install openai-agents" ) - return AgentResult(output="", raw=None, metrics=LDAIMetrics(success=False, usage=None)) + return RunnerResult( + content="", + metrics=LDAIMetrics(success=False, usage=None), + ) try: agent_tools = self._build_agent_tools() @@ -73,17 +85,26 @@ async def run(self, input: Any) -> AgentResult: result = await Runner.run(agent, str(input), max_turns=25) - return AgentResult( - output=str(result.final_output), - raw=result, + tool_calls = [ + tool_name + for _agent_name, tool_name in get_tool_calls_from_run_items(result.new_items) + ] + + return RunnerResult( + content=str(result.final_output), metrics=LDAIMetrics( success=True, usage=get_ai_usage_from_response(result), + tool_calls=tool_calls if tool_calls else None, ), + raw=result, ) except Exception as error: log.warning(f"OpenAI agent run failed: {error}") - return AgentResult(output="", raw=None, metrics=LDAIMetrics(success=False, usage=None)) + return RunnerResult( + content="", + metrics=LDAIMetrics(success=False, usage=None), + ) def _build_agent_tools(self) -> List[Any]: """Build tool instances from LD tool definitions and registry.""" diff --git a/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_model_runner.py b/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_model_runner.py index 9c4a34d8..34fcde2f 100644 --- a/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_model_runner.py +++ b/packages/ai-providers/server-ai-openai/src/ldai_openai/openai_model_runner.py @@ -1,9 +1,8 @@ import json -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from ldai import LDMessage, log -from ldai.providers.model_runner import ModelRunner -from ldai.providers.types import LDAIMetrics, ModelResponse, StructuredResponse +from ldai.providers.types import LDAIMetrics, RunnerResult from openai import AsyncOpenAI from ldai_openai.openai_helper import ( @@ -12,12 +11,15 @@ ) -class OpenAIModelRunner(ModelRunner): +class OpenAIModelRunner: """ - ModelRunner implementation for OpenAI. + Runner implementation for OpenAI chat completions. Holds a fully-configured AsyncOpenAI client, model name, and parameters. - Returned by OpenAIConnector.create_model(config). + Returned by ``OpenAIRunnerFactory.create_model(config)``. + + Implements the unified :class:`~ldai.providers.runner.Runner` protocol via + :meth:`run`. """ def __init__( @@ -30,13 +32,38 @@ def __init__( self._model_name = model_name self._parameters = parameters - async def invoke_model(self, messages: List[LDMessage]) -> ModelResponse: + async def run( + self, + input: Any, + output_type: Optional[Dict[str, Any]] = None, + ) -> RunnerResult: """ - Invoke the OpenAI model with an array of messages. - - :param messages: Array of LDMessage objects representing the conversation - :return: ModelResponse containing the model's response and metrics + Run the OpenAI model with the given input. + + :param input: A string prompt or a list of :class:`LDMessage` objects + :param output_type: Optional JSON schema dict requesting structured output. + When provided, ``parsed`` on the returned :class:`RunnerResult` is + populated with the parsed JSON document. + :return: :class:`RunnerResult` containing ``content``, ``metrics``, + ``raw`` and (when ``output_type`` is set) ``parsed``. """ + messages = self._coerce_input(input) + + if output_type is not None: + return await self._run_structured(messages, output_type) + return await self._run_completion(messages) + + @staticmethod + def _coerce_input(input: Any) -> List[LDMessage]: + if isinstance(input, str): + return [LDMessage(role='user', content=input)] + if isinstance(input, list): + return input + raise TypeError( + f"Unsupported input type for OpenAIModelRunner.run: {type(input).__name__}" + ) + + async def _run_completion(self, messages: List[LDMessage]) -> RunnerResult: try: response = await self._client.chat.completions.create( model=self._model_name, @@ -45,40 +72,29 @@ async def invoke_model(self, messages: List[LDMessage]) -> ModelResponse: ) metrics = get_ai_metrics_from_response(response) - - content = '' - if response.choices and len(response.choices) > 0: - message = response.choices[0].message - if message and message.content: - content = message.content + content = self._extract_content(response) if not content: log.warning('OpenAI response has no content available') - metrics = LDAIMetrics(success=False, usage=metrics.usage) + return RunnerResult( + content='', + metrics=LDAIMetrics(success=False, usage=metrics.usage), + raw=response, + ) - return ModelResponse( - message=LDMessage(role='assistant', content=content), - metrics=metrics, - ) + return RunnerResult(content=content, metrics=metrics, raw=response) except Exception as error: log.warning(f'OpenAI model invocation failed: {error}') - return ModelResponse( - message=LDMessage(role='assistant', content=''), + return RunnerResult( + content='', metrics=LDAIMetrics(success=False, usage=None), ) - async def invoke_structured_model( + async def _run_structured( self, messages: List[LDMessage], - response_structure: Dict[str, Any], - ) -> StructuredResponse: - """ - Invoke the OpenAI model with structured output support. - - :param messages: Array of LDMessage objects representing the conversation - :param response_structure: Dictionary defining the JSON schema for output structure - :return: StructuredResponse containing the structured data - """ + output_type: Dict[str, Any], + ) -> RunnerResult: try: response = await self._client.chat.completions.create( model=self._model_name, @@ -87,7 +103,7 @@ async def invoke_structured_model( 'type': 'json_schema', 'json_schema': { 'name': 'structured_output', - 'schema': response_structure, + 'schema': output_type, 'strict': True, }, }, @@ -95,35 +111,42 @@ async def invoke_structured_model( ) metrics = get_ai_metrics_from_response(response) - - content = '' - if response.choices and len(response.choices) > 0: - message = response.choices[0].message - if message and message.content: - content = message.content + content = self._extract_content(response) if not content: log.warning('OpenAI structured response has no content available') - return StructuredResponse( - data={}, - raw_response='', + return RunnerResult( + content='', metrics=LDAIMetrics(success=False, usage=metrics.usage), + raw=response, ) try: - data = json.loads(content) - return StructuredResponse(data=data, raw_response=content, metrics=metrics) + parsed = json.loads(content) + return RunnerResult( + content=content, + metrics=metrics, + raw=response, + parsed=parsed, + ) except json.JSONDecodeError as parse_error: log.warning(f'OpenAI structured response contains invalid JSON: {parse_error}') - return StructuredResponse( - data={}, - raw_response=content, + return RunnerResult( + content=content, metrics=LDAIMetrics(success=False, usage=metrics.usage), + raw=response, ) except Exception as error: log.warning(f'OpenAI structured model invocation failed: {error}') - return StructuredResponse( - data={}, - raw_response='', + return RunnerResult( + content='', metrics=LDAIMetrics(success=False, usage=None), ) + + @staticmethod + def _extract_content(response: Any) -> str: + if response.choices and len(response.choices) > 0: + message = response.choices[0].message + if message and message.content: + return message.content + return '' diff --git a/packages/ai-providers/server-ai-openai/tests/test_openai_provider.py b/packages/ai-providers/server-ai-openai/tests/test_openai_provider.py index 19d2cff7..3b69d3f6 100644 --- a/packages/ai-providers/server-ai-openai/tests/test_openai_provider.py +++ b/packages/ai-providers/server-ai-openai/tests/test_openai_provider.py @@ -120,8 +120,8 @@ def test_handles_partial_usage_data(self): assert result.usage.output == 0 -class TestInvokeModel: - """Tests for invoke_model instance method.""" +class TestRunCompletion: + """Tests for the unified run() method (chat-completion path).""" @pytest.fixture def mock_client(self): @@ -144,15 +144,14 @@ async def test_invokes_openai_chat_completions_and_returns_response(self, mock_c provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {}) messages = [LDMessage(role='user', content='Hello!')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) mock_client.chat.completions.create.assert_called_once_with( model='gpt-3.5-turbo', messages=[{'role': 'user', 'content': 'Hello!'}], ) - assert result.message.role == 'assistant' - assert result.message.content == 'Hello! How can I help you today?' + assert result.content == 'Hello! How can I help you today?' assert result.metrics.success is True assert result.metrics.usage is not None assert result.metrics.usage.total == 25 @@ -174,10 +173,9 @@ async def test_returns_unsuccessful_response_when_no_content(self, mock_client): provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {}) messages = [LDMessage(role='user', content='Hello!')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) - assert result.message.role == 'assistant' - assert result.message.content == '' + assert result.content == '' assert result.metrics.success is False @pytest.mark.asyncio @@ -193,10 +191,9 @@ async def test_returns_unsuccessful_response_when_choices_empty(self, mock_clien provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {}) messages = [LDMessage(role='user', content='Hello!')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) - assert result.message.role == 'assistant' - assert result.message.content == '' + assert result.content == '' assert result.metrics.success is False @pytest.mark.asyncio @@ -208,15 +205,14 @@ async def test_returns_unsuccessful_response_when_exception_thrown(self, mock_cl provider = OpenAIModelRunner(mock_client, 'gpt-3.5-turbo', {}) messages = [LDMessage(role='user', content='Hello!')] - result = await provider.invoke_model(messages) + result = await provider.run(messages) - assert result.message.role == 'assistant' - assert result.message.content == '' + assert result.content == '' assert result.metrics.success is False -class TestInvokeStructuredModel: - """Tests for invoke_structured_model instance method.""" +class TestRunStructured: + """Tests for the unified run() method (structured-output path).""" @pytest.fixture def mock_client(self): @@ -249,10 +245,10 @@ async def test_invokes_openai_with_structured_output(self, mock_client): 'required': ['name', 'age', 'city'], } - result = await provider.invoke_structured_model(messages, response_structure) + result = await provider.run(messages, output_type=response_structure) - assert result.data == {'name': 'John', 'age': 30, 'city': 'New York'} - assert result.raw_response == '{"name": "John", "age": 30, "city": "New York"}' + assert result.parsed == {'name': 'John', 'age': 30, 'city': 'New York'} + assert result.content == '{"name": "John", "age": 30, "city": "New York"}' assert result.metrics.success is True assert result.metrics.usage is not None assert result.metrics.usage.total == 30 @@ -276,10 +272,10 @@ async def test_returns_unsuccessful_when_no_content_in_structured_response(self, messages = [LDMessage(role='user', content='Tell me about a person')] response_structure = {'type': 'object'} - result = await provider.invoke_structured_model(messages, response_structure) + result = await provider.run(messages, output_type=response_structure) - assert result.data == {} - assert result.raw_response == '' + assert result.parsed is None + assert result.content == '' assert result.metrics.success is False @pytest.mark.asyncio @@ -300,10 +296,10 @@ async def test_handles_json_parsing_errors(self, mock_client): messages = [LDMessage(role='user', content='Tell me about a person')] response_structure = {'type': 'object'} - result = await provider.invoke_structured_model(messages, response_structure) + result = await provider.run(messages, output_type=response_structure) - assert result.data == {} - assert result.raw_response == 'invalid json content' + assert result.parsed is None + assert result.content == 'invalid json content' assert result.metrics.success is False assert result.metrics.usage is not None assert result.metrics.usage.total == 15 @@ -319,10 +315,10 @@ async def test_returns_unsuccessful_response_when_exception_thrown(self, mock_cl messages = [LDMessage(role='user', content='Tell me about a person')] response_structure = {'type': 'object'} - result = await provider.invoke_structured_model(messages, response_structure) + result = await provider.run(messages, output_type=response_structure) - assert result.data == {} - assert result.raw_response == '' + assert result.parsed is None + assert result.content == '' assert result.metrics.success is False @@ -465,19 +461,20 @@ def _make_run_result(self, output: str, total: int = 15, input_tokens: int = 10, @pytest.mark.asyncio async def test_runs_agent_and_returns_result_with_no_tool_calls(self): - """Should return AgentResult when Runner.run returns a final output.""" + """Should return RunnerResult when Runner.run returns a final output.""" import sys from ldai_openai import OpenAIAgentRunner mock_run_result = self._make_run_result("The answer is 42.", total=15, input_tokens=10, output_tokens=5) + mock_run_result.new_items = [] agents_mock, tc_mock = _make_agents_mock(AsyncMock(return_value=mock_run_result)) runner = OpenAIAgentRunner('gpt-4', {}, 'You are helpful.', [], {}) with patch.dict(sys.modules, {'agents': agents_mock, 'agents.tool_context': tc_mock}): result = await runner.run("What is the answer?") - assert result.output == "The answer is 42." + assert result.content == "The answer is 42." assert result.metrics.success is True assert result.metrics.usage is not None assert result.metrics.usage.total == 15 @@ -490,6 +487,7 @@ async def test_executes_tool_calls_and_returns_final_response(self): from ldai_openai import OpenAIAgentRunner mock_run_result = self._make_run_result("It is sunny in Paris.", total=43, input_tokens=30, output_tokens=13) + mock_run_result.new_items = [] agents_mock, tc_mock = _make_agents_mock(AsyncMock(return_value=mock_run_result)) weather_fn = MagicMock(return_value="Sunny, 25°C") @@ -501,13 +499,13 @@ async def test_executes_tool_calls_and_returns_final_response(self): with patch.dict(sys.modules, {'agents': agents_mock, 'agents.tool_context': tc_mock}): result = await runner.run("What is the weather in Paris?") - assert result.output == "It is sunny in Paris." + assert result.content == "It is sunny in Paris." assert result.metrics.success is True assert result.metrics.usage.total == 43 @pytest.mark.asyncio async def test_returns_failure_when_exception_thrown(self): - """Should return unsuccessful AgentResult when Runner.run raises.""" + """Should return unsuccessful RunnerResult when Runner.run raises.""" import sys from ldai_openai import OpenAIAgentRunner @@ -518,12 +516,12 @@ async def test_returns_failure_when_exception_thrown(self): with patch.dict(sys.modules, {'agents': agents_mock, 'agents.tool_context': tc_mock}): result = await runner.run("Hello") - assert result.output == "" + assert result.content == "" assert result.metrics.success is False @pytest.mark.asyncio async def test_returns_failure_when_openai_agents_not_installed(self): - """Should return unsuccessful AgentResult when openai-agents is not installed.""" + """Should return unsuccessful RunnerResult when openai-agents is not installed.""" import sys from ldai_openai import OpenAIAgentRunner @@ -532,5 +530,5 @@ async def test_returns_failure_when_openai_agents_not_installed(self): with patch.dict(sys.modules, {'agents': None}): result = await runner.run("Hello") - assert result.output == "" + assert result.content == "" assert result.metrics.success is False diff --git a/packages/sdk/server-ai/src/ldai/__init__.py b/packages/sdk/server-ai/src/ldai/__init__.py index f02cee30..56d780d3 100644 --- a/packages/sdk/server-ai/src/ldai/__init__.py +++ b/packages/sdk/server-ai/src/ldai/__init__.py @@ -34,8 +34,12 @@ from ldai.providers import ( AgentGraphResult, AgentGraphRunner, + AgentGraphRunnerResult, AgentResult, AgentRunner, + GraphMetrics, + GraphMetricSummary, + ManagedGraphResult, ManagedResult, Runner, RunnerResult, @@ -51,6 +55,10 @@ 'AgentGraphRunner', 'AgentResult', 'AgentGraphResult', + 'AgentGraphRunnerResult', + 'GraphMetrics', + 'GraphMetricSummary', + 'ManagedGraphResult', 'ManagedResult', 'Runner', 'RunnerResult', diff --git a/packages/sdk/server-ai/src/ldai/managed_agent_graph.py b/packages/sdk/server-ai/src/ldai/managed_agent_graph.py index a146e60e..50b3440e 100644 --- a/packages/sdk/server-ai/src/ldai/managed_agent_graph.py +++ b/packages/sdk/server-ai/src/ldai/managed_agent_graph.py @@ -1,17 +1,19 @@ """ManagedAgentGraph — LaunchDarkly managed wrapper for agent graph execution.""" -from typing import Any +import asyncio +from typing import Any, List from ldai.providers import AgentGraphResult, AgentGraphRunner +from ldai.providers.types import GraphMetricSummary, JudgeResult, ManagedGraphResult class ManagedAgentGraph: """ LaunchDarkly managed wrapper for AI agent graph execution. - Holds an AgentGraphRunner. Auto-tracking of path, - tool calls, handoffs, latency, and invocation success/failure is handled - by the runner implementation. + Holds an AgentGraphRunner. Wraps the runner result in a + :class:`~ldai.providers.types.ManagedGraphResult` and builds a + :class:`~ldai.providers.types.GraphMetricSummary` from the runner's metrics. Obtain an instance via ``LDAIClient.create_agent_graph()``. """ @@ -27,17 +29,37 @@ def __init__( """ self._runner = runner - async def run(self, input: Any) -> AgentGraphResult: + async def run(self, input: Any) -> ManagedGraphResult: """ Run the agent graph with the given input. - Delegates to the underlying AgentGraphRunner, which handles - execution and all auto-tracking internally. + Delegates to the underlying AgentGraphRunner, builds a + :class:`GraphMetricSummary` from the result, and wraps everything in a + :class:`ManagedGraphResult`. :param input: The input prompt or structured input for the graph - :return: AgentGraphResult containing the output, raw response, and metrics + :return: ManagedGraphResult containing the content, metric summary, raw response, + and an optional evaluations task (currently always ``None`` for graphs — + per-graph evaluations will be added in a future PR). """ - return await self._runner.run(input) + result: AgentGraphResult = await self._runner.run(input) + + # Build a GraphMetricSummary from the runner result's LDAIMetrics. + # path and node_metrics will be populated once graph runners are migrated + # to return AgentGraphRunnerResult with GraphMetrics (PR 11). + metrics = result.metrics + summary = GraphMetricSummary( + success=metrics.success, + usage=metrics.usage, + duration_ms=getattr(metrics, 'duration_ms', None), + ) + + return ManagedGraphResult( + content=result.output, + metrics=summary, + raw=result.raw, + evaluations=None, + ) def get_agent_graph_runner(self) -> AgentGraphRunner: """ diff --git a/packages/sdk/server-ai/src/ldai/providers/__init__.py b/packages/sdk/server-ai/src/ldai/providers/__init__.py index 6f472c69..22dce784 100644 --- a/packages/sdk/server-ai/src/ldai/providers/__init__.py +++ b/packages/sdk/server-ai/src/ldai/providers/__init__.py @@ -6,9 +6,13 @@ from ldai.providers.runner_factory import RunnerFactory from ldai.providers.types import ( AgentGraphResult, + AgentGraphRunnerResult, AgentResult, + GraphMetrics, + GraphMetricSummary, JudgeResult, LDAIMetrics, + ManagedGraphResult, ManagedResult, ModelResponse, RunnerResult, @@ -20,10 +24,14 @@ 'AIProvider', 'AgentGraphResult', 'AgentGraphRunner', + 'AgentGraphRunnerResult', 'AgentResult', 'AgentRunner', + 'GraphMetrics', + 'GraphMetricSummary', 'JudgeResult', 'LDAIMetrics', + 'ManagedGraphResult', 'ManagedResult', 'ModelResponse', 'ModelRunner', diff --git a/packages/sdk/server-ai/src/ldai/providers/types.py b/packages/sdk/server-ai/src/ldai/providers/types.py index f5224e0e..5bb8ce47 100644 --- a/packages/sdk/server-ai/src/ldai/providers/types.py +++ b/packages/sdk/server-ai/src/ldai/providers/types.py @@ -3,7 +3,7 @@ from __future__ import annotations import asyncio -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Optional from ldai.models import LDMessage @@ -114,6 +114,80 @@ class StructuredResponse: metrics: LDAIMetrics +@dataclass +class GraphMetrics: + """Contains raw metrics from a single agent graph run.""" + + success: bool + """Whether the graph run succeeded.""" + + path: List[str] = field(default_factory=list) + """Ordered list of node keys visited during the run.""" + + duration_ms: Optional[int] = None + """Wall-clock duration of the graph run in milliseconds.""" + + usage: Optional[TokenUsage] = None + """Optional aggregate token usage information across all nodes in the graph run.""" + + node_metrics: Dict[str, LDAIMetrics] = field(default_factory=dict) + """Per-node metrics keyed by node key.""" + + +@dataclass +class GraphMetricSummary: + """Contains a summary of metrics for an agent graph run.""" + + success: bool + """Whether the graph run succeeded.""" + + path: List[str] = field(default_factory=list) + """Ordered list of node keys visited during the run.""" + + duration_ms: Optional[int] = None + """Wall-clock duration of the graph run in milliseconds.""" + + usage: Optional[TokenUsage] = None + """Optional aggregate token usage information across all nodes in the graph run.""" + + node_metrics: Dict[str, LDAIMetrics] = field(default_factory=dict) + """Per-node metrics keyed by node key.""" + + resumption_token: Optional[str] = None + """Optional resumption token from the graph tracker for cross-process resumption.""" + + +@dataclass +class ManagedGraphResult: + """Contains the result of a managed agent graph run, including metrics and optional judge evaluations.""" + + content: str + """The graph's final output content.""" + + metrics: GraphMetricSummary + """Aggregated metric summary from the graph tracker for this run.""" + + raw: Optional[Any] = None + """Optional provider-native response object for advanced consumers.""" + + evaluations: Optional[asyncio.Task[List[JudgeResult]]] = None + """Optional asyncio Task that resolves to the list of :class:`JudgeResult` instances when awaited.""" + + +@dataclass +class AgentGraphRunnerResult: + """Contains the result of an agent graph runner invocation.""" + + content: str + """The graph's final output content.""" + + metrics: GraphMetrics + """Metrics from the graph run.""" + + raw: Optional[Any] = None + """Optional provider-native response object for advanced consumers.""" + + @dataclass class JudgeResult: """Contains the result of a single judge evaluation.""" diff --git a/packages/sdk/server-ai/src/ldai/tracker.py b/packages/sdk/server-ai/src/ldai/tracker.py index 608297d3..9c869133 100644 --- a/packages/sdk/server-ai/src/ldai/tracker.py +++ b/packages/sdk/server-ai/src/ldai/tracker.py @@ -410,17 +410,6 @@ def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None: 1, ) - def track_tool_calls(self, tool_calls: List[str]) -> None: - """ - Track the tool calls made during an AI operation. - - :param tool_calls: List of tool call names. - """ - if self._summary.tool_calls is not None: - log.warning("Tool calls have already been tracked for this execution. %s", self.__get_track_data()) - return - self._summary._tool_calls = list(tool_calls) - def track_success(self) -> None: """ Track a successful AI generation. @@ -560,9 +549,20 @@ def track_tool_calls(self, tool_keys: Iterable[str]) -> None: """ Track multiple tool invocations for this configuration. + Records the tool keys on :class:`LDAIMetricSummary` and emits a + ``$ld:ai:tool_call`` event for each one. + :param tool_keys: Tool identifiers (e.g. from a model response). """ - for tool_key in tool_keys: + if self._summary.tool_calls is not None: + log.warning( + "Tool calls have already been tracked for this execution. %s", + self.__get_track_data(), + ) + return + keys = list(tool_keys) + self._summary._tool_calls = keys + for tool_key in keys: self.track_tool_call(tool_key) def get_summary(self) -> LDAIMetricSummary: diff --git a/packages/sdk/server-ai/tests/test_managed_agent_graph.py b/packages/sdk/server-ai/tests/test_managed_agent_graph.py index 35be2766..9cdceaed 100644 --- a/packages/sdk/server-ai/tests/test_managed_agent_graph.py +++ b/packages/sdk/server-ai/tests/test_managed_agent_graph.py @@ -5,7 +5,7 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai import LDAIClient, ManagedAgentGraph +from ldai import LDAIClient, ManagedAgentGraph, ManagedGraphResult from ldai.providers.types import LDAIMetrics from ldai.providers import AgentGraphResult, AgentGraphRunner, ToolRegistry @@ -31,7 +31,8 @@ async def test_managed_agent_graph_run_delegates_to_runner(): runner = StubAgentGraphRunner("hello world") managed = ManagedAgentGraph(runner) result = await managed.run("test input") - assert result.output == "hello world" + assert isinstance(result, ManagedGraphResult) + assert result.content == "hello world" assert result.metrics.success is True @@ -172,7 +173,8 @@ async def test_create_agent_graph_run_produces_result(ldai_client: LDAIClient): assert managed is not None result = await managed.run("find restaurants") - assert result.output == "final answer" + assert isinstance(result, ManagedGraphResult) + assert result.content == "final answer" assert result.metrics.success is True