From 3cee472a33c0fb562676f018ef21e59eb6977a0f Mon Sep 17 00:00:00 2001 From: radu-mocanu Date: Sun, 29 Mar 2026 21:26:27 +0300 Subject: [PATCH] fix: trim eval run MCP output to scores and justifications only Drops traces, inputs, outputs, and other large fields from run_eval_set and get_eval_run responses to keep MCP tool output manageable. Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 2 +- src/uipath/dev/mcp/__init__.py | 31 ++++++++++++++++++++++++++++--- uv.lock | 2 +- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6145137..d389b19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath-dev" -version = "0.0.76" +version = "0.0.77" description = "UiPath Developer Console" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/src/uipath/dev/mcp/__init__.py b/src/uipath/dev/mcp/__init__.py index 3ff0625..b5bb8c3 100644 --- a/src/uipath/dev/mcp/__init__.py +++ b/src/uipath/dev/mcp/__init__.py @@ -191,6 +191,27 @@ async def get_run_status(run_id: str) -> dict[str, Any]: return resp.json() +def _summarize_eval_run(result: dict[str, Any]) -> dict[str, Any]: + """Return only scores and justifications per item — drop everything else.""" + items = [ + { + "name": item.get("name"), + "status": item.get("status"), + "scores": item.get("scores", {}), + "justifications": item.get("justifications", {}), + "overall_score": item.get("overall_score"), + } + for item in result.get("results", []) + ] + return { + "id": result.get("id"), + "status": result.get("status"), + "overall_score": result.get("overall_score"), + "evaluator_scores": result.get("evaluator_scores", {}), + "results": items, + } + + @mcp.tool() async def list_eval_sets() -> list[dict[str, Any]]: """List all evaluation sets. @@ -276,7 +297,9 @@ async def run_eval_set( async with httpx.AsyncClient() as client: resp = await client.get(_api_url(f"/eval-runs/{run_id}"), timeout=10) resp.raise_for_status() - return resp.json() + result = resp.json() + + return _summarize_eval_run(result) @mcp.tool() @@ -300,13 +323,15 @@ async def get_eval_run(eval_run_id: str) -> dict[str, Any]: Args: eval_run_id: ID of the eval run. - Returns the run with all item results, scores, justifications, and traces. + Returns per-item evaluator scores and justifications. """ await _report_tool_call("get_eval_run", {"eval_run_id": eval_run_id}) async with httpx.AsyncClient() as client: resp = await client.get(_api_url(f"/eval-runs/{eval_run_id}"), timeout=10) resp.raise_for_status() - return resp.json() + result = resp.json() + + return _summarize_eval_run(result) def main() -> None: diff --git a/uv.lock b/uv.lock index 322fbe7..2a27ebf 100644 --- a/uv.lock +++ b/uv.lock @@ -2555,7 +2555,7 @@ wheels = [ [[package]] name = "uipath-dev" -version = "0.0.76" +version = "0.0.77" source = { editable = "." } dependencies = [ { name = "aiosqlite" },