From 3cee472a33c0fb562676f018ef21e59eb6977a0f Mon Sep 17 00:00:00 2001
From: radu-mocanu <radu.mocanu@uipath.com>
Date: Sun, 29 Mar 2026 21:26:27 +0300
Subject: [PATCH] fix: trim eval run MCP output to scores and justifications
 only

Drops traces, inputs, outputs, and other large fields from run_eval_set
and get_eval_run responses to keep MCP tool output manageable.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 pyproject.toml                 |  2 +-
 src/uipath/dev/mcp/__init__.py | 31 ++++++++++++++++++++++++++++---
 uv.lock                        |  2 +-
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6145137..d389b19 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath-dev"
-version = "0.0.76"
+version = "0.0.77"
 description = "UiPath Developer Console"
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
diff --git a/src/uipath/dev/mcp/__init__.py b/src/uipath/dev/mcp/__init__.py
index 3ff0625..b5bb8c3 100644
--- a/src/uipath/dev/mcp/__init__.py
+++ b/src/uipath/dev/mcp/__init__.py
@@ -191,6 +191,27 @@ async def get_run_status(run_id: str) -> dict[str, Any]:
         return resp.json()
 
 
+def _summarize_eval_run(result: dict[str, Any]) -> dict[str, Any]:
+    """Return only scores and justifications per item — drop everything else."""
+    items = [
+        {
+            "name": item.get("name"),
+            "status": item.get("status"),
+            "scores": item.get("scores", {}),
+            "justifications": item.get("justifications", {}),
+            "overall_score": item.get("overall_score"),
+        }
+        for item in result.get("results", [])
+    ]
+    return {
+        "id": result.get("id"),
+        "status": result.get("status"),
+        "overall_score": result.get("overall_score"),
+        "evaluator_scores": result.get("evaluator_scores", {}),
+        "results": items,
+    }
+
+
 @mcp.tool()
 async def list_eval_sets() -> list[dict[str, Any]]:
     """List all evaluation sets.
@@ -276,7 +297,9 @@ async def run_eval_set(
     async with httpx.AsyncClient() as client:
         resp = await client.get(_api_url(f"/eval-runs/{run_id}"), timeout=10)
         resp.raise_for_status()
-        return resp.json()
+        result = resp.json()
+
+    return _summarize_eval_run(result)
 
 
 @mcp.tool()
@@ -300,13 +323,15 @@ async def get_eval_run(eval_run_id: str) -> dict[str, Any]:
     Args:
         eval_run_id: ID of the eval run.
 
-    Returns the run with all item results, scores, justifications, and traces.
+    Returns per-item evaluator scores and justifications.
     """
     await _report_tool_call("get_eval_run", {"eval_run_id": eval_run_id})
     async with httpx.AsyncClient() as client:
         resp = await client.get(_api_url(f"/eval-runs/{eval_run_id}"), timeout=10)
         resp.raise_for_status()
-        return resp.json()
+        result = resp.json()
+
+    return _summarize_eval_run(result)
 
 
 def main() -> None:
diff --git a/uv.lock b/uv.lock
index 322fbe7..2a27ebf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2555,7 +2555,7 @@ wheels = [
 
 [[package]]
 name = "uipath-dev"
-version = "0.0.76"
+version = "0.0.77"
 source = { editable = "." }
 dependencies = [
     { name = "aiosqlite" },