From c1870b1f4c56fb7fea1df6c428e6edb23e757d17 Mon Sep 17 00:00:00 2001
From: hyeokjun32 <ksjm0417@naver.com>
Date: Thu, 11 Jun 2026 23:38:11 +0900
Subject: [PATCH] feat: surface stale drop evidence in Lab report

---
 .../agent_runtime_reliability_report.md       |  16 ++-
 .../aiguard_runtime_guard_analysis.json       |  55 +++++++-
 inferedgelab/commands/agent_runtime_report.py |   4 +
 inferedgelab/services/agent_runtime_report.py |  97 +++++++++++++
 tests/test_agent_runtime_report.py            | 131 +++++++++++++++++-
 5 files changed, 294 insertions(+), 9 deletions(-)

diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md
index 590f0c4..0f949ad 100644
--- a/docs/portfolio/agent_runtime_reliability_report.md
+++ b/docs/portfolio/agent_runtime_reliability_report.md
@@ -106,6 +106,11 @@ Future hardening, not current completion:
 AIGuard `guard_analysis` also includes `sustained_overload_risk`, which Lab
 preserves as report evidence and reflects in the agent deployment decision
 context.
+Newer AIGuard guard analysis can also include `stale_frame_risk` or preserved
+`edgeenv_orchestrator_stale_drop_summary` evidence. Lab surfaces the stale-drop
+count, stale-drop rate, affected tasks, reason counts, and reason classes in
+the AIGuard Orchestrator Operation Evidence section as deployment review
+context.
 
 The report also preserves the Orchestrator operation-health fields added for
 runtime operation review:
@@ -151,8 +156,9 @@ runtime operation review:
 - AIGuard Orchestrator operation evidence, including
   `worker_health_degradation` and `scheduler_delay_pattern` when Orchestrator
   worker health or runtime event telemetry is analyzed by AIGuard.
-  Lab preserves health reasons, policy/drop reason counts, and scheduler delay
-  counts as deployment context without making AIGuard the final decision owner.
+  Lab preserves health reasons, policy/drop/stale-drop reason counts, scheduler
+  delay counts, stale-drop affected tasks, and stale-drop boundary markers as
+  deployment context without making AIGuard the final decision owner.
 
 These fields make the report path explicit:
 
@@ -204,8 +210,10 @@ The report also surfaces Orchestrator operation guard evidence as context. For
 example, `worker_health_degradation` shows degraded/constrained worker reasons
 such as fallback policy use or dropped frames, while `scheduler_delay_pattern`
 shows scheduler delay counts and related policy/drop reasons. These evidence
-items contribute through AIGuard's overall guard verdict and remain separate
-from Lab's final policy ownership.
+shows scheduler delay counts and related policy/drop reasons.
+`stale_frame_risk` shows which tasks had stale/backlog drops and why. These
+evidence items contribute through AIGuard's overall guard verdict and remain
+separate from Lab's final policy ownership.
 
 ## Boundary
 
diff --git a/examples/agent_runtime/aiguard_runtime_guard_analysis.json b/examples/agent_runtime/aiguard_runtime_guard_analysis.json
index c034d8e..86080aa 100644
--- a/examples/agent_runtime/aiguard_runtime_guard_analysis.json
+++ b/examples/agent_runtime/aiguard_runtime_guard_analysis.json
@@ -55,21 +55,70 @@
         "queue_depth_sample_count": 1,
         "latency_sample_count": 1
       }
+    },
+    {
+      "type": "stale_frame_risk",
+      "metric_name": "stale_drop_rate",
+      "observed_value": 0.714,
+      "baseline_value": 0,
+      "threshold": 0.2,
+      "delta": null,
+      "delta_pct": null,
+      "increase_factor": null,
+      "severity": "high",
+      "status": "failed",
+      "explanation": "Orchestrator reported 5 stale/backlog drop events.",
+      "why_it_matters": "Stale frame or backlog drops can protect high-priority work, but they also show that lower-priority Vision or command workloads may lose fresh inputs under sustained multi-agent load.",
+      "suspected_causes": [
+        "load_shedding_context",
+        "stale_queue_overflow"
+      ],
+      "recommendation": "Review tasks_with_stale_drop, stale_drop_reasons, queue depth, producer rate, and fallback policy in Lab before treating the operation profile as stable.",
+      "raw_context": {
+        "stale_drop_count": 5,
+        "total_drop_count": 7,
+        "stale_drop_rate": 0.714,
+        "stale_drop_reasons": {
+          "load_shedding_backlog_threshold_exceeded": 3,
+          "queue_overflow_drop_oldest": 2
+        },
+        "stale_drop_reason_classes": [
+          "load_shedding_stale_backlog",
+          "stale_queue_overflow"
+        ],
+        "tasks_with_stale_drop": [
+          "vision_agent",
+          "voice_command_agent"
+        ],
+        "latest_stale_drop_event": {
+          "task": "voice_command_agent",
+          "agent_id": "voice_command_agent",
+          "reason": "queue_overflow_drop_oldest",
+          "stale_drop_class": "stale_queue_overflow"
+        },
+        "decision_owner": "lab",
+        "scheduler_owner": "orchestrator",
+        "not_a_deployment_decision": true
+      }
     }
   ],
   "suspected_causes": [
     "queue_backlog",
     "overload_load_shedding",
     "producer_rate_exceeds_runtime_capacity",
-    "sustained_multi_agent_overload"
+    "sustained_multi_agent_overload",
+    "stale_queue_overflow"
   ],
   "recommendations": [
     "Tune target FPS, queue size, drop policy, or fallback policy for affected agents.",
-    "Lower producer rate, tighten stale-frame drop policy, or move lower priority work behind a fallback path before deployment."
+    "Lower producer rate, tighten stale-frame drop policy, or move lower priority work behind a fallback path before deployment.",
+    "Review stale drop reasons and affected agent workloads in Lab."
   ],
   "thresholds": {
     "drop_rate_review": 0.2,
-    "drop_rate_blocked": 0.5
+    "drop_rate_blocked": 0.5,
+    "stale_drop_rate_review": 0.2,
+    "stale_drop_rate_blocked": 0.5
   },
   "baseline_summary": {},
   "candidate_summary": {
diff --git a/inferedgelab/commands/agent_runtime_report.py b/inferedgelab/commands/agent_runtime_report.py
index 73df0d8..5eed8c7 100644
--- a/inferedgelab/commands/agent_runtime_report.py
+++ b/inferedgelab/commands/agent_runtime_report.py
@@ -79,6 +79,7 @@ def _text_summary(report: dict) -> str:
     remote_runtime_event_summary = (
         remote_context.get("remote_runtime_event_summary") or {}
     )
+    orchestrator_guard = report.get("orchestrator_operation_guard_summary") or {}
     edgeenv_context = (
         report["agent_runtime_summary"].get("edgeenv_preservation_context") or {}
     )
@@ -113,6 +114,9 @@ def _text_summary(report: dict) -> str:
         f"remote_runtime_event_count: {remote_runtime_event_summary.get('runtime_event_count')}",
         f"remote_runtime_event_final_status: {remote_runtime_event_summary.get('final_status')}",
         f"remote_runtime_summary_boundary: {remote_runtime_event_summary.get('operation_boundary')}",
+        f"stale_drop_count: {orchestrator_guard.get('stale_drop_count')}",
+        f"stale_drop_rate: {orchestrator_guard.get('stale_drop_rate')}",
+        f"tasks_with_stale_drop: {', '.join(orchestrator_guard.get('tasks_with_stale_drop') or [])}",
         f"edgeenv_run_id: {edgeenv_context.get('run_id')}",
         f"edgeenv_runtime_operation_health: {edgeenv_context.get('runtime_operation_health_reason')}",
         f"edgeenv_runtime_operation_action: {edgeenv_context.get('runtime_operation_recommended_action')}",
diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py
index b8a5fc5..9d5a8ba 100644
--- a/inferedgelab/services/agent_runtime_report.py
+++ b/inferedgelab/services/agent_runtime_report.py
@@ -30,6 +30,10 @@
 ORCHESTRATOR_OPERATION_GUARD_EVIDENCE_TYPES = {
     "worker_health_degradation",
     "scheduler_delay_pattern",
+    "operation_timeline_summary",
+    "stale_frame_risk",
+    "edgeenv_orchestrator_operation_timeline_summary",
+    "edgeenv_orchestrator_stale_drop_summary",
 }
 
 DEFAULT_AGENT_RUNTIME_THRESHOLDS = {
@@ -877,6 +881,11 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
             f"| policy_decision_reasons | {_fmt_mapping(orchestrator_guard.get('policy_decision_reason_counts'))} |",
             f"| drop_reasons | {_fmt_mapping(orchestrator_guard.get('drop_reason_counts'))} |",
             f"| scheduler_delay_event_count | {_fmt_number(orchestrator_guard.get('scheduler_delay_event_count'))} |",
+            f"| stale_drop_count | {_fmt_number(orchestrator_guard.get('stale_drop_count'))} |",
+            f"| stale_drop_rate | {_fmt_number(orchestrator_guard.get('stale_drop_rate'))} |",
+            f"| tasks_with_stale_drop | {', '.join(orchestrator_guard.get('tasks_with_stale_drop') or []) or '-'} |",
+            f"| stale_drop_reasons | {_fmt_mapping(orchestrator_guard.get('stale_drop_reasons'))} |",
+            f"| stale_drop_reason_classes | {', '.join(orchestrator_guard.get('stale_drop_reason_classes') or []) or '-'} |",
             "",
             "Orchestrator operation guard evidence:",
             "",
@@ -1494,7 +1503,12 @@ def _orchestrator_operation_guard_summary(
     health_reason_counts: dict[str, int] = {}
     policy_decision_reason_counts: dict[str, int] = {}
     drop_reason_counts: dict[str, int] = {}
+    stale_drop_reason_counts: dict[str, int] = {}
+    stale_drop_reason_classes: list[str] = []
+    tasks_with_stale_drop: list[str] = []
     scheduler_delay_event_count = 0.0
+    stale_drop_count = 0.0
+    stale_drop_rate = 0.0
     for item in evidence:
         detail = _orchestrator_operation_guard_detail(item)
         health_reason_counts = _merge_count_maps(
@@ -1509,10 +1523,34 @@ def _orchestrator_operation_guard_summary(
             drop_reason_counts,
             detail.get("drop_reason_counts"),
         )
+        stale_drop_reason_counts = _merge_count_maps(
+            stale_drop_reason_counts,
+            detail.get("stale_drop_reasons"),
+        )
+        stale_drop_reason_classes = _unique_strings(
+            [
+                *stale_drop_reason_classes,
+                *_string_list(detail.get("stale_drop_reason_classes")),
+            ]
+        )
+        tasks_with_stale_drop = _unique_strings(
+            [
+                *tasks_with_stale_drop,
+                *_string_list(detail.get("tasks_with_stale_drop")),
+            ]
+        )
         scheduler_delay_event_count = max(
             scheduler_delay_event_count,
             _non_negative_number(detail.get("scheduler_delay_event_count")),
         )
+        stale_drop_count = max(
+            stale_drop_count,
+            _non_negative_number(detail.get("stale_drop_count")),
+        )
+        stale_drop_rate = max(
+            stale_drop_rate,
+            _non_negative_number(detail.get("stale_drop_rate")),
+        )
     metric_context = metrics if isinstance(metrics, dict) else {}
     if not policy_decision_reason_counts:
         policy_decision_reason_counts = _count_mapping(
@@ -1547,6 +1585,11 @@ def _orchestrator_operation_guard_summary(
         "policy_decision_reason_counts": policy_decision_reason_counts,
         "drop_reason_counts": drop_reason_counts,
         "scheduler_delay_event_count": scheduler_delay_event_count,
+        "stale_drop_count": stale_drop_count,
+        "stale_drop_rate": stale_drop_rate,
+        "stale_drop_reasons": stale_drop_reason_counts,
+        "stale_drop_reason_classes": stale_drop_reason_classes,
+        "tasks_with_stale_drop": tasks_with_stale_drop,
         "evidence": [
             {
                 "type": item.get("type"),
@@ -1575,6 +1618,7 @@ def _orchestrator_operation_guard_detail(
     health_reason_counts = {}
     if isinstance(worker_health, dict):
         health_reason_counts = _count_mapping(worker_health.get("health_reason_counts"))
+    stale_drop = _stale_drop_guard_context(raw_context)
     return {
         "health_reason_counts": health_reason_counts,
         "policy_decision_reason_counts": _count_mapping(
@@ -1587,6 +1631,59 @@ def _orchestrator_operation_guard_detail(
         "scheduler_delay_event_count": _non_negative_number(
             raw_context.get("scheduler_delay_event_count")
         ),
+        "stale_drop_count": stale_drop.get("stale_drop_count", 0.0),
+        "total_drop_count": stale_drop.get("total_drop_count", 0.0),
+        "stale_drop_rate": stale_drop.get("stale_drop_rate", 0.0),
+        "stale_drop_reasons": stale_drop.get("stale_drop_reasons", {}),
+        "stale_drop_reason_classes": stale_drop.get(
+            "stale_drop_reason_classes", []
+        ),
+        "tasks_with_stale_drop": stale_drop.get("tasks_with_stale_drop", []),
+        "latest_stale_drop_event": stale_drop.get("latest_stale_drop_event", {}),
+        "stale_drop_boundary_markers_valid": stale_drop.get(
+            "boundary_markers_valid"
+        ),
+        "stale_drop_decision_owner": stale_drop.get("decision_owner"),
+        "stale_drop_scheduler_owner": stale_drop.get("scheduler_owner"),
+        "stale_drop_not_a_deployment_decision": stale_drop.get(
+            "not_a_deployment_decision"
+        ),
+    }
+
+
+def _stale_drop_guard_context(raw_context: dict[str, Any]) -> dict[str, Any]:
+    candidate = raw_context.get("stale_drop_summary")
+    if not isinstance(candidate, dict):
+        candidate = {}
+    summary = candidate.get("summary")
+    if isinstance(summary, dict):
+        source = {**summary, **candidate}
+        source.pop("summary", None)
+    else:
+        source = {**candidate, **raw_context}
+    total_drop_count = _non_negative_number(source.get("total_drop_count"))
+    stale_drop_count = _non_negative_number(source.get("stale_drop_count"))
+    stale_drop_rate = _non_negative_number(source.get("stale_drop_rate"))
+    if stale_drop_rate <= 0 and total_drop_count > 0:
+        stale_drop_rate = stale_drop_count / total_drop_count
+    return {
+        "stale_drop_count": stale_drop_count,
+        "total_drop_count": total_drop_count,
+        "stale_drop_rate": stale_drop_rate,
+        "stale_drop_reasons": _count_mapping(source.get("stale_drop_reasons")),
+        "stale_drop_reason_classes": _string_list(
+            source.get("stale_drop_reason_classes")
+        ),
+        "tasks_with_stale_drop": _string_list(source.get("tasks_with_stale_drop")),
+        "latest_stale_drop_event": (
+            dict(source.get("latest_stale_drop_event"))
+            if isinstance(source.get("latest_stale_drop_event"), dict)
+            else {}
+        ),
+        "boundary_markers_valid": source.get("boundary_markers_valid"),
+        "decision_owner": source.get("decision_owner"),
+        "scheduler_owner": source.get("scheduler_owner"),
+        "not_a_deployment_decision": source.get("not_a_deployment_decision"),
     }
 
 
diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py
index 2079910..447bde9 100644
--- a/tests/test_agent_runtime_report.py
+++ b/tests/test_agent_runtime_report.py
@@ -460,16 +460,72 @@ def orchestrator_operation_guard_analysis() -> dict:
                     },
                 },
             },
+            {
+                "type": "stale_frame_risk",
+                "metric_name": "stale_drop_rate",
+                "observed_value": 0.714,
+                "baseline_value": 0,
+                "threshold": 0.2,
+                "delta": None,
+                "delta_pct": None,
+                "increase_factor": None,
+                "severity": "high",
+                "status": "failed",
+                "explanation": (
+                    "Orchestrator reported 5 stale/backlog drop events."
+                ),
+                "why_it_matters": (
+                    "Stale frame or backlog drops can protect high-priority "
+                    "work, but they also show that lower-priority workloads may "
+                    "lose fresh inputs."
+                ),
+                "suspected_causes": [
+                    "load_shedding_context",
+                    "stale_queue_overflow",
+                ],
+                "recommendation": (
+                    "Review tasks_with_stale_drop, stale_drop_reasons, queue "
+                    "depth, producer rate, and fallback policy in Lab."
+                ),
+                "raw_context": {
+                    "stale_drop_count": 5,
+                    "total_drop_count": 7,
+                    "stale_drop_rate": 0.714,
+                    "stale_drop_reasons": {
+                        "load_shedding_backlog_threshold_exceeded": 3,
+                        "queue_overflow_drop_oldest": 2,
+                    },
+                    "stale_drop_reason_classes": [
+                        "load_shedding_stale_backlog",
+                        "stale_queue_overflow",
+                    ],
+                    "tasks_with_stale_drop": [
+                        "vision_agent",
+                        "voice_command_agent",
+                    ],
+                    "latest_stale_drop_event": {
+                        "task": "voice_command_agent",
+                        "agent_id": "voice_command_agent",
+                        "reason": "queue_overflow_drop_oldest",
+                        "stale_drop_class": "stale_queue_overflow",
+                    },
+                    "decision_owner": "lab",
+                    "scheduler_owner": "orchestrator",
+                    "not_a_deployment_decision": True,
+                },
+            },
         ]
     )
     data["suspected_causes"] = [
         *data["suspected_causes"],
         "fallback_policy_used",
         "scheduler_queue_wait",
+        "stale_queue_overflow",
     ]
     data["recommendations"] = [
         *data["recommendations"],
         "Inspect worker health reasons and scheduler delay timeline.",
+        "Review stale drop reasons and affected agent workloads in Lab.",
     ]
     return data
 
@@ -1340,12 +1396,13 @@ def test_agent_runtime_report_summarizes_orchestrator_operation_guard_evidence()
     )
 
     orchestrator_guard = report["orchestrator_operation_guard_summary"]
-    assert orchestrator_guard["evidence_count"] == 2
-    assert orchestrator_guard["failed_count"] == 1
+    assert orchestrator_guard["evidence_count"] == 3
+    assert orchestrator_guard["failed_count"] == 2
     assert orchestrator_guard["warning_count"] == 1
     assert orchestrator_guard["evidence_types"] == [
         "worker_health_degradation",
         "scheduler_delay_pattern",
+        "stale_frame_risk",
     ]
     assert orchestrator_guard["health_reasons"] == [
         "fallback_policy_used",
@@ -1364,18 +1421,71 @@ def test_agent_runtime_report_summarizes_orchestrator_operation_guard_evidence()
         "load_shedding_backlog_threshold_exceeded": 1,
     }
     assert orchestrator_guard["scheduler_delay_event_count"] == 2
+    assert orchestrator_guard["stale_drop_count"] == 5
+    assert orchestrator_guard["stale_drop_rate"] == 0.714
+    assert orchestrator_guard["stale_drop_reasons"] == {
+        "load_shedding_backlog_threshold_exceeded": 3,
+        "queue_overflow_drop_oldest": 2,
+    }
+    assert orchestrator_guard["stale_drop_reason_classes"] == [
+        "load_shedding_stale_backlog",
+        "stale_queue_overflow",
+    ]
+    assert orchestrator_guard["tasks_with_stale_drop"] == [
+        "vision_agent",
+        "voice_command_agent",
+    ]
+
+
+def test_agent_runtime_report_command_text_surfaces_stale_drop(capsys):
+    agent_runtime_report_cmd(
+        orchestration_summary="examples/agent_runtime/agent_3_orchestration_summary.json",
+        guard_analysis="examples/agent_runtime/aiguard_runtime_guard_analysis.json",
+        runtime_result="",
+        remote_dispatch="",
+        edgeenv_run_show="",
+        format="text",
+        output="",
+    )
+
+    out = capsys.readouterr().out
+    assert "stale_drop_count: 5" in out
+    assert "stale_drop_rate: 0.714" in out
+    assert "tasks_with_stale_drop: vision_agent, voice_command_agent" in out
+
+    report = build_agent_runtime_reliability_report(
+        orchestration_summary=orchestration_summary(),
+        guard_analysis=orchestrator_operation_guard_analysis(),
+    )
+    orchestrator_guard = report["orchestrator_operation_guard_summary"]
     assert orchestrator_guard["evidence"][1]["runtime_event_reason_counts"] == {
         "scheduler_delay_observed": 2,
     }
+    assert orchestrator_guard["evidence"][2]["stale_drop_boundary_markers_valid"] is None
+    assert orchestrator_guard["evidence"][2]["stale_drop_decision_owner"] == "lab"
+    assert (
+        orchestrator_guard["evidence"][2]["stale_drop_scheduler_owner"]
+        == "orchestrator"
+    )
+    assert (
+        orchestrator_guard["evidence"][2]["stale_drop_not_a_deployment_decision"]
+        is True
+    )
 
     markdown = build_agent_runtime_reliability_markdown(report)
     assert "AIGuard Orchestrator Operation Evidence" in markdown
     assert "worker_health_degradation" in markdown
     assert "scheduler_delay_pattern" in markdown
+    assert "stale_frame_risk" in markdown
     assert "policy_decision_reasons" in markdown
     assert "queue_backlog_threshold_exceeded=1" in markdown
     assert "drop_reasons" in markdown
     assert "load_shedding_backlog_threshold_exceeded=1" in markdown
+    assert "stale_drop_count" in markdown
+    assert "stale_drop_rate" in markdown
+    assert "tasks_with_stale_drop" in markdown
+    assert "vision_agent, voice_command_agent" in markdown
+    assert "queue_overflow_drop_oldest=2" in markdown
 
 
 def test_agent_runtime_report_marks_runtime_timeout_as_review():
@@ -1509,6 +1619,9 @@ def test_agent_runtime_report_markdown_contains_sections():
     assert "AIGuard Orchestrator Operation Evidence" in markdown
     assert "worker_health_degradation" in markdown
     assert "scheduler_delay_pattern" in markdown
+    assert "stale_frame_risk" in markdown
+    assert "stale_drop_count" in markdown
+    assert "tasks_with_stale_drop" in markdown
     assert "Remote Dispatch Context" in markdown
     assert "Remote execution starter evidence" in markdown
     assert "jetson-nano-01" in markdown
@@ -1560,6 +1673,14 @@ def test_agent_runtime_report_loads_committed_fixtures():
         "path=agent_runtime_preservation"
     )
     assert "device_local_events=0" in context["preservation_details_label"]
+    orchestrator_guard = report["orchestrator_operation_guard_summary"]
+    assert "stale_frame_risk" in orchestrator_guard["evidence_types"]
+    assert orchestrator_guard["stale_drop_count"] == 5
+    assert orchestrator_guard["stale_drop_rate"] == 0.714
+    assert orchestrator_guard["tasks_with_stale_drop"] == [
+        "vision_agent",
+        "voice_command_agent",
+    ]
 
 
 def test_agent_runtime_report_surfaces_remote_execution_failure():
@@ -1706,3 +1827,9 @@ def test_agent_runtime_report_command_outputs_json(tmp_path, capsys):
     ]
     edgeenv_context = report["agent_runtime_summary"]["edgeenv_preservation_context"]
     assert edgeenv_context["run_id"] == "run-20260529-094714-0955a027"
+    orchestrator_guard = report["orchestrator_operation_guard_summary"]
+    assert orchestrator_guard["stale_drop_count"] == 5
+    assert orchestrator_guard["tasks_with_stale_drop"] == [
+        "vision_agent",
+        "voice_command_agent",
+    ]