From c1870b1f4c56fb7fea1df6c428e6edb23e757d17 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Thu, 11 Jun 2026 23:38:11 +0900 Subject: [PATCH] feat: surface stale drop evidence in Lab report --- .../agent_runtime_reliability_report.md | 16 ++- .../aiguard_runtime_guard_analysis.json | 55 +++++++- inferedgelab/commands/agent_runtime_report.py | 4 + inferedgelab/services/agent_runtime_report.py | 97 +++++++++++++ tests/test_agent_runtime_report.py | 131 +++++++++++++++++- 5 files changed, 294 insertions(+), 9 deletions(-) diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md index 590f0c4..0f949ad 100644 --- a/docs/portfolio/agent_runtime_reliability_report.md +++ b/docs/portfolio/agent_runtime_reliability_report.md @@ -106,6 +106,11 @@ Future hardening, not current completion: AIGuard `guard_analysis` also includes `sustained_overload_risk`, which Lab preserves as report evidence and reflects in the agent deployment decision context. +Newer AIGuard guard analysis can also include `stale_frame_risk` or preserved +`edgeenv_orchestrator_stale_drop_summary` evidence. Lab surfaces the stale-drop +count, stale-drop rate, affected tasks, reason counts, and reason classes in +the AIGuard Orchestrator Operation Evidence section as deployment review +context. The report also preserves the Orchestrator operation-health fields added for runtime operation review: @@ -151,8 +156,9 @@ runtime operation review: - AIGuard Orchestrator operation evidence, including `worker_health_degradation` and `scheduler_delay_pattern` when Orchestrator worker health or runtime event telemetry is analyzed by AIGuard. - Lab preserves health reasons, policy/drop reason counts, and scheduler delay - counts as deployment context without making AIGuard the final decision owner. + Lab preserves health reasons, policy/drop/stale-drop reason counts, scheduler + delay counts, stale-drop affected tasks, and stale-drop boundary markers as + deployment context without making AIGuard the final decision owner. These fields make the report path explicit: @@ -204,8 +210,10 @@ The report also surfaces Orchestrator operation guard evidence as context. For example, `worker_health_degradation` shows degraded/constrained worker reasons such as fallback policy use or dropped frames, while `scheduler_delay_pattern` shows scheduler delay counts and related policy/drop reasons. These evidence -items contribute through AIGuard's overall guard verdict and remain separate -from Lab's final policy ownership. +shows scheduler delay counts and related policy/drop reasons. +`stale_frame_risk` shows which tasks had stale/backlog drops and why. These +evidence items contribute through AIGuard's overall guard verdict and remain +separate from Lab's final policy ownership. ## Boundary diff --git a/examples/agent_runtime/aiguard_runtime_guard_analysis.json b/examples/agent_runtime/aiguard_runtime_guard_analysis.json index c034d8e..86080aa 100644 --- a/examples/agent_runtime/aiguard_runtime_guard_analysis.json +++ b/examples/agent_runtime/aiguard_runtime_guard_analysis.json @@ -55,21 +55,70 @@ "queue_depth_sample_count": 1, "latency_sample_count": 1 } + }, + { + "type": "stale_frame_risk", + "metric_name": "stale_drop_rate", + "observed_value": 0.714, + "baseline_value": 0, + "threshold": 0.2, + "delta": null, + "delta_pct": null, + "increase_factor": null, + "severity": "high", + "status": "failed", + "explanation": "Orchestrator reported 5 stale/backlog drop events.", + "why_it_matters": "Stale frame or backlog drops can protect high-priority work, but they also show that lower-priority Vision or command workloads may lose fresh inputs under sustained multi-agent load.", + "suspected_causes": [ + "load_shedding_context", + "stale_queue_overflow" + ], + "recommendation": "Review tasks_with_stale_drop, stale_drop_reasons, queue depth, producer rate, and fallback policy in Lab before treating the operation profile as stable.", + "raw_context": { + "stale_drop_count": 5, + "total_drop_count": 7, + "stale_drop_rate": 0.714, + "stale_drop_reasons": { + "load_shedding_backlog_threshold_exceeded": 3, + "queue_overflow_drop_oldest": 2 + }, + "stale_drop_reason_classes": [ + "load_shedding_stale_backlog", + "stale_queue_overflow" + ], + "tasks_with_stale_drop": [ + "vision_agent", + "voice_command_agent" + ], + "latest_stale_drop_event": { + "task": "voice_command_agent", + "agent_id": "voice_command_agent", + "reason": "queue_overflow_drop_oldest", + "stale_drop_class": "stale_queue_overflow" + }, + "decision_owner": "lab", + "scheduler_owner": "orchestrator", + "not_a_deployment_decision": true + } } ], "suspected_causes": [ "queue_backlog", "overload_load_shedding", "producer_rate_exceeds_runtime_capacity", - "sustained_multi_agent_overload" + "sustained_multi_agent_overload", + "stale_queue_overflow" ], "recommendations": [ "Tune target FPS, queue size, drop policy, or fallback policy for affected agents.", - "Lower producer rate, tighten stale-frame drop policy, or move lower priority work behind a fallback path before deployment." + "Lower producer rate, tighten stale-frame drop policy, or move lower priority work behind a fallback path before deployment.", + "Review stale drop reasons and affected agent workloads in Lab." ], "thresholds": { "drop_rate_review": 0.2, - "drop_rate_blocked": 0.5 + "drop_rate_blocked": 0.5, + "stale_drop_rate_review": 0.2, + "stale_drop_rate_blocked": 0.5 }, "baseline_summary": {}, "candidate_summary": { diff --git a/inferedgelab/commands/agent_runtime_report.py b/inferedgelab/commands/agent_runtime_report.py index 73df0d8..5eed8c7 100644 --- a/inferedgelab/commands/agent_runtime_report.py +++ b/inferedgelab/commands/agent_runtime_report.py @@ -79,6 +79,7 @@ def _text_summary(report: dict) -> str: remote_runtime_event_summary = ( remote_context.get("remote_runtime_event_summary") or {} ) + orchestrator_guard = report.get("orchestrator_operation_guard_summary") or {} edgeenv_context = ( report["agent_runtime_summary"].get("edgeenv_preservation_context") or {} ) @@ -113,6 +114,9 @@ def _text_summary(report: dict) -> str: f"remote_runtime_event_count: {remote_runtime_event_summary.get('runtime_event_count')}", f"remote_runtime_event_final_status: {remote_runtime_event_summary.get('final_status')}", f"remote_runtime_summary_boundary: {remote_runtime_event_summary.get('operation_boundary')}", + f"stale_drop_count: {orchestrator_guard.get('stale_drop_count')}", + f"stale_drop_rate: {orchestrator_guard.get('stale_drop_rate')}", + f"tasks_with_stale_drop: {', '.join(orchestrator_guard.get('tasks_with_stale_drop') or [])}", f"edgeenv_run_id: {edgeenv_context.get('run_id')}", f"edgeenv_runtime_operation_health: {edgeenv_context.get('runtime_operation_health_reason')}", f"edgeenv_runtime_operation_action: {edgeenv_context.get('runtime_operation_recommended_action')}", diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py index b8a5fc5..9d5a8ba 100644 --- a/inferedgelab/services/agent_runtime_report.py +++ b/inferedgelab/services/agent_runtime_report.py @@ -30,6 +30,10 @@ ORCHESTRATOR_OPERATION_GUARD_EVIDENCE_TYPES = { "worker_health_degradation", "scheduler_delay_pattern", + "operation_timeline_summary", + "stale_frame_risk", + "edgeenv_orchestrator_operation_timeline_summary", + "edgeenv_orchestrator_stale_drop_summary", } DEFAULT_AGENT_RUNTIME_THRESHOLDS = { @@ -877,6 +881,11 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str: f"| policy_decision_reasons | {_fmt_mapping(orchestrator_guard.get('policy_decision_reason_counts'))} |", f"| drop_reasons | {_fmt_mapping(orchestrator_guard.get('drop_reason_counts'))} |", f"| scheduler_delay_event_count | {_fmt_number(orchestrator_guard.get('scheduler_delay_event_count'))} |", + f"| stale_drop_count | {_fmt_number(orchestrator_guard.get('stale_drop_count'))} |", + f"| stale_drop_rate | {_fmt_number(orchestrator_guard.get('stale_drop_rate'))} |", + f"| tasks_with_stale_drop | {', '.join(orchestrator_guard.get('tasks_with_stale_drop') or []) or '-'} |", + f"| stale_drop_reasons | {_fmt_mapping(orchestrator_guard.get('stale_drop_reasons'))} |", + f"| stale_drop_reason_classes | {', '.join(orchestrator_guard.get('stale_drop_reason_classes') or []) or '-'} |", "", "Orchestrator operation guard evidence:", "", @@ -1494,7 +1503,12 @@ def _orchestrator_operation_guard_summary( health_reason_counts: dict[str, int] = {} policy_decision_reason_counts: dict[str, int] = {} drop_reason_counts: dict[str, int] = {} + stale_drop_reason_counts: dict[str, int] = {} + stale_drop_reason_classes: list[str] = [] + tasks_with_stale_drop: list[str] = [] scheduler_delay_event_count = 0.0 + stale_drop_count = 0.0 + stale_drop_rate = 0.0 for item in evidence: detail = _orchestrator_operation_guard_detail(item) health_reason_counts = _merge_count_maps( @@ -1509,10 +1523,34 @@ def _orchestrator_operation_guard_summary( drop_reason_counts, detail.get("drop_reason_counts"), ) + stale_drop_reason_counts = _merge_count_maps( + stale_drop_reason_counts, + detail.get("stale_drop_reasons"), + ) + stale_drop_reason_classes = _unique_strings( + [ + *stale_drop_reason_classes, + *_string_list(detail.get("stale_drop_reason_classes")), + ] + ) + tasks_with_stale_drop = _unique_strings( + [ + *tasks_with_stale_drop, + *_string_list(detail.get("tasks_with_stale_drop")), + ] + ) scheduler_delay_event_count = max( scheduler_delay_event_count, _non_negative_number(detail.get("scheduler_delay_event_count")), ) + stale_drop_count = max( + stale_drop_count, + _non_negative_number(detail.get("stale_drop_count")), + ) + stale_drop_rate = max( + stale_drop_rate, + _non_negative_number(detail.get("stale_drop_rate")), + ) metric_context = metrics if isinstance(metrics, dict) else {} if not policy_decision_reason_counts: policy_decision_reason_counts = _count_mapping( @@ -1547,6 +1585,11 @@ def _orchestrator_operation_guard_summary( "policy_decision_reason_counts": policy_decision_reason_counts, "drop_reason_counts": drop_reason_counts, "scheduler_delay_event_count": scheduler_delay_event_count, + "stale_drop_count": stale_drop_count, + "stale_drop_rate": stale_drop_rate, + "stale_drop_reasons": stale_drop_reason_counts, + "stale_drop_reason_classes": stale_drop_reason_classes, + "tasks_with_stale_drop": tasks_with_stale_drop, "evidence": [ { "type": item.get("type"), @@ -1575,6 +1618,7 @@ def _orchestrator_operation_guard_detail( health_reason_counts = {} if isinstance(worker_health, dict): health_reason_counts = _count_mapping(worker_health.get("health_reason_counts")) + stale_drop = _stale_drop_guard_context(raw_context) return { "health_reason_counts": health_reason_counts, "policy_decision_reason_counts": _count_mapping( @@ -1587,6 +1631,59 @@ def _orchestrator_operation_guard_detail( "scheduler_delay_event_count": _non_negative_number( raw_context.get("scheduler_delay_event_count") ), + "stale_drop_count": stale_drop.get("stale_drop_count", 0.0), + "total_drop_count": stale_drop.get("total_drop_count", 0.0), + "stale_drop_rate": stale_drop.get("stale_drop_rate", 0.0), + "stale_drop_reasons": stale_drop.get("stale_drop_reasons", {}), + "stale_drop_reason_classes": stale_drop.get( + "stale_drop_reason_classes", [] + ), + "tasks_with_stale_drop": stale_drop.get("tasks_with_stale_drop", []), + "latest_stale_drop_event": stale_drop.get("latest_stale_drop_event", {}), + "stale_drop_boundary_markers_valid": stale_drop.get( + "boundary_markers_valid" + ), + "stale_drop_decision_owner": stale_drop.get("decision_owner"), + "stale_drop_scheduler_owner": stale_drop.get("scheduler_owner"), + "stale_drop_not_a_deployment_decision": stale_drop.get( + "not_a_deployment_decision" + ), + } + + +def _stale_drop_guard_context(raw_context: dict[str, Any]) -> dict[str, Any]: + candidate = raw_context.get("stale_drop_summary") + if not isinstance(candidate, dict): + candidate = {} + summary = candidate.get("summary") + if isinstance(summary, dict): + source = {**summary, **candidate} + source.pop("summary", None) + else: + source = {**candidate, **raw_context} + total_drop_count = _non_negative_number(source.get("total_drop_count")) + stale_drop_count = _non_negative_number(source.get("stale_drop_count")) + stale_drop_rate = _non_negative_number(source.get("stale_drop_rate")) + if stale_drop_rate <= 0 and total_drop_count > 0: + stale_drop_rate = stale_drop_count / total_drop_count + return { + "stale_drop_count": stale_drop_count, + "total_drop_count": total_drop_count, + "stale_drop_rate": stale_drop_rate, + "stale_drop_reasons": _count_mapping(source.get("stale_drop_reasons")), + "stale_drop_reason_classes": _string_list( + source.get("stale_drop_reason_classes") + ), + "tasks_with_stale_drop": _string_list(source.get("tasks_with_stale_drop")), + "latest_stale_drop_event": ( + dict(source.get("latest_stale_drop_event")) + if isinstance(source.get("latest_stale_drop_event"), dict) + else {} + ), + "boundary_markers_valid": source.get("boundary_markers_valid"), + "decision_owner": source.get("decision_owner"), + "scheduler_owner": source.get("scheduler_owner"), + "not_a_deployment_decision": source.get("not_a_deployment_decision"), } diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py index 2079910..447bde9 100644 --- a/tests/test_agent_runtime_report.py +++ b/tests/test_agent_runtime_report.py @@ -460,16 +460,72 @@ def orchestrator_operation_guard_analysis() -> dict: }, }, }, + { + "type": "stale_frame_risk", + "metric_name": "stale_drop_rate", + "observed_value": 0.714, + "baseline_value": 0, + "threshold": 0.2, + "delta": None, + "delta_pct": None, + "increase_factor": None, + "severity": "high", + "status": "failed", + "explanation": ( + "Orchestrator reported 5 stale/backlog drop events." + ), + "why_it_matters": ( + "Stale frame or backlog drops can protect high-priority " + "work, but they also show that lower-priority workloads may " + "lose fresh inputs." + ), + "suspected_causes": [ + "load_shedding_context", + "stale_queue_overflow", + ], + "recommendation": ( + "Review tasks_with_stale_drop, stale_drop_reasons, queue " + "depth, producer rate, and fallback policy in Lab." + ), + "raw_context": { + "stale_drop_count": 5, + "total_drop_count": 7, + "stale_drop_rate": 0.714, + "stale_drop_reasons": { + "load_shedding_backlog_threshold_exceeded": 3, + "queue_overflow_drop_oldest": 2, + }, + "stale_drop_reason_classes": [ + "load_shedding_stale_backlog", + "stale_queue_overflow", + ], + "tasks_with_stale_drop": [ + "vision_agent", + "voice_command_agent", + ], + "latest_stale_drop_event": { + "task": "voice_command_agent", + "agent_id": "voice_command_agent", + "reason": "queue_overflow_drop_oldest", + "stale_drop_class": "stale_queue_overflow", + }, + "decision_owner": "lab", + "scheduler_owner": "orchestrator", + "not_a_deployment_decision": True, + }, + }, ] ) data["suspected_causes"] = [ *data["suspected_causes"], "fallback_policy_used", "scheduler_queue_wait", + "stale_queue_overflow", ] data["recommendations"] = [ *data["recommendations"], "Inspect worker health reasons and scheduler delay timeline.", + "Review stale drop reasons and affected agent workloads in Lab.", ] return data @@ -1340,12 +1396,13 @@ def test_agent_runtime_report_summarizes_orchestrator_operation_guard_evidence() ) orchestrator_guard = report["orchestrator_operation_guard_summary"] - assert orchestrator_guard["evidence_count"] == 2 - assert orchestrator_guard["failed_count"] == 1 + assert orchestrator_guard["evidence_count"] == 3 + assert orchestrator_guard["failed_count"] == 2 assert orchestrator_guard["warning_count"] == 1 assert orchestrator_guard["evidence_types"] == [ "worker_health_degradation", "scheduler_delay_pattern", + "stale_frame_risk", ] assert orchestrator_guard["health_reasons"] == [ "fallback_policy_used", @@ -1364,18 +1421,71 @@ def test_agent_runtime_report_summarizes_orchestrator_operation_guard_evidence() "load_shedding_backlog_threshold_exceeded": 1, } assert orchestrator_guard["scheduler_delay_event_count"] == 2 + assert orchestrator_guard["stale_drop_count"] == 5 + assert orchestrator_guard["stale_drop_rate"] == 0.714 + assert orchestrator_guard["stale_drop_reasons"] == { + "load_shedding_backlog_threshold_exceeded": 3, + "queue_overflow_drop_oldest": 2, + } + assert orchestrator_guard["stale_drop_reason_classes"] == [ + "load_shedding_stale_backlog", + "stale_queue_overflow", + ] + assert orchestrator_guard["tasks_with_stale_drop"] == [ + "vision_agent", + "voice_command_agent", + ] + + +def test_agent_runtime_report_command_text_surfaces_stale_drop(capsys): + agent_runtime_report_cmd( + orchestration_summary="examples/agent_runtime/agent_3_orchestration_summary.json", + guard_analysis="examples/agent_runtime/aiguard_runtime_guard_analysis.json", + runtime_result="", + remote_dispatch="", + edgeenv_run_show="", + format="text", + output="", + ) + + out = capsys.readouterr().out + assert "stale_drop_count: 5" in out + assert "stale_drop_rate: 0.714" in out + assert "tasks_with_stale_drop: vision_agent, voice_command_agent" in out + + report = build_agent_runtime_reliability_report( + orchestration_summary=orchestration_summary(), + guard_analysis=orchestrator_operation_guard_analysis(), + ) + orchestrator_guard = report["orchestrator_operation_guard_summary"] assert orchestrator_guard["evidence"][1]["runtime_event_reason_counts"] == { "scheduler_delay_observed": 2, } + assert orchestrator_guard["evidence"][2]["stale_drop_boundary_markers_valid"] is None + assert orchestrator_guard["evidence"][2]["stale_drop_decision_owner"] == "lab" + assert ( + orchestrator_guard["evidence"][2]["stale_drop_scheduler_owner"] + == "orchestrator" + ) + assert ( + orchestrator_guard["evidence"][2]["stale_drop_not_a_deployment_decision"] + is True + ) markdown = build_agent_runtime_reliability_markdown(report) assert "AIGuard Orchestrator Operation Evidence" in markdown assert "worker_health_degradation" in markdown assert "scheduler_delay_pattern" in markdown + assert "stale_frame_risk" in markdown assert "policy_decision_reasons" in markdown assert "queue_backlog_threshold_exceeded=1" in markdown assert "drop_reasons" in markdown assert "load_shedding_backlog_threshold_exceeded=1" in markdown + assert "stale_drop_count" in markdown + assert "stale_drop_rate" in markdown + assert "tasks_with_stale_drop" in markdown + assert "vision_agent, voice_command_agent" in markdown + assert "queue_overflow_drop_oldest=2" in markdown def test_agent_runtime_report_marks_runtime_timeout_as_review(): @@ -1509,6 +1619,9 @@ def test_agent_runtime_report_markdown_contains_sections(): assert "AIGuard Orchestrator Operation Evidence" in markdown assert "worker_health_degradation" in markdown assert "scheduler_delay_pattern" in markdown + assert "stale_frame_risk" in markdown + assert "stale_drop_count" in markdown + assert "tasks_with_stale_drop" in markdown assert "Remote Dispatch Context" in markdown assert "Remote execution starter evidence" in markdown assert "jetson-nano-01" in markdown @@ -1560,6 +1673,14 @@ def test_agent_runtime_report_loads_committed_fixtures(): "path=agent_runtime_preservation" ) assert "device_local_events=0" in context["preservation_details_label"] + orchestrator_guard = report["orchestrator_operation_guard_summary"] + assert "stale_frame_risk" in orchestrator_guard["evidence_types"] + assert orchestrator_guard["stale_drop_count"] == 5 + assert orchestrator_guard["stale_drop_rate"] == 0.714 + assert orchestrator_guard["tasks_with_stale_drop"] == [ + "vision_agent", + "voice_command_agent", + ] def test_agent_runtime_report_surfaces_remote_execution_failure(): @@ -1706,3 +1827,9 @@ def test_agent_runtime_report_command_outputs_json(tmp_path, capsys): ] edgeenv_context = report["agent_runtime_summary"]["edgeenv_preservation_context"] assert edgeenv_context["run_id"] == "run-20260529-094714-0955a027" + orchestrator_guard = report["orchestrator_operation_guard_summary"] + assert orchestrator_guard["stale_drop_count"] == 5 + assert orchestrator_guard["tasks_with_stale_drop"] == [ + "vision_agent", + "voice_command_agent", + ]