Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ agk trace mermaid > trace_flow.md
| `trace list` | List all captured trace runs. |
| `trace show` | Display summary of a specific run. |
| `trace view` | Open the interactive TUI trace explorer. |
| `trace diff` | Compare two trace runs (duration, tokens, cost, LLM calls). |
| `trace mermaid` | Generate Mermaid flowchart of trace execution. |

---
Expand Down
202 changes: 202 additions & 0 deletions cmd/trace_diff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
package cmd

import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"text/tabwriter"
"time"

"github.com/fatih/color"
"github.com/spf13/cobra"
)

// diffMetric is a single comparable metric between two runs.
type diffMetric struct {
Label string
A, B float64
LowerBetter bool // a lower B than A is an improvement

Check failure on line 20 in cmd/trace_diff.go

View workflow job for this annotation

GitHub Actions / Lint

File is not properly formatted (gofmt)
Colorize bool // whether the delta should be colored good/bad
Format func(float64) string
}

// diffCmd compares two trace runs to answer "did my change help?".
var diffCmd = &cobra.Command{
Use: "diff [run-a] [run-b]",
Short: "Compare two trace runs (duration, tokens, cost, ...)",
Long: `Compare two trace runs and show the deltas for duration, spans, LLM calls,
tokens, and estimated cost.

Run selection:
agk trace diff # compare the two most recent runs
agk trace diff <run-a> # compare <run-a> (baseline) against the latest run
agk trace diff <run-a> <run-b> # compare two explicit runs

The first run is treated as the baseline (A); the second is the new run (B).
For duration, tokens, and cost, a lower value in B is shown as an improvement.`,
Args: cobra.MaximumNArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
return runTraceDiff(args)
},
}

func init() {
traceCmd.AddCommand(diffCmd)
}

func runTraceDiff(args []string) error {
idA, idB, err := resolveDiffRuns(args)
if err != nil {
return err
}

for _, id := range []string{idA, idB} {
if _, err := os.Stat(filepath.Join(runsDirName, id)); err != nil {
return fmt.Errorf("trace not found: %s", id)
}
}

manifestA, err := readManifest(filepath.Join(runsDirName, idA))
if err != nil {
return fmt.Errorf("failed to read run %s: %w", idA, err)
}
manifestB, err := readManifest(filepath.Join(runsDirName, idB))
if err != nil {
return fmt.Errorf("failed to read run %s: %w", idB, err)
}

printDiff(manifestA, manifestB)
return nil
}

// resolveDiffRuns determines the two run IDs to compare based on the args provided.
func resolveDiffRuns(args []string) (string, string, error) {
switch len(args) {
case 2:
return args[0], args[1], nil
case 1:
latest := ""
for _, id := range recentRunIDs(runsDirName) {
if id != args[0] {
latest = id
break
}
}
if latest == "" {
return "", "", fmt.Errorf("need a second run to diff against %s", args[0])
}
return args[0], latest, nil
default: // 0 args
ids := recentRunIDs(runsDirName)
if len(ids) < 2 {
return "", "", fmt.Errorf("need at least two trace runs to diff (found %d)", len(ids))
}
// ids[0] is newest; baseline is the older of the two most recent.
return ids[1], ids[0], nil
}
}

// recentRunIDs returns run directory names sorted newest-first by modification time.
func recentRunIDs(runsDir string) []string {
entries, err := os.ReadDir(runsDir)
if err != nil {
return nil
}
type run struct {
name string
mod time.Time
}
var runs []run
for _, e := range entries {
if !e.IsDir() {
continue
}
info, err := e.Info()
if err != nil {
continue
}
runs = append(runs, run{e.Name(), info.ModTime()})
}
sort.Slice(runs, func(i, j int) bool { return runs[i].mod.After(runs[j].mod) })

ids := make([]string, len(runs))
for i, r := range runs {
ids[i] = r.name
}
return ids
}

func runDiffMetrics(a, b TraceRun) []diffMetric {
return []diffMetric{
{Label: "Duration", A: a.Duration, B: b.Duration, LowerBetter: true, Colorize: true, Format: fmtSeconds},
{Label: "Spans", A: float64(a.SpanCount), B: float64(b.SpanCount), Format: fmtCount},
{Label: "LLM Calls", A: float64(a.LLMCalls), B: float64(b.LLMCalls), Format: fmtCount},
{Label: "Tokens", A: float64(a.TotalTokens), B: float64(b.TotalTokens), LowerBetter: true, Colorize: true, Format: fmtCount},
{Label: "Est. Cost", A: a.EstimatedCost, B: b.EstimatedCost, LowerBetter: true, Colorize: true, Format: fmtUSD},
}
}

// deltaDirection returns +1 if B is an improvement over A, -1 if a regression,
// and 0 if unchanged or not colorized.
func deltaDirection(m diffMetric) int {
d := m.B - m.A
if d == 0 || !m.Colorize {
return 0
}
if (d < 0) == m.LowerBetter {
return 1
}
return -1
}

func printDiff(a, b TraceRun) {
fmt.Println()
color.Cyan("πŸ“Š Trace Diff")
fmt.Printf(" A (baseline): %s\n", a.RunID)
fmt.Printf(" B (new): %s\n", b.RunID)
fmt.Println(strings.Repeat("─", 64))

w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "METRIC\tA (baseline)\tB (new)\tΞ”")
for _, m := range runDiffMetrics(a, b) {
delta := formatDelta(m)
switch deltaDirection(m) {
case 1:
delta = color.GreenString(delta)
case -1:
delta = color.RedString(delta)
}
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", m.Label, m.Format(m.A), m.Format(m.B), delta)
}
_ = w.Flush()
fmt.Println()
}

func formatDelta(m diffMetric) string {
d := m.B - m.A
if d == 0 {
return "β€”"
}
sign, arrow := "+", "β–²"
if d < 0 {
sign, arrow = "-", "β–Ό"
}
out := fmt.Sprintf("%s%s %s", sign, m.Format(absF(d)), arrow)
if m.A != 0 {
out += fmt.Sprintf(" %.0f%%", d/m.A*100)
}
return out
}

func fmtSeconds(v float64) string { return fmt.Sprintf("%.2fs", v) }
func fmtCount(v float64) string { return fmt.Sprintf("%.0f", v) }
func fmtUSD(v float64) string { return fmt.Sprintf("$%.4f", v) }

func absF(v float64) float64 {
if v < 0 {
return -v
}
return v
}
73 changes: 73 additions & 0 deletions cmd/trace_diff_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package cmd

import "testing"

func TestDeltaDirection(t *testing.T) {
cases := []struct {
name string
m diffMetric
want int
}{
{"lower-better improvement", diffMetric{A: 10, B: 5, LowerBetter: true, Colorize: true}, 1},
{"lower-better regression", diffMetric{A: 5, B: 10, LowerBetter: true, Colorize: true}, -1},
{"equal is neutral", diffMetric{A: 7, B: 7, LowerBetter: true, Colorize: true}, 0},
{"not colorized is neutral", diffMetric{A: 10, B: 5, LowerBetter: true, Colorize: false}, 0},
{"higher-better improvement", diffMetric{A: 5, B: 10, LowerBetter: false, Colorize: true}, 1},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := deltaDirection(c.m); got != c.want {
t.Errorf("deltaDirection = %d, want %d", got, c.want)
}
})
}
}

func TestRunDiffMetrics(t *testing.T) {
a := TraceRun{Duration: 2.0, SpanCount: 5, LLMCalls: 2, TotalTokens: 1000, EstimatedCost: 0.0100}
b := TraceRun{Duration: 1.0, SpanCount: 4, LLMCalls: 1, TotalTokens: 500, EstimatedCost: 0.0050}

metrics := runDiffMetrics(a, b)
if len(metrics) != 5 {
t.Fatalf("expected 5 metrics, got %d", len(metrics))
}

byLabel := make(map[string]diffMetric, len(metrics))
for _, m := range metrics {
byLabel[m.Label] = m
}

// Tokens halved β†’ improvement.
if dir := deltaDirection(byLabel["Tokens"]); dir != 1 {
t.Errorf("Tokens direction = %d, want 1 (improvement)", dir)
}
// Cost halved β†’ improvement.
if dir := deltaDirection(byLabel["Est. Cost"]); dir != 1 {
t.Errorf("Cost direction = %d, want 1 (improvement)", dir)
}
// Spans is not colorized β†’ neutral even though it changed.
if dir := deltaDirection(byLabel["Spans"]); dir != 0 {
t.Errorf("Spans direction = %d, want 0 (neutral)", dir)
}
}

func TestFormatDelta(t *testing.T) {
if got := formatDelta(diffMetric{A: 10, B: 10, Format: fmtCount}); got != "β€”" {
t.Errorf("equal delta = %q, want em dash", got)
}
// 1000 -> 500 tokens: -500, 50% lower.
got := formatDelta(diffMetric{A: 1000, B: 500, Format: fmtCount})
if got != "-500 β–Ό -50%" {
t.Errorf("formatDelta = %q, want %q", got, "-500 β–Ό -50%")
}
}

func TestResolveDiffRunsExplicit(t *testing.T) {
a, b, err := resolveDiffRuns([]string{"run-1", "run-2"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if a != "run-1" || b != "run-2" {
t.Errorf("got (%s, %s), want (run-1, run-2)", a, b)
}
}
Loading