diff --git a/README.md b/README.md index 375d8a7..2630bce 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,7 @@ agk trace mermaid > trace_flow.md | `trace list` | List all captured trace runs. | | `trace show` | Display summary of a specific run. | | `trace view` | Open the interactive TUI trace explorer. | +| `trace diff` | Compare two trace runs (duration, tokens, cost, LLM calls). | | `trace mermaid` | Generate Mermaid flowchart of trace execution. | --- diff --git a/cmd/trace_diff.go b/cmd/trace_diff.go new file mode 100644 index 0000000..4fc72d2 --- /dev/null +++ b/cmd/trace_diff.go @@ -0,0 +1,202 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "text/tabwriter" + "time" + + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +// diffMetric is a single comparable metric between two runs. +type diffMetric struct { + Label string + A, B float64 + LowerBetter bool // a lower B than A is an improvement + Colorize bool // whether the delta should be colored good/bad + Format func(float64) string +} + +// diffCmd compares two trace runs to answer "did my change help?". +var diffCmd = &cobra.Command{ + Use: "diff [run-a] [run-b]", + Short: "Compare two trace runs (duration, tokens, cost, ...)", + Long: `Compare two trace runs and show the deltas for duration, spans, LLM calls, +tokens, and estimated cost. + +Run selection: + agk trace diff # compare the two most recent runs + agk trace diff # compare (baseline) against the latest run + agk trace diff # compare two explicit runs + +The first run is treated as the baseline (A); the second is the new run (B). +For duration, tokens, and cost, a lower value in B is shown as an improvement.`, + Args: cobra.MaximumNArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runTraceDiff(args) + }, +} + +func init() { + traceCmd.AddCommand(diffCmd) +} + +func runTraceDiff(args []string) error { + idA, idB, err := resolveDiffRuns(args) + if err != nil { + return err + } + + for _, id := range []string{idA, idB} { + if _, err := os.Stat(filepath.Join(runsDirName, id)); err != nil { + return fmt.Errorf("trace not found: %s", id) + } + } + + manifestA, err := readManifest(filepath.Join(runsDirName, idA)) + if err != nil { + return fmt.Errorf("failed to read run %s: %w", idA, err) + } + manifestB, err := readManifest(filepath.Join(runsDirName, idB)) + if err != nil { + return fmt.Errorf("failed to read run %s: %w", idB, err) + } + + printDiff(manifestA, manifestB) + return nil +} + +// resolveDiffRuns determines the two run IDs to compare based on the args provided. +func resolveDiffRuns(args []string) (string, string, error) { + switch len(args) { + case 2: + return args[0], args[1], nil + case 1: + latest := "" + for _, id := range recentRunIDs(runsDirName) { + if id != args[0] { + latest = id + break + } + } + if latest == "" { + return "", "", fmt.Errorf("need a second run to diff against %s", args[0]) + } + return args[0], latest, nil + default: // 0 args + ids := recentRunIDs(runsDirName) + if len(ids) < 2 { + return "", "", fmt.Errorf("need at least two trace runs to diff (found %d)", len(ids)) + } + // ids[0] is newest; baseline is the older of the two most recent. + return ids[1], ids[0], nil + } +} + +// recentRunIDs returns run directory names sorted newest-first by modification time. +func recentRunIDs(runsDir string) []string { + entries, err := os.ReadDir(runsDir) + if err != nil { + return nil + } + type run struct { + name string + mod time.Time + } + var runs []run + for _, e := range entries { + if !e.IsDir() { + continue + } + info, err := e.Info() + if err != nil { + continue + } + runs = append(runs, run{e.Name(), info.ModTime()}) + } + sort.Slice(runs, func(i, j int) bool { return runs[i].mod.After(runs[j].mod) }) + + ids := make([]string, len(runs)) + for i, r := range runs { + ids[i] = r.name + } + return ids +} + +func runDiffMetrics(a, b TraceRun) []diffMetric { + return []diffMetric{ + {Label: "Duration", A: a.Duration, B: b.Duration, LowerBetter: true, Colorize: true, Format: fmtSeconds}, + {Label: "Spans", A: float64(a.SpanCount), B: float64(b.SpanCount), Format: fmtCount}, + {Label: "LLM Calls", A: float64(a.LLMCalls), B: float64(b.LLMCalls), Format: fmtCount}, + {Label: "Tokens", A: float64(a.TotalTokens), B: float64(b.TotalTokens), LowerBetter: true, Colorize: true, Format: fmtCount}, + {Label: "Est. Cost", A: a.EstimatedCost, B: b.EstimatedCost, LowerBetter: true, Colorize: true, Format: fmtUSD}, + } +} + +// deltaDirection returns +1 if B is an improvement over A, -1 if a regression, +// and 0 if unchanged or not colorized. +func deltaDirection(m diffMetric) int { + d := m.B - m.A + if d == 0 || !m.Colorize { + return 0 + } + if (d < 0) == m.LowerBetter { + return 1 + } + return -1 +} + +func printDiff(a, b TraceRun) { + fmt.Println() + color.Cyan("šŸ“Š Trace Diff") + fmt.Printf(" A (baseline): %s\n", a.RunID) + fmt.Printf(" B (new): %s\n", b.RunID) + fmt.Println(strings.Repeat("─", 64)) + + w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintln(w, "METRIC\tA (baseline)\tB (new)\tĪ”") + for _, m := range runDiffMetrics(a, b) { + delta := formatDelta(m) + switch deltaDirection(m) { + case 1: + delta = color.GreenString(delta) + case -1: + delta = color.RedString(delta) + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", m.Label, m.Format(m.A), m.Format(m.B), delta) + } + _ = w.Flush() + fmt.Println() +} + +func formatDelta(m diffMetric) string { + d := m.B - m.A + if d == 0 { + return "—" + } + sign, arrow := "+", "ā–²" + if d < 0 { + sign, arrow = "-", "ā–¼" + } + out := fmt.Sprintf("%s%s %s", sign, m.Format(absF(d)), arrow) + if m.A != 0 { + out += fmt.Sprintf(" %.0f%%", d/m.A*100) + } + return out +} + +func fmtSeconds(v float64) string { return fmt.Sprintf("%.2fs", v) } +func fmtCount(v float64) string { return fmt.Sprintf("%.0f", v) } +func fmtUSD(v float64) string { return fmt.Sprintf("$%.4f", v) } + +func absF(v float64) float64 { + if v < 0 { + return -v + } + return v +} diff --git a/cmd/trace_diff_test.go b/cmd/trace_diff_test.go new file mode 100644 index 0000000..117e09b --- /dev/null +++ b/cmd/trace_diff_test.go @@ -0,0 +1,73 @@ +package cmd + +import "testing" + +func TestDeltaDirection(t *testing.T) { + cases := []struct { + name string + m diffMetric + want int + }{ + {"lower-better improvement", diffMetric{A: 10, B: 5, LowerBetter: true, Colorize: true}, 1}, + {"lower-better regression", diffMetric{A: 5, B: 10, LowerBetter: true, Colorize: true}, -1}, + {"equal is neutral", diffMetric{A: 7, B: 7, LowerBetter: true, Colorize: true}, 0}, + {"not colorized is neutral", diffMetric{A: 10, B: 5, LowerBetter: true, Colorize: false}, 0}, + {"higher-better improvement", diffMetric{A: 5, B: 10, LowerBetter: false, Colorize: true}, 1}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if got := deltaDirection(c.m); got != c.want { + t.Errorf("deltaDirection = %d, want %d", got, c.want) + } + }) + } +} + +func TestRunDiffMetrics(t *testing.T) { + a := TraceRun{Duration: 2.0, SpanCount: 5, LLMCalls: 2, TotalTokens: 1000, EstimatedCost: 0.0100} + b := TraceRun{Duration: 1.0, SpanCount: 4, LLMCalls: 1, TotalTokens: 500, EstimatedCost: 0.0050} + + metrics := runDiffMetrics(a, b) + if len(metrics) != 5 { + t.Fatalf("expected 5 metrics, got %d", len(metrics)) + } + + byLabel := make(map[string]diffMetric, len(metrics)) + for _, m := range metrics { + byLabel[m.Label] = m + } + + // Tokens halved → improvement. + if dir := deltaDirection(byLabel["Tokens"]); dir != 1 { + t.Errorf("Tokens direction = %d, want 1 (improvement)", dir) + } + // Cost halved → improvement. + if dir := deltaDirection(byLabel["Est. Cost"]); dir != 1 { + t.Errorf("Cost direction = %d, want 1 (improvement)", dir) + } + // Spans is not colorized → neutral even though it changed. + if dir := deltaDirection(byLabel["Spans"]); dir != 0 { + t.Errorf("Spans direction = %d, want 0 (neutral)", dir) + } +} + +func TestFormatDelta(t *testing.T) { + if got := formatDelta(diffMetric{A: 10, B: 10, Format: fmtCount}); got != "—" { + t.Errorf("equal delta = %q, want em dash", got) + } + // 1000 -> 500 tokens: -500, 50% lower. + got := formatDelta(diffMetric{A: 1000, B: 500, Format: fmtCount}) + if got != "-500 ā–¼ -50%" { + t.Errorf("formatDelta = %q, want %q", got, "-500 ā–¼ -50%") + } +} + +func TestResolveDiffRunsExplicit(t *testing.T) { + a, b, err := resolveDiffRuns([]string{"run-1", "run-2"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if a != "run-1" || b != "run-2" { + t.Errorf("got (%s, %s), want (run-1, run-2)", a, b) + } +}