Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion cmd/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package cmd
import (
"fmt"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"

"github.com/spf13/cobra"
Expand Down Expand Up @@ -39,6 +41,10 @@ var (
evalOutputFormat string
evalFailFast bool
evalReportFile string
evalServe bool
evalServeDir string
evalServeCmd string
evalServeWait int
)

func init() {
Expand All @@ -50,6 +56,10 @@ func init() {
evalCmd.Flags().StringVarP(&evalOutputFormat, "format", "f", "console", "Output format (console, json, junit, markdown)")
evalCmd.Flags().BoolVar(&evalFailFast, "fail-fast", false, "Stop on first test failure")
evalCmd.Flags().StringVarP(&evalReportFile, "report", "r", "", "Save detailed report to file (auto-generated if not specified)")
evalCmd.Flags().BoolVar(&evalServe, "serve", false, "Build & launch the project in EvalServer mode, run tests, then stop it")
evalCmd.Flags().StringVar(&evalServeDir, "serve-dir", ".", "Project directory to launch when --serve is set")
evalCmd.Flags().StringVar(&evalServeCmd, "serve-cmd", "", "Custom command to launch the server (default: go run .)")
evalCmd.Flags().IntVar(&evalServeWait, "serve-timeout", 90, "Seconds to wait for the server to become healthy")
}

func runEval(cmd *cobra.Command, args []string) error {
Expand Down Expand Up @@ -86,6 +96,26 @@ func runEval(cmd *cobra.Command, args []string) error {
return nil
}

// Optionally launch the project's EvalServer for the duration of the run.
var srv *evalServer
if evalServe {
s, err := launchAndWait(evalServeDir, evalServeCmd, suite.Target.URL, evalServeWait, evalVerbose)
if err != nil {
return err
}
srv = s
defer srv.Stop()

// Ensure the server is stopped if the user interrupts the run.
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
<-sigCh
srv.Stop()
os.Exit(130)
}()
}

// Create test runner
runner := eval.NewRunner(&eval.RunnerConfig{
Timeout: time.Duration(evalTimeout) * time.Second,
Expand Down Expand Up @@ -139,8 +169,12 @@ func runEval(cmd *cobra.Command, args []string) error {
}
}

// Exit with error code if tests failed
// Exit with error code if tests failed. os.Exit skips deferred calls, so stop
// the server explicitly first.
if !results.AllPassed() {
if srv != nil {
srv.Stop()
}
os.Exit(1)
}

Expand Down
167 changes: 167 additions & 0 deletions cmd/eval_serve.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package cmd

import (
"bytes"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"strings"
"sync"
"syscall"
"time"
)

// evalServer manages a user EvalServer process launched for the duration of a
// test run (the `agk eval --serve` workflow).
type evalServer struct {
cmd *exec.Cmd
output *syncBuffer
once sync.Once
}

// syncBuffer is a goroutine-safe buffer for capturing child process output.
type syncBuffer struct {
mu sync.Mutex
buf bytes.Buffer
}

func (b *syncBuffer) Write(p []byte) (int, error) {
b.mu.Lock()
defer b.mu.Unlock()
return b.buf.Write(p)
}

func (b *syncBuffer) String() string {
b.mu.Lock()
defer b.mu.Unlock()
return b.buf.String()
}

// startEvalServer launches the project in EvalServer mode (AGK_EVAL_MODE=true).
// The default command is `go run .` in dir; customCmd overrides it. The process is
// started in its own process group so `go run`'s compiled child can be reliably killed.
func startEvalServer(dir, customCmd string, streamOutput bool) (*evalServer, error) {
name, args := parseServeCmd(customCmd)

c := exec.Command(name, args...) //nolint:gosec // command is user-provided by design
c.Dir = dir
c.Env = append(os.Environ(), "AGK_EVAL_MODE=true")
c.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}

Check failure on line 51 in cmd/eval_serve.go

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 1.21)

unknown field Setpgid in struct literal of type "syscall".SysProcAttr

Check failure on line 51 in cmd/eval_serve.go

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 1.22)

unknown field Setpgid in struct literal of type "syscall".SysProcAttr

out := &syncBuffer{}
var w io.Writer = out
if streamOutput {
w = io.MultiWriter(out, &prefixWriter{prefix: "[server] ", w: os.Stderr})
}
c.Stdout = w
c.Stderr = w

if err := c.Start(); err != nil {
return nil, fmt.Errorf("failed to start eval server (%s): %w", name, err)
}
return &evalServer{cmd: c, output: out}, nil
}

// parseServeCmd returns the command name and args, defaulting to `go run .`.
func parseServeCmd(customCmd string) (string, []string) {
if fields := strings.Fields(customCmd); len(fields) > 0 {
return fields[0], fields[1:]
}
return "go", []string{"run", "."}
}

// Stop terminates the server process group (idempotent), escalating SIGTERM→SIGKILL.
func (s *evalServer) Stop() {
s.once.Do(func() {
if s.cmd.Process == nil {
return
}
s.signalGroup(syscall.SIGTERM)

done := make(chan struct{})
go func() { _ = s.cmd.Wait(); close(done) }()

select {
case <-done:
case <-time.After(3 * time.Second):
s.signalGroup(syscall.SIGKILL)
<-done
}
})
}

func (s *evalServer) signalGroup(sig syscall.Signal) {
if pgid, err := syscall.Getpgid(s.cmd.Process.Pid); err == nil {

Check failure on line 96 in cmd/eval_serve.go

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 1.21)

undefined: syscall.Getpgid

Check failure on line 96 in cmd/eval_serve.go

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 1.22)

undefined: syscall.Getpgid
_ = syscall.Kill(-pgid, sig)

Check failure on line 97 in cmd/eval_serve.go

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 1.21)

undefined: syscall.Kill

Check failure on line 97 in cmd/eval_serve.go

View workflow job for this annotation

GitHub Actions / Test (windows-latest, 1.22)

undefined: syscall.Kill
} else {
_ = s.cmd.Process.Signal(sig)
}
}

// Output returns everything the server printed to stdout/stderr so far.
func (s *evalServer) Output() string { return s.output.String() }

// waitForHealthy polls url + "/health" until it returns 200 or the timeout elapses.
func waitForHealthy(url string, timeout time.Duration) error {
client := &http.Client{Timeout: 3 * time.Second}
deadline := time.Now().Add(timeout)
healthURL := strings.TrimRight(url, "/") + "/health"

var lastErr error
for time.Now().Before(deadline) {
resp, err := client.Get(healthURL)
if err == nil {
_ = resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return nil
}
lastErr = fmt.Errorf("health returned HTTP %d", resp.StatusCode)
} else {
lastErr = err
}
time.Sleep(500 * time.Millisecond)
}
return fmt.Errorf("server not healthy within %s: %w", timeout, lastErr)
}

// launchAndWait starts the eval server and blocks until it is healthy. On failure it
// stops the server and surfaces its captured output to aid debugging.
func launchAndWait(dir, customCmd, targetURL string, waitSecs int, verbose bool) (*evalServer, error) {
if targetURL == "" {
return nil, fmt.Errorf("--serve requires a target URL in the test file")
}

fmt.Printf("🚀 Launching EvalServer from %s (AGK_EVAL_MODE=true)...\n", dir)
srv, err := startEvalServer(dir, customCmd, verbose)
if err != nil {
return nil, err
}

fmt.Printf("⏳ Waiting up to %ds for %s to become healthy...\n", waitSecs, targetURL)
if err := waitForHealthy(targetURL, time.Duration(waitSecs)*time.Second); err != nil {
out := srv.Output()
srv.Stop()
if strings.TrimSpace(out) != "" {
fmt.Fprintf(os.Stderr, "\n--- server output ---\n%s\n---------------------\n", out)
}
return nil, fmt.Errorf("eval server did not start: %w", err)
}

fmt.Println("✓ Server is healthy")
return srv, nil
}

// prefixWriter prefixes each write with a label (used to tag streamed server output).
type prefixWriter struct {
prefix string
w io.Writer
}

func (p *prefixWriter) Write(b []byte) (int, error) {
if _, err := io.WriteString(p.w, p.prefix); err != nil {
return 0, err
}
return p.w.Write(b)
}
63 changes: 63 additions & 0 deletions cmd/eval_serve_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package cmd

import (
"net/http"
"net/http/httptest"
"reflect"
"sync/atomic"
"testing"
"time"
)

func TestParseServeCmd(t *testing.T) {
cases := []struct {
in string
wantName string
wantArgs []string
}{
{"", "go", []string{"run", "."}},
{" ", "go", []string{"run", "."}},
{"./server", "./server", []string{}},
{"go run ./cmd/server", "go", []string{"run", "./cmd/server"}},
{"mybin --eval --port 8787", "mybin", []string{"--eval", "--port", "8787"}},
}
for _, c := range cases {
name, args := parseServeCmd(c.in)
if name != c.wantName || !reflect.DeepEqual(args, c.wantArgs) {
t.Errorf("parseServeCmd(%q) = (%q, %v), want (%q, %v)", c.in, name, args, c.wantName, c.wantArgs)
}
}
}

func TestWaitForHealthyBecomesHealthy(t *testing.T) {
var ready atomic.Bool
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/health" && ready.Load() {
w.WriteHeader(http.StatusOK)
return
}
http.Error(w, "starting", http.StatusServiceUnavailable)
}))
defer server.Close()

// Flip to healthy shortly after we start polling.
go func() {
time.Sleep(300 * time.Millisecond)
ready.Store(true)
}()

if err := waitForHealthy(server.URL, 5*time.Second); err != nil {
t.Fatalf("waitForHealthy returned error: %v", err)
}
}

func TestWaitForHealthyTimesOut(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "never ready", http.StatusServiceUnavailable)
}))
defer server.Close()

if err := waitForHealthy(server.URL, 1*time.Second); err == nil {
t.Fatal("expected timeout error, got nil")
}
}
28 changes: 25 additions & 3 deletions docs/EVAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,37 @@ tests:

### 3. Run Tests

The simplest path is `--serve`, which builds and launches your project in EvalServer
mode, waits for it to become healthy, runs the tests, and stops it automatically:

```bash
# One command: launch the server, run tests, tear down
agk eval tests.yaml --serve

# View report
cat .agk/reports/eval-report-*.md
```

`--serve` options:

| Flag | Default | Description |
|------|---------|-------------|
| `--serve` | off | Launch the project in EvalServer mode for the run, then stop it |
| `--serve-dir` | `.` | Project directory to launch |
| `--serve-cmd` | `go run .` | Custom launch command (e.g. a prebuilt binary) |
| `--serve-timeout` | `90` | Seconds to wait for the server to become healthy |

It sets `AGK_EVAL_MODE=true` in the launched process and derives the health URL from the
test file's `target.url`. Server output is captured and printed if startup fails.

Prefer to manage the server yourself? Run it in a separate terminal and omit `--serve`:

```bash
# Terminal 1: Start your workflow in EvalServer mode
AGK_EVAL_MODE=true ./myworkflow

# Terminal 2: Run tests
agk eval tests.yaml --timeout 200

# View report
cat .agk/reports/eval-report-*.md
```

---
Expand Down
Loading