Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ func runServe() error {
workspaceRoot = filepath.Join(homeDir, ".flashduty")
}

// Surface the resolved workspace root at startup. When the cloud image and the
// runner disagree on where the workspace lives (the class of root-divergence
// bug that previously cost hours), this single line makes the truth obvious in
// the logs instead of leaving us to infer it from downstream path errors.
slog.Info("envd workspace root resolved", "workspace_root", workspaceRoot)

// Pin the runner home into the environment so environment.BundledToolsDir
// resolves the SAME <home>/bin the workspace uses (keeps install dir and bash
// PATH dir from diverging). No-op when FLASHDUTY_RUNNER_HOME was already set.
Expand Down
87 changes: 84 additions & 3 deletions envd/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
)

Expand All @@ -17,17 +18,97 @@ type connectError struct {

// writeUnary writes a Connect-RPC unary response (JSON body + 200 on success,
// 4xx/5xx + error envelope on failure).
//
// Invariant: the response body is ALWAYS a complete JSON value, never empty.
// The safari client decodes every unary response with json.Decode, and an empty
// body decodes to io.EOF — which surfaced to the agent as the opaque
// "remote write operation failed: EOF", hiding the real failure (see
// fix/envd-write-error-opacity). On success with no payload we emit `{}` so the
// client always gets a decodable envelope; on the error path we never drop the
// encode error silently.
func writeUnary(w http.ResponseWriter, v any, err error) {
w.Header().Set("Content-Type", "application/json")
if err != nil {
code, status := mapError(err)
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(connectError{Code: code, Message: err.Error()})
if encErr := json.NewEncoder(w).Encode(connectError{Code: code, Message: err.Error()}); encErr != nil {
// The status line + headers are already flushed, so we can't change
// the status now — but we must not leave the connection looking like a
// clean close with no body. Log loudly so the dropped envelope is
// visible in runner logs instead of silently becoming a client EOF.
slog.Error("envd: failed to encode error envelope",
"code", code, "status", status, "orig_error", err.Error(), "encode_error", encErr.Error())
}
return
}
w.WriteHeader(http.StatusOK)
if v != nil {
_ = json.NewEncoder(w).Encode(v)
// Always write a body. A nil payload (e.g. Write, which returns no value)
// must still serialize to `{}`, not an empty body, or the client's
// json.Decode hits io.EOF and reports a phantom failure.
if v == nil {
v = struct{}{}
}
if encErr := json.NewEncoder(w).Encode(v); encErr != nil {
slog.Error("envd: failed to encode success envelope", "encode_error", encErr.Error())
}
}

// recoverMiddleware turns a panic in any handler into a clean Connect-RPC
// internal-error envelope instead of letting the goroutine die and the
// connection close with no body. A dropped connection is exactly what the
// safari client reports as a bare io.EOF, hiding the real cause. We can only
// write a body if nothing has been flushed yet; we use a small interceptor to
// know whether WriteHeader was already called.
func recoverMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
rw := &recordingWriter{ResponseWriter: w}
defer func() {
if rec := recover(); rec != nil {
// http.ErrAbortHandler is net/http's own sentinel for a
// deliberate connection abort (e.g. client disconnect on a
// streaming response). Re-panic so the server suppresses it the
// standard way — swallowing it would leak the handler goroutine,
// which would go on believing it can still write to a dead conn.
if rec == http.ErrAbortHandler {
panic(rec)
}
slog.Error("envd: handler panic recovered",
"path", r.URL.Path, "panic", fmt.Sprintf("%v", rec))
// Write through rw, not the raw w, so wroteHeader tracking stays
// consistent with the rest of the response path.
if !rw.wroteHeader {
writeUnary(rw, nil, fmt.Errorf("internal error: handler panicked: %v", rec))
}
// If a header was already written we can't change the status, but
// the log above ensures the panic is never silent.
}
}()
next.ServeHTTP(rw, r)
})
}

// recordingWriter tracks whether the wrapped handler has begun the response, so
// recoverMiddleware knows if it can still emit a clean error envelope.
type recordingWriter struct {
http.ResponseWriter
wroteHeader bool
}

func (rw *recordingWriter) WriteHeader(status int) {
rw.wroteHeader = true
rw.ResponseWriter.WriteHeader(status)
}

func (rw *recordingWriter) Write(b []byte) (int, error) {
rw.wroteHeader = true
return rw.ResponseWriter.Write(b)
}

// Flush proxies http.Flusher so streaming handlers (writeStream) keep working
// through the recording wrapper.
func (rw *recordingWriter) Flush() {
if f, ok := rw.ResponseWriter.(http.Flusher); ok {
f.Flush()
}
}

Expand Down
21 changes: 21 additions & 0 deletions envd/connect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,27 @@ func TestWriteUnary_Error(t *testing.T) {
}
}

// TestWriteUnary_NilSuccessBodyIsNotEmpty pins the EOF-opacity fix: a successful
// unary response with no payload (e.g. Write) must serialize to a decodable JSON
// value, never an empty body. An empty body decodes to io.EOF on the safari
// client and surfaced as the opaque "remote write operation failed: EOF".
func TestWriteUnary_NilSuccessBodyIsNotEmpty(t *testing.T) {
w := httptest.NewRecorder()
writeUnary(w, nil, nil)
if w.Code != 200 {
t.Fatalf("code=%d", w.Code)
}
body := w.Body.Bytes()
if len(body) == 0 {
t.Fatal("nil-payload success wrote an empty body; client would see io.EOF")
}
// Must be valid JSON the client can Decode without hitting io.EOF.
var got json.RawMessage
if err := json.NewDecoder(w.Body).Decode(&got); err != nil {
t.Fatalf("success body is not decodable JSON: %v (body=%q)", err, string(body))
}
}

func TestReadUnary_Decodes(t *testing.T) {
body, _ := json.Marshal(map[string]int{"x": 7})
r := httptest.NewRequestWithContext(context.Background(), http.MethodPost, "/svc/m", bytes.NewReader(body))
Expand Down
16 changes: 15 additions & 1 deletion envd/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ func (pr *processRegistry) add(p *procEntry) {
pr.all[p.cmd.Process.Pid] = p
pr.mu.Unlock()
}

func (pr *processRegistry) remove(pid int) {
pr.mu.Lock()
delete(pr.all, pid)
pr.mu.Unlock()
}

func (pr *processRegistry) get(pid int) *procEntry {
pr.mu.Lock()
defer pr.mu.Unlock()
Expand Down Expand Up @@ -89,8 +91,19 @@ func (s *Server) handleProcessStart(w http.ResponseWriter, r *http.Request) {
// that is the contract. Auth and sandbox isolation enforce the trust boundary;
// gosec's caller-controlled-input rule does not apply here.
cmd := exec.CommandContext(ctx, req.Process.Cmd, req.Process.Args...) //nolint:gosec // intentional
if req.Process.Cwd != "" {
// Default the working directory to the workspace root when the client omits
// cwd, so a bare `bash` lands in the SAME root the file tools (read/write/
// glob/sync_files) operate under. Without this the child inherits envd's own
// process cwd — the image WORKDIR (e.g. /home/runner) — which diverges from
// WorkspaceRoot (e.g. /home/runner/.flashduty): files written by bash become
// invisible to the file tools and absolute paths get rejected as "path
// escapes environment root". The BYOC exec path already defaults to the
// workspace root via resolveWorkdir; this brings the cloud process path in line.
switch {
case req.Process.Cwd != "":
cmd.Dir = req.Process.Cwd
case s.cfg.WorkspaceRoot != "":
cmd.Dir = s.cfg.WorkspaceRoot
}
if len(req.Process.Envs) > 0 {
env := make([]string, 0, len(req.Process.Envs))
Expand Down Expand Up @@ -226,6 +239,7 @@ func (s *Server) handleProcessList(w http.ResponseWriter, r *http.Request) {
func (s *Server) handleProcessSendInput(w http.ResponseWriter, _ *http.Request) {
writeUnary(w, nil, fmt.Errorf("%w: send_input", errUnimplemented))
}

func (s *Server) handleProcessConnect(w http.ResponseWriter, _ *http.Request) {
writeUnary(w, nil, fmt.Errorf("%w: connect", errUnimplemented))
}
52 changes: 52 additions & 0 deletions envd/process_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"encoding/json"
"io"
"net/http"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
Expand Down Expand Up @@ -121,3 +123,53 @@ func TestProcessSendSignal_KillsRunning(t *testing.T) {
t.Fatal("start stream did not close after kill")
}
}

func TestProcessStart_DefaultsCwdToWorkspaceRoot(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("envd Process.Start uses POSIX /bin commands; serve mode targets the Linux sandbox container")
}
// A bare process (no cwd) must run in the workspace root, NOT envd's own
// process cwd — otherwise the agent's bash and the file tools (rooted at the
// workspace root) diverge, which is exactly the prod failure this fixes.
// Prove it: write a marker in the workspace root and read it back via a
// RELATIVE path, which only resolves if the child's cwd == WorkspaceRoot.
_, dir, hs := newTestServerWithWS(t)
const marker = "cwd-is-workspace-root"
if err := os.WriteFile(filepath.Join(dir, "marker.txt"), []byte(marker), 0o644); err != nil {
t.Fatal(err)
}

body := strings.NewReader(`{"process":{"cmd":"/bin/cat","args":["marker.txt"]}}`)
req, _ := http.NewRequestWithContext(context.Background(), http.MethodPost, hs.URL+"/process.Process/Start", body)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatal(err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 {
t.Fatalf("status=%d", resp.StatusCode)
}

dec := json.NewDecoder(resp.Body)
var stdout strings.Builder
for {
var frame map[string]any
if decErr := dec.Decode(&frame); decErr != nil {
if decErr == io.EOF {
break
}
t.Fatalf("decode: %v", decErr)
}
if ev, _ := frame["event"].(map[string]any); ev != nil {
if d, ok := ev["data"].(map[string]any); ok {
if s, _ := d["stdout"].(string); s != "" {
stdout.WriteString(s)
}
}
}
}
if got := strings.TrimSpace(stdout.String()); got != marker {
t.Fatalf("relative cat did not resolve against workspace root: stdout=%q want %q (cwd likely defaulted to envd process cwd, not WorkspaceRoot)", got, marker)
}
}
2 changes: 1 addition & 1 deletion envd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (s *Server) registerRoutes() {
func (s *Server) Run(ctx context.Context) error {
srv := &http.Server{
Addr: s.cfg.Listen,
Handler: s.authMiddleware(s.mux),
Handler: recoverMiddleware(s.authMiddleware(s.mux)),
ReadHeaderTimeout: 10 * time.Second,
}
errCh := make(chan error, 1)
Expand Down
4 changes: 2 additions & 2 deletions envd/test_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
func newTestServer(t *testing.T) (*Server, *httptest.Server) {
t.Helper()
s := NewServer(Config{Listen: ":0"})
hs := httptest.NewServer(s.authMiddleware(s.mux))
hs := httptest.NewServer(recoverMiddleware(s.authMiddleware(s.mux)))
t.Cleanup(hs.Close)
return s, hs
}
Expand All @@ -29,7 +29,7 @@ func newTestServerWithWS(t *testing.T) (*Server, string, *httptest.Server) {
t.Fatal(err)
}
s := NewServer(Config{Listen: ":0", Workspace: ws, WorkspaceRoot: dir})
hs := httptest.NewServer(s.authMiddleware(s.mux))
hs := httptest.NewServer(recoverMiddleware(s.authMiddleware(s.mux)))
t.Cleanup(hs.Close)
return s, dir, hs
}
Loading
Loading