From c7dc167688bfa056b6683095169875b001c4e2f8 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Wed, 27 May 2026 19:55:25 -0700 Subject: [PATCH] ateom: hash pod name to avoid unix socket path length limits pod names alone can easily exceed the max length add a symlink with the full pod name for debugging --- cmd/ateom-gvisor/main.go | 14 ++--- internal/ateompath/ateompath.go | 53 ++++++++++-------- internal/ateompath/ateompath_test.go | 84 ++++++++++++---------------- 3 files changed, 74 insertions(+), 77 deletions(-) diff --git a/cmd/ateom-gvisor/main.go b/cmd/ateom-gvisor/main.go index ce468df..53adb87 100644 --- a/cmd/ateom-gvisor/main.go +++ b/cmd/ateom-gvisor/main.go @@ -81,12 +81,18 @@ func do(ctx context.Context) error { } defer serverboot.ShutdownProvider("TracerProvider", tp.Shutdown) - // Create ateom dir + // Create ateom dir, this one hashes the pod name to avoid exceeding socket max length ateomDir := ateompath.AteomPath(*podNamespace, *podName) if err := os.MkdirAll(ateomDir, 0o700); err != nil { return fmt.Errorf("in os.MkdirAll(%q): %w", ateomDir, err) } + // Create a more predictable symlink for debugging purposes. + debugAteomDir := ateompath.DebugFriendlyAteomPath(*podNamespace, *podName) + if err := os.Symlink(ateomDir, debugAteomDir); err != nil { + return fmt.Errorf("in os.Symlink(%q, %q): %w", ateomDir, debugAteomDir, err) + } + // TODO: Consider whether we want to fork, so that we have an "init" process // as PID 1 that does nothing but reap processes that get reparented to it. // Then we won't have to mess about with locking the reaper while we do our @@ -94,12 +100,6 @@ func do(ctx context.Context) error { go reap.ReapChildren(nil, nil, nil, &reapLock) slog.InfoContext(ctx, "Child process reaper launched") - // Validate before opening the socket so the operator sees a clear - // message rather than the kernel's cryptic "bind: invalid argument". - if err := ateompath.ValidateAteomSocketPath(*podNamespace, *podName); err != nil { - return err - } - // Clean up any old socket. sockPath := ateompath.AteomSocketPath(*podNamespace, *podName) if err := os.RemoveAll(sockPath); err != nil { diff --git a/internal/ateompath/ateompath.go b/internal/ateompath/ateompath.go index 3fae572..0d118dc 100644 --- a/internal/ateompath/ateompath.go +++ b/internal/ateompath/ateompath.go @@ -16,15 +16,11 @@ package ateompath import ( - "fmt" + "encoding/base64" + "hash/fnv" "path/filepath" ) -// MaxUnixSocketPathLen is the practical Linux limit for unix-domain socket -// paths. The kernel's sockaddr_un.sun_path is 108 bytes including the trailing -// NUL, leaving 107 usable bytes. Bind fails with EINVAL above this. -const MaxUnixSocketPathLen = 107 - const ( // The base path. This is both the path of the root shared folder on the // host filesystem, and when it is mounted into ateom and atelet containers. @@ -41,6 +37,33 @@ func RunSCBinaryPath(sha256 string) string { } func AteomPath(ateomNamespace, ateomName string) string { + // This path needs to be safe for containing the aetom socket + // Sockets can only be 107 bytes (108 with the nul terminator) + // Namespaces are at most 63. + // Pod names can be up to 253. Clearly pod names are a problem. + // `/run/ateom-gvisor/ateoms/` is 25. + // One more for the separator `:` + // `/sock` is 6. + // 25 + 1 + 6 = 32. 107 - 32 - 63 = 12. + // 12 characters max for the pod hash. + // base64(fnv-64a(pod name)) hash is 11 bytes. + return filepath.Join( + BasePath, + "ateoms", + ateomNamespace+":"+hashPodName(ateomName), + ) +} + +func hashPodName(name string) string { + hasher := fnv.New64a() + hasher.Write([]byte(name)) + return base64.RawURLEncoding.EncodeToString(hasher.Sum(nil)) +} + +// DebugFriendlyAteomPath returns the path to the ateom directory using the pod name +// and namespace (without hashing), this is useful for debugging. +// Ateom will create a symlink at this path pointing to AteomPath. +func DebugFriendlyAteomPath(ateomNamespace, ateomName string) string { return filepath.Join( BasePath, "ateoms", @@ -51,26 +74,10 @@ func AteomPath(ateomNamespace, ateomName string) string { func AteomSocketPath(ateomNamespace, ateomName string) string { return filepath.Join( AteomPath(ateomNamespace, ateomName), - "ateom.sock", + "sock", ) } -// ValidateAteomSocketPath returns a descriptive error when the socket path -// derived from ateomNamespace and ateomName would exceed Linux's unix-socket -// limit. Calling net.Listen("unix", ...) with an over-limit path otherwise -// fails with the cryptic "bind: invalid argument". -func ValidateAteomSocketPath(ateomNamespace, ateomName string) error { - p := AteomSocketPath(ateomNamespace, ateomName) - if len(p) > MaxUnixSocketPathLen { - return fmt.Errorf( - "ateom socket path %q is %d bytes, exceeds Linux unix-socket limit of %d: shorten the namespace or pod name (%d + %d = %d chars used for namespace + name)", - p, len(p), MaxUnixSocketPathLen, - len(ateomNamespace), len(ateomName), len(ateomNamespace)+len(ateomName), - ) - } - return nil -} - func AteomNetNSName(ateomNamespace, ateomName string) string { return "ateom:" + ateomNamespace + ":" + ateomName } diff --git a/internal/ateompath/ateompath_test.go b/internal/ateompath/ateompath_test.go index 8cb8e9c..5361542 100644 --- a/internal/ateompath/ateompath_test.go +++ b/internal/ateompath/ateompath_test.go @@ -19,53 +19,43 @@ import ( "testing" ) -func TestValidateAteomSocketPath(t *testing.T) { - tests := []struct { - name string - namespace string - podName string - wantErr bool - }{ - { - name: "short names well under the limit", - namespace: "ate-demo-counter", - podName: "counter-deployment-abcd1234-xyzw1", - wantErr: false, - }, - { - name: "exactly at the limit", - namespace: strings.Repeat("a", 25), - podName: strings.Repeat("b", 45), - wantErr: false, - }, - { - name: "one byte over the limit", - namespace: strings.Repeat("a", 25), - podName: strings.Repeat("b", 46), - wantErr: true, - }, - { - name: "the reproducer from the original bug report", - namespace: "ate-demo-lovable-sandbox", - podName: "lovable-sandbox-pool-deployment-5797879cd7-2n7wb", - wantErr: true, - }, +func TestAteomSocketPathLimits(t *testing.T) { + // Kubernetes maximum lengths: + // Namespace: 63 characters + // Pod Name: 253 characters + maxNamespace := strings.Repeat("n", 63) + maxName := strings.Repeat("p", 253) + + sockPath := AteomSocketPath(maxNamespace, maxName) + + // Unix domain socket path limit is 107 bytes (108 with NUL terminator) + const maxUnixSocketLen = 107 + if len(sockPath) > maxUnixSocketLen { + t.Errorf("socket path length %d exceeds max allowed length %d: %q", len(sockPath), maxUnixSocketLen, sockPath) + } + + // Verify it is deterministic + sockPath2 := AteomSocketPath(maxNamespace, maxName) + if sockPath != sockPath2 { + t.Errorf("expected deterministic socket paths, got %q and %q", sockPath, sockPath2) + } +} + +func TestAteomPathUniqueness(t *testing.T) { + ns := "test-ns" + name1 := "pod-1" + name2 := "pod-2" + + path1 := AteomPath(ns, name1) + path2 := AteomPath(ns, name2) + + if path1 == path2 { + t.Errorf("expected different paths for different pod names, got %q", path1) } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := ValidateAteomSocketPath(tt.namespace, tt.podName) - if (err != nil) != tt.wantErr { - t.Errorf("ValidateAteomSocketPath(%q, %q) err=%v, wantErr=%v", - tt.namespace, tt.podName, err, tt.wantErr) - } - if tt.wantErr && err != nil { - // Error message should mention the limit so an operator can - // figure out by how much to shorten. - msg := err.Error() - if !strings.Contains(msg, "107") { - t.Errorf("error message %q does not reference the limit (107)", msg) - } - } - }) + + ns2 := "test-ns-2" + path3 := AteomPath(ns2, name1) + if path1 == path3 { + t.Errorf("expected different paths for different namespaces, got %q", path1) } }