diff --git a/cmd/obol/model.go b/cmd/obol/model.go index 375885e9..2ef8ef4e 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -27,6 +27,7 @@ func modelCommand(cfg *config.Config) *cli.Command { modelSyncCommand(cfg), modelPullCommand(), modelListCommand(cfg), + modelPreferCommand(cfg), modelRemoveCommand(cfg), }, } @@ -194,12 +195,33 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m } if len(models) == 0 { - // Sensible defaults + // Per-provider defaults — kept in sync with what the providers + // document as their current chat-tuned flagship. Bumping these is a + // small follow-up PR when frontier models drop, and it isolates the + // "what's good today" maintenance to one place. + var defaultModel string switch provider { case "anthropic": - models = []string{"claude-sonnet-4-6"} + defaultModel = "claude-sonnet-4-6" case "openai": - models = []string{"gpt-4.1"} + defaultModel = "gpt-5.5" + } + + // Interactive: let the user override the default with a free-text + // entry. Non-interactive (no TTY): silently use the default — the + // caller can always pass --model to be explicit. + chosen := defaultModel + if defaultModel != "" && u.IsTTY() && !u.IsJSON() { + input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel) + if err != nil { + return err + } + if strings.TrimSpace(input) != "" { + chosen = strings.TrimSpace(input) + } + } + if chosen != "" { + models = []string{chosen} } } @@ -493,6 +515,34 @@ func modelListCommand(cfg *config.Config) *cli.Command { } } +func modelPreferCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "prefer", + Usage: "Pull one or more models to the head of the LiteLLM model_list (the head becomes the agent's primary)", + ArgsUsage: " [ ...]", + Flags: []cli.Flag{ + &cli.BoolFlag{Name: "no-sync", Usage: "Skip the agent model sync (batch with other model commands, then run `obol model sync` once)"}, + }, + Action: func(ctx context.Context, cmd *cli.Command) error { + u := getUI(cmd) + + names := cmd.Args().Slice() + if len(names) == 0 { + return errors.New("at least one model name is required\n\nUsage: obol model prefer [ ...]\n\nList configured models with: obol model list") + } + + if err := model.PreferModels(cfg, u, names); err != nil { + return err + } + + if cmd.Bool("no-sync") { + return nil + } + return syncAgentModels(cfg, u) + }, + } +} + func modelRemoveCommand(cfg *config.Config) *cli.Command { return &cli.Command{ Name: "remove", diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 05ff5f27..44d73ea8 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -2,6 +2,24 @@ # Orchestrates core infrastructure components deployed with every stack # Uses Traefik with Gateway API for routing (replaces nginx-ingress) +# Force helm to use Server-Side Apply with --force-conflicts on every release. +# Without this, every `obol stack down`/`up` cycle hits a different SSA +# ownership conflict on resources helm shares with other writers — for example +# litellm-config.data["config.yaml"] (touched by `obol model setup` patches), +# remote-signer-keystore-password.metadata.labels (helm 3 pre-SSA default +# manager was kubectl-client-side-apply), or fields the apiserver synthesises +# under "before-first-apply" when an Update-managed object first sees an +# Apply call. --server-side=true switches helm to SSA semantics so it can +# reason about field ownership at all; --force-conflicts tells it to take +# ownership when it disagrees with another manager rather than aborting the +# upgrade. The `=true` form is required: helm's --server-side flag takes a +# value (auto|true|false), so a bare --server-side would consume the next +# arg as its value and reject --force-conflicts as an unknown apply method. +helmDefaults: + args: + - "--server-side=true" + - "--force-conflicts" + repositories: - name: traefik url: https://traefik.github.io/charts diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index ccb70e0e..2e5051ea 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -35,7 +35,7 @@ image: repository: obolnetwork/obol-stack-front-end pullPolicy: IfNotPresent - tag: "v0.1.17-rc.5" + tag: "v0.1.18" service: type: ClusterIP diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go index 2abd2d7e..7bdc48b6 100644 --- a/internal/hermes/hermes.go +++ b/internal/hermes/hermes.go @@ -690,6 +690,19 @@ func writeDeploymentFiles(cfg *config.Config, id, deploymentDir, agentBaseURL st func generateHelmfile(namespace string) string { return fmt.Sprintf(`# Managed by obol agent +# Force helm to use Server-Side Apply with --force-conflicts on every +# release. Without this, every Hermes resync after a fresh install hits SSA +# ownership conflicts on shared fields — most often +# remote-signer-keystore-password.metadata.labels, where helm's pre-SSA +# install left field ownership under "kubectl-client-side-apply" and a +# subsequent helm upgrade with SSA can't reclaim them. =true is required +# because helm's --server-side flag takes a value (auto|true|false); a bare +# --server-side would consume the next arg and reject --force-conflicts. +helmDefaults: + args: + - "--server-side=true" + - "--force-conflicts" + repositories: - name: obol url: https://obolnetwork.github.io/helm-charts/ @@ -1260,15 +1273,14 @@ func litellmMasterKey(cfg *config.Config) string { return "sk-obol-" + strings.TrimSpace(string(data)) } -// rankModels delegates to model.Rank, which knows how to prefer larger local -// models and frontier cloud models. Kept as a thin wrapper so call sites -// don't need to import internal/model directly. +// rankModels delegates to model.Rank, which preserves configured LiteLLM model +// order and keeps known embedding-only models behind chat-capable models. Kept +// as a thin wrapper so call sites don't need to import internal/model directly. // -// IMPORTANT: do NOT pre-strip provider prefixes here. model.Rank strips -// internally for ranking heuristics but returns the ORIGINAL strings so the -// agent can round-trip them back to LiteLLM. Stripping at this layer would -// break that round-trip — that's exactly the double-strip bug that -// ca820c9 worked around for custom endpoints. +// IMPORTANT: do NOT strip provider prefixes here. model.Rank returns the +// original strings so the agent can round-trip them back to LiteLLM. Stripping +// at this layer would break that round-trip — that's exactly the double-strip +// bug that ca820c9 worked around for custom endpoints. func rankModels(models []string) (primary string, fallbacks []string) { return model.Rank(models) } diff --git a/internal/hermes/rankmodels_test.go b/internal/hermes/rankmodels_test.go index d7903e40..8cebfb32 100644 --- a/internal/hermes/rankmodels_test.go +++ b/internal/hermes/rankmodels_test.go @@ -2,42 +2,20 @@ package hermes import "testing" -// TestRankModels_HermesWrapper_PrefersLargerLocalModel encodes the regression -// from the colleague's screenshot: Hermes was deploying with `llama3.2:1b` as -// the default model, which then parroted its own tool list back on every -// "hello" prompt. The fix moved capability ranking into model.Rank; this test -// just confirms the Hermes-side wrapper still calls into it correctly. -// -// Contract: bare LiteLLM model_name strings come in, the SAME bare strings -// come back out — no provider-prefix stripping at this layer. The agent must -// be able to round-trip the returned primary back to LiteLLM without -// modification. -func TestRankModels_HermesWrapper_PrefersLargerLocalModel(t *testing.T) { +// Contract: LiteLLM model_name strings come in, the SAME strings come back +// out in configured order. The agent must be able to round-trip the returned +// primary back to LiteLLM without modification. +func TestRankModels_HermesWrapper_PreservesConfiguredOrder(t *testing.T) { primary, fallbacks := rankModels([]string{ "llama3.2:1b", - "llama3.1:8b", - "llama3.2:3b", - }) - if primary != "llama3.1:8b" { - t.Fatalf("primary: got %q, want llama3.1:8b", primary) - } - if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" { - t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks) - } -} - -// TestRankModels_HermesWrapper_PrefersClaudeOverLocal exercises the cloud -// tier. Cloud entries written by buildModelEntries are bare (e.g. -// `claude-opus-4-7`, not `anthropic/claude-opus-4-7`), and the wrapper must -// preserve that. -func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) { - primary, _ := rankModels([]string{ "llama3.1:8b", "claude-opus-4-7", - "llama3.2:1b", }) - if primary != "claude-opus-4-7" { - t.Fatalf("primary: got %q, want claude-opus-4-7", primary) + if primary != "llama3.2:1b" { + t.Fatalf("primary: got %q, want llama3.2:1b", primary) + } + if len(fallbacks) != 2 || fallbacks[0] != "llama3.1:8b" || fallbacks[1] != "claude-opus-4-7" { + t.Fatalf("fallbacks: got %v, want [llama3.1:8b claude-opus-4-7]", fallbacks) } } @@ -50,12 +28,12 @@ func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) { // reintroducing it. func TestRankModels_HermesWrapper_PreservesProviderPrefixIfPresent(t *testing.T) { primary, _ := rankModels([]string{ + "llama3.1:8b", "anthropic/claude-opus-4-7", "openai/gpt-4o", - "llama3.1:8b", }) - if primary != "anthropic/claude-opus-4-7" { - t.Fatalf("primary: got %q, want anthropic/claude-opus-4-7 (unstripped)", primary) + if primary != "llama3.1:8b" { + t.Fatalf("primary: got %q, want llama3.1:8b", primary) } } diff --git a/internal/model/model.go b/internal/model/model.go index 8433bdb2..ecf4a4b0 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -577,6 +577,130 @@ func hotDeleteModel(cfg *config.Config, u *ui.UI, modelName string) error { return nil } +// reorderModelList is the pure-function core of PreferModels. It moves the +// named entries to the head of the list (in the order given) and returns +// the new slice, plus a boolean indicating whether the input was already in +// the requested order (the caller should treat that as a no-op so it can +// skip the kubectl patch + LiteLLM rollout). Unknown or duplicate names +// produce an error so typos surface loudly. +func reorderModelList(entries []ModelEntry, names []string) ([]ModelEntry, bool, error) { + indexByName := make(map[string]int, len(entries)) + for i, entry := range entries { + indexByName[entry.ModelName] = i + } + + var missing []string + picked := make(map[string]bool, len(names)) + for _, name := range names { + if _, ok := indexByName[name]; !ok { + missing = append(missing, name) + continue + } + if picked[name] { + return nil, false, fmt.Errorf("duplicate model in prefer args: %q", name) + } + picked[name] = true + } + if len(missing) > 0 { + return nil, false, fmt.Errorf("model(s) not found in LiteLLM config: %s\n Run 'obol model list' to see available entries", strings.Join(missing, ", ")) + } + + alreadyAtHead := true + for i, name := range names { + if i >= len(entries) || entries[i].ModelName != name { + alreadyAtHead = false + break + } + } + + reordered := make([]ModelEntry, 0, len(entries)) + for _, name := range names { + reordered = append(reordered, entries[indexByName[name]]) + } + for _, entry := range entries { + if picked[entry.ModelName] { + continue + } + reordered = append(reordered, entry) + } + return reordered, alreadyAtHead, nil +} + +// PreferModels reorders LiteLLM's model_list so the named entries appear at +// the head, in the order given. Remaining entries keep their original +// relative order. This is the operator-facing primitive that lets +// model.Rank's "first chat-capable wins" rule pick a specific primary +// without a remove/re-add cycle. +// +// Returns an error if any of the requested names is not present in the +// current model_list — typos should be loud, not silent no-ops. +// +// LiteLLM has no model_list reorder API, so after the ConfigMap patch this +// rolls the LiteLLM Deployment so the new order takes effect (the +// /v1/models listing follows model_list order, and hermes/openclaw read +// the ConfigMap directly via GetConfiguredModels for the agent primary). +func PreferModels(cfg *config.Config, u *ui.UI, names []string) error { + if len(names) == 0 { + return errors.New("at least one model name is required") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return errors.New("cluster not running. Run 'obol stack up' first") + } + + raw, err := kubectl.Output(kubectlBinary, kubeconfigPath, + "get", "configmap", configMapName, "-n", namespace, "-o", "jsonpath={.data.config\\.yaml}") + if err != nil { + return fmt.Errorf("failed to read LiteLLM config: %w", err) + } + + var litellmConfig LiteLLMConfig + if err := yaml.Unmarshal([]byte(raw), &litellmConfig); err != nil { + return fmt.Errorf("failed to parse config.yaml: %w", err) + } + + reordered, alreadyAtHead, err := reorderModelList(litellmConfig.ModelList, names) + if err != nil { + return err + } + if alreadyAtHead { + u.Infof("Model(s) already at the head of the model_list, no change") + return nil + } + litellmConfig.ModelList = reordered + + updated, err := yaml.Marshal(&litellmConfig) + if err != nil { + return fmt.Errorf("failed to marshal config: %w", err) + } + escapedYAML, err := json.Marshal(string(updated)) + if err != nil { + return fmt.Errorf("failed to escape YAML: %w", err) + } + patchJSON := fmt.Sprintf(`{"data":{"config.yaml":%s}}`, escapedYAML) + + u.Infof("Promoting %s to head of LiteLLM model_list", strings.Join(names, ", ")) + if err := kubectl.Run(kubectlBinary, kubeconfigPath, + "patch", "configmap", configMapName, "-n", namespace, + "-p", patchJSON, "--type=merge", "--field-manager=helm"); err != nil { + return fmt.Errorf("failed to patch ConfigMap: %w", err) + } + + // LiteLLM has no reorder API; restart the deployment so the new order + // takes effect (mostly cosmetic for /v1/models listings — agent primary + // is read from the ConfigMap directly via GetConfiguredModels, which is + // already correct after the patch above). + if err := RestartLiteLLM(cfg, u, "prefer"); err != nil { + u.Warnf("LiteLLM rollout failed: %v", err) + u.Dim(" The ConfigMap is updated; agent will pick up the new primary on next sync.") + } + + return nil +} + // RemoveModel removes a model entry from the LiteLLM ConfigMap (persistence) // and hot-deletes it from the running router via the API (immediate effect). // No pod restart is required. @@ -1123,16 +1247,10 @@ func buildModelEntries(provider string, models []string) []ModelEntry { } case ProviderAnthropic: cachePoints := anthropicCacheControlPoints() - // Wildcard: routes any anthropic model without explicit registration - entries = append(entries, ModelEntry{ - ModelName: "anthropic/*", - LiteLLMParams: LiteLLMParams{ - Model: "anthropic/*", - APIKey: "os.environ/ANTHROPIC_API_KEY", - CacheControlInjectionPoints: cachePoints, - }, - }) - // Explicit entries for requested models (better /v1/models listing) + // Explicit entries first so the user-selected model is the primary + // under model.Rank's "first chat-capable wins" rule. Hermes cannot + // send `model: anthropic/*` literally (LiteLLM doesn't resolve a + // wildcard to a default), so the wildcard must never sit at index 0. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, @@ -1143,17 +1261,27 @@ func buildModelEntries(provider string, models []string) []ModelEntry { }, }) } - case ProviderOpenAI: + // Wildcard: routes any anthropic model without explicit registration. entries = append(entries, ModelEntry{ - ModelName: "openai/*", - LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"}, + ModelName: "anthropic/*", + LiteLLMParams: LiteLLMParams{ + Model: "anthropic/*", + APIKey: "os.environ/ANTHROPIC_API_KEY", + CacheControlInjectionPoints: cachePoints, + }, }) + case ProviderOpenAI: + // Explicit-before-wildcard, same rationale as Anthropic above. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"}, }) } + entries = append(entries, ModelEntry{ + ModelName: "openai/*", + LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"}, + }) default: for _, m := range models { entries = append(entries, ModelEntry{ diff --git a/internal/model/model_test.go b/internal/model/model_test.go index aadc53cb..03292f5a 100644 --- a/internal/model/model_test.go +++ b/internal/model/model_test.go @@ -31,41 +31,42 @@ func TestBuildModelEntries(t *testing.T) { } }) - t.Run("anthropic gets wildcard plus explicit entries", func(t *testing.T) { + t.Run("anthropic puts explicit before the wildcard", func(t *testing.T) { entries := buildModelEntries("anthropic", []string{"claude-sonnet-4-5-20250929"}) if len(entries) != 2 { - t.Fatalf("got %d entries, want 2 (wildcard + explicit)", len(entries)) + t.Fatalf("got %d entries, want 2 (explicit + wildcard)", len(entries)) } - // First entry is the wildcard - if entries[0].ModelName != "anthropic/*" { - t.Errorf("entries[0].ModelName = %q, want anthropic/*", entries[0].ModelName) + // Explicit first so model.Rank picks it as primary; Hermes can't + // send `model: anthropic/*` literally. + if entries[0].ModelName != "claude-sonnet-4-5-20250929" { + t.Errorf("entries[0].ModelName = %q, want explicit model", entries[0].ModelName) } - - if entries[0].LiteLLMParams.Model != "anthropic/*" { - t.Errorf("entries[0].Model = %q, want anthropic/*", entries[0].LiteLLMParams.Model) + if entries[0].LiteLLMParams.Model != "claude-sonnet-4-5-20250929" { + t.Errorf("entries[0].Model = %q", entries[0].LiteLLMParams.Model) } - // Second entry is the explicit model - if entries[1].ModelName != "claude-sonnet-4-5-20250929" { - t.Errorf("entries[1].ModelName = %q", entries[1].ModelName) + if entries[0].LiteLLMParams.APIKey != "os.environ/ANTHROPIC_API_KEY" { + t.Errorf("api_key = %q, want os.environ/ANTHROPIC_API_KEY", entries[0].LiteLLMParams.APIKey) } - - if entries[1].LiteLLMParams.APIKey != "os.environ/ANTHROPIC_API_KEY" { - t.Errorf("api_key = %q, want os.environ/ANTHROPIC_API_KEY", entries[1].LiteLLMParams.APIKey) + // Wildcard appended last as catch-all. + if entries[1].ModelName != "anthropic/*" { + t.Errorf("entries[1].ModelName = %q, want anthropic/*", entries[1].ModelName) } }) - t.Run("openai gets wildcard plus explicit entries", func(t *testing.T) { + t.Run("openai puts explicit before the wildcard", func(t *testing.T) { entries := buildModelEntries("openai", []string{"gpt-4o"}) if len(entries) != 2 { - t.Fatalf("got %d entries, want 2 (wildcard + explicit)", len(entries)) + t.Fatalf("got %d entries, want 2 (explicit + wildcard)", len(entries)) } - if entries[0].ModelName != "openai/*" { - t.Errorf("entries[0].ModelName = %q, want openai/*", entries[0].ModelName) + if entries[0].ModelName != "gpt-4o" { + t.Errorf("entries[0].ModelName = %q, want gpt-4o", entries[0].ModelName) } - - if entries[1].LiteLLMParams.Model != "openai/gpt-4o" { - t.Errorf("entries[1].Model = %q, want openai/gpt-4o", entries[1].LiteLLMParams.Model) + if entries[0].LiteLLMParams.Model != "openai/gpt-4o" { + t.Errorf("entries[0].Model = %q, want openai/gpt-4o", entries[0].LiteLLMParams.Model) + } + if entries[1].ModelName != "openai/*" { + t.Errorf("entries[1].ModelName = %q, want openai/*", entries[1].ModelName) } }) @@ -750,3 +751,122 @@ func TestPullOllamaModel_MockServer(t *testing.T) { } }) } + +func TestReorderModelList(t *testing.T) { + entries := []ModelEntry{ + {ModelName: "anthropic/*"}, + {ModelName: "claude-sonnet-4-6"}, + {ModelName: "claude-opus-4-7"}, + {ModelName: "openai/*"}, + {ModelName: "gpt-5.5"}, + } + + t.Run("pulls a single model to the head", func(t *testing.T) { + got, already, err := reorderModelList(entries, []string{"claude-opus-4-7"}) + if err != nil { + t.Fatalf("reorderModelList: %v", err) + } + if already { + t.Fatalf("expected alreadyAtHead = false") + } + want := []string{"claude-opus-4-7", "anthropic/*", "claude-sonnet-4-6", "openai/*", "gpt-5.5"} + assertOrder(t, got, want) + }) + + t.Run("pulls multiple models to the head in the given order", func(t *testing.T) { + got, _, err := reorderModelList(entries, []string{"gpt-5.5", "claude-opus-4-7"}) + if err != nil { + t.Fatalf("reorderModelList: %v", err) + } + want := []string{"gpt-5.5", "claude-opus-4-7", "anthropic/*", "claude-sonnet-4-6", "openai/*"} + assertOrder(t, got, want) + }) + + t.Run("preserves relative order of unselected entries", func(t *testing.T) { + got, _, err := reorderModelList(entries, []string{"claude-opus-4-7"}) + if err != nil { + t.Fatalf("reorderModelList: %v", err) + } + // anthropic/*, claude-sonnet-4-6, openai/*, gpt-5.5 should stay in + // that relative order behind the promoted entry. + want := []string{"claude-opus-4-7", "anthropic/*", "claude-sonnet-4-6", "openai/*", "gpt-5.5"} + assertOrder(t, got, want) + }) + + t.Run("flags already-at-head as no-op", func(t *testing.T) { + _, already, err := reorderModelList(entries, []string{"anthropic/*"}) + if err != nil { + t.Fatalf("reorderModelList: %v", err) + } + if !already { + t.Fatalf("expected alreadyAtHead = true when first entry is requested") + } + }) + + t.Run("flags multi-arg already-at-head as no-op", func(t *testing.T) { + _, already, err := reorderModelList(entries, []string{"anthropic/*", "claude-sonnet-4-6"}) + if err != nil { + t.Fatalf("reorderModelList: %v", err) + } + if !already { + t.Fatalf("expected alreadyAtHead = true when first two requested entries already lead") + } + }) + + t.Run("errors on missing names with full list", func(t *testing.T) { + _, _, err := reorderModelList(entries, []string{"claude-opus-4-7", "typo-1", "typo-2"}) + if err == nil { + t.Fatal("expected error for missing names") + } + if !strings.Contains(err.Error(), "typo-1") || !strings.Contains(err.Error(), "typo-2") { + t.Fatalf("error should list every missing name, got: %v", err) + } + }) + + t.Run("errors on duplicate args", func(t *testing.T) { + _, _, err := reorderModelList(entries, []string{"claude-opus-4-7", "claude-opus-4-7"}) + if err == nil { + t.Fatal("expected error for duplicate names") + } + if !strings.Contains(err.Error(), "duplicate") { + t.Fatalf("error should mention duplicate, got: %v", err) + } + }) + + t.Run("preserves ModelEntry contents not just names", func(t *testing.T) { + entriesWithParams := []ModelEntry{ + {ModelName: "a", LiteLLMParams: LiteLLMParams{Model: "openai/a", APIKey: "key-a"}}, + {ModelName: "b", LiteLLMParams: LiteLLMParams{Model: "openai/b", APIKey: "key-b"}}, + } + got, _, err := reorderModelList(entriesWithParams, []string{"b"}) + if err != nil { + t.Fatalf("reorderModelList: %v", err) + } + if got[0].LiteLLMParams.APIKey != "key-b" { + t.Fatalf("promoted entry lost its params: got %+v", got[0]) + } + if got[1].LiteLLMParams.APIKey != "key-a" { + t.Fatalf("trailing entry lost its params: got %+v", got[1]) + } + }) +} + +func assertOrder(t *testing.T, got []ModelEntry, want []string) { + t.Helper() + if len(got) != len(want) { + t.Fatalf("len = %d, want %d (got %v)", len(got), len(want), modelNames(got)) + } + for i, w := range want { + if got[i].ModelName != w { + t.Fatalf("entry[%d] = %q, want %q (full order: %v)", i, got[i].ModelName, w, modelNames(got)) + } + } +} + +func modelNames(entries []ModelEntry) []string { + out := make([]string, len(entries)) + for i, e := range entries { + out[i] = e.ModelName + } + return out +} diff --git a/internal/model/rank.go b/internal/model/rank.go index e5bb47a8..66953c34 100644 --- a/internal/model/rank.go +++ b/internal/model/rank.go @@ -1,221 +1,42 @@ package model -import ( - "regexp" - "sort" - "strconv" - "strings" -) +import "strings" -// Rank picks the strongest model from a list and demotes the rest to fallbacks. +// Rank selects the primary model from the configured model list and demotes the +// rest to fallbacks. // -// Selection order: +// Model ordering is configuration, not hidden product policy. LiteLLM's +// model_list order is the source of truth, so Rank preserves that order instead +// of guessing quality from provider names, parameter-count tags, or model-family +// aliases. The only exception is known embedding-only entries: they are kept in +// the fallback list but moved behind chat-capable models so an embedding model +// does not become the default chat model when another option exists. // -// 1. Cloud models (Anthropic Claude, OpenAI GPT/o-series) outrank local Ollama -// models. The agent works far better against a frontier model when one is -// wired up. -// 2. Within the cloud tier, models are ranked by a known-quality table — -// newer/larger first (Opus > Sonnet > Haiku, gpt-5 > gpt-4.1, etc.). Names -// not in the table fall to the bottom of the cloud tier alphabetically. -// 3. Within the local tier, models are ranked by parameter count parsed from -// the model tag (e.g. `qwen3.5:9b` → 9, `mixtral:8x7b` → 56, `llama3.2:1b` -// → 1). Larger first. Untagged or "latest" models are ranked using the -// average size of their family if known, otherwise treated as unknown. -// -// The fallback list preserves cloud-then-local ordering so a controller using -// LiteLLM's fallback chain still tries cloud models before reaching for local -// ones if the primary fails. -// -// This used to be duplicated between internal/hermes and internal/openclaw, -// where each copy returned `local[0]` — i.e. whatever Ollama listed first. -// In practice that frequently picked llama3.2:1b on hosts that had recently -// pulled it, and a 1B model produces nonsense on the agent's tool-heavy -// system prompt (the typical symptom is the agent parroting its tool list -// back to the user instead of answering "hello"). Fixing the ranking here -// fixes both runtimes at once. +// The returned strings are the original inputs. Do not strip provider prefixes +// or normalize names here; Hermes/OpenClaw round-trip the returned primary back +// to LiteLLM as the chat-completions model field. func Rank(models []string) (primary string, fallbacks []string) { if len(models) == 0 { return "", nil } - var cloud, local []string + ordered := make([]string, 0, len(models)) + embeddingOnly := make([]string, 0) for _, m := range models { - if IsCloudModel(m) { - cloud = append(cloud, m) - } else { - local = append(local, m) + if isEmbeddingOnlyModel(m) { + embeddingOnly = append(embeddingOnly, m) + continue } + ordered = append(ordered, m) } + ordered = append(ordered, embeddingOnly...) - sort.Slice(cloud, func(i, j int) bool { - return cloudRank(cloud[i]) < cloudRank(cloud[j]) - }) - sort.Slice(local, func(i, j int) bool { - ip := localRank(local[i]) - jp := localRank(local[j]) - if ip != jp { - return ip > jp - } - return local[i] < local[j] - }) - - if len(cloud) > 0 { - primary = cloud[0] - fallbacks = append(append([]string{}, cloud[1:]...), local...) - } else { - primary = local[0] - fallbacks = local[1:] - } + primary = ordered[0] + fallbacks = ordered[1:] return primary, fallbacks } -// IsCloudModel reports whether a model name belongs to a frontier cloud -// provider (Anthropic Claude, OpenAI GPT or o-series). The check is by -// substring/prefix because LiteLLM model ids carry a provider prefix -// (`anthropic/claude-3-5-sonnet-latest`, `openai/gpt-4o`). -func IsCloudModel(name string) bool { - n := strings.ToLower(stripProviderPrefix(name)) - if strings.Contains(n, "claude") { - return true - } - if strings.HasPrefix(n, "gpt") || - strings.HasPrefix(n, "o1") || - strings.HasPrefix(n, "o3") || - strings.HasPrefix(n, "o4") || - strings.HasPrefix(n, "o5") { - return true - } - return false -} - -// stripProviderPrefix is an internal helper for ranking only. It is NOT -// exported and MUST NOT be used to mutate model identifiers that the agent -// will pass back to LiteLLM on chat-completion calls. -// -// LiteLLM `model_name` is bare (no provider prefix) — see the contract -// documented on AddCustomEndpoint and buildModelEntries. The only place a -// `provider/model` shape can sneak in is wildcard entries like `anthropic/*`, -// or legacy entries from older releases that namespaced custom endpoints as -// `custom//`. We strip those here so size/family parsing in -// IsCloudModel/cloudRank/localRank still works on tagged tokens, but the -// caller in Rank() returns the ORIGINAL string — never the stripped form. -func stripProviderPrefix(name string) string { - if idx := strings.Index(name, "/"); idx >= 0 { - return name[idx+1:] - } - return name -} - -// cloudRank returns a sort key for cloud model names — lower is better. The -// table lists representative substrings; the first match wins. -func cloudRank(name string) int { - n := strings.ToLower(stripProviderPrefix(name)) - for i, marker := range cloudPrecedence { - if strings.Contains(n, marker) { - return i - } - } - return len(cloudPrecedence) + 1 -} - -var cloudPrecedence = []string{ - "opus-4-7", "opus-4-6", "opus-4-5", "opus-4", "opus", - "sonnet-4-7", "sonnet-4-6", "sonnet-4-5", "sonnet-4", "sonnet-3-7", "sonnet", - "haiku-4-5", "haiku-4", "haiku", - "gpt-5", "gpt-4.1", "gpt-4o", "gpt-4", "gpt-3.5", - "o5", "o4", "o3", "o1", +func isEmbeddingOnlyModel(name string) bool { + n := strings.ToLower(name) + return strings.Contains(n, "embed") } - -// paramSizeRe matches the parameter-count tag in model names. Decimals are -// allowed so a `:0.6b` Ollama tag doesn't fall through to the family default -// and accidentally outrank a `:9b` peer. The captured groups are: -// -// llama3.2:1b → "1" → 10 (deci-billions) -// qwen3.5:9b → "9" → 90 -// qwen3:0.6b → "0.6" → 6 -// deepseek-r1:32b → "32" → 320 -// mixtral:8x7b → "8x7" → 560 (8 * 70) -// qwen3-vl:235b-cloud → "235" → 2350 -// -// localRank returns deci-billions (parameter count × 10) so 0.5b / 0.6b -// fractional sizes still produce distinct integer ranks without complicating -// the comparator. -var paramSizeRe = regexp.MustCompile(`(?i)(?::|-)(\d+(?:\.\d+)?(?:x\d+(?:\.\d+)?)?)b\b`) - -// localRank returns the parameter count (in deci-billions, i.e. params × 10) -// for a local model name. Decimal sizes (`0.5b`, `0.6b`) survive the int -// conversion intact. Untagged Ollama models fall back to a family-average -// lookup; truly unknown models return 0 (worst). Larger → higher rank. -func localRank(name string) int { - n := strings.ToLower(stripProviderPrefix(name)) - n = strings.TrimSuffix(n, ":latest") - - if m := paramSizeRe.FindStringSubmatch(n); m != nil { - raw := m[1] - if x := strings.Index(raw, "x"); x >= 0 { - a, errA := strconv.ParseFloat(raw[:x], 64) - b, errB := strconv.ParseFloat(raw[x+1:], 64) - if errA == nil && errB == nil { - return int(a * b * 10) - } - } - if v, err := strconv.ParseFloat(raw, 64); err == nil { - return int(v * 10) - } - } - - // No size in the tag — fall back to a family heuristic. These default - // values track the family's flagship "latest" tag at the time of writing - // and can be updated as new releases ship. We try longest prefixes first - // so `llama3.3` doesn't get the `llama3` default. - for _, prefix := range untaggedFamilyOrder { - if strings.HasPrefix(n, prefix) { - return untaggedFamilyDefaults[prefix] - } - } - return 0 -} - -// untaggedFamilyDefaults maps a model-family prefix to a typical parameter -// count expressed in deci-billions (params × 10), so the table shares units -// with localRank's tagged-parsing branch. The numbers don't have to be exact -// — the goal is "is this roughly bigger than that other model", not a -// precise sort. Untagged-model selection is rare; most Ollama users carry -// a size in the tag. -var untaggedFamilyDefaults = map[string]int{ - "qwen3.5": 90, - "qwen3": 140, - "qwen2.5": 70, - "llama3.3": 700, - "llama3.2": 30, - "llama3.1": 80, - "llama3": 80, - "deepseek-r1": 140, - "deepseek-coder": 60, - "deepseek-ocr": 70, - "mistral": 70, - "mixtral": 560, - "phi4": 140, - "phi3": 30, - "gemma3": 70, - "gemma2": 90, - "command-r": 350, - "nomic-embed": 0, // embedding model, never pick as agent default -} - -// untaggedFamilyOrder lists the keys of untaggedFamilyDefaults sorted by -// descending length so HasPrefix matches the most specific family first -// (e.g. `llama3.3` before `llama3`). -var untaggedFamilyOrder = func() []string { - keys := make([]string, 0, len(untaggedFamilyDefaults)) - for k := range untaggedFamilyDefaults { - keys = append(keys, k) - } - sort.Slice(keys, func(i, j int) bool { - if len(keys[i]) != len(keys[j]) { - return len(keys[i]) > len(keys[j]) - } - return keys[i] < keys[j] - }) - return keys -}() diff --git a/internal/model/rank_test.go b/internal/model/rank_test.go index 9dece8f9..676c717f 100644 --- a/internal/model/rank_test.go +++ b/internal/model/rank_test.go @@ -1,119 +1,88 @@ package model -import ( - "testing" -) +import "testing" -func TestRank_PrefersLargerLocalModelOver1B(t *testing.T) { - // The exact regression that produced "hello" → wall-of-text on the - // colleague's screenshot: a 1B model winning over qwen3.5:9b just because - // Ollama happened to list it first. +func TestRank_PreservesConfiguredOrder(t *testing.T) { primary, fallbacks := Rank([]string{ "llama3.2:1b", "qwen3.5:9b", - "llama3.2:3b", + "claude-opus-4-7", }) - if primary != "qwen3.5:9b" { - t.Fatalf("primary: got %q, want qwen3.5:9b", primary) + if primary != "llama3.2:1b" { + t.Fatalf("primary: got %q, want llama3.2:1b", primary) } - if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" { - t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks) + if len(fallbacks) != 2 || fallbacks[0] != "qwen3.5:9b" || fallbacks[1] != "claude-opus-4-7" { + t.Fatalf("fallbacks: got %v, want [qwen3.5:9b claude-opus-4-7]", fallbacks) } } -func TestRank_CloudOutranksLocal(t *testing.T) { +func TestRank_DoesNotInferProviderPrecedence(t *testing.T) { primary, fallbacks := Rank([]string{ - "qwen3.5:9b", + "gpt-4o", "claude-opus-4-7", - "llama3.2:1b", + "o3", }) - if primary != "claude-opus-4-7" { - t.Fatalf("primary: got %q, want claude-opus-4-7", primary) + if primary != "gpt-4o" { + t.Fatalf("primary: got %q, want gpt-4o", primary) } - // fallbacks: cloud-then-local order preserved - if len(fallbacks) != 2 || fallbacks[0] != "qwen3.5:9b" || fallbacks[1] != "llama3.2:1b" { - t.Fatalf("fallbacks: got %v", fallbacks) + if len(fallbacks) != 2 || fallbacks[0] != "claude-opus-4-7" || fallbacks[1] != "o3" { + t.Fatalf("fallbacks: got %v, want [claude-opus-4-7 o3]", fallbacks) } } -func TestRank_CloudInternalOrdering(t *testing.T) { - cases := []struct { - name string - in []string - want string - }{ - {"opus over sonnet", []string{"claude-sonnet-4-6", "claude-opus-4-7"}, "claude-opus-4-7"}, - {"sonnet over haiku", []string{"claude-haiku-4-5", "claude-sonnet-4-6"}, "claude-sonnet-4-6"}, - {"gpt-5 over gpt-4", []string{"gpt-4o", "gpt-5"}, "gpt-5"}, - {"opus over gpt", []string{"gpt-5", "claude-opus-4-7"}, "claude-opus-4-7"}, - {"provider prefix tolerated", []string{"openai/gpt-4o", "anthropic/claude-opus-4-7"}, "anthropic/claude-opus-4-7"}, +func TestRank_DoesNotInferSizePrecedence(t *testing.T) { + primary, fallbacks := Rank([]string{ + "llama3.2:1b", + "qwen3.5:9b", + "mixtral:8x7b", + }) + if primary != "llama3.2:1b" { + t.Fatalf("primary: got %q, want llama3.2:1b", primary) } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got, _ := Rank(tc.in) - if got != tc.want { - t.Fatalf("Rank(%v): got %q, want %q", tc.in, got, tc.want) - } - }) + if len(fallbacks) != 2 || fallbacks[0] != "qwen3.5:9b" || fallbacks[1] != "mixtral:8x7b" { + t.Fatalf("fallbacks: got %v, want [qwen3.5:9b mixtral:8x7b]", fallbacks) } } -func TestRank_LocalParameterParsing(t *testing.T) { - cases := []struct { - name string - in []string - want string - }{ - {"plain b suffix", []string{"llama3.2:1b", "llama3.2:3b"}, "llama3.2:3b"}, - {"two-digit count", []string{"qwen3:14b", "qwen3.5:9b"}, "qwen3:14b"}, - {"mixtral 8x7b", []string{"qwen3.5:9b", "mixtral:8x7b"}, "mixtral:8x7b"}, - {"235b cloud variant", []string{"qwen3.5:9b", "qwen3-vl:235b-cloud"}, "qwen3-vl:235b-cloud"}, - {"untagged family lookup", []string{"qwen3.5:9b", "llama3.3"}, "llama3.3"}, // family default 70 > 9 - // Regression on regression: a `:0.6b` Ollama tag must NOT fall through - // to the qwen3 family default (~14B) — that would mistakenly outrank - // qwen3.5:9b. The decimal-aware regex parses it as 0.6 directly. - {"decimal size below 1b", []string{"qwen3:0.6b", "qwen3.5:9b"}, "qwen3.5:9b"}, - {"decimal size 1.5b", []string{"qwen3.5:9b", "smol:1.5b"}, "qwen3.5:9b"}, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got, _ := Rank(tc.in) - if got != tc.want { - t.Fatalf("Rank(%v): got %q, want %q", tc.in, got, tc.want) - } - }) +func TestRank_EmbeddingModelLast(t *testing.T) { + primary, fallbacks := Rank([]string{ + "nomic-embed-text", + "llama3.2:1b", + "text-embedding-3-large", + "qwen3.5:9b", + }) + if primary != "llama3.2:1b" { + t.Fatalf("primary: got %q, want llama3.2:1b", primary) } -} - -func TestRank_DeterministicTiebreak(t *testing.T) { - // Two models the same size — must sort alphabetically so successive runs - // don't flip the primary. - primary1, _ := Rank([]string{"foo:7b", "bar:7b"}) - primary2, _ := Rank([]string{"bar:7b", "foo:7b"}) - if primary1 != primary2 { - t.Fatalf("non-deterministic: %q vs %q", primary1, primary2) + want := []string{"qwen3.5:9b", "nomic-embed-text", "text-embedding-3-large"} + if len(fallbacks) != len(want) { + t.Fatalf("fallbacks: got %v, want %v", fallbacks, want) } - if primary1 != "bar:7b" { - t.Fatalf("expected alphabetical tiebreak, got %q", primary1) + for i := range want { + if fallbacks[i] != want[i] { + t.Fatalf("fallbacks: got %v, want %v", fallbacks, want) + } } } -func TestRank_EmbeddingModelLast(t *testing.T) { - // nomic-embed-text isn't a chat model — must never become the agent's - // default if anything else is present. - primary, _ := Rank([]string{"nomic-embed-text", "llama3.2:1b"}) - if primary != "llama3.2:1b" { - t.Fatalf("primary: got %q, want llama3.2:1b (embedding model picked instead)", primary) +func TestRank_AllEmbeddingModelsPreserveOrder(t *testing.T) { + primary, fallbacks := Rank([]string{ + "nomic-embed-text", + "text-embedding-3-large", + }) + if primary != "nomic-embed-text" { + t.Fatalf("primary: got %q, want nomic-embed-text", primary) + } + if len(fallbacks) != 1 || fallbacks[0] != "text-embedding-3-large" { + t.Fatalf("fallbacks: got %v, want [text-embedding-3-large]", fallbacks) } } -// TestRank_PreservesProviderPrefixOnOutput documents the contract relied on -// by internal/hermes and internal/openclaw: Rank() may use the provider -// prefix internally for ranking heuristics (e.g. detecting "claude" in -// "anthropic/claude-opus-4-7"), but it MUST return the input strings -// UNCHANGED. The agent round-trips the returned primary back to LiteLLM as -// the `model` field on chat-completions; stripping here would mismatch the -// LiteLLM model_name and surface as 400 "no healthy deployments". +// TestRank_PreservesProviderPrefixOnOutput documents the contract relied on by +// internal/hermes and internal/openclaw: Rank returns input strings unchanged. +// The agent round-trips the returned primary back to LiteLLM as the `model` +// field on chat-completions; stripping here would mismatch LiteLLM model_name +// and surface as 400 "no healthy deployments". func TestRank_PreservesProviderPrefixOnOutput(t *testing.T) { cases := []struct { name string @@ -121,19 +90,19 @@ func TestRank_PreservesProviderPrefixOnOutput(t *testing.T) { want string }{ { - "anthropic/-prefixed wins over local", + "anthropic/-prefixed first", []string{"anthropic/claude-opus-4-7", "qwen3.5:9b"}, "anthropic/claude-opus-4-7", }, { - "openai/-prefixed wins over local", - []string{"qwen3.5:9b", "openai/gpt-4o"}, + "openai/-prefixed first", + []string{"openai/gpt-4o", "qwen3.5:9b"}, "openai/gpt-4o", }, { - "legacy custom// round-trips", - []string{"custom/spark1-vllm/qwen36-fast"}, - "custom/spark1-vllm/qwen36-fast", + "custom namespaced entry round-trips", + []string{"custom/qa-vllm/qwen36-fast"}, + "custom/qa-vllm/qwen36-fast", }, } for _, tc := range cases { @@ -152,18 +121,3 @@ func TestRank_Empty(t *testing.T) { t.Fatalf("Rank(nil): got %q,%v, want empty,nil", primary, fallbacks) } } - -func TestIsCloudModel(t *testing.T) { - cloud := []string{"claude-opus-4-7", "anthropic/claude-3-5-sonnet", "gpt-4o", "openai/gpt-5", "o1-preview", "o3-mini"} - local := []string{"llama3.2:1b", "qwen3.5:9b", "mixtral:8x7b", "deepseek-r1:14b", "nomic-embed-text"} - for _, n := range cloud { - if !IsCloudModel(n) { - t.Errorf("IsCloudModel(%q) = false, want true", n) - } - } - for _, n := range local { - if IsCloudModel(n) { - t.Errorf("IsCloudModel(%q) = true, want false", n) - } - } -} diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index aab93f0b..47d0c54a 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -1669,10 +1669,7 @@ func cliViaKubectlExec(cfg *config.Config, namespace string, args []string) erro // current LiteLLM model list. For each LiteLLM-routed instance it: // 1. Patches the overlay YAML model list (for helm consistency) // 2. Writes a clean per-agent models.json to the host PVC -// 3. Patches the openclaw-config ConfigMap with the best primary model -// -// Cloud models are promoted to primary (they're added because they're better -// than local defaults). The previous primary becomes a fallback. +// 3. Patches the openclaw-config ConfigMap with the configured primary model func SyncOverlayModels(cfg *config.Config, models []string, u *ui.UI) error { ids, err := ListInstanceIDs(cfg) if err != nil || len(ids) == 0 { @@ -1721,10 +1718,9 @@ func SyncOverlayModels(cfg *config.Config, models []string, u *ui.UI) error { return nil } -// rankModels delegates to model.Rank for capability-aware ranking, then -// prefixes every entry with `openai/` for LiteLLM routing. Both runtimes used -// to roll their own ranker that picked `local[0]` (whatever Ollama listed -// first), which produced the llama3.2:1b regression — see internal/model/rank.go. +// rankModels delegates to model.Rank for configured-order selection, then +// prefixes every entry with `openai/` for LiteLLM routing through OpenClaw's +// openai-compatible provider slot. func rankModels(models []string) (primary string, fallbacks []string) { primary, fallbacks = model.Rank(models) if primary != "" { diff --git a/internal/openclaw/rankmodels_test.go b/internal/openclaw/rankmodels_test.go index 5e9b5ae8..296d0def 100644 --- a/internal/openclaw/rankmodels_test.go +++ b/internal/openclaw/rankmodels_test.go @@ -2,30 +2,24 @@ package openclaw import "testing" -// TestRankModels_OpenClawWrapper_PrefersLargerLocalModel — same regression -// guard as the Hermes side, but exercising the openai/-prefix that OpenClaw -// adds for LiteLLM routing through the openai-compatible provider slot. -func TestRankModels_OpenClawWrapper_PrefersLargerLocalModel(t *testing.T) { +func TestRankModels_OpenClawWrapper_PreservesConfiguredOrder(t *testing.T) { primary, fallbacks := rankModels([]string{ "llama3.2:1b", "qwen3.5:9b", - "llama3.2:3b", + "claude-opus-4-7", }) - if primary != "openai/qwen3.5:9b" { - t.Fatalf("primary: got %q, want openai/qwen3.5:9b", primary) + if primary != "openai/llama3.2:1b" { + t.Fatalf("primary: got %q, want openai/llama3.2:1b", primary) } - if len(fallbacks) != 2 || fallbacks[0] != "openai/llama3.2:3b" || fallbacks[1] != "openai/llama3.2:1b" { + if len(fallbacks) != 2 || fallbacks[0] != "openai/qwen3.5:9b" || fallbacks[1] != "openai/claude-opus-4-7" { t.Fatalf("fallbacks: got %v", fallbacks) } } -func TestRankModels_OpenClawWrapper_KeepsOpenAIPrefixOnCloudPicks(t *testing.T) { - // Cloud models also get the openai/ prefix in OpenClaw because LiteLLM - // routes them through its openai-compatible adapter slot. The wrapper - // must wrap regardless of whether the underlying pick is cloud or local. +func TestRankModels_OpenClawWrapper_PrefixesConfiguredCloudPrimary(t *testing.T) { primary, _ := rankModels([]string{ - "qwen3.5:9b", "claude-opus-4-7", + "qwen3.5:9b", }) if primary != "openai/claude-opus-4-7" { t.Fatalf("primary: got %q, want openai/claude-opus-4-7", primary) diff --git a/internal/stack/backend_k3d.go b/internal/stack/backend_k3d.go index f530f0e3..f2c5c10e 100644 --- a/internal/stack/backend_k3d.go +++ b/internal/stack/backend_k3d.go @@ -103,6 +103,23 @@ func (b *K3dBackend) Up(cfg *config.Config, u *ui.UI, stackID string) ([]byte, e return nil, err } + // Ensure the dev registry caches are running on every Up, in BOTH the + // existing-cluster and fresh-create branches. `k3d cluster start` does + // not auto-restart standalone registry containers attached via + // `--registry-use` at create time — it only starts the cluster's own + // nodes. Without this call, every retry after a `cluster stop` (or after + // the failure-recovery Down() call in syncDefaults) falls back to direct + // upstream pulls and re-fetches every image, costing minutes per + // attempt. + if os.Getenv("OBOL_DEVELOPMENT") == "true" { + setup, setupErr := ensureDevRegistries(cfg, u) + if setupErr != nil { + u.Warnf("Dev registry cache unavailable, falling back to direct upstream pulls: %v", setupErr) + } else { + registrySetup = setup + } + } + if running { u.Warn("Cluster already exists, starting it") @@ -128,15 +145,6 @@ func (b *K3dBackend) Up(cfg *config.Config, u *ui.UI, stackID string) ([]byte, e // 'obol stack init' wrote the k3d config. ensureK3dPortsAvailable(k3dConfigPath, u) - if os.Getenv("OBOL_DEVELOPMENT") == "true" { - setup, setupErr := ensureDevRegistries(cfg, u) - if setupErr != nil { - u.Warnf("Dev registry cache unavailable, falling back to direct upstream pulls: %v", setupErr) - } else { - registrySetup = setup - } - } - createCmd := exec.Command( filepath.Join(cfg.BinDir, "k3d"), k3dCreateArgs(stackName, k3dConfigPath, registrySetup)..., diff --git a/internal/stack/dev_registry.go b/internal/stack/dev_registry.go index 91ed2f96..dfd70e03 100644 --- a/internal/stack/dev_registry.go +++ b/internal/stack/dev_registry.go @@ -79,11 +79,20 @@ func ensureDevRegistry(cfg *config.Config, k3dBinary string, mirror registryMirr return nil } - if err := runCommand(exec.Command("docker", "start", containerName)); err != nil { - return fmt.Errorf("start registry %s: %w", containerName, err) + // Container exists but is stopped. Try to start it. + if startErr := runCommand(exec.Command("docker", "start", containerName)); startErr == nil { + return nil } - return nil + // Start failed — most commonly because the k3d-obol-stack-* Docker + // network the registry was attached to has been removed (cluster + // purge or reclaimLeakedDevK3dNetworks). The container's stored + // network reference is now a dangling ID and `docker start` aborts + // with "network ... not found". Force-remove the container and + // fall through to recreate it. The cache content lives on the host + // bind-mount under registryCacheDir(mirror), so the new container + // picks up exactly the layers the old one had — no re-pull. + _ = runCommand(exec.Command("docker", "rm", "-f", containerName)) } createCmd := exec.Command( diff --git a/internal/stack/stack.go b/internal/stack/stack.go index b6704e61..90700775 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -362,18 +362,6 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s u.Warnf("Failed to preserve LiteLLM config across Helm sync: %v", err) } - // Release runtime field ownership of litellm-config.data.config.yaml so the - // upcoming helm upgrade can reclaim it without an SSA conflict. Without - // this step, the second `obol stack up` after autoConfigureLLM/restore has - // claimed the field via SSA (manager=helm, op=Apply) fails with - // "conflict with helm using v1: .data.config.yaml" because helm registers - // a separate managedFields entry (manager=helm, op=Update) for the same - // field. The data is already snapshotted in previousLiteLLMConfig and gets - // re-applied by restoreLiteLLMConfig after helm runs. - if err := releaseLiteLLMConfigOwnership(cfg, kubeconfigPath); err != nil { - u.Warnf("Failed to release LiteLLM config field ownership: %v", err) - } - // Compatibility migration if err := migrateDefaultsHTTPRouteHostnames(helmfilePath); err != nil { u.Warnf("Failed to migrate defaults helmfile hostnames: %v", err) @@ -919,37 +907,6 @@ func preserveLiteLLMConfigForHelm(cfg *config.Config, kubeconfigPath string) (st return raw, nil } -// releaseLiteLLMConfigOwnership strips managedFields from the litellm-config -// ConfigMap so the next helm upgrade can claim ownership of every field -// without an SSA conflict. Helm tracks release ownership via the -// meta.helm.sh/release-name annotation, not managedFields, so clearing -// managedFields does not detach the resource from its release. -// -// The single empty entry [{}] is the documented apiserver idiom for clearing -// all field-ownership claims on a resource. See: -// https://kubernetes.io/docs/reference/using-api/server-side-apply/#clearing-managedfields -func releaseLiteLLMConfigOwnership(cfg *config.Config, kubeconfigPath string) error { - kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") - - // Skip if the configmap doesn't exist (first install). - if _, err := kubectl.Output(kubectlBinary, kubeconfigPath, - "get", "configmap", "litellm-config", "-n", "llm", "-o", "name"); err != nil { - return nil - } - - cmd := exec.Command(kubectlBinary, - "patch", "configmap", "litellm-config", - "-n", "llm", - "--type=merge", - "--patch", `{"metadata":{"managedFields":[{}]}}`, - ) - cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath) - if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("kubectl patch managedFields: %w\n%s", err, string(out)) - } - return nil -} - func restoreLiteLLMConfig(cfg *config.Config, kubeconfigPath, raw string) error { if strings.TrimSpace(raw) == "" { return nil