ObolNetwork · bussyjd · May 4, 2026 · May 4, 2026
diff --git a/cmd/obol/model.go b/cmd/obol/model.go
@@ -27,6 +27,7 @@ func modelCommand(cfg *config.Config) *cli.Command {
 			modelSyncCommand(cfg),
 			modelPullCommand(),
 			modelListCommand(cfg),
+			modelPreferCommand(cfg),
 			modelRemoveCommand(cfg),
 		},
 	}
@@ -194,12 +195,33 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
 	}
 
 	if len(models) == 0 {
-		// Sensible defaults
+		// Per-provider defaults — kept in sync with what the providers
+		// document as their current chat-tuned flagship. Bumping these is a
+		// small follow-up PR when frontier models drop, and it isolates the
+		// "what's good today" maintenance to one place.
+		var defaultModel string
 		switch provider {
 		case "anthropic":
-			models = []string{"claude-sonnet-4-6"}
+			defaultModel = "claude-sonnet-4-6"
 		case "openai":
-			models = []string{"gpt-4.1"}
+			defaultModel = "gpt-5.5"
+		}
+
+		// Interactive: let the user override the default with a free-text
+		// entry. Non-interactive (no TTY): silently use the default — the
+		// caller can always pass --model to be explicit.
+		chosen := defaultModel
+		if defaultModel != "" && u.IsTTY() && !u.IsJSON() {
+			input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel)
+			if err != nil {
+				return err
+			}
+			if strings.TrimSpace(input) != "" {
+				chosen = strings.TrimSpace(input)
+			}
+		}
+		if chosen != "" {
+			models = []string{chosen}
 		}
 	}
 
@@ -493,6 +515,34 @@ func modelListCommand(cfg *config.Config) *cli.Command {
 	}
 }
 
+func modelPreferCommand(cfg *config.Config) *cli.Command {
+	return &cli.Command{
+		Name:      "prefer",
+		Usage:     "Pull one or more models to the head of the LiteLLM model_list (the head becomes the agent's primary)",
+		ArgsUsage: "<model-name> [<model-name> ...]",
+		Flags: []cli.Flag{
+			&cli.BoolFlag{Name: "no-sync", Usage: "Skip the agent model sync (batch with other model commands, then run `obol model sync` once)"},
+		},
+		Action: func(ctx context.Context, cmd *cli.Command) error {
+			u := getUI(cmd)
+
+			names := cmd.Args().Slice()
+			if len(names) == 0 {
+				return errors.New("at least one model name is required\n\nUsage: obol model prefer <model-name> [<model-name> ...]\n\nList configured models with: obol model list")
+			}
+
+			if err := model.PreferModels(cfg, u, names); err != nil {
+				return err
+			}
+
+			if cmd.Bool("no-sync") {
+				return nil
+			}
+			return syncAgentModels(cfg, u)
+		},
+	}
+}
+
 func modelRemoveCommand(cfg *config.Config) *cli.Command {
 	return &cli.Command{
 		Name:      "remove",

diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
@@ -1260,15 +1260,14 @@ func litellmMasterKey(cfg *config.Config) string {
 	return "sk-obol-" + strings.TrimSpace(string(data))
 }
 
-// rankModels delegates to model.Rank, which knows how to prefer larger local
-// models and frontier cloud models. Kept as a thin wrapper so call sites
-// don't need to import internal/model directly.
+// rankModels delegates to model.Rank, which preserves configured LiteLLM model
+// order and keeps known embedding-only models behind chat-capable models. Kept
+// as a thin wrapper so call sites don't need to import internal/model directly.
 //
-// IMPORTANT: do NOT pre-strip provider prefixes here. model.Rank strips
-// internally for ranking heuristics but returns the ORIGINAL strings so the
-// agent can round-trip them back to LiteLLM. Stripping at this layer would
-// break that round-trip — that's exactly the double-strip bug that
-// ca820c9 worked around for custom endpoints.
+// IMPORTANT: do NOT strip provider prefixes here. model.Rank returns the
+// original strings so the agent can round-trip them back to LiteLLM. Stripping
+// at this layer would break that round-trip — that's exactly the double-strip
+// bug that ca820c9 worked around for custom endpoints.
 func rankModels(models []string) (primary string, fallbacks []string) {
 	return model.Rank(models)
 }

diff --git a/internal/hermes/rankmodels_test.go b/internal/hermes/rankmodels_test.go
@@ -2,42 +2,20 @@ package hermes
 
 import "testing"
 
-// TestRankModels_HermesWrapper_PrefersLargerLocalModel encodes the regression
-// from the colleague's screenshot: Hermes was deploying with `llama3.2:1b` as
-// the default model, which then parroted its own tool list back on every
-// "hello" prompt. The fix moved capability ranking into model.Rank; this test
-// just confirms the Hermes-side wrapper still calls into it correctly.
-//
-// Contract: bare LiteLLM model_name strings come in, the SAME bare strings
-// come back out — no provider-prefix stripping at this layer. The agent must
-// be able to round-trip the returned primary back to LiteLLM without
-// modification.
-func TestRankModels_HermesWrapper_PrefersLargerLocalModel(t *testing.T) {
+// Contract: LiteLLM model_name strings come in, the SAME strings come back
+// out in configured order. The agent must be able to round-trip the returned
+// primary back to LiteLLM without modification.
+func TestRankModels_HermesWrapper_PreservesConfiguredOrder(t *testing.T) {
 	primary, fallbacks := rankModels([]string{
 		"llama3.2:1b",
-		"llama3.1:8b",
-		"llama3.2:3b",
-	})
-	if primary != "llama3.1:8b" {
-		t.Fatalf("primary: got %q, want llama3.1:8b", primary)
-	}
-	if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" {
-		t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks)
-	}
-}
-
-// TestRankModels_HermesWrapper_PrefersClaudeOverLocal exercises the cloud
-// tier. Cloud entries written by buildModelEntries are bare (e.g.
-// `claude-opus-4-7`, not `anthropic/claude-opus-4-7`), and the wrapper must
-// preserve that.
-func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
-	primary, _ := rankModels([]string{
 		"llama3.1:8b",
 		"claude-opus-4-7",
-		"llama3.2:1b",
 	})
-	if primary != "claude-opus-4-7" {
-		t.Fatalf("primary: got %q, want claude-opus-4-7", primary)
+	if primary != "llama3.2:1b" {
+		t.Fatalf("primary: got %q, want llama3.2:1b", primary)
+	}
+	if len(fallbacks) != 2 || fallbacks[0] != "llama3.1:8b" || fallbacks[1] != "claude-opus-4-7" {
+		t.Fatalf("fallbacks: got %v, want [llama3.1:8b claude-opus-4-7]", fallbacks)
 	}
 }
 
@@ -50,12 +28,12 @@ func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
 // reintroducing it.
 func TestRankModels_HermesWrapper_PreservesProviderPrefixIfPresent(t *testing.T) {
 	primary, _ := rankModels([]string{
+		"llama3.1:8b",
 		"anthropic/claude-opus-4-7",
 		"openai/gpt-4o",
-		"llama3.1:8b",
 	})
-	if primary != "anthropic/claude-opus-4-7" {
-		t.Fatalf("primary: got %q, want anthropic/claude-opus-4-7 (unstripped)", primary)
+	if primary != "llama3.1:8b" {
+		t.Fatalf("primary: got %q, want llama3.1:8b", primary)
 	}
 }
 

diff --git a/internal/model/model.go b/internal/model/model.go
@@ -577,6 +577,130 @@ func hotDeleteModel(cfg *config.Config, u *ui.UI, modelName string) error {
 	return nil
 }
 
+// reorderModelList is the pure-function core of PreferModels. It moves the
+// named entries to the head of the list (in the order given) and returns
+// the new slice, plus a boolean indicating whether the input was already in
+// the requested order (the caller should treat that as a no-op so it can
+// skip the kubectl patch + LiteLLM rollout). Unknown or duplicate names
+// produce an error so typos surface loudly.
+func reorderModelList(entries []ModelEntry, names []string) ([]ModelEntry, bool, error) {
+	indexByName := make(map[string]int, len(entries))
+	for i, entry := range entries {
+		indexByName[entry.ModelName] = i
+	}
+
+	var missing []string
+	picked := make(map[string]bool, len(names))
+	for _, name := range names {
+		if _, ok := indexByName[name]; !ok {
+			missing = append(missing, name)
+			continue
+		}
+		if picked[name] {
+			return nil, false, fmt.Errorf("duplicate model in prefer args: %q", name)
+		}
+		picked[name] = true
+	}
+	if len(missing) > 0 {
+		return nil, false, fmt.Errorf("model(s) not found in LiteLLM config: %s\n  Run 'obol model list' to see available entries", strings.Join(missing, ", "))
+	}
+
+	alreadyAtHead := true
+	for i, name := range names {
+		if i >= len(entries) || entries[i].ModelName != name {
+			alreadyAtHead = false
+			break
+		}
+	}
+
+	reordered := make([]ModelEntry, 0, len(entries))
+	for _, name := range names {
+		reordered = append(reordered, entries[indexByName[name]])
+	}
+	for _, entry := range entries {
+		if picked[entry.ModelName] {
+			continue
+		}
+		reordered = append(reordered, entry)
+	}
+	return reordered, alreadyAtHead, nil
+}
+
+// PreferModels reorders LiteLLM's model_list so the named entries appear at
+// the head, in the order given. Remaining entries keep their original
+// relative order. This is the operator-facing primitive that lets
+// model.Rank's "first chat-capable wins" rule pick a specific primary
+// without a remove/re-add cycle.
+//
+// Returns an error if any of the requested names is not present in the
+// current model_list — typos should be loud, not silent no-ops.
+//
+// LiteLLM has no model_list reorder API, so after the ConfigMap patch this
+// rolls the LiteLLM Deployment so the new order takes effect (the
+// /v1/models listing follows model_list order, and hermes/openclaw read
+// the ConfigMap directly via GetConfiguredModels for the agent primary).
+func PreferModels(cfg *config.Config, u *ui.UI, names []string) error {
+	if len(names) == 0 {
+		return errors.New("at least one model name is required")
+	}
+
+	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
+	kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml")
+
+	if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) {
+		return errors.New("cluster not running. Run 'obol stack up' first")
+	}
+
+	raw, err := kubectl.Output(kubectlBinary, kubeconfigPath,
+		"get", "configmap", configMapName, "-n", namespace, "-o", "jsonpath={.data.config\\.yaml}")
+	if err != nil {
+		return fmt.Errorf("failed to read LiteLLM config: %w", err)
+	}
+
+	var litellmConfig LiteLLMConfig
+	if err := yaml.Unmarshal([]byte(raw), &litellmConfig); err != nil {
+		return fmt.Errorf("failed to parse config.yaml: %w", err)
+	}
+
+	reordered, alreadyAtHead, err := reorderModelList(litellmConfig.ModelList, names)
+	if err != nil {
+		return err
+	}
+	if alreadyAtHead {
+		u.Infof("Model(s) already at the head of the model_list, no change")
+		return nil
+	}
+	litellmConfig.ModelList = reordered
+
+	updated, err := yaml.Marshal(&litellmConfig)
+	if err != nil {
+		return fmt.Errorf("failed to marshal config: %w", err)
+	}
+	escapedYAML, err := json.Marshal(string(updated))
+	if err != nil {
+		return fmt.Errorf("failed to escape YAML: %w", err)
+	}
+	patchJSON := fmt.Sprintf(`{"data":{"config.yaml":%s}}`, escapedYAML)
+
+	u.Infof("Promoting %s to head of LiteLLM model_list", strings.Join(names, ", "))
+	if err := kubectl.Run(kubectlBinary, kubeconfigPath,
+		"patch", "configmap", configMapName, "-n", namespace,
+		"-p", patchJSON, "--type=merge", "--field-manager=helm"); err != nil {
+		return fmt.Errorf("failed to patch ConfigMap: %w", err)
+	}
+
+	// LiteLLM has no reorder API; restart the deployment so the new order
+	// takes effect (mostly cosmetic for /v1/models listings — agent primary
+	// is read from the ConfigMap directly via GetConfiguredModels, which is
+	// already correct after the patch above).
+	if err := RestartLiteLLM(cfg, u, "prefer"); err != nil {
+		u.Warnf("LiteLLM rollout failed: %v", err)
+		u.Dim("  The ConfigMap is updated; agent will pick up the new primary on next sync.")
+	}
+
+	return nil
+}
+
 // RemoveModel removes a model entry from the LiteLLM ConfigMap (persistence)
 // and hot-deletes it from the running router via the API (immediate effect).
 // No pod restart is required.
@@ -1123,16 +1247,10 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
 		}
 	case ProviderAnthropic:
 		cachePoints := anthropicCacheControlPoints()
-		// Wildcard: routes any anthropic model without explicit registration
-		entries = append(entries, ModelEntry{
-			ModelName: "anthropic/*",
-			LiteLLMParams: LiteLLMParams{
-				Model:                       "anthropic/*",
-				APIKey:                      "os.environ/ANTHROPIC_API_KEY",
-				CacheControlInjectionPoints: cachePoints,
-			},
-		})
-		// Explicit entries for requested models (better /v1/models listing)
+		// Explicit entries first so the user-selected model is the primary
+		// under model.Rank's "first chat-capable wins" rule. Hermes cannot
+		// send `model: anthropic/*` literally (LiteLLM doesn't resolve a
+		// wildcard to a default), so the wildcard must never sit at index 0.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName: m,
@@ -1143,17 +1261,27 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
 				},
 			})
 		}
-	case ProviderOpenAI:
+		// Wildcard: routes any anthropic model without explicit registration.
 		entries = append(entries, ModelEntry{
-			ModelName:     "openai/*",
-			LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
+			ModelName: "anthropic/*",
+			LiteLLMParams: LiteLLMParams{
+				Model:                       "anthropic/*",
+				APIKey:                      "os.environ/ANTHROPIC_API_KEY",
+				CacheControlInjectionPoints: cachePoints,
+			},
 		})
+	case ProviderOpenAI:
+		// Explicit-before-wildcard, same rationale as Anthropic above.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName:     m,
 				LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"},
 			})
 		}
+		entries = append(entries, ModelEntry{
+			ModelName:     "openai/*",
+			LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
+		})
 	default:
 		for _, m := range models {
 			entries = append(entries, ModelEntry{