diff --git a/cmd/obol/model.go b/cmd/obol/model.go
index 375885e9..2ef8ef4e 100644
--- a/cmd/obol/model.go
+++ b/cmd/obol/model.go
@@ -27,6 +27,7 @@ func modelCommand(cfg *config.Config) *cli.Command {
 			modelSyncCommand(cfg),
 			modelPullCommand(),
 			modelListCommand(cfg),
+			modelPreferCommand(cfg),
 			modelRemoveCommand(cfg),
 		},
 	}
@@ -194,12 +195,33 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
 	}
 
 	if len(models) == 0 {
-		// Sensible defaults
+		// Per-provider defaults — kept in sync with what the providers
+		// document as their current chat-tuned flagship. Bumping these is a
+		// small follow-up PR when frontier models drop, and it isolates the
+		// "what's good today" maintenance to one place.
+		var defaultModel string
 		switch provider {
 		case "anthropic":
-			models = []string{"claude-sonnet-4-6"}
+			defaultModel = "claude-sonnet-4-6"
 		case "openai":
-			models = []string{"gpt-4.1"}
+			defaultModel = "gpt-5.5"
+		}
+
+		// Interactive: let the user override the default with a free-text
+		// entry. Non-interactive (no TTY): silently use the default — the
+		// caller can always pass --model to be explicit.
+		chosen := defaultModel
+		if defaultModel != "" && u.IsTTY() && !u.IsJSON() {
+			input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel)
+			if err != nil {
+				return err
+			}
+			if strings.TrimSpace(input) != "" {
+				chosen = strings.TrimSpace(input)
+			}
+		}
+		if chosen != "" {
+			models = []string{chosen}
 		}
 	}
 
@@ -493,6 +515,34 @@ func modelListCommand(cfg *config.Config) *cli.Command {
 	}
 }
 
+func modelPreferCommand(cfg *config.Config) *cli.Command {
+	return &cli.Command{
+		Name:      "prefer",
+		Usage:     "Pull one or more models to the head of the LiteLLM model_list (the head becomes the agent's primary)",
+		ArgsUsage: "<model-name> [<model-name> ...]",
+		Flags: []cli.Flag{
+			&cli.BoolFlag{Name: "no-sync", Usage: "Skip the agent model sync (batch with other model commands, then run `obol model sync` once)"},
+		},
+		Action: func(ctx context.Context, cmd *cli.Command) error {
+			u := getUI(cmd)
+
+			names := cmd.Args().Slice()
+			if len(names) == 0 {
+				return errors.New("at least one model name is required\n\nUsage: obol model prefer <model-name> [<model-name> ...]\n\nList configured models with: obol model list")
+			}
+
+			if err := model.PreferModels(cfg, u, names); err != nil {
+				return err
+			}
+
+			if cmd.Bool("no-sync") {
+				return nil
+			}
+			return syncAgentModels(cfg, u)
+		},
+	}
+}
+
 func modelRemoveCommand(cfg *config.Config) *cli.Command {
 	return &cli.Command{
 		Name:      "remove",
diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml
index 05ff5f27..44d73ea8 100644
--- a/internal/embed/infrastructure/helmfile.yaml
+++ b/internal/embed/infrastructure/helmfile.yaml
@@ -2,6 +2,24 @@
 # Orchestrates core infrastructure components deployed with every stack
 # Uses Traefik with Gateway API for routing (replaces nginx-ingress)
 
+# Force helm to use Server-Side Apply with --force-conflicts on every release.
+# Without this, every `obol stack down`/`up` cycle hits a different SSA
+# ownership conflict on resources helm shares with other writers — for example
+# litellm-config.data["config.yaml"] (touched by `obol model setup` patches),
+# remote-signer-keystore-password.metadata.labels (helm 3 pre-SSA default
+# manager was kubectl-client-side-apply), or fields the apiserver synthesises
+# under "before-first-apply" when an Update-managed object first sees an
+# Apply call. --server-side=true switches helm to SSA semantics so it can
+# reason about field ownership at all; --force-conflicts tells it to take
+# ownership when it disagrees with another manager rather than aborting the
+# upgrade. The `=true` form is required: helm's --server-side flag takes a
+# value (auto|true|false), so a bare --server-side would consume the next
+# arg as its value and reject --force-conflicts as an unknown apply method.
+helmDefaults:
+  args:
+    - "--server-side=true"
+    - "--force-conflicts"
+
 repositories:
   - name: traefik
     url: https://traefik.github.io/charts
diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl
index ccb70e0e..2e5051ea 100644
--- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl
+++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl
@@ -35,7 +35,7 @@ image:
 
   repository: obolnetwork/obol-stack-front-end
   pullPolicy: IfNotPresent
-  tag: "v0.1.17-rc.5"
+  tag: "v0.1.18"
 
 service:
   type: ClusterIP
diff --git a/internal/hermes/hermes.go b/internal/hermes/hermes.go
index 2abd2d7e..7bdc48b6 100644
--- a/internal/hermes/hermes.go
+++ b/internal/hermes/hermes.go
@@ -690,6 +690,19 @@ func writeDeploymentFiles(cfg *config.Config, id, deploymentDir, agentBaseURL st
 func generateHelmfile(namespace string) string {
 	return fmt.Sprintf(`# Managed by obol agent
 
+# Force helm to use Server-Side Apply with --force-conflicts on every
+# release. Without this, every Hermes resync after a fresh install hits SSA
+# ownership conflicts on shared fields — most often
+# remote-signer-keystore-password.metadata.labels, where helm's pre-SSA
+# install left field ownership under "kubectl-client-side-apply" and a
+# subsequent helm upgrade with SSA can't reclaim them. =true is required
+# because helm's --server-side flag takes a value (auto|true|false); a bare
+# --server-side would consume the next arg and reject --force-conflicts.
+helmDefaults:
+  args:
+    - "--server-side=true"
+    - "--force-conflicts"
+
 repositories:
   - name: obol
     url: https://obolnetwork.github.io/helm-charts/
@@ -1260,15 +1273,14 @@ func litellmMasterKey(cfg *config.Config) string {
 	return "sk-obol-" + strings.TrimSpace(string(data))
 }
 
-// rankModels delegates to model.Rank, which knows how to prefer larger local
-// models and frontier cloud models. Kept as a thin wrapper so call sites
-// don't need to import internal/model directly.
+// rankModels delegates to model.Rank, which preserves configured LiteLLM model
+// order and keeps known embedding-only models behind chat-capable models. Kept
+// as a thin wrapper so call sites don't need to import internal/model directly.
 //
-// IMPORTANT: do NOT pre-strip provider prefixes here. model.Rank strips
-// internally for ranking heuristics but returns the ORIGINAL strings so the
-// agent can round-trip them back to LiteLLM. Stripping at this layer would
-// break that round-trip — that's exactly the double-strip bug that
-// ca820c9 worked around for custom endpoints.
+// IMPORTANT: do NOT strip provider prefixes here. model.Rank returns the
+// original strings so the agent can round-trip them back to LiteLLM. Stripping
+// at this layer would break that round-trip — that's exactly the double-strip
+// bug that ca820c9 worked around for custom endpoints.
 func rankModels(models []string) (primary string, fallbacks []string) {
 	return model.Rank(models)
 }
diff --git a/internal/hermes/rankmodels_test.go b/internal/hermes/rankmodels_test.go
index d7903e40..8cebfb32 100644
--- a/internal/hermes/rankmodels_test.go
+++ b/internal/hermes/rankmodels_test.go
@@ -2,42 +2,20 @@ package hermes
 
 import "testing"
 
-// TestRankModels_HermesWrapper_PrefersLargerLocalModel encodes the regression
-// from the colleague's screenshot: Hermes was deploying with `llama3.2:1b` as
-// the default model, which then parroted its own tool list back on every
-// "hello" prompt. The fix moved capability ranking into model.Rank; this test
-// just confirms the Hermes-side wrapper still calls into it correctly.
-//
-// Contract: bare LiteLLM model_name strings come in, the SAME bare strings
-// come back out — no provider-prefix stripping at this layer. The agent must
-// be able to round-trip the returned primary back to LiteLLM without
-// modification.
-func TestRankModels_HermesWrapper_PrefersLargerLocalModel(t *testing.T) {
+// Contract: LiteLLM model_name strings come in, the SAME strings come back
+// out in configured order. The agent must be able to round-trip the returned
+// primary back to LiteLLM without modification.
+func TestRankModels_HermesWrapper_PreservesConfiguredOrder(t *testing.T) {
 	primary, fallbacks := rankModels([]string{
 		"llama3.2:1b",
-		"llama3.1:8b",
-		"llama3.2:3b",
-	})
-	if primary != "llama3.1:8b" {
-		t.Fatalf("primary: got %q, want llama3.1:8b", primary)
-	}
-	if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" {
-		t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks)
-	}
-}
-
-// TestRankModels_HermesWrapper_PrefersClaudeOverLocal exercises the cloud
-// tier. Cloud entries written by buildModelEntries are bare (e.g.
-// `claude-opus-4-7`, not `anthropic/claude-opus-4-7`), and the wrapper must
-// preserve that.
-func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
-	primary, _ := rankModels([]string{
 		"llama3.1:8b",
 		"claude-opus-4-7",
-		"llama3.2:1b",
 	})
-	if primary != "claude-opus-4-7" {
-		t.Fatalf("primary: got %q, want claude-opus-4-7", primary)
+	if primary != "llama3.2:1b" {
+		t.Fatalf("primary: got %q, want llama3.2:1b", primary)
+	}
+	if len(fallbacks) != 2 || fallbacks[0] != "llama3.1:8b" || fallbacks[1] != "claude-opus-4-7" {
+		t.Fatalf("fallbacks: got %v, want [llama3.1:8b claude-opus-4-7]", fallbacks)
 	}
 }
 
@@ -50,12 +28,12 @@ func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
 // reintroducing it.
 func TestRankModels_HermesWrapper_PreservesProviderPrefixIfPresent(t *testing.T) {
 	primary, _ := rankModels([]string{
+		"llama3.1:8b",
 		"anthropic/claude-opus-4-7",
 		"openai/gpt-4o",
-		"llama3.1:8b",
 	})
-	if primary != "anthropic/claude-opus-4-7" {
-		t.Fatalf("primary: got %q, want anthropic/claude-opus-4-7 (unstripped)", primary)
+	if primary != "llama3.1:8b" {
+		t.Fatalf("primary: got %q, want llama3.1:8b", primary)
 	}
 }
 
diff --git a/internal/model/model.go b/internal/model/model.go
index 8433bdb2..ecf4a4b0 100644
--- a/internal/model/model.go
+++ b/internal/model/model.go
@@ -577,6 +577,130 @@ func hotDeleteModel(cfg *config.Config, u *ui.UI, modelName string) error {
 	return nil
 }
 
+// reorderModelList is the pure-function core of PreferModels. It moves the
+// named entries to the head of the list (in the order given) and returns
+// the new slice, plus a boolean indicating whether the input was already in
+// the requested order (the caller should treat that as a no-op so it can
+// skip the kubectl patch + LiteLLM rollout). Unknown or duplicate names
+// produce an error so typos surface loudly.
+func reorderModelList(entries []ModelEntry, names []string) ([]ModelEntry, bool, error) {
+	indexByName := make(map[string]int, len(entries))
+	for i, entry := range entries {
+		indexByName[entry.ModelName] = i
+	}
+
+	var missing []string
+	picked := make(map[string]bool, len(names))
+	for _, name := range names {
+		if _, ok := indexByName[name]; !ok {
+			missing = append(missing, name)
+			continue
+		}
+		if picked[name] {
+			return nil, false, fmt.Errorf("duplicate model in prefer args: %q", name)
+		}
+		picked[name] = true
+	}
+	if len(missing) > 0 {
+		return nil, false, fmt.Errorf("model(s) not found in LiteLLM config: %s\n  Run 'obol model list' to see available entries", strings.Join(missing, ", "))
+	}
+
+	alreadyAtHead := true
+	for i, name := range names {
+		if i >= len(entries) || entries[i].ModelName != name {
+			alreadyAtHead = false
+			break
+		}
+	}
+
+	reordered := make([]ModelEntry, 0, len(entries))
+	for _, name := range names {
+		reordered = append(reordered, entries[indexByName[name]])
+	}
+	for _, entry := range entries {
+		if picked[entry.ModelName] {
+			continue
+		}
+		reordered = append(reordered, entry)
+	}
+	return reordered, alreadyAtHead, nil
+}
+
+// PreferModels reorders LiteLLM's model_list so the named entries appear at
+// the head, in the order given. Remaining entries keep their original
+// relative order. This is the operator-facing primitive that lets
+// model.Rank's "first chat-capable wins" rule pick a specific primary
+// without a remove/re-add cycle.
+//
+// Returns an error if any of the requested names is not present in the
+// current model_list — typos should be loud, not silent no-ops.
+//
+// LiteLLM has no model_list reorder API, so after the ConfigMap patch this
+// rolls the LiteLLM Deployment so the new order takes effect (the
+// /v1/models listing follows model_list order, and hermes/openclaw read
+// the ConfigMap directly via GetConfiguredModels for the agent primary).
+func PreferModels(cfg *config.Config, u *ui.UI, names []string) error {
+	if len(names) == 0 {
+		return errors.New("at least one model name is required")
+	}
+
+	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
+	kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml")
+
+	if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) {
+		return errors.New("cluster not running. Run 'obol stack up' first")
+	}
+
+	raw, err := kubectl.Output(kubectlBinary, kubeconfigPath,
+		"get", "configmap", configMapName, "-n", namespace, "-o", "jsonpath={.data.config\\.yaml}")
+	if err != nil {
+		return fmt.Errorf("failed to read LiteLLM config: %w", err)
+	}
+
+	var litellmConfig LiteLLMConfig
+	if err := yaml.Unmarshal([]byte(raw), &litellmConfig); err != nil {
+		return fmt.Errorf("failed to parse config.yaml: %w", err)
+	}
+
+	reordered, alreadyAtHead, err := reorderModelList(litellmConfig.ModelList, names)
+	if err != nil {
+		return err
+	}
+	if alreadyAtHead {
+		u.Infof("Model(s) already at the head of the model_list, no change")
+		return nil
+	}
+	litellmConfig.ModelList = reordered
+
+	updated, err := yaml.Marshal(&litellmConfig)
+	if err != nil {
+		return fmt.Errorf("failed to marshal config: %w", err)
+	}
+	escapedYAML, err := json.Marshal(string(updated))
+	if err != nil {
+		return fmt.Errorf("failed to escape YAML: %w", err)
+	}
+	patchJSON := fmt.Sprintf(`{"data":{"config.yaml":%s}}`, escapedYAML)
+
+	u.Infof("Promoting %s to head of LiteLLM model_list", strings.Join(names, ", "))
+	if err := kubectl.Run(kubectlBinary, kubeconfigPath,
+		"patch", "configmap", configMapName, "-n", namespace,
+		"-p", patchJSON, "--type=merge", "--field-manager=helm"); err != nil {
+		return fmt.Errorf("failed to patch ConfigMap: %w", err)
+	}
+
+	// LiteLLM has no reorder API; restart the deployment so the new order
+	// takes effect (mostly cosmetic for /v1/models listings — agent primary
+	// is read from the ConfigMap directly via GetConfiguredModels, which is
+	// already correct after the patch above).
+	if err := RestartLiteLLM(cfg, u, "prefer"); err != nil {
+		u.Warnf("LiteLLM rollout failed: %v", err)
+		u.Dim("  The ConfigMap is updated; agent will pick up the new primary on next sync.")
+	}
+
+	return nil
+}
+
 // RemoveModel removes a model entry from the LiteLLM ConfigMap (persistence)
 // and hot-deletes it from the running router via the API (immediate effect).
 // No pod restart is required.
@@ -1123,16 +1247,10 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
 		}
 	case ProviderAnthropic:
 		cachePoints := anthropicCacheControlPoints()
-		// Wildcard: routes any anthropic model without explicit registration
-		entries = append(entries, ModelEntry{
-			ModelName: "anthropic/*",
-			LiteLLMParams: LiteLLMParams{
-				Model:                       "anthropic/*",
-				APIKey:                      "os.environ/ANTHROPIC_API_KEY",
-				CacheControlInjectionPoints: cachePoints,
-			},
-		})
-		// Explicit entries for requested models (better /v1/models listing)
+		// Explicit entries first so the user-selected model is the primary
+		// under model.Rank's "first chat-capable wins" rule. Hermes cannot
+		// send `model: anthropic/*` literally (LiteLLM doesn't resolve a
+		// wildcard to a default), so the wildcard must never sit at index 0.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName: m,
@@ -1143,17 +1261,27 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
 				},
 			})
 		}
-	case ProviderOpenAI:
+		// Wildcard: routes any anthropic model without explicit registration.
 		entries = append(entries, ModelEntry{
-			ModelName:     "openai/*",
-			LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
+			ModelName: "anthropic/*",
+			LiteLLMParams: LiteLLMParams{
+				Model:                       "anthropic/*",
+				APIKey:                      "os.environ/ANTHROPIC_API_KEY",
+				CacheControlInjectionPoints: cachePoints,
+			},
 		})
+	case ProviderOpenAI:
+		// Explicit-before-wildcard, same rationale as Anthropic above.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName:     m,
 				LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"},
 			})
 		}
+		entries = append(entries, ModelEntry{
+			ModelName:     "openai/*",
+			LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
+		})
 	default:
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
diff --git a/internal/model/model_test.go b/internal/model/model_test.go
index aadc53cb..03292f5a 100644
--- a/internal/model/model_test.go
+++ b/internal/model/model_test.go
@@ -31,41 +31,42 @@ func TestBuildModelEntries(t *testing.T) {
 		}
 	})
 
-	t.Run("anthropic gets wildcard plus explicit entries", func(t *testing.T) {
+	t.Run("anthropic puts explicit before the wildcard", func(t *testing.T) {
 		entries := buildModelEntries("anthropic", []string{"claude-sonnet-4-5-20250929"})
 		if len(entries) != 2 {
-			t.Fatalf("got %d entries, want 2 (wildcard + explicit)", len(entries))
+			t.Fatalf("got %d entries, want 2 (explicit + wildcard)", len(entries))
 		}
-		// First entry is the wildcard
-		if entries[0].ModelName != "anthropic/*" {
-			t.Errorf("entries[0].ModelName = %q, want anthropic/*", entries[0].ModelName)
+		// Explicit first so model.Rank picks it as primary; Hermes can't
+		// send `model: anthropic/*` literally.
+		if entries[0].ModelName != "claude-sonnet-4-5-20250929" {
+			t.Errorf("entries[0].ModelName = %q, want explicit model", entries[0].ModelName)
 		}
-
-		if entries[0].LiteLLMParams.Model != "anthropic/*" {
-			t.Errorf("entries[0].Model = %q, want anthropic/*", entries[0].LiteLLMParams.Model)
+		if entries[0].LiteLLMParams.Model != "claude-sonnet-4-5-20250929" {
+			t.Errorf("entries[0].Model = %q", entries[0].LiteLLMParams.Model)
 		}
-		// Second entry is the explicit model
-		if entries[1].ModelName != "claude-sonnet-4-5-20250929" {
-			t.Errorf("entries[1].ModelName = %q", entries[1].ModelName)
+		if entries[0].LiteLLMParams.APIKey != "os.environ/ANTHROPIC_API_KEY" {
+			t.Errorf("api_key = %q, want os.environ/ANTHROPIC_API_KEY", entries[0].LiteLLMParams.APIKey)
 		}
-
-		if entries[1].LiteLLMParams.APIKey != "os.environ/ANTHROPIC_API_KEY" {
-			t.Errorf("api_key = %q, want os.environ/ANTHROPIC_API_KEY", entries[1].LiteLLMParams.APIKey)
+		// Wildcard appended last as catch-all.
+		if entries[1].ModelName != "anthropic/*" {
+			t.Errorf("entries[1].ModelName = %q, want anthropic/*", entries[1].ModelName)
 		}
 	})
 
-	t.Run("openai gets wildcard plus explicit entries", func(t *testing.T) {
+	t.Run("openai puts explicit before the wildcard", func(t *testing.T) {
 		entries := buildModelEntries("openai", []string{"gpt-4o"})
 		if len(entries) != 2 {
-			t.Fatalf("got %d entries, want 2 (wildcard + explicit)", len(entries))
+			t.Fatalf("got %d entries, want 2 (explicit + wildcard)", len(entries))
 		}
 
-		if entries[0].ModelName != "openai/*" {
-			t.Errorf("entries[0].ModelName = %q, want openai/*", entries[0].ModelName)
+		if entries[0].ModelName != "gpt-4o" {
+			t.Errorf("entries[0].ModelName = %q, want gpt-4o", entries[0].ModelName)
 		}
-
-		if entries[1].LiteLLMParams.Model != "openai/gpt-4o" {
-			t.Errorf("entries[1].Model = %q, want openai/gpt-4o", entries[1].LiteLLMParams.Model)
+		if entries[0].LiteLLMParams.Model != "openai/gpt-4o" {
+			t.Errorf("entries[0].Model = %q, want openai/gpt-4o", entries[0].LiteLLMParams.Model)
+		}
+		if entries[1].ModelName != "openai/*" {
+			t.Errorf("entries[1].ModelName = %q, want openai/*", entries[1].ModelName)
 		}
 	})
 
@@ -750,3 +751,122 @@ func TestPullOllamaModel_MockServer(t *testing.T) {
 		}
 	})
 }
+
+func TestReorderModelList(t *testing.T) {
+	entries := []ModelEntry{
+		{ModelName: "anthropic/*"},
+		{ModelName: "claude-sonnet-4-6"},
+		{ModelName: "claude-opus-4-7"},
+		{ModelName: "openai/*"},
+		{ModelName: "gpt-5.5"},
+	}
+
+	t.Run("pulls a single model to the head", func(t *testing.T) {
+		got, already, err := reorderModelList(entries, []string{"claude-opus-4-7"})
+		if err != nil {
+			t.Fatalf("reorderModelList: %v", err)
+		}
+		if already {
+			t.Fatalf("expected alreadyAtHead = false")
+		}
+		want := []string{"claude-opus-4-7", "anthropic/*", "claude-sonnet-4-6", "openai/*", "gpt-5.5"}
+		assertOrder(t, got, want)
+	})
+
+	t.Run("pulls multiple models to the head in the given order", func(t *testing.T) {
+		got, _, err := reorderModelList(entries, []string{"gpt-5.5", "claude-opus-4-7"})
+		if err != nil {
+			t.Fatalf("reorderModelList: %v", err)
+		}
+		want := []string{"gpt-5.5", "claude-opus-4-7", "anthropic/*", "claude-sonnet-4-6", "openai/*"}
+		assertOrder(t, got, want)
+	})
+
+	t.Run("preserves relative order of unselected entries", func(t *testing.T) {
+		got, _, err := reorderModelList(entries, []string{"claude-opus-4-7"})
+		if err != nil {
+			t.Fatalf("reorderModelList: %v", err)
+		}
+		// anthropic/*, claude-sonnet-4-6, openai/*, gpt-5.5 should stay in
+		// that relative order behind the promoted entry.
+		want := []string{"claude-opus-4-7", "anthropic/*", "claude-sonnet-4-6", "openai/*", "gpt-5.5"}
+		assertOrder(t, got, want)
+	})
+
+	t.Run("flags already-at-head as no-op", func(t *testing.T) {
+		_, already, err := reorderModelList(entries, []string{"anthropic/*"})
+		if err != nil {
+			t.Fatalf("reorderModelList: %v", err)
+		}
+		if !already {
+			t.Fatalf("expected alreadyAtHead = true when first entry is requested")
+		}
+	})
+
+	t.Run("flags multi-arg already-at-head as no-op", func(t *testing.T) {
+		_, already, err := reorderModelList(entries, []string{"anthropic/*", "claude-sonnet-4-6"})
+		if err != nil {
+			t.Fatalf("reorderModelList: %v", err)
+		}
+		if !already {
+			t.Fatalf("expected alreadyAtHead = true when first two requested entries already lead")
+		}
+	})
+
+	t.Run("errors on missing names with full list", func(t *testing.T) {
+		_, _, err := reorderModelList(entries, []string{"claude-opus-4-7", "typo-1", "typo-2"})
+		if err == nil {
+			t.Fatal("expected error for missing names")
+		}
+		if !strings.Contains(err.Error(), "typo-1") || !strings.Contains(err.Error(), "typo-2") {
+			t.Fatalf("error should list every missing name, got: %v", err)
+		}
+	})
+
+	t.Run("errors on duplicate args", func(t *testing.T) {
+		_, _, err := reorderModelList(entries, []string{"claude-opus-4-7", "claude-opus-4-7"})
+		if err == nil {
+			t.Fatal("expected error for duplicate names")
+		}
+		if !strings.Contains(err.Error(), "duplicate") {
+			t.Fatalf("error should mention duplicate, got: %v", err)
+		}
+	})
+
+	t.Run("preserves ModelEntry contents not just names", func(t *testing.T) {
+		entriesWithParams := []ModelEntry{
+			{ModelName: "a", LiteLLMParams: LiteLLMParams{Model: "openai/a", APIKey: "key-a"}},
+			{ModelName: "b", LiteLLMParams: LiteLLMParams{Model: "openai/b", APIKey: "key-b"}},
+		}
+		got, _, err := reorderModelList(entriesWithParams, []string{"b"})
+		if err != nil {
+			t.Fatalf("reorderModelList: %v", err)
+		}
+		if got[0].LiteLLMParams.APIKey != "key-b" {
+			t.Fatalf("promoted entry lost its params: got %+v", got[0])
+		}
+		if got[1].LiteLLMParams.APIKey != "key-a" {
+			t.Fatalf("trailing entry lost its params: got %+v", got[1])
+		}
+	})
+}
+
+func assertOrder(t *testing.T, got []ModelEntry, want []string) {
+	t.Helper()
+	if len(got) != len(want) {
+		t.Fatalf("len = %d, want %d (got %v)", len(got), len(want), modelNames(got))
+	}
+	for i, w := range want {
+		if got[i].ModelName != w {
+			t.Fatalf("entry[%d] = %q, want %q (full order: %v)", i, got[i].ModelName, w, modelNames(got))
+		}
+	}
+}
+
+func modelNames(entries []ModelEntry) []string {
+	out := make([]string, len(entries))
+	for i, e := range entries {
+		out[i] = e.ModelName
+	}
+	return out
+}
diff --git a/internal/model/rank.go b/internal/model/rank.go
index e5bb47a8..66953c34 100644
--- a/internal/model/rank.go
+++ b/internal/model/rank.go
@@ -1,221 +1,42 @@
 package model
 
-import (
-	"regexp"
-	"sort"
-	"strconv"
-	"strings"
-)
+import "strings"
 
-// Rank picks the strongest model from a list and demotes the rest to fallbacks.
+// Rank selects the primary model from the configured model list and demotes the
+// rest to fallbacks.
 //
-// Selection order:
+// Model ordering is configuration, not hidden product policy. LiteLLM's
+// model_list order is the source of truth, so Rank preserves that order instead
+// of guessing quality from provider names, parameter-count tags, or model-family
+// aliases. The only exception is known embedding-only entries: they are kept in
+// the fallback list but moved behind chat-capable models so an embedding model
+// does not become the default chat model when another option exists.
 //
-//  1. Cloud models (Anthropic Claude, OpenAI GPT/o-series) outrank local Ollama
-//     models. The agent works far better against a frontier model when one is
-//     wired up.
-//  2. Within the cloud tier, models are ranked by a known-quality table —
-//     newer/larger first (Opus > Sonnet > Haiku, gpt-5 > gpt-4.1, etc.). Names
-//     not in the table fall to the bottom of the cloud tier alphabetically.
-//  3. Within the local tier, models are ranked by parameter count parsed from
-//     the model tag (e.g. `qwen3.5:9b` → 9, `mixtral:8x7b` → 56, `llama3.2:1b`
-//     → 1). Larger first. Untagged or "latest" models are ranked using the
-//     average size of their family if known, otherwise treated as unknown.
-//
-// The fallback list preserves cloud-then-local ordering so a controller using
-// LiteLLM's fallback chain still tries cloud models before reaching for local
-// ones if the primary fails.
-//
-// This used to be duplicated between internal/hermes and internal/openclaw,
-// where each copy returned `local[0]` — i.e. whatever Ollama listed first.
-// In practice that frequently picked llama3.2:1b on hosts that had recently
-// pulled it, and a 1B model produces nonsense on the agent's tool-heavy
-// system prompt (the typical symptom is the agent parroting its tool list
-// back to the user instead of answering "hello"). Fixing the ranking here
-// fixes both runtimes at once.
+// The returned strings are the original inputs. Do not strip provider prefixes
+// or normalize names here; Hermes/OpenClaw round-trip the returned primary back
+// to LiteLLM as the chat-completions model field.
 func Rank(models []string) (primary string, fallbacks []string) {
 	if len(models) == 0 {
 		return "", nil
 	}
 
-	var cloud, local []string
+	ordered := make([]string, 0, len(models))
+	embeddingOnly := make([]string, 0)
 	for _, m := range models {
-		if IsCloudModel(m) {
-			cloud = append(cloud, m)
-		} else {
-			local = append(local, m)
+		if isEmbeddingOnlyModel(m) {
+			embeddingOnly = append(embeddingOnly, m)
+			continue
 		}
+		ordered = append(ordered, m)
 	}
+	ordered = append(ordered, embeddingOnly...)
 
-	sort.Slice(cloud, func(i, j int) bool {
-		return cloudRank(cloud[i]) < cloudRank(cloud[j])
-	})
-	sort.Slice(local, func(i, j int) bool {
-		ip := localRank(local[i])
-		jp := localRank(local[j])
-		if ip != jp {
-			return ip > jp
-		}
-		return local[i] < local[j]
-	})
-
-	if len(cloud) > 0 {
-		primary = cloud[0]
-		fallbacks = append(append([]string{}, cloud[1:]...), local...)
-	} else {
-		primary = local[0]
-		fallbacks = local[1:]
-	}
+	primary = ordered[0]
+	fallbacks = ordered[1:]
 	return primary, fallbacks
 }
 
-// IsCloudModel reports whether a model name belongs to a frontier cloud
-// provider (Anthropic Claude, OpenAI GPT or o-series). The check is by
-// substring/prefix because LiteLLM model ids carry a provider prefix
-// (`anthropic/claude-3-5-sonnet-latest`, `openai/gpt-4o`).
-func IsCloudModel(name string) bool {
-	n := strings.ToLower(stripProviderPrefix(name))
-	if strings.Contains(n, "claude") {
-		return true
-	}
-	if strings.HasPrefix(n, "gpt") ||
-		strings.HasPrefix(n, "o1") ||
-		strings.HasPrefix(n, "o3") ||
-		strings.HasPrefix(n, "o4") ||
-		strings.HasPrefix(n, "o5") {
-		return true
-	}
-	return false
-}
-
-// stripProviderPrefix is an internal helper for ranking only. It is NOT
-// exported and MUST NOT be used to mutate model identifiers that the agent
-// will pass back to LiteLLM on chat-completion calls.
-//
-// LiteLLM `model_name` is bare (no provider prefix) — see the contract
-// documented on AddCustomEndpoint and buildModelEntries. The only place a
-// `provider/model` shape can sneak in is wildcard entries like `anthropic/*`,
-// or legacy entries from older releases that namespaced custom endpoints as
-// `custom/<name>/<model>`. We strip those here so size/family parsing in
-// IsCloudModel/cloudRank/localRank still works on tagged tokens, but the
-// caller in Rank() returns the ORIGINAL string — never the stripped form.
-func stripProviderPrefix(name string) string {
-	if idx := strings.Index(name, "/"); idx >= 0 {
-		return name[idx+1:]
-	}
-	return name
-}
-
-// cloudRank returns a sort key for cloud model names — lower is better. The
-// table lists representative substrings; the first match wins.
-func cloudRank(name string) int {
-	n := strings.ToLower(stripProviderPrefix(name))
-	for i, marker := range cloudPrecedence {
-		if strings.Contains(n, marker) {
-			return i
-		}
-	}
-	return len(cloudPrecedence) + 1
-}
-
-var cloudPrecedence = []string{
-	"opus-4-7", "opus-4-6", "opus-4-5", "opus-4", "opus",
-	"sonnet-4-7", "sonnet-4-6", "sonnet-4-5", "sonnet-4", "sonnet-3-7", "sonnet",
-	"haiku-4-5", "haiku-4", "haiku",
-	"gpt-5", "gpt-4.1", "gpt-4o", "gpt-4", "gpt-3.5",
-	"o5", "o4", "o3", "o1",
+func isEmbeddingOnlyModel(name string) bool {
+	n := strings.ToLower(name)
+	return strings.Contains(n, "embed")
 }
-
-// paramSizeRe matches the parameter-count tag in model names. Decimals are
-// allowed so a `:0.6b` Ollama tag doesn't fall through to the family default
-// and accidentally outrank a `:9b` peer. The captured groups are:
-//
-//	llama3.2:1b           → "1"           → 10  (deci-billions)
-//	qwen3.5:9b            → "9"           → 90
-//	qwen3:0.6b            → "0.6"         → 6
-//	deepseek-r1:32b       → "32"          → 320
-//	mixtral:8x7b          → "8x7"         → 560 (8 * 70)
-//	qwen3-vl:235b-cloud   → "235"         → 2350
-//
-// localRank returns deci-billions (parameter count × 10) so 0.5b / 0.6b
-// fractional sizes still produce distinct integer ranks without complicating
-// the comparator.
-var paramSizeRe = regexp.MustCompile(`(?i)(?::|-)(\d+(?:\.\d+)?(?:x\d+(?:\.\d+)?)?)b\b`)
-
-// localRank returns the parameter count (in deci-billions, i.e. params × 10)
-// for a local model name. Decimal sizes (`0.5b`, `0.6b`) survive the int
-// conversion intact. Untagged Ollama models fall back to a family-average
-// lookup; truly unknown models return 0 (worst). Larger → higher rank.
-func localRank(name string) int {
-	n := strings.ToLower(stripProviderPrefix(name))
-	n = strings.TrimSuffix(n, ":latest")
-
-	if m := paramSizeRe.FindStringSubmatch(n); m != nil {
-		raw := m[1]
-		if x := strings.Index(raw, "x"); x >= 0 {
-			a, errA := strconv.ParseFloat(raw[:x], 64)
-			b, errB := strconv.ParseFloat(raw[x+1:], 64)
-			if errA == nil && errB == nil {
-				return int(a * b * 10)
-			}
-		}
-		if v, err := strconv.ParseFloat(raw, 64); err == nil {
-			return int(v * 10)
-		}
-	}
-
-	// No size in the tag — fall back to a family heuristic. These default
-	// values track the family's flagship "latest" tag at the time of writing
-	// and can be updated as new releases ship. We try longest prefixes first
-	// so `llama3.3` doesn't get the `llama3` default.
-	for _, prefix := range untaggedFamilyOrder {
-		if strings.HasPrefix(n, prefix) {
-			return untaggedFamilyDefaults[prefix]
-		}
-	}
-	return 0
-}
-
-// untaggedFamilyDefaults maps a model-family prefix to a typical parameter
-// count expressed in deci-billions (params × 10), so the table shares units
-// with localRank's tagged-parsing branch. The numbers don't have to be exact
-// — the goal is "is this roughly bigger than that other model", not a
-// precise sort. Untagged-model selection is rare; most Ollama users carry
-// a size in the tag.
-var untaggedFamilyDefaults = map[string]int{
-	"qwen3.5":        90,
-	"qwen3":          140,
-	"qwen2.5":        70,
-	"llama3.3":       700,
-	"llama3.2":       30,
-	"llama3.1":       80,
-	"llama3":         80,
-	"deepseek-r1":    140,
-	"deepseek-coder": 60,
-	"deepseek-ocr":   70,
-	"mistral":        70,
-	"mixtral":        560,
-	"phi4":           140,
-	"phi3":           30,
-	"gemma3":         70,
-	"gemma2":         90,
-	"command-r":      350,
-	"nomic-embed":    0, // embedding model, never pick as agent default
-}
-
-// untaggedFamilyOrder lists the keys of untaggedFamilyDefaults sorted by
-// descending length so HasPrefix matches the most specific family first
-// (e.g. `llama3.3` before `llama3`).
-var untaggedFamilyOrder = func() []string {
-	keys := make([]string, 0, len(untaggedFamilyDefaults))
-	for k := range untaggedFamilyDefaults {
-		keys = append(keys, k)
-	}
-	sort.Slice(keys, func(i, j int) bool {
-		if len(keys[i]) != len(keys[j]) {
-			return len(keys[i]) > len(keys[j])
-		}
-		return keys[i] < keys[j]
-	})
-	return keys
-}()
diff --git a/internal/model/rank_test.go b/internal/model/rank_test.go
index 9dece8f9..676c717f 100644
--- a/internal/model/rank_test.go
+++ b/internal/model/rank_test.go
@@ -1,119 +1,88 @@
 package model
 
-import (
-	"testing"
-)
+import "testing"
 
-func TestRank_PrefersLargerLocalModelOver1B(t *testing.T) {
-	// The exact regression that produced "hello" → wall-of-text on the
-	// colleague's screenshot: a 1B model winning over qwen3.5:9b just because
-	// Ollama happened to list it first.
+func TestRank_PreservesConfiguredOrder(t *testing.T) {
 	primary, fallbacks := Rank([]string{
 		"llama3.2:1b",
 		"qwen3.5:9b",
-		"llama3.2:3b",
+		"claude-opus-4-7",
 	})
-	if primary != "qwen3.5:9b" {
-		t.Fatalf("primary: got %q, want qwen3.5:9b", primary)
+	if primary != "llama3.2:1b" {
+		t.Fatalf("primary: got %q, want llama3.2:1b", primary)
 	}
-	if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" {
-		t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks)
+	if len(fallbacks) != 2 || fallbacks[0] != "qwen3.5:9b" || fallbacks[1] != "claude-opus-4-7" {
+		t.Fatalf("fallbacks: got %v, want [qwen3.5:9b claude-opus-4-7]", fallbacks)
 	}
 }
 
-func TestRank_CloudOutranksLocal(t *testing.T) {
+func TestRank_DoesNotInferProviderPrecedence(t *testing.T) {
 	primary, fallbacks := Rank([]string{
-		"qwen3.5:9b",
+		"gpt-4o",
 		"claude-opus-4-7",
-		"llama3.2:1b",
+		"o3",
 	})
-	if primary != "claude-opus-4-7" {
-		t.Fatalf("primary: got %q, want claude-opus-4-7", primary)
+	if primary != "gpt-4o" {
+		t.Fatalf("primary: got %q, want gpt-4o", primary)
 	}
-	// fallbacks: cloud-then-local order preserved
-	if len(fallbacks) != 2 || fallbacks[0] != "qwen3.5:9b" || fallbacks[1] != "llama3.2:1b" {
-		t.Fatalf("fallbacks: got %v", fallbacks)
+	if len(fallbacks) != 2 || fallbacks[0] != "claude-opus-4-7" || fallbacks[1] != "o3" {
+		t.Fatalf("fallbacks: got %v, want [claude-opus-4-7 o3]", fallbacks)
 	}
 }
 
-func TestRank_CloudInternalOrdering(t *testing.T) {
-	cases := []struct {
-		name string
-		in   []string
-		want string
-	}{
-		{"opus over sonnet", []string{"claude-sonnet-4-6", "claude-opus-4-7"}, "claude-opus-4-7"},
-		{"sonnet over haiku", []string{"claude-haiku-4-5", "claude-sonnet-4-6"}, "claude-sonnet-4-6"},
-		{"gpt-5 over gpt-4", []string{"gpt-4o", "gpt-5"}, "gpt-5"},
-		{"opus over gpt", []string{"gpt-5", "claude-opus-4-7"}, "claude-opus-4-7"},
-		{"provider prefix tolerated", []string{"openai/gpt-4o", "anthropic/claude-opus-4-7"}, "anthropic/claude-opus-4-7"},
+func TestRank_DoesNotInferSizePrecedence(t *testing.T) {
+	primary, fallbacks := Rank([]string{
+		"llama3.2:1b",
+		"qwen3.5:9b",
+		"mixtral:8x7b",
+	})
+	if primary != "llama3.2:1b" {
+		t.Fatalf("primary: got %q, want llama3.2:1b", primary)
 	}
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			got, _ := Rank(tc.in)
-			if got != tc.want {
-				t.Fatalf("Rank(%v): got %q, want %q", tc.in, got, tc.want)
-			}
-		})
+	if len(fallbacks) != 2 || fallbacks[0] != "qwen3.5:9b" || fallbacks[1] != "mixtral:8x7b" {
+		t.Fatalf("fallbacks: got %v, want [qwen3.5:9b mixtral:8x7b]", fallbacks)
 	}
 }
 
-func TestRank_LocalParameterParsing(t *testing.T) {
-	cases := []struct {
-		name string
-		in   []string
-		want string
-	}{
-		{"plain b suffix", []string{"llama3.2:1b", "llama3.2:3b"}, "llama3.2:3b"},
-		{"two-digit count", []string{"qwen3:14b", "qwen3.5:9b"}, "qwen3:14b"},
-		{"mixtral 8x7b", []string{"qwen3.5:9b", "mixtral:8x7b"}, "mixtral:8x7b"},
-		{"235b cloud variant", []string{"qwen3.5:9b", "qwen3-vl:235b-cloud"}, "qwen3-vl:235b-cloud"},
-		{"untagged family lookup", []string{"qwen3.5:9b", "llama3.3"}, "llama3.3"}, // family default 70 > 9
-		// Regression on regression: a `:0.6b` Ollama tag must NOT fall through
-		// to the qwen3 family default (~14B) — that would mistakenly outrank
-		// qwen3.5:9b. The decimal-aware regex parses it as 0.6 directly.
-		{"decimal size below 1b", []string{"qwen3:0.6b", "qwen3.5:9b"}, "qwen3.5:9b"},
-		{"decimal size 1.5b", []string{"qwen3.5:9b", "smol:1.5b"}, "qwen3.5:9b"},
-	}
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			got, _ := Rank(tc.in)
-			if got != tc.want {
-				t.Fatalf("Rank(%v): got %q, want %q", tc.in, got, tc.want)
-			}
-		})
+func TestRank_EmbeddingModelLast(t *testing.T) {
+	primary, fallbacks := Rank([]string{
+		"nomic-embed-text",
+		"llama3.2:1b",
+		"text-embedding-3-large",
+		"qwen3.5:9b",
+	})
+	if primary != "llama3.2:1b" {
+		t.Fatalf("primary: got %q, want llama3.2:1b", primary)
 	}
-}
-
-func TestRank_DeterministicTiebreak(t *testing.T) {
-	// Two models the same size — must sort alphabetically so successive runs
-	// don't flip the primary.
-	primary1, _ := Rank([]string{"foo:7b", "bar:7b"})
-	primary2, _ := Rank([]string{"bar:7b", "foo:7b"})
-	if primary1 != primary2 {
-		t.Fatalf("non-deterministic: %q vs %q", primary1, primary2)
+	want := []string{"qwen3.5:9b", "nomic-embed-text", "text-embedding-3-large"}
+	if len(fallbacks) != len(want) {
+		t.Fatalf("fallbacks: got %v, want %v", fallbacks, want)
 	}
-	if primary1 != "bar:7b" {
-		t.Fatalf("expected alphabetical tiebreak, got %q", primary1)
+	for i := range want {
+		if fallbacks[i] != want[i] {
+			t.Fatalf("fallbacks: got %v, want %v", fallbacks, want)
+		}
 	}
 }
 
-func TestRank_EmbeddingModelLast(t *testing.T) {
-	// nomic-embed-text isn't a chat model — must never become the agent's
-	// default if anything else is present.
-	primary, _ := Rank([]string{"nomic-embed-text", "llama3.2:1b"})
-	if primary != "llama3.2:1b" {
-		t.Fatalf("primary: got %q, want llama3.2:1b (embedding model picked instead)", primary)
+func TestRank_AllEmbeddingModelsPreserveOrder(t *testing.T) {
+	primary, fallbacks := Rank([]string{
+		"nomic-embed-text",
+		"text-embedding-3-large",
+	})
+	if primary != "nomic-embed-text" {
+		t.Fatalf("primary: got %q, want nomic-embed-text", primary)
+	}
+	if len(fallbacks) != 1 || fallbacks[0] != "text-embedding-3-large" {
+		t.Fatalf("fallbacks: got %v, want [text-embedding-3-large]", fallbacks)
 	}
 }
 
-// TestRank_PreservesProviderPrefixOnOutput documents the contract relied on
-// by internal/hermes and internal/openclaw: Rank() may use the provider
-// prefix internally for ranking heuristics (e.g. detecting "claude" in
-// "anthropic/claude-opus-4-7"), but it MUST return the input strings
-// UNCHANGED. The agent round-trips the returned primary back to LiteLLM as
-// the `model` field on chat-completions; stripping here would mismatch the
-// LiteLLM model_name and surface as 400 "no healthy deployments".
+// TestRank_PreservesProviderPrefixOnOutput documents the contract relied on by
+// internal/hermes and internal/openclaw: Rank returns input strings unchanged.
+// The agent round-trips the returned primary back to LiteLLM as the `model`
+// field on chat-completions; stripping here would mismatch LiteLLM model_name
+// and surface as 400 "no healthy deployments".
 func TestRank_PreservesProviderPrefixOnOutput(t *testing.T) {
 	cases := []struct {
 		name string
@@ -121,19 +90,19 @@ func TestRank_PreservesProviderPrefixOnOutput(t *testing.T) {
 		want string
 	}{
 		{
-			"anthropic/-prefixed wins over local",
+			"anthropic/-prefixed first",
 			[]string{"anthropic/claude-opus-4-7", "qwen3.5:9b"},
 			"anthropic/claude-opus-4-7",
 		},
 		{
-			"openai/-prefixed wins over local",
-			[]string{"qwen3.5:9b", "openai/gpt-4o"},
+			"openai/-prefixed first",
+			[]string{"openai/gpt-4o", "qwen3.5:9b"},
 			"openai/gpt-4o",
 		},
 		{
-			"legacy custom/<name>/<model> round-trips",
-			[]string{"custom/spark1-vllm/qwen36-fast"},
-			"custom/spark1-vllm/qwen36-fast",
+			"custom namespaced entry round-trips",
+			[]string{"custom/qa-vllm/qwen36-fast"},
+			"custom/qa-vllm/qwen36-fast",
 		},
 	}
 	for _, tc := range cases {
@@ -152,18 +121,3 @@ func TestRank_Empty(t *testing.T) {
 		t.Fatalf("Rank(nil): got %q,%v, want empty,nil", primary, fallbacks)
 	}
 }
-
-func TestIsCloudModel(t *testing.T) {
-	cloud := []string{"claude-opus-4-7", "anthropic/claude-3-5-sonnet", "gpt-4o", "openai/gpt-5", "o1-preview", "o3-mini"}
-	local := []string{"llama3.2:1b", "qwen3.5:9b", "mixtral:8x7b", "deepseek-r1:14b", "nomic-embed-text"}
-	for _, n := range cloud {
-		if !IsCloudModel(n) {
-			t.Errorf("IsCloudModel(%q) = false, want true", n)
-		}
-	}
-	for _, n := range local {
-		if IsCloudModel(n) {
-			t.Errorf("IsCloudModel(%q) = true, want false", n)
-		}
-	}
-}
diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go
index aab93f0b..47d0c54a 100644
--- a/internal/openclaw/openclaw.go
+++ b/internal/openclaw/openclaw.go
@@ -1669,10 +1669,7 @@ func cliViaKubectlExec(cfg *config.Config, namespace string, args []string) erro
 // current LiteLLM model list. For each LiteLLM-routed instance it:
 //  1. Patches the overlay YAML model list (for helm consistency)
 //  2. Writes a clean per-agent models.json to the host PVC
-//  3. Patches the openclaw-config ConfigMap with the best primary model
-//
-// Cloud models are promoted to primary (they're added because they're better
-// than local defaults). The previous primary becomes a fallback.
+//  3. Patches the openclaw-config ConfigMap with the configured primary model
 func SyncOverlayModels(cfg *config.Config, models []string, u *ui.UI) error {
 	ids, err := ListInstanceIDs(cfg)
 	if err != nil || len(ids) == 0 {
@@ -1721,10 +1718,9 @@ func SyncOverlayModels(cfg *config.Config, models []string, u *ui.UI) error {
 	return nil
 }
 
-// rankModels delegates to model.Rank for capability-aware ranking, then
-// prefixes every entry with `openai/` for LiteLLM routing. Both runtimes used
-// to roll their own ranker that picked `local[0]` (whatever Ollama listed
-// first), which produced the llama3.2:1b regression — see internal/model/rank.go.
+// rankModels delegates to model.Rank for configured-order selection, then
+// prefixes every entry with `openai/` for LiteLLM routing through OpenClaw's
+// openai-compatible provider slot.
 func rankModels(models []string) (primary string, fallbacks []string) {
 	primary, fallbacks = model.Rank(models)
 	if primary != "" {
diff --git a/internal/openclaw/rankmodels_test.go b/internal/openclaw/rankmodels_test.go
index 5e9b5ae8..296d0def 100644
--- a/internal/openclaw/rankmodels_test.go
+++ b/internal/openclaw/rankmodels_test.go
@@ -2,30 +2,24 @@ package openclaw
 
 import "testing"
 
-// TestRankModels_OpenClawWrapper_PrefersLargerLocalModel — same regression
-// guard as the Hermes side, but exercising the openai/-prefix that OpenClaw
-// adds for LiteLLM routing through the openai-compatible provider slot.
-func TestRankModels_OpenClawWrapper_PrefersLargerLocalModel(t *testing.T) {
+func TestRankModels_OpenClawWrapper_PreservesConfiguredOrder(t *testing.T) {
 	primary, fallbacks := rankModels([]string{
 		"llama3.2:1b",
 		"qwen3.5:9b",
-		"llama3.2:3b",
+		"claude-opus-4-7",
 	})
-	if primary != "openai/qwen3.5:9b" {
-		t.Fatalf("primary: got %q, want openai/qwen3.5:9b", primary)
+	if primary != "openai/llama3.2:1b" {
+		t.Fatalf("primary: got %q, want openai/llama3.2:1b", primary)
 	}
-	if len(fallbacks) != 2 || fallbacks[0] != "openai/llama3.2:3b" || fallbacks[1] != "openai/llama3.2:1b" {
+	if len(fallbacks) != 2 || fallbacks[0] != "openai/qwen3.5:9b" || fallbacks[1] != "openai/claude-opus-4-7" {
 		t.Fatalf("fallbacks: got %v", fallbacks)
 	}
 }
 
-func TestRankModels_OpenClawWrapper_KeepsOpenAIPrefixOnCloudPicks(t *testing.T) {
-	// Cloud models also get the openai/ prefix in OpenClaw because LiteLLM
-	// routes them through its openai-compatible adapter slot. The wrapper
-	// must wrap regardless of whether the underlying pick is cloud or local.
+func TestRankModels_OpenClawWrapper_PrefixesConfiguredCloudPrimary(t *testing.T) {
 	primary, _ := rankModels([]string{
-		"qwen3.5:9b",
 		"claude-opus-4-7",
+		"qwen3.5:9b",
 	})
 	if primary != "openai/claude-opus-4-7" {
 		t.Fatalf("primary: got %q, want openai/claude-opus-4-7", primary)
diff --git a/internal/stack/backend_k3d.go b/internal/stack/backend_k3d.go
index f530f0e3..f2c5c10e 100644
--- a/internal/stack/backend_k3d.go
+++ b/internal/stack/backend_k3d.go
@@ -103,6 +103,23 @@ func (b *K3dBackend) Up(cfg *config.Config, u *ui.UI, stackID string) ([]byte, e
 		return nil, err
 	}
 
+	// Ensure the dev registry caches are running on every Up, in BOTH the
+	// existing-cluster and fresh-create branches. `k3d cluster start` does
+	// not auto-restart standalone registry containers attached via
+	// `--registry-use` at create time — it only starts the cluster's own
+	// nodes. Without this call, every retry after a `cluster stop` (or after
+	// the failure-recovery Down() call in syncDefaults) falls back to direct
+	// upstream pulls and re-fetches every image, costing minutes per
+	// attempt.
+	if os.Getenv("OBOL_DEVELOPMENT") == "true" {
+		setup, setupErr := ensureDevRegistries(cfg, u)
+		if setupErr != nil {
+			u.Warnf("Dev registry cache unavailable, falling back to direct upstream pulls: %v", setupErr)
+		} else {
+			registrySetup = setup
+		}
+	}
+
 	if running {
 		u.Warn("Cluster already exists, starting it")
 
@@ -128,15 +145,6 @@ func (b *K3dBackend) Up(cfg *config.Config, u *ui.UI, stackID string) ([]byte, e
 		// 'obol stack init' wrote the k3d config.
 		ensureK3dPortsAvailable(k3dConfigPath, u)
 
-		if os.Getenv("OBOL_DEVELOPMENT") == "true" {
-			setup, setupErr := ensureDevRegistries(cfg, u)
-			if setupErr != nil {
-				u.Warnf("Dev registry cache unavailable, falling back to direct upstream pulls: %v", setupErr)
-			} else {
-				registrySetup = setup
-			}
-		}
-
 		createCmd := exec.Command(
 			filepath.Join(cfg.BinDir, "k3d"),
 			k3dCreateArgs(stackName, k3dConfigPath, registrySetup)...,
diff --git a/internal/stack/dev_registry.go b/internal/stack/dev_registry.go
index 91ed2f96..dfd70e03 100644
--- a/internal/stack/dev_registry.go
+++ b/internal/stack/dev_registry.go
@@ -79,11 +79,20 @@ func ensureDevRegistry(cfg *config.Config, k3dBinary string, mirror registryMirr
 			return nil
 		}
 
-		if err := runCommand(exec.Command("docker", "start", containerName)); err != nil {
-			return fmt.Errorf("start registry %s: %w", containerName, err)
+		// Container exists but is stopped. Try to start it.
+		if startErr := runCommand(exec.Command("docker", "start", containerName)); startErr == nil {
+			return nil
 		}
 
-		return nil
+		// Start failed — most commonly because the k3d-obol-stack-* Docker
+		// network the registry was attached to has been removed (cluster
+		// purge or reclaimLeakedDevK3dNetworks). The container's stored
+		// network reference is now a dangling ID and `docker start` aborts
+		// with "network ... not found". Force-remove the container and
+		// fall through to recreate it. The cache content lives on the host
+		// bind-mount under registryCacheDir(mirror), so the new container
+		// picks up exactly the layers the old one had — no re-pull.
+		_ = runCommand(exec.Command("docker", "rm", "-f", containerName))
 	}
 
 	createCmd := exec.Command(
diff --git a/internal/stack/stack.go b/internal/stack/stack.go
index b6704e61..90700775 100644
--- a/internal/stack/stack.go
+++ b/internal/stack/stack.go
@@ -362,18 +362,6 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s
 		u.Warnf("Failed to preserve LiteLLM config across Helm sync: %v", err)
 	}
 
-	// Release runtime field ownership of litellm-config.data.config.yaml so the
-	// upcoming helm upgrade can reclaim it without an SSA conflict. Without
-	// this step, the second `obol stack up` after autoConfigureLLM/restore has
-	// claimed the field via SSA (manager=helm, op=Apply) fails with
-	// "conflict with helm using v1: .data.config.yaml" because helm registers
-	// a separate managedFields entry (manager=helm, op=Update) for the same
-	// field. The data is already snapshotted in previousLiteLLMConfig and gets
-	// re-applied by restoreLiteLLMConfig after helm runs.
-	if err := releaseLiteLLMConfigOwnership(cfg, kubeconfigPath); err != nil {
-		u.Warnf("Failed to release LiteLLM config field ownership: %v", err)
-	}
-
 	// Compatibility migration
 	if err := migrateDefaultsHTTPRouteHostnames(helmfilePath); err != nil {
 		u.Warnf("Failed to migrate defaults helmfile hostnames: %v", err)
@@ -919,37 +907,6 @@ func preserveLiteLLMConfigForHelm(cfg *config.Config, kubeconfigPath string) (st
 	return raw, nil
 }
 
-// releaseLiteLLMConfigOwnership strips managedFields from the litellm-config
-// ConfigMap so the next helm upgrade can claim ownership of every field
-// without an SSA conflict. Helm tracks release ownership via the
-// meta.helm.sh/release-name annotation, not managedFields, so clearing
-// managedFields does not detach the resource from its release.
-//
-// The single empty entry [{}] is the documented apiserver idiom for clearing
-// all field-ownership claims on a resource. See:
-// https://kubernetes.io/docs/reference/using-api/server-side-apply/#clearing-managedfields
-func releaseLiteLLMConfigOwnership(cfg *config.Config, kubeconfigPath string) error {
-	kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
-
-	// Skip if the configmap doesn't exist (first install).
-	if _, err := kubectl.Output(kubectlBinary, kubeconfigPath,
-		"get", "configmap", "litellm-config", "-n", "llm", "-o", "name"); err != nil {
-		return nil
-	}
-
-	cmd := exec.Command(kubectlBinary,
-		"patch", "configmap", "litellm-config",
-		"-n", "llm",
-		"--type=merge",
-		"--patch", `{"metadata":{"managedFields":[{}]}}`,
-	)
-	cmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath)
-	if out, err := cmd.CombinedOutput(); err != nil {
-		return fmt.Errorf("kubectl patch managedFields: %w\n%s", err, string(out))
-	}
-	return nil
-}
-
 func restoreLiteLLMConfig(cfg *config.Config, kubeconfigPath, raw string) error {
 	if strings.TrimSpace(raw) == "" {
 		return nil