Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions cmd/obol/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func modelCommand(cfg *config.Config) *cli.Command {
modelSyncCommand(cfg),
modelPullCommand(),
modelListCommand(cfg),
modelPreferCommand(cfg),
modelRemoveCommand(cfg),
},
}
Expand Down Expand Up @@ -194,12 +195,33 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
}

if len(models) == 0 {
// Sensible defaults
// Per-provider defaults — kept in sync with what the providers
// document as their current chat-tuned flagship. Bumping these is a
// small follow-up PR when frontier models drop, and it isolates the
// "what's good today" maintenance to one place.
var defaultModel string
switch provider {
case "anthropic":
models = []string{"claude-sonnet-4-6"}
defaultModel = "claude-sonnet-4-6"
case "openai":
models = []string{"gpt-4.1"}
defaultModel = "gpt-5.5"
}

// Interactive: let the user override the default with a free-text
// entry. Non-interactive (no TTY): silently use the default — the
// caller can always pass --model to be explicit.
chosen := defaultModel
if defaultModel != "" && u.IsTTY() && !u.IsJSON() {
input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel)
if err != nil {
return err
}
if strings.TrimSpace(input) != "" {
chosen = strings.TrimSpace(input)
}
}
if chosen != "" {
models = []string{chosen}
}
}

Expand Down Expand Up @@ -493,6 +515,34 @@ func modelListCommand(cfg *config.Config) *cli.Command {
}
}

func modelPreferCommand(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "prefer",
Usage: "Pull one or more models to the head of the LiteLLM model_list (the head becomes the agent's primary)",
ArgsUsage: "<model-name> [<model-name> ...]",
Flags: []cli.Flag{
&cli.BoolFlag{Name: "no-sync", Usage: "Skip the agent model sync (batch with other model commands, then run `obol model sync` once)"},
},
Action: func(ctx context.Context, cmd *cli.Command) error {
u := getUI(cmd)

names := cmd.Args().Slice()
if len(names) == 0 {
return errors.New("at least one model name is required\n\nUsage: obol model prefer <model-name> [<model-name> ...]\n\nList configured models with: obol model list")
}

if err := model.PreferModels(cfg, u, names); err != nil {
return err
}

if cmd.Bool("no-sync") {
return nil
}
return syncAgentModels(cfg, u)
},
}
}

func modelRemoveCommand(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "remove",
Expand Down
15 changes: 7 additions & 8 deletions internal/hermes/hermes.go
Original file line number Diff line number Diff line change
Expand Up @@ -1260,15 +1260,14 @@ func litellmMasterKey(cfg *config.Config) string {
return "sk-obol-" + strings.TrimSpace(string(data))
}

// rankModels delegates to model.Rank, which knows how to prefer larger local
// models and frontier cloud models. Kept as a thin wrapper so call sites
// don't need to import internal/model directly.
// rankModels delegates to model.Rank, which preserves configured LiteLLM model
// order and keeps known embedding-only models behind chat-capable models. Kept
// as a thin wrapper so call sites don't need to import internal/model directly.
//
// IMPORTANT: do NOT pre-strip provider prefixes here. model.Rank strips
// internally for ranking heuristics but returns the ORIGINAL strings so the
// agent can round-trip them back to LiteLLM. Stripping at this layer would
// break that round-trip — that's exactly the double-strip bug that
// ca820c9 worked around for custom endpoints.
// IMPORTANT: do NOT strip provider prefixes here. model.Rank returns the
// original strings so the agent can round-trip them back to LiteLLM. Stripping
// at this layer would break that round-trip — that's exactly the double-strip
// bug that ca820c9 worked around for custom endpoints.
func rankModels(models []string) (primary string, fallbacks []string) {
return model.Rank(models)
}
Expand Down
46 changes: 12 additions & 34 deletions internal/hermes/rankmodels_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,20 @@ package hermes

import "testing"

// TestRankModels_HermesWrapper_PrefersLargerLocalModel encodes the regression
// from the colleague's screenshot: Hermes was deploying with `llama3.2:1b` as
// the default model, which then parroted its own tool list back on every
// "hello" prompt. The fix moved capability ranking into model.Rank; this test
// just confirms the Hermes-side wrapper still calls into it correctly.
//
// Contract: bare LiteLLM model_name strings come in, the SAME bare strings
// come back out — no provider-prefix stripping at this layer. The agent must
// be able to round-trip the returned primary back to LiteLLM without
// modification.
func TestRankModels_HermesWrapper_PrefersLargerLocalModel(t *testing.T) {
// Contract: LiteLLM model_name strings come in, the SAME strings come back
// out in configured order. The agent must be able to round-trip the returned
// primary back to LiteLLM without modification.
func TestRankModels_HermesWrapper_PreservesConfiguredOrder(t *testing.T) {
primary, fallbacks := rankModels([]string{
"llama3.2:1b",
"llama3.1:8b",
"llama3.2:3b",
})
if primary != "llama3.1:8b" {
t.Fatalf("primary: got %q, want llama3.1:8b", primary)
}
if len(fallbacks) != 2 || fallbacks[0] != "llama3.2:3b" || fallbacks[1] != "llama3.2:1b" {
t.Fatalf("fallbacks: got %v, want [llama3.2:3b llama3.2:1b]", fallbacks)
}
}

// TestRankModels_HermesWrapper_PrefersClaudeOverLocal exercises the cloud
// tier. Cloud entries written by buildModelEntries are bare (e.g.
// `claude-opus-4-7`, not `anthropic/claude-opus-4-7`), and the wrapper must
// preserve that.
func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
primary, _ := rankModels([]string{
"llama3.1:8b",
"claude-opus-4-7",
"llama3.2:1b",
})
if primary != "claude-opus-4-7" {
t.Fatalf("primary: got %q, want claude-opus-4-7", primary)
if primary != "llama3.2:1b" {
t.Fatalf("primary: got %q, want llama3.2:1b", primary)
}
if len(fallbacks) != 2 || fallbacks[0] != "llama3.1:8b" || fallbacks[1] != "claude-opus-4-7" {
t.Fatalf("fallbacks: got %v, want [llama3.1:8b claude-opus-4-7]", fallbacks)
}
}

Expand All @@ -50,12 +28,12 @@ func TestRankModels_HermesWrapper_PrefersClaudeOverLocal(t *testing.T) {
// reintroducing it.
func TestRankModels_HermesWrapper_PreservesProviderPrefixIfPresent(t *testing.T) {
primary, _ := rankModels([]string{
"llama3.1:8b",
"anthropic/claude-opus-4-7",
"openai/gpt-4o",
"llama3.1:8b",
})
if primary != "anthropic/claude-opus-4-7" {
t.Fatalf("primary: got %q, want anthropic/claude-opus-4-7 (unstripped)", primary)
if primary != "llama3.1:8b" {
t.Fatalf("primary: got %q, want llama3.1:8b", primary)
}
}

Expand Down
154 changes: 141 additions & 13 deletions internal/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,130 @@ func hotDeleteModel(cfg *config.Config, u *ui.UI, modelName string) error {
return nil
}

// reorderModelList is the pure-function core of PreferModels. It moves the
// named entries to the head of the list (in the order given) and returns
// the new slice, plus a boolean indicating whether the input was already in
// the requested order (the caller should treat that as a no-op so it can
// skip the kubectl patch + LiteLLM rollout). Unknown or duplicate names
// produce an error so typos surface loudly.
func reorderModelList(entries []ModelEntry, names []string) ([]ModelEntry, bool, error) {
indexByName := make(map[string]int, len(entries))
for i, entry := range entries {
indexByName[entry.ModelName] = i
}

var missing []string
picked := make(map[string]bool, len(names))
for _, name := range names {
if _, ok := indexByName[name]; !ok {
missing = append(missing, name)
continue
}
if picked[name] {
return nil, false, fmt.Errorf("duplicate model in prefer args: %q", name)
}
picked[name] = true
}
if len(missing) > 0 {
return nil, false, fmt.Errorf("model(s) not found in LiteLLM config: %s\n Run 'obol model list' to see available entries", strings.Join(missing, ", "))
}

alreadyAtHead := true
for i, name := range names {
if i >= len(entries) || entries[i].ModelName != name {
alreadyAtHead = false
break
}
}

reordered := make([]ModelEntry, 0, len(entries))
for _, name := range names {
reordered = append(reordered, entries[indexByName[name]])
}
for _, entry := range entries {
if picked[entry.ModelName] {
continue
}
reordered = append(reordered, entry)
}
return reordered, alreadyAtHead, nil
}

// PreferModels reorders LiteLLM's model_list so the named entries appear at
// the head, in the order given. Remaining entries keep their original
// relative order. This is the operator-facing primitive that lets
// model.Rank's "first chat-capable wins" rule pick a specific primary
// without a remove/re-add cycle.
//
// Returns an error if any of the requested names is not present in the
// current model_list — typos should be loud, not silent no-ops.
//
// LiteLLM has no model_list reorder API, so after the ConfigMap patch this
// rolls the LiteLLM Deployment so the new order takes effect (the
// /v1/models listing follows model_list order, and hermes/openclaw read
// the ConfigMap directly via GetConfiguredModels for the agent primary).
func PreferModels(cfg *config.Config, u *ui.UI, names []string) error {
if len(names) == 0 {
return errors.New("at least one model name is required")
}

kubectlBinary := filepath.Join(cfg.BinDir, "kubectl")
kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml")

if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) {
return errors.New("cluster not running. Run 'obol stack up' first")
}

raw, err := kubectl.Output(kubectlBinary, kubeconfigPath,
"get", "configmap", configMapName, "-n", namespace, "-o", "jsonpath={.data.config\\.yaml}")
if err != nil {
return fmt.Errorf("failed to read LiteLLM config: %w", err)
}

var litellmConfig LiteLLMConfig
if err := yaml.Unmarshal([]byte(raw), &litellmConfig); err != nil {
return fmt.Errorf("failed to parse config.yaml: %w", err)
}

reordered, alreadyAtHead, err := reorderModelList(litellmConfig.ModelList, names)
if err != nil {
return err
}
if alreadyAtHead {
u.Infof("Model(s) already at the head of the model_list, no change")
return nil
}
litellmConfig.ModelList = reordered

updated, err := yaml.Marshal(&litellmConfig)
if err != nil {
return fmt.Errorf("failed to marshal config: %w", err)
}
escapedYAML, err := json.Marshal(string(updated))
if err != nil {
return fmt.Errorf("failed to escape YAML: %w", err)
}
patchJSON := fmt.Sprintf(`{"data":{"config.yaml":%s}}`, escapedYAML)

u.Infof("Promoting %s to head of LiteLLM model_list", strings.Join(names, ", "))
if err := kubectl.Run(kubectlBinary, kubeconfigPath,
"patch", "configmap", configMapName, "-n", namespace,
"-p", patchJSON, "--type=merge", "--field-manager=helm"); err != nil {
return fmt.Errorf("failed to patch ConfigMap: %w", err)
}

// LiteLLM has no reorder API; restart the deployment so the new order
// takes effect (mostly cosmetic for /v1/models listings — agent primary
// is read from the ConfigMap directly via GetConfiguredModels, which is
// already correct after the patch above).
if err := RestartLiteLLM(cfg, u, "prefer"); err != nil {
u.Warnf("LiteLLM rollout failed: %v", err)
u.Dim(" The ConfigMap is updated; agent will pick up the new primary on next sync.")
}

return nil
}

// RemoveModel removes a model entry from the LiteLLM ConfigMap (persistence)
// and hot-deletes it from the running router via the API (immediate effect).
// No pod restart is required.
Expand Down Expand Up @@ -1123,16 +1247,10 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
}
case ProviderAnthropic:
cachePoints := anthropicCacheControlPoints()
// Wildcard: routes any anthropic model without explicit registration
entries = append(entries, ModelEntry{
ModelName: "anthropic/*",
LiteLLMParams: LiteLLMParams{
Model: "anthropic/*",
APIKey: "os.environ/ANTHROPIC_API_KEY",
CacheControlInjectionPoints: cachePoints,
},
})
// Explicit entries for requested models (better /v1/models listing)
// Explicit entries first so the user-selected model is the primary
// under model.Rank's "first chat-capable wins" rule. Hermes cannot
// send `model: anthropic/*` literally (LiteLLM doesn't resolve a
// wildcard to a default), so the wildcard must never sit at index 0.
for _, m := range models {
entries = append(entries, ModelEntry{
ModelName: m,
Expand All @@ -1143,17 +1261,27 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
},
})
}
case ProviderOpenAI:
// Wildcard: routes any anthropic model without explicit registration.
entries = append(entries, ModelEntry{
ModelName: "openai/*",
LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
ModelName: "anthropic/*",
LiteLLMParams: LiteLLMParams{
Model: "anthropic/*",
APIKey: "os.environ/ANTHROPIC_API_KEY",
CacheControlInjectionPoints: cachePoints,
},
})
case ProviderOpenAI:
// Explicit-before-wildcard, same rationale as Anthropic above.
for _, m := range models {
entries = append(entries, ModelEntry{
ModelName: m,
LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"},
})
}
entries = append(entries, ModelEntry{
ModelName: "openai/*",
LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
})
default:
for _, m := range models {
entries = append(entries, ModelEntry{
Expand Down
Loading
Loading