diff --git a/config.example.yaml b/config.example.yaml index 5fd7e09..3d3c617 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -11,25 +11,6 @@ timezone: America/New_York # aws_region: us-east-1 # AWS region for Bedrock # # aws_profile: "" # Optional: AWS SSO profile name -# Local proxy (CLIProxyAPI) — optional. Routes Anthropic API calls through -# Claude Code OAuth, and is the Anthropic↔OpenAI translation layer that local -# Ollama models are reached through. Required for the ollama block below. -# proxy: -# enabled: false -# port: 8317 -# host: 127.0.0.1 - -# Local Ollama — expose locally-installed Ollama models as selectable chat -# models in the web composer's model picker. Ollama speaks an OpenAI-compatible -# API, so this requires proxy.enabled: true (the proxy translates the -# Anthropic requests the SDK emits). Models are auto-discovered at runtime -# from the running Ollama server (GET /api/tags) — whatever you've pulled -# locally shows up automatically, no need to list models here. -# ollama: -# enabled: false -# host: 127.0.0.1 -# port: 11434 - # Agent agent: model: claude-opus-4-8 # Primary model for conversations @@ -39,7 +20,13 @@ agent: title_model: claude-haiku-4-5-20251001 # Session title generation max_turns: 50 # Max agentic turns per request max_concurrent: 4 # Max concurrent agent sessions - background_agent_permissions: true # Background sub-agents (Agent run_in_background) get the same tool permissions as foreground; false denies their Write/Edit/Bash + thinking: max # Thinking budget for the main model + effort: max # Reasoning effort for the main model + # Note: cron/hook sessions run on cron_model (Sonnet). Under Claude OAuth + # (subscription) Sonnet rejects thinking/effort="max" with + # level "max" not supported, valid levels: low, medium, high + # Nerve detects OAuth via `proxy.enabled` and automatically caps + # cron/hook sessions at "high" in that case. API users keep "max". # First-prompt rewrite — the web UI can refine the opening message of a # new chat with a fast model, preview it, and send only after approval. prompt_rewrite: diff --git a/docs/config.md b/docs/config.md index f24c5b3..9537450 100644 --- a/docs/config.md +++ b/docs/config.md @@ -40,6 +40,8 @@ from any working directory: | `agent.cron_model` | string | `claude-sonnet-4-6` | Model for cron jobs (cheaper) | | `agent.max_turns` | int | `50` | Max agentic turns per request | | `agent.max_concurrent` | int | `4` | Max concurrent agent sessions | +| `agent.thinking` | string | `max` | Thinking budget for the main model: `max` / `high` / `medium` / `low` / `disabled` / `adaptive` / explicit token count. Automatically capped at `high` for cron and hook sessions when `proxy.enabled` is true (Claude OAuth subscription rejects `max` on non-flagship models like Sonnet). | +| `agent.effort` | string | `max` | Reasoning effort for the main model: `max` / `high` / `medium` / `low`. Same OAuth+cron cap as `thinking`. | | `agent.prompt_rewrite.enabled` | bool | `true` | Offer the first-prompt rewrite feature in the web UI (per-user toggle lives in the composer) | | `agent.prompt_rewrite.model` | string | `""` | Model for prompt rewriting (empty = `agent.model`, the chat model) | | `agent.prompt_rewrite.max_tokens` | int | `1024` | Max tokens for the rewritten prompt | diff --git a/nerve/agent/engine.py b/nerve/agent/engine.py index 3dcdf96..bef2f71 100644 --- a/nerve/agent/engine.py +++ b/nerve/agent/engine.py @@ -70,6 +70,35 @@ _SURROGATE_RE = re.compile(r"[\ud800-\udfff]") +_OAUTH_CRON_CAP = "high" + + +def _select_thinking_effort(config: Any, source: str) -> tuple[str, str]: + """Pick (thinking, effort) for a session, downgrading only under OAuth. + + Cron and hook sessions run on ``agent.cron_model`` (typically Sonnet). + Under Claude OAuth (subscription) Sonnet caps thinking/effort at + ``high`` and the CLI rejects ``max`` with:: + + API Error: 400 level "max" not supported, valid levels: low, medium, high + + OAuth mode in nerve is gated by the local cli-proxy-api wrapping + the user's subscription credentials — i.e. ``config.proxy.enabled``. + When that's on AND the session is cron/hook, cap both knobs to + ``high``. API users (no proxy) and interactive sessions keep the + user's configured value unchanged — defaults stay ``max`` for + everyone, only the narrow OAuth+cron path is downgraded. + """ + thinking = config.agent.thinking + effort = config.agent.effort + if source in ("cron", "hook") and config.proxy.enabled: + if thinking == "max": + thinking = _OAUTH_CRON_CAP + if effort == "max": + effort = _OAUTH_CRON_CAP + return thinking, effort + + # Anthropic API image limits _MAX_IMAGE_BYTES = 5 * 1024 * 1024 # 5 MB _IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp"} @@ -315,22 +344,10 @@ def __init__(self, config: NerveConfig, db: Database): # task_started / task_updated / task_notification system messages: # session_id -> task_id -> {task_id, label, tool, status}. self._bg_task_registry: dict[str, dict[str, dict[str, Any]]] = {} - # Per-session dynamic-workflow registry: session_id -> tool_use_id -> - # {name, snapshot}. The tool_use_id is captured when a ``Workflow`` - # tool call streams; later task_* system messages carrying a - # ``workflow_progress`` tree are matched back to it so the UI can - # render a live phase/agent panel. The last snapshot is cached so the - # terminal task_notification (which omits the tree) can still settle - # the panel and persist the final state. - self._workflows: dict[str, dict[str, dict[str, Any]]] = {} # Per-session active channel — set on run() entry, cleared on exit. # Read by session-scoped tools (send_file) to avoid dispatching via # stale router context from a prior inbound channel. self._active_channel: dict[str, str] = {} - # Resolved model bound to each session's live SDK client. Used to - # detect mid-session model switches (the CLI fixes its model at - # connect time, so a change requires recreating the client). - self._session_models: dict[str, str] = {} self._router = None # ChannelRouter — lazy-initialized via .router property self._mcp_servers_cache = list(config.mcp_servers) # hot-reloadable self._claude_code_plugins: list[dict[str, str]] = [] # plugin dirs @@ -977,28 +994,25 @@ def _build_options( else: system_prompt = system_prompt_str - # Local Ollama models are reached through the proxy and speak the - # OpenAI-translated API — Anthropic-only knobs (extended thinking, - # effort, the context-1m beta) don't apply and may break translation, - # so suppress them for non-Claude models. - selected_model = model or self.config.agent.model - is_ollama_model = ( - self.config.ollama.enabled and "claude" not in selected_model.lower() + # Pick raw thinking/effort by source (cron/hook → cron_* overrides, + # interactive → main settings), then cap each to what the resolved + # model actually supports. + thinking_value, effort_value = _select_thinking_effort( + self.config, source, ) - - thinking_config = ( - None if is_ollama_model - else self._parse_thinking_config(self.config.agent.thinking, selected_model) + thinking_config = self._parse_thinking_config( + thinking_value, + model or self.config.agent.model, ) - effort = ( - None if is_ollama_model - else self._effective_effort(self.config.agent.effort, selected_model) + effort = self._effective_effort( + effort_value, + model or self.config.agent.model, ) # Some subscriptions reject the context-1m beta for specific models # (e.g. claude-sonnet-4-6) — skip the beta header for those. betas = ( ["context-1m-2025-08-07"] - if not is_ollama_model and self.config.agent.context_1m_enabled_for(model) + if self.config.agent.context_1m_enabled_for(model) else [] ) @@ -1192,7 +1206,7 @@ def _build_hooks(self, session_id: str) -> dict: through ``engine.run(..., source="wakeup")`` (the CLI's own autonomous firing is suppressed — see ``_build_env``). """ - from nerve.agent.interactive import INTERACTIVE_TOOLS, _read_file_safe + from nerve.agent.interactive import _read_file_safe captured_files: set[str] = set() @@ -1265,58 +1279,17 @@ async def _capture_wakeup_hook(hook_input, tool_use_id, context): ) return {"hookSpecificOutput": {"hookEventName": "PostToolUse"}} - async def _grant_permission_hook(hook_input, tool_use_id, context): - """PreToolUse hook: pre-approve non-interactive tools. - - Background sub-agents (the Agent tool with run_in_background) run - detached and non-blocking, so the CLI never surfaces an approval - prompt for their nested tool calls — the ``can_use_tool`` callback - is never invoked for them and the CLI denies their Write/Edit/Bash - by default. A PreToolUse hook, however, DOES fire for those nested - calls (it is a programmatic callback, not a user-facing prompt), so - returning ``permissionDecision: "allow"`` here grants the same - auto-approval foreground agents already get via ``can_use_tool``. - - Interactive tools and Read are left untouched: interactive tools - defer to ``can_use_tool`` (pause / inject answers / deny), and Read - defers to the image validator above plus the CLI's read-only - auto-allow. This keeps the web pause-for-input flow intact while - giving background sub-agents permission parity with the foreground. - """ - tool_name = hook_input.get("tool_name", "") - if tool_name in INTERACTIVE_TOOLS or tool_name == "Read": - return {"hookSpecificOutput": {"hookEventName": "PreToolUse"}} - return { - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "permissionDecision": "allow", - "permissionDecisionReason": ( - "nerve: auto-approved (background-agent permission parity)" - ), - } - } - - pre_tool_use = [ - HookMatcher( - matcher="Edit|Write|NotebookEdit", - hooks=[_snapshot_hook], - ), - HookMatcher( - matcher="Read", - hooks=[_validate_image_hook], - ), - ] - # Catch-all permission grant so background sub-agents (whose nested - # tool calls never reach can_use_tool) inherit foreground's tool - # permissions. Registered last so the snapshot/validator hooks still - # run for their tools; a deny from the validator wins over this allow. - if self.config.agent.background_agent_permissions: - pre_tool_use.append( - HookMatcher(matcher=None, hooks=[_grant_permission_hook]) - ) - return { - "PreToolUse": pre_tool_use, + "PreToolUse": [ + HookMatcher( + matcher="Edit|Write|NotebookEdit", + hooks=[_snapshot_hook], + ), + HookMatcher( + matcher="Read", + hooks=[_validate_image_hook], + ), + ], "PostToolUse": [ HookMatcher( matcher="ScheduleWakeup", @@ -1522,9 +1495,7 @@ async def _get_or_create_client( lock = self.sessions.get_lock(session_id) async with lock: client = self.sessions.get_client(session_id) - requested_model = model or self.config.agent.model if client is not None: - bound_model = self._session_models.get(session_id) # Health check: verify the underlying CLI process is still alive if self._is_client_dead(client): logger.warning( @@ -1536,20 +1507,6 @@ async def _get_or_create_client( unregister_handler(session_id) await self._safe_disconnect(client) client = None - elif bound_model is not None and bound_model != requested_model: - # Model switched mid-session (e.g. the composer's picker - # moved from the Anthropic default to a local Ollama - # model). The CLI binds its model at connect time, so - # tear the client down and recreate it below. - logger.info( - "Session %s model changed (%s → %s), recreating client", - session_id, bound_model, requested_model, - ) - self._stop_idle_watcher(session_id) - self.sessions.remove_client(session_id) - unregister_handler(session_id) - await self._safe_disconnect(client) - client = None else: return client @@ -1638,7 +1595,6 @@ async def _get_or_create_client( # Record connected_at and the resolved model resolved_model = options.model - self._session_models[session_id] = resolved_model now = datetime.now(timezone.utc).isoformat() connected_at = session.get("connected_at") if session and sdk_resume_id else now await self.sessions.mark_active( @@ -1952,16 +1908,6 @@ async def _process_sdk_message( description=str(tool_input.get("description", "")), model=str(tool_input.get("model", "")) or None, ) - # Track dynamic workflows. A ``Workflow`` tool call spawns - # a background runtime; later task_* system messages carry - # its progress tree keyed by this tool_use_id. Register it - # now so _handle_system_message can recognize those events - # even before the first ``workflow_progress`` payload. - if tool_name == "Workflow" and tool_use_id: - self._workflows.setdefault(session_id, {})[tool_use_id] = { - "name": self._derive_workflow_name(tool_input), - "snapshot": None, - } st.tool_calls_log.append({ "tool": tool_name, "input": tool_input, @@ -2084,33 +2030,6 @@ async def _handle_system_message( if not entry["label"]: entry["label"] = data.get("summary") or task_id - # Dynamic-workflow progress. A workflow task is recognized either by - # its tool_use_id (captured when the ``Workflow`` tool streamed) or by - # the presence of a ``workflow_progress`` tree on the message. We emit - # a dedicated event so the UI can render a live phase/agent panel — - # independent of the coarse background-task chip above. - tool_use_id = data.get("tool_use_id") or getattr(message, "tool_use_id", None) - wf_reg = self._workflows.get(session_id) or {} - wp = data.get("workflow_progress") - task_type = str(data.get("task_type") or "") - is_workflow = bool(tool_use_id) and ( - tool_use_id in wf_reg - or (isinstance(wp, list) and len(wp) > 0) - or "workflow" in task_type - ) - if is_workflow: - entry["tool"] = "Workflow" - # The CLI reports the workflow name on task_started — authoritative - # (and better than the tool-input guess for inline scripts). - wf_name = data.get("workflow_name") - if wf_name: - self._workflows.setdefault(session_id, {}).setdefault( - tool_use_id, {"name": "Workflow", "snapshot": None}, - )["name"] = str(wf_name) - await self._emit_workflow_progress( - session_id, tool_use_id, subtype, data, message, wp, - ) - if changed: await broadcaster.broadcast(session_id, { "type": "background_tasks_update", @@ -2118,140 +2037,6 @@ async def _handle_system_message( "tasks": list(registry.values()), }) - async def _emit_workflow_progress( - self, - session_id: str, - tool_use_id: str, - subtype: str, - data: dict, - message: Any, - wp: Any, - ) -> None: - """Build, cache, broadcast (and on terminal, persist) a workflow - progress snapshot for the ``Workflow`` call ``tool_use_id``.""" - reg = self._workflows.setdefault(session_id, {}) - cached = reg.setdefault(tool_use_id, {"name": "Workflow", "snapshot": None}) - - # task_progress carries the full tree; task_notification omits it, so - # fall back to the last cached snapshot to settle the panel. - if isinstance(wp, list) and wp: - snapshot = self._build_workflow_snapshot(wp) - else: - prev = cached.get("snapshot") or {} - snapshot = { - "phases": prev.get("phases", []), - "agents": prev.get("agents", []), - "totalTokens": prev.get("totalTokens", 0), - "totalToolCalls": prev.get("totalToolCalls", 0), - "agentCount": prev.get("agentCount", 0), - } - - status = self._workflow_status(subtype, data, message) - snapshot["name"] = cached.get("name") or "Workflow" - snapshot["status"] = status - summary = ( - data.get("summary") - or data.get("description") - or getattr(message, "summary", "") - or getattr(message, "description", "") - ) - if summary: - snapshot["summary"] = str(summary)[:2000] - - cached["snapshot"] = snapshot - await broadcaster.broadcast_workflow_progress(session_id, tool_use_id, snapshot) - - if status in ("completed", "failed", "stopped"): - try: - await self.db.merge_workflow_into_call(session_id, tool_use_id, snapshot) - except Exception as e: # persistence is best-effort - logger.debug("merge_workflow_into_call failed for %s: %s", tool_use_id, e) - - @staticmethod - def _workflow_status(subtype: str, data: dict, message: Any) -> str: - """Map a task_* system message to a workflow status string - (running / completed / failed / stopped).""" - if subtype in ("task_started", "task_progress"): - return "running" - if subtype == "task_updated": - patch = data.get("patch") or {} - s = str(patch.get("status") or "") - if s == "killed": - return "stopped" - return s or "running" - if subtype == "task_notification": - return str( - data.get("status") or getattr(message, "status", "") or "completed" - ) - return "running" - - @staticmethod - def _derive_workflow_name(tool_input: Any) -> str: - """Best-effort workflow name: the ``name`` arg for a named workflow, - else ``meta.name`` parsed from an inline script, else "Workflow".""" - if not isinstance(tool_input, dict): - return "Workflow" - name = tool_input.get("name") - if isinstance(name, str) and name.strip(): - return name.strip() - script = tool_input.get("script") - if isinstance(script, str): - m = re.search(r"name\s*:\s*['\"]([^'\"]+)['\"]", script) - if m: - return m.group(1) - return "Workflow" - - @staticmethod - def _fold_workflow_snapshots( - ordered_blocks: list | None, wf_reg: dict | None, - ) -> None: - """Attach cached workflow snapshots onto their ``Workflow`` tool_call - blocks (in place), so a settled-within-turn workflow persists its tree.""" - if not wf_reg or not ordered_blocks: - return - for ob in ordered_blocks: - if not isinstance(ob, dict) or ob.get("type") != "tool_call": - continue - snap = (wf_reg.get(ob.get("tool_use_id")) or {}).get("snapshot") - if snap: - ob["workflow"] = snap - - @staticmethod - def _build_workflow_snapshot(wp: list) -> dict: - """Normalize the CLI's flat ``workflow_progress`` list into a - {phases, agents, totals} snapshot for the UI.""" - phases: list[dict] = [] - agents: list[dict] = [] - for e in wp: - if not isinstance(e, dict): - continue - etype = e.get("type") - if etype == "workflow_phase": - phases.append({"index": e.get("index"), "title": e.get("title")}) - elif etype == "workflow_agent": - summary = e.get("lastToolSummary") - agents.append({ - "label": e.get("label"), - "phaseIndex": e.get("phaseIndex"), - "phaseTitle": e.get("phaseTitle"), - "state": e.get("state"), - "model": e.get("model"), - "tokens": e.get("tokens"), - "toolCalls": e.get("toolCalls"), - "lastToolName": e.get("lastToolName"), - "lastToolSummary": str(summary)[:200] if summary else None, - "durationMs": e.get("durationMs"), - }) - total_tokens = sum(int(a.get("tokens") or 0) for a in agents) - total_tool_calls = sum(int(a.get("toolCalls") or 0) for a in agents) - return { - "phases": phases, - "agents": agents, - "totalTokens": total_tokens, - "totalToolCalls": total_tool_calls, - "agentCount": len(agents), - } - def _prune_bg_tasks(self, session_id: str) -> None: """Drop settled background tasks from the registry. @@ -2259,24 +2044,12 @@ def _prune_bg_tasks(self, session_id: str) -> None: accumulate forever. Running tasks are kept. """ registry = self._bg_task_registry.get(session_id) - if registry: - for tid in [t for t, e in registry.items() if e.get("status") != "running"]: - del registry[tid] - if not registry: - self._bg_task_registry.pop(session_id, None) - - # Drop settled workflows too (terminal snapshot already broadcast + - # persisted); keep running ones so late progress still maps back. - wf_reg = self._workflows.get(session_id) - if wf_reg: - terminal = {"completed", "failed", "stopped"} - for tuid in [ - t for t, e in wf_reg.items() - if (e.get("snapshot") or {}).get("status") in terminal - ]: - del wf_reg[tuid] - if not wf_reg: - self._workflows.pop(session_id, None) + if not registry: + return + for tid in [t for t, e in registry.items() if e.get("status") != "running"]: + del registry[tid] + if not registry: + self._bg_task_registry.pop(session_id, None) async def _finalize_turn( self, session_id: str, st: _TurnState, channel: str | None, @@ -2291,13 +2064,6 @@ async def _finalize_turn( # Merge tool results into tool_calls_log self._merge_tool_results(st.tool_calls_log, st.tool_results_map) - # Fold the latest dynamic-workflow snapshot onto its ``Workflow`` block - # so the panel reconstructs after reload. This covers workflows that - # settle *within* the launching turn — before the message row exists, - # so the out-of-band merge_workflow_into_call has nothing to patch. - # Longer workflows that settle after finalize are handled by that merge. - self._fold_workflow_snapshots(st.ordered_blocks, self._workflows.get(session_id)) - # Store assistant message in DB await self.sessions.add_message( session_id, "assistant", st.full_response_text, @@ -3310,44 +3076,6 @@ async def _idle_stream_watcher( # Cron / Hook runs # # ------------------------------------------------------------------ # - async def _teardown_oneshot_client( - self, session_id: str, *, keepalive_if_bg: bool = True, - ) -> None: - """Tear down a one-shot (cron / hook) run's SDK client. - - One-shot runs normally discard the client immediately to avoid leaking - claude CLI subprocesses. The exception is a run that yields while a - ``run_in_background`` task is still live: discarding here kills the - subprocess and the idle-stream watcher that delivers the task's - completion turn, so the agent would never resume to finish its work - (the fix-worker "strand" failure). In that case keep the client alive — - exactly as an interactive/web session does — and let - ``run_idle_client_sweep`` reap it once the task settles (it already - skips live-background-task sessions for the same reason). - - ``keepalive_if_bg`` MUST be False for runs whose ``session_id`` is - reused across runs (``run_persistent_cron``'s stable ``cron:{job_id}``): - parking such a client would let the NEXT scheduled run reuse the same - client/conversation while the prior run's background task is still in - flight, interleaving the two. Keep-alive is only safe for the - unique-per-run isolated paths (``run_cron`` / ``run_hook``). - """ - # Optimistic check: a task that settles between the watcher's last drain - # and here still reads as live, parking a client whose work is actually - # done — harmless, the next idle sweep reaps it. - if keepalive_if_bg and self._has_live_background_tasks(session_id): - logger.info( - "One-shot session %s parked on a live background task — keeping " - "client alive so its completion turn can resume the run; the " - "idle sweep reaps it once the task settles.", - session_id, - ) - return - # background_memorize: returning promptly closes the run log and frees - # APScheduler to fire the next run — memorization queues on a global - # lock and must not gate the run lifecycle. - await self._discard_client(session_id, background_memorize=True) - async def run_cron( self, job_id: str, @@ -3357,11 +3085,8 @@ async def run_cron( ) -> str: """Run an agent turn for a cron job in an isolated session. - The SDK client is normally discarded immediately after the run - completes to avoid leaking claude CLI subprocesses for one-shot jobs — - unless the run yielded with a live ``run_in_background`` task, in which - case it is kept alive so the agent can resume when the task completes - (see ``_teardown_oneshot_client``). + The SDK client is discarded immediately after the run completes + to avoid leaking claude CLI subprocesses for one-shot jobs. """ if run_id is None: run_id = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") @@ -3375,7 +3100,10 @@ async def run_cron( model=model or self.config.agent.cron_model, ) finally: - await self._teardown_oneshot_client(session_id) + # background_memorize: returning promptly closes the cron run + # log and frees APScheduler to fire the next run — memorization + # queues on a global lock and must not gate the run lifecycle. + await self._discard_client(session_id, background_memorize=True) async def run_persistent_cron( self, @@ -3386,13 +3114,9 @@ async def run_persistent_cron( """Run a persistent cron job that maintains context across runs. Uses a stable session_id (cron:{job_id}) so the SDK resumes - conversation context on subsequent triggers. The client is discarded - after each run to free the subprocess (sdk_session_id is preserved for - the next resume). Unlike the isolated one-shot paths it does NOT keep - the client alive for a live background task: the stable session is - reused by the next run, which would collide with the parked task — so a - persistent-cron background task that outlives its run is not resumed - (use an isolated cron for long background work). + conversation context on subsequent triggers. The client is + discarded after each run to free the subprocess, but + sdk_session_id is preserved for the next resume. """ session_id = f"cron:{job_id}" await self.sessions.get_or_create( @@ -3406,10 +3130,8 @@ async def run_persistent_cron( model=model or self.config.agent.cron_model, ) finally: - # Stable session_id is reused by the next run, which would collide - # with a parked background task — so persistent crons always discard - # (no keep-alive). See _teardown_oneshot_client. - await self._teardown_oneshot_client(session_id, keepalive_if_bg=False) + # See run_cron: memorization must not gate the run lifecycle. + await self._discard_client(session_id, background_memorize=True) async def run_hook( self, @@ -3420,10 +3142,7 @@ async def run_hook( ) -> str: """Run an agent turn for a webhook in an isolated session. - The SDK client is normally discarded immediately after the run - completes — unless the run yielded with a live ``run_in_background`` - task, in which case it is kept alive so the agent can resume when the - task completes (see ``_teardown_oneshot_client``). + The SDK client is discarded immediately after the run completes. """ session = await self.sessions.create_hook_session(hook_name, hook_id) session_id = session["id"] @@ -3435,7 +3154,8 @@ async def run_hook( model=model or self.config.agent.cron_model, ) finally: - await self._teardown_oneshot_client(session_id) + # See run_cron: memorization must not gate the run lifecycle. + await self._discard_client(session_id, background_memorize=True) # ------------------------------------------------------------------ # # Idle client sweep # diff --git a/nerve/config.py b/nerve/config.py index 1488733..263b86a 100644 --- a/nerve/config.py +++ b/nerve/config.py @@ -151,17 +151,6 @@ class AgentConfig: # behaviour: turns can hang forever). 900s comfortably covers a 10-min # Bash tool call plus SDK round-trips while still catching real hangs. cli_idle_timeout_seconds: int = 900 - # When True, background sub-agents (the Agent tool with run_in_background, or - # background Bash) get the SAME auto-approved tool permissions as foreground - # agents, via a PreToolUse hook that pre-approves all non-interactive tools. - # Background tasks are detached and non-blocking, so the CLI never surfaces an - # approval prompt for them — the can_use_tool callback is never invoked for - # their nested Write/Edit/Bash calls, and the CLI denies them by default. - # A PreToolUse hook DOES fire for those nested calls (it is a programmatic - # callback, not a user prompt), so returning permissionDecision="allow" there - # grants the permission. Set False to restore the CLI default (background - # sub-agent writes denied; build/write agents must then run in foreground). - background_agent_permissions: bool = True prompt_rewrite: PromptRewriteConfig = field(default_factory=PromptRewriteConfig) @classmethod @@ -179,9 +168,6 @@ def from_dict(cls, d: dict) -> AgentConfig: d.get("context_1m_excluded_models", []) or [] ), cli_idle_timeout_seconds=int(d.get("cli_idle_timeout_seconds", 900)), - background_agent_permissions=bool( - d.get("background_agent_permissions", True) - ), prompt_rewrite=PromptRewriteConfig.from_dict(d.get("prompt_rewrite") or {}), ) @@ -305,14 +291,6 @@ class GitHubSyncConfig: # pass); deny_repos is a denylist and takes precedence over allow_repos. allow_repos: list[str] = field(default_factory=list) deny_repos: list[str] = field(default_factory=list) - # Actor guardrails — limit which GitHub logins can land a notification in - # the inbox, matched on the "actors" metadata key (every login involved in - # the notification: issue/PR author, assignees, comment & review authors). - # Same semantics as allow_repos/deny_repos — case-insensitive globs, deny - # wins, and a non-empty allow_actors is fail-closed (a notification with no - # matching actor is dropped before it reaches the inbox). Empty = all pass. - allow_actors: list[str] = field(default_factory=list) - deny_actors: list[str] = field(default_factory=list) @classmethod def from_dict(cls, d: dict) -> GitHubSyncConfig: @@ -326,8 +304,6 @@ def from_dict(cls, d: dict) -> GitHubSyncConfig: condense=d.get("condense", False), allow_repos=d.get("allow_repos", []), deny_repos=d.get("deny_repos", []), - allow_actors=d.get("allow_actors", []), - deny_actors=d.get("deny_actors", []), ) @@ -547,16 +523,12 @@ def from_dict(cls, d: dict) -> MemoryConfig: class CronConfig: jobs_file: Path = field(default_factory=lambda: Path("~/.nerve/cron/jobs.yaml")) system_file: Path = field(default_factory=lambda: Path("~/.nerve/cron/system.yaml")) - # Directory scanned at startup for drop-in custom gate plugins (.py files - # defining CronGate subclasses). See nerve/cron/gate_plugins.py. - gate_plugins_dir: Path = field(default_factory=lambda: Path("~/.nerve/cron/gates")) @classmethod def from_dict(cls, d: dict) -> CronConfig: return cls( jobs_file=_expand_path(d.get("jobs_file", "~/.nerve/cron/jobs.yaml")) or Path("~/.nerve/cron/jobs.yaml"), system_file=_expand_path(d.get("system_file", "~/.nerve/cron/system.yaml")) or Path("~/.nerve/cron/system.yaml"), - gate_plugins_dir=_expand_path(d.get("gate_plugins_dir", "~/.nerve/cron/gates")) or Path("~/.nerve/cron/gates"), ) @@ -731,48 +703,6 @@ def from_dict(cls, d: dict) -> ProxyConfig: ) -@dataclass -class OllamaConfig: - """Local Ollama server — exposes its models as selectable chat models. - - Ollama speaks an OpenAI-compatible API (``/v1``), not the Anthropic - Messages API the Claude Agent SDK uses. So Ollama models are routed - through the bundled CLIProxyAPI, which translates Anthropic ↔ OpenAI - and is registered with Ollama as an ``openai-compatibility`` upstream. - - Requirement: this only takes effect when the proxy is also enabled - (``proxy.enabled: true``) — the proxy is the translation layer. When - ``enabled`` is true but the proxy is off, Ollama models are not offered - (a warning is logged at startup). - - Models are auto-discovered at runtime from Ollama's native - ``GET /api/tags`` endpoint, so whatever you have pulled locally shows - up in the model picker with no extra config. - """ - - enabled: bool = False - host: str = "127.0.0.1" - port: int = 11434 - - @property - def base_url(self) -> str: - """Native Ollama base URL (used for ``/api/tags`` discovery).""" - return f"http://{self.host}:{self.port}" - - @property - def openai_base_url(self) -> str: - """OpenAI-compatible base URL (registered as a proxy upstream).""" - return f"http://{self.host}:{self.port}/v1" - - @classmethod - def from_dict(cls, d: dict) -> OllamaConfig: - return cls( - enabled=bool(d.get("enabled", False)), - host=d.get("host", "127.0.0.1"), - port=int(d.get("port", 11434)), - ) - - @dataclass class McpEndpointConfig: """Nerve's own MCP server endpoint (Nerve-as-MCP-server). @@ -1119,7 +1049,6 @@ class NerveConfig: notifications: NotificationsConfig = field(default_factory=NotificationsConfig) docker: DockerConfig = field(default_factory=DockerConfig) proxy: ProxyConfig = field(default_factory=ProxyConfig) - ollama: OllamaConfig = field(default_factory=OllamaConfig) houseofagents: HouseOfAgentsConfig = field(default_factory=HouseOfAgentsConfig) langfuse: LangfuseConfig = field(default_factory=LangfuseConfig) xmemory: XmemoryConfig = field(default_factory=XmemoryConfig) @@ -1155,15 +1084,6 @@ def effective_api_key(self) -> str: return self.proxy.api_key return self.anthropic_api_key - @property - def ollama_routable(self) -> bool: - """True when Ollama models can actually be served. - - Requires both Ollama enabled and the proxy running (the proxy is - the Anthropic↔OpenAI translation layer Ollama is reached through). - """ - return self.ollama.enabled and self.proxy.enabled - def create_anthropic_client(self, timeout: float = 60.0) -> Any: """Create an Anthropic client based on the configured provider. @@ -1246,7 +1166,6 @@ def from_dict(cls, d: dict) -> NerveConfig: notifications=NotificationsConfig.from_dict(d.get("notifications", {})), docker=DockerConfig.from_dict(d.get("docker", {})), proxy=ProxyConfig.from_dict(d.get("proxy", {})), - ollama=OllamaConfig.from_dict(d.get("ollama", {})), houseofagents=HouseOfAgentsConfig.from_dict(d.get("houseofagents", {})), langfuse=LangfuseConfig.from_dict(d.get("langfuse", {})), xmemory=XmemoryConfig.from_dict(d.get("xmemory", {})), diff --git a/tests/test_engine_options.py b/tests/test_engine_options.py new file mode 100644 index 0000000..25e7493 --- /dev/null +++ b/tests/test_engine_options.py @@ -0,0 +1,143 @@ +"""Tests for engine option helpers — OAuth-conditional thinking/effort cap. + +Regression for the issue where every cron run failed with +``API Error: 400 level "max" not supported, valid levels: low, medium, high`` +because the global ``effort=max`` / ``thinking=max`` settings were applied +to cron sessions running on ``cron_model`` (Sonnet) under Claude OAuth, +which caps non-flagship models at ``high``. + +The fix downgrades ``thinking`` and ``effort`` to ``high`` for cron and +hook sessions **only when OAuth is in use** (``config.proxy.enabled``). +API users keep ``max`` for every session, and interactive sessions +(web/Telegram/Discord/...) keep ``max`` even under OAuth — only the +narrow OAuth+cron path is touched. +""" + +from __future__ import annotations + +import pytest + +from nerve.agent.engine import _select_thinking_effort +from nerve.config import AgentConfig, NerveConfig, ProxyConfig + + +def _make_config( + *, + thinking: str = "max", + effort: str = "max", + proxy_enabled: bool = False, +) -> NerveConfig: + """Build a minimal NerveConfig for testing _select_thinking_effort. + + Only the ``agent`` and ``proxy`` fields are read by the helper; the + rest can stay at their defaults. + """ + return NerveConfig( + agent=AgentConfig(thinking=thinking, effort=effort), + proxy=ProxyConfig(enabled=proxy_enabled), + ) + + +class TestSelectThinkingEffort: + """``_select_thinking_effort`` downgrades only when OAuth + cron/hook.""" + + # ------------------------------------------------------------------ # + # OAuth on, cron-like source: must downgrade max -> high # + # ------------------------------------------------------------------ # + + @pytest.mark.parametrize("source", ["cron", "hook"]) + def test_oauth_caps_cron_max_to_high(self, source: str): + """The exact bug being fixed: OAuth + cron + max -> 'high'.""" + config = _make_config(thinking="max", effort="max", proxy_enabled=True) + assert _select_thinking_effort(config, source) == ("high", "high") + + @pytest.mark.parametrize("source", ["cron", "hook"]) + def test_oauth_does_not_upgrade_lower_values(self, source: str): + """The cap is a max-only cap, not a forced value. Lower settings pass through.""" + config = _make_config( + thinking="medium", effort="low", proxy_enabled=True, + ) + assert _select_thinking_effort(config, source) == ("medium", "low") + + @pytest.mark.parametrize("source", ["cron", "hook"]) + def test_oauth_caps_individually(self, source: str): + """Each knob is capped independently if it's at 'max'.""" + config = _make_config( + thinking="max", effort="medium", proxy_enabled=True, + ) + assert _select_thinking_effort(config, source) == ("high", "medium") + + config = _make_config( + thinking="medium", effort="max", proxy_enabled=True, + ) + assert _select_thinking_effort(config, source) == ("medium", "high") + + # ------------------------------------------------------------------ # + # OAuth on, interactive source: must NOT downgrade # + # ------------------------------------------------------------------ # + + @pytest.mark.parametrize("source", ["web", "telegram", "discord", "api", ""]) + def test_oauth_does_not_downgrade_interactive_sources(self, source: str): + """Interactive sessions run on agent.model (Opus by default) which + accepts max under OAuth. Don't touch them.""" + config = _make_config(thinking="max", effort="max", proxy_enabled=True) + assert _select_thinking_effort(config, source) == ("max", "max") + + # ------------------------------------------------------------------ # + # OAuth off (API key): never downgrade — Artem's requirement # + # ------------------------------------------------------------------ # + + @pytest.mark.parametrize("source", ["cron", "hook", "web", "telegram", ""]) + def test_api_users_never_downgraded(self, source: str): + """Without the local proxy (i.e. user has a real Anthropic API key), + every source keeps the configured value. This is exactly what Artem + asked for on ClickHouse/nerve#129 — don't change behavior for API + users.""" + config = _make_config(thinking="max", effort="max", proxy_enabled=False) + assert _select_thinking_effort(config, source) == ("max", "max") + + @pytest.mark.parametrize("source", ["cron", "hook"]) + def test_api_users_keep_custom_values_for_cron(self, source: str): + """API users can pick any value for cron sessions too — no special + treatment.""" + config = _make_config( + thinking="medium", effort="low", proxy_enabled=False, + ) + assert _select_thinking_effort(config, source) == ("medium", "low") + + # ------------------------------------------------------------------ # + # Defaults sanity check # + # ------------------------------------------------------------------ # + + def test_defaults_match_documented_values(self): + """Defaults: max/max for everyone; no special cron knobs anymore.""" + cfg = AgentConfig() + assert cfg.thinking == "max" + assert cfg.effort == "max" + # The cron_thinking / cron_effort knobs were removed — keep them + # gone so we don't reintroduce the unconditional downgrade. + assert not hasattr(cfg, "cron_thinking") + assert not hasattr(cfg, "cron_effort") + + +class TestAgentConfigFromDict: + """``AgentConfig.from_dict`` no longer reads cron_thinking/cron_effort.""" + + def test_from_dict_ignores_legacy_cron_keys(self): + """Old configs with cron_thinking/cron_effort load cleanly — the + keys are silently ignored. Backwards-compatible: nothing crashes, + the keys just don't do anything anymore (their behavior moved to + the engine's OAuth check).""" + cfg = AgentConfig.from_dict({ + "thinking": "max", + "effort": "max", + "cron_thinking": "high", # legacy, ignored + "cron_effort": "high", # legacy, ignored + }) + assert cfg.thinking == "max" + assert cfg.effort == "max" + + def test_from_dict_empty_uses_defaults(self): + cfg = AgentConfig.from_dict({}) + assert cfg.thinking == "max" + assert cfg.effort == "max"