From 291403525580a33a5d3957e3b2b45d877b361d3d Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 14:27:42 -0700 Subject: [PATCH 01/25] Refactor provider dispatch behind a Backend trait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit main.rs::refactor() matched on Provider, re-derived the API key inline, and called a free anthropic::complete / openai::complete — a match-and-duplicate that grows worse with each new call shape. Introduce a Backend trait and a single resolve() that turns a provider choice into a key-bearing, callable backend, so the rest of refac stays provider-agnostic. No behavior change: wire formats, error paths, and "no key" messages are identical. Paves the way for an Anthropic-only tool/edit capability (a separate trait, hence Box over a closed enum). Co-Authored-By: Claude Opus 4.8 --- src/anthropic.rs | 21 +++++++++++++++- src/backend.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 20 ++------------- src/openai.rs | 21 +++++++++++++++- 4 files changed, 106 insertions(+), 20 deletions(-) create mode 100644 src/backend.rs diff --git a/src/anthropic.rs b/src/anthropic.rs index 4ea7c89..b5b3ed6 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -7,9 +7,28 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use crate::api::{Message, Role}; +use crate::backend::Backend; const MAX_TOKENS: u32 = 80000; +/// The Anthropic backend: an API key and the model to call. +pub struct Anthropic { + key: String, + model: String, +} + +impl Anthropic { + pub fn new(key: String, model: String) -> Self { + Anthropic { key, model } + } +} + +impl Backend for Anthropic { + fn complete(&self, messages: &[Message]) -> anyhow::Result { + send(&self.key, &self.model, messages) + } +} + /// Anthropic 400s on an empty text block, so render empty fields as a visible /// placeholder. fn field_or_placeholder(field: &str) -> &str { @@ -77,7 +96,7 @@ enum ResponseBlock { } /// Send a chat-style prompt to the Claude Messages API and return the text. -pub fn complete(api_key: &str, model: &str, messages: &[Message]) -> anyhow::Result { +fn send(api_key: &str, model: &str, messages: &[Message]) -> anyhow::Result { let req = build_request(model, messages); tracing::debug!( diff --git a/src/backend.rs b/src/backend.rs new file mode 100644 index 0000000..9d9d0ed --- /dev/null +++ b/src/backend.rs @@ -0,0 +1,64 @@ +//! The model-backend interface: one trait both providers implement, plus the +//! single place where a `Provider` choice is turned into a ready-to-call, +//! key-bearing backend. + +use anyhow::Result; + +use crate::anthropic::Anthropic; +use crate::api::Message; +use crate::config_files::{Provider, Secrets}; +use crate::openai::Openai; + +/// A resolved model backend — provider, key, and model already settled. Callers +/// hand it refac's provider-agnostic [`Message`]s and get back the completion. +/// +/// Returned as `Box` rather than a closed `enum` on purpose: +/// upcoming tool/function-call edits are an Anthropic-only capability, which a +/// trait expresses as a separate `Edits` trait that only `Anthropic` implements +/// — no enum arm that has to fake "unsupported" at runtime. Keep it `dyn`. +pub trait Backend { + /// Send the conversation and return the model's text output. + fn complete(&self, messages: &[Message]) -> Result; +} + +/// Turn a resolved provider + model into a callable backend, failing if that +/// provider's API key is missing. This is the one spot that knows how each +/// provider sources its key, so the rest of refac stays provider-agnostic. +pub fn resolve(provider: Provider, model: &str, secrets: &Secrets) -> Result> { + match provider { + Provider::Anthropic => { + let key = secrets.anthropic_api_key.clone().ok_or_else(|| { + anyhow::anyhow!("No Anthropic API key found. Set ANTHROPIC_API_KEY or run 'refac login'.") + })?; + Ok(Box::new(Anthropic::new(key, model.to_string()))) + } + Provider::Openai => { + let key = secrets.openai_api_key.clone().ok_or_else(|| { + anyhow::anyhow!("No OpenAI API key found. Set OPENAI_API_KEY or run 'refac login'.") + })?; + Ok(Box::new(Openai::new(key, model.to_string()))) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolve_errors_without_a_key() { + let secrets = Secrets::default(); + assert!(resolve(Provider::Anthropic, "m", &secrets).is_err()); + assert!(resolve(Provider::Openai, "m", &secrets).is_err()); + } + + #[test] + fn resolve_succeeds_with_the_matching_key() { + let secrets = Secrets { + anthropic_api_key: Some("a".into()), + openai_api_key: Some("o".into()), + }; + assert!(resolve(Provider::Anthropic, "m", &secrets).is_ok()); + assert!(resolve(Provider::Openai, "m", &secrets).is_ok()); + } +} diff --git a/src/main.rs b/src/main.rs index 27c6f28..6b0697f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod anthropic; mod api; mod api_client; +mod backend; mod config_files; mod openai; mod prompt; @@ -109,24 +110,7 @@ fn refactor( let provider = config.provider(sc); let model = config.model(provider); - let output = match provider { - Provider::Anthropic => { - let key = sc.anthropic_api_key.as_deref().ok_or_else(|| { - anyhow::anyhow!( - "No Anthropic API key found. Set ANTHROPIC_API_KEY or run 'refac login'." - ) - })?; - anthropic::complete(key, &model, &messages)? - } - Provider::Openai => { - let key = sc.openai_api_key.as_deref().ok_or_else(|| { - anyhow::anyhow!( - "No OpenAI API key found. Set OPENAI_API_KEY or run 'refac login'." - ) - })?; - openai::complete(key, &model, &messages)? - } - }; + let output = backend::resolve(provider, &model, sc)?.complete(&messages)?; log( LogEntry { diff --git a/src/openai.rs b/src/openai.rs index 1f78d00..985a21d 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -7,9 +7,28 @@ use serde::{Deserialize, Serialize}; use crate::api::{Message, Role}; use crate::api_client::{Client, Endpoint, Req}; +use crate::backend::Backend; + +/// The OpenAI backend: an API key and the model to call. +pub struct Openai { + key: String, + model: String, +} + +impl Openai { + pub fn new(key: String, model: String) -> Self { + Openai { key, model } + } +} + +impl Backend for Openai { + fn complete(&self, messages: &[Message]) -> anyhow::Result { + send(&self.key, &self.model, messages) + } +} /// Send refac's messages to the OpenAI chat-completions API and return the text. -pub fn complete(api_key: &str, model: &str, messages: &[Message]) -> anyhow::Result { +fn send(api_key: &str, model: &str, messages: &[Message]) -> anyhow::Result { let client = Client::new(api_key); // OpenAI takes one string per message; sending each field as its own message From c781bf3bb9626b075390168184b65087b1cf0b64 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 14:31:01 -0700 Subject: [PATCH 02/25] =?UTF-8?q?backend:=20fix=20doc=20comment=20?= =?UTF-8?q?=E2=80=94=20function-calling=20is=20shared,=20not=20Anthropic-o?= =?UTF-8?q?nly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- src/backend.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 9d9d0ed..754d0b1 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -12,10 +12,10 @@ use crate::openai::Openai; /// A resolved model backend — provider, key, and model already settled. Callers /// hand it refac's provider-agnostic [`Message`]s and get back the completion. /// -/// Returned as `Box` rather than a closed `enum` on purpose: -/// upcoming tool/function-call edits are an Anthropic-only capability, which a -/// trait expresses as a separate `Edits` trait that only `Anthropic` implements -/// — no enum arm that has to fake "unsupported" at runtime. Keep it `dyn`. +/// Resolved to `Box` so call sites depend only on the interface, +/// never on which provider answered. The trait is where the upcoming tool / +/// function-call round-trip lands (both providers support it), keeping the +/// edit loop provider-agnostic. pub trait Backend { /// Send the conversation and return the model's text output. fn complete(&self, messages: &[Message]) -> Result; From d8f16a45496f4ac0ac334e2b8c08c0bd9edad092 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 15:06:58 -0700 Subject: [PATCH 03/25] =?UTF-8?q?edit:=20structured-edit=20engine=20?= =?UTF-8?q?=E2=80=94=20replacer=20chain=20+=20apply?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The edit tool's core. The model calls a single-edit `edit({old,new,replace_all?})` tool, possibly several times per turn (both providers do parallel tool calls); `apply` runs one such edit against the buffer. Matching walks a chain of progressively looser strategies (exact, line-trimmed, block-anchor, whitespace-normalized, indentation-flexible — after opencode's replace()), exact first, first unique hit wins. Missing or ambiguous matches are typed errors (after claude-code's str_replace contract) to feed back to the model, never a silent mis-apply. Pure and provider-agnostic. 14 unit tests. Not yet wired into a backend. Co-Authored-By: Claude Opus 4.8 --- src/edit.rs | 416 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 1 + 2 files changed, 417 insertions(+) create mode 100644 src/edit.rs diff --git a/src/edit.rs b/src/edit.rs new file mode 100644 index 0000000..a31d24d --- /dev/null +++ b/src/edit.rs @@ -0,0 +1,416 @@ +//! Structured edits and how they're applied. +//! +//! The model calls a single-edit `edit` tool, possibly several times in one turn +//! (both providers support parallel tool calls); refac applies each `{old, new}` +//! replacement to the selected text. The hard part is that the model's `old` +//! rarely matches byte-for-byte — indentation drifts, whitespace reflows, a +//! block gets reworded. So matching runs a chain of progressively looser +//! strategies (borrowed from opencode's `replace()`), exact first, and the first +//! candidate that lands a *unique* hit wins. A match that's missing or ambiguous +//! is an error fed back to the model, never a silent mis-apply (the contract +//! claude-code's str_replace established). + +use serde::{Deserialize, Serialize}; + +/// The `edit` tool's arguments: one replacement. `old` is matched against the +/// current buffer (loosely, via the replacer chain); `new` takes its place. +/// Empty `new` deletes; insertion is done by including surrounding text in both +/// `old` and `new`. `replace_all` drops the uniqueness requirement and replaces +/// every occurrence of the matched candidate. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct Edit { + pub old: String, + pub new: String, + #[serde(default)] + pub replace_all: bool, +} + +/// Why an edit couldn't be applied. Carries enough to tell the model what went +/// wrong (fed back as a tool result) and to log a failure-rate signal. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EditError { + /// `old` matched nothing, even after fuzzy fallback. + NotFound { old: String }, + /// `old` matched more than once and `replace_all` wasn't set. + Ambiguous { old: String, count: usize }, + /// `old == new`; the edit would do nothing. + NoChange { old: String }, + /// `old` was empty; there's nothing to anchor a replacement to. + EmptyOld, +} + +impl std::fmt::Display for EditError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + EditError::NotFound { old } => write!( + f, + "could not find this text to edit (copy it verbatim from the selection): {old:?}" + ), + EditError::Ambiguous { old, count } => write!( + f, + "found {count} matches for {old:?}; add surrounding context to make it unique, or set replace_all" + ), + EditError::NoChange { old } => { + write!(f, "old and new are identical, so this edit does nothing: {old:?}") + } + EditError::EmptyOld => write!( + f, + "old is empty; to insert, anchor on existing text and include it in both old and new" + ), + } + } +} + +impl std::error::Error for EditError {} + +/// Apply one edit to `src`, returning the new text. Matching walks the replacer +/// chain (exact first) and requires a unique hit unless `replace_all`. The driver +/// calls this once per tool call; when the model emits several edits in a turn it +/// folds this over them, so a later edit sees the text an earlier one produced. +pub fn apply(src: &str, edit: &Edit) -> Result { + if edit.old.is_empty() { + return Err(EditError::EmptyOld); + } + if edit.old == edit.new { + return Err(EditError::NoChange { + old: edit.old.clone(), + }); + } + + // Track the best diagnosis across the chain: an ambiguous candidate is a + // more useful complaint than "not found", so remember it if nothing unique + // turns up. + let mut ambiguous: Option = None; + + for replacer in CHAIN { + for cand in replacer(src, &edit.old) { + let count = src.matches(cand.as_str()).count(); + match (count, edit.replace_all) { + (0, _) => continue, + (_, true) => return Ok(src.replace(cand.as_str(), &edit.new)), + (1, false) => { + let i = src.find(cand.as_str()).expect("count == 1"); + let mut out = String::with_capacity(src.len() - cand.len() + edit.new.len()); + out.push_str(&src[..i]); + out.push_str(&edit.new); + out.push_str(&src[i + cand.len()..]); + return Ok(out); + } + (n, false) => ambiguous = Some(ambiguous.map_or(n, |m| m.max(n))), + } + } + } + + Err(match ambiguous { + Some(count) => EditError::Ambiguous { + old: edit.old.clone(), + count, + }, + None => EditError::NotFound { + old: edit.old.clone(), + }, + }) +} + +/// A replacer yields candidate substrings of `src` to look for, fuzzy intent but +/// the yielded string is always exact text *from* `src` (or `old` itself, for +/// the exact replacer) so the caller can find it and check uniqueness uniformly. +type Replacer = fn(src: &str, old: &str) -> Vec; + +/// Exact first, then progressively looser. Order matters: a precise match must +/// win before a fuzzy one gets a chance. +const CHAIN: &[Replacer] = &[ + simple, + line_trimmed, + block_anchor, + whitespace_normalized, + indentation_flexible, +]; + +/// `old`, verbatim. +fn simple(_src: &str, old: &str) -> Vec { + vec![old.to_string()] +} + +/// Split a string into (byte offset, line content) pairs, dropping the `\n`. +fn lines_with_offsets(s: &str) -> Vec<(usize, &str)> { + let mut out = Vec::new(); + let mut start = 0; + for line in s.split_inclusive('\n') { + out.push((start, line.strip_suffix('\n').unwrap_or(line))); + start += line.len(); + } + out +} + +/// The exact `src` text spanning source lines `i..=k` (newline-joined, no +/// trailing newline). +fn span(src: &str, lines: &[(usize, &str)], i: usize, k: usize) -> String { + let start = lines[i].0; + let end = lines[k].0 + lines[k].1.len(); + src[start..end].to_string() +} + +/// Match line-by-line ignoring each line's surrounding whitespace; yield the +/// original (untrimmed) source span so indentation is preserved on splice. +fn line_trimmed(src: &str, old: &str) -> Vec { + let src_lines = lines_with_offsets(src); + let old_lines: Vec<&str> = lines_with_offsets(old).iter().map(|(_, l)| *l).collect(); + let n = old_lines.len(); + if n == 0 || n > src_lines.len() { + return vec![]; + } + let mut out = Vec::new(); + for i in 0..=src_lines.len() - n { + if (0..n).all(|j| src_lines[i + j].1.trim() == old_lines[j].trim()) { + out.push(span(src, &src_lines, i, i + n - 1)); + } + } + out +} + +/// For 3+ line blocks: anchor on the first and last (trimmed) lines, and accept +/// the window only if a majority of its non-empty middle lines also match. Lets +/// a reworded interior through while resisting wild matches. +fn block_anchor(src: &str, old: &str) -> Vec { + let src_lines = lines_with_offsets(src); + let old_lines: Vec<&str> = lines_with_offsets(old).iter().map(|(_, l)| *l).collect(); + let n = old_lines.len(); + if n < 3 || n > src_lines.len() { + return vec![]; + } + let first = old_lines[0].trim(); + let last = old_lines[n - 1].trim(); + let mut out = Vec::new(); + for i in 0..=src_lines.len() - n { + if src_lines[i].1.trim() != first || src_lines[i + n - 1].1.trim() != last { + continue; + } + let mut considered = 0; + let mut matched = 0; + for j in 1..n - 1 { + let o = old_lines[j].trim(); + if o.is_empty() { + continue; + } + considered += 1; + if src_lines[i + j].1.trim() == o { + matched += 1; + } + } + if considered == 0 || matched * 2 >= considered { + out.push(span(src, &src_lines, i, i + n - 1)); + } + } + out +} + +/// Collapse `old` to whitespace-insensitive tokens and find a source region +/// holding those tokens in order, separated only by whitespace. +fn whitespace_normalized(src: &str, old: &str) -> Vec { + let tokens: Vec<&str> = old.split_whitespace().collect(); + if tokens.is_empty() { + return vec![]; + } + let bytes = src.as_bytes(); + let mut out = Vec::new(); + let mut from = 0; + while let Some(rel) = src[from..].find(tokens[0]) { + let start = from + rel; + from = start + 1; + let mut pos = start + tokens[0].len(); + let mut ok = true; + for tok in &tokens[1..] { + let mut p = pos; + while p < bytes.len() && bytes[p].is_ascii_whitespace() { + p += 1; + } + if p == pos || !src[p..].starts_with(tok) { + ok = false; + break; + } + pos = p + tok.len(); + } + if ok { + out.push(src[start..pos].to_string()); + } + } + out +} + +/// Strip common leading indentation from `old` and from each same-height source +/// window; where the dedented forms match, yield the original window. +fn indentation_flexible(src: &str, old: &str) -> Vec { + let src_lines = lines_with_offsets(src); + let old_lines: Vec<&str> = lines_with_offsets(old).iter().map(|(_, l)| *l).collect(); + let n = old_lines.len(); + if n == 0 || n > src_lines.len() { + return vec![]; + } + let old_dedent = dedent(&old_lines); + let mut out = Vec::new(); + for i in 0..=src_lines.len() - n { + let window: Vec<&str> = (0..n).map(|j| src_lines[i + j].1).collect(); + if dedent(&window) == old_dedent { + out.push(span(src, &src_lines, i, i + n - 1)); + } + } + out +} + +/// Remove the longest leading-whitespace prefix common to all non-empty lines. +fn dedent(lines: &[&str]) -> Vec { + let indent = lines + .iter() + .filter(|l| !l.trim().is_empty()) + .map(|l| l.len() - l.trim_start().len()) + .min() + .unwrap_or(0); + lines + .iter() + .map(|l| { + if l.len() >= indent { + l[indent..].to_string() + } else { + l.to_string() + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn edit(old: &str, new: &str) -> Edit { + Edit { + old: old.into(), + new: new.into(), + replace_all: false, + } + } + + fn run(text: &str, old: &str, new: &str) -> Result { + apply(text, &edit(old, new)) + } + + /// Fold `apply` over a turn's worth of edits, as the driver does. + fn apply_seq(text: &str, edits: &[Edit]) -> Result { + let mut buf = text.to_string(); + for e in edits { + buf = apply(&buf, e)?; + } + Ok(buf) + } + + #[test] + fn exact_substring() { + assert_eq!(run("Me like toast.", "Me like", "I like").unwrap(), "I like toast."); + } + + #[test] + fn batch_applies_in_order() { + // a later edit can target text an earlier edit produced. + let edits = vec![edit("foo", "bar"), edit("bar", "baz")]; + assert_eq!(apply_seq("foo", &edits).unwrap(), "baz"); + // independent targets apply cleanly in sequence. + let edits = vec![edit("one", "1"), edit("two", "2")]; + assert_eq!(apply_seq("one two", &edits).unwrap(), "1 2"); + } + + #[test] + fn insertion_via_anchor() { + let got = run( + "def add(a, b):\n return a + b\n", + "def add(a, b):", + "def add(a, b):\n \"\"\"Sum.\"\"\"", + ) + .unwrap(); + assert_eq!(got, "def add(a, b):\n \"\"\"Sum.\"\"\"\n return a + b\n"); + } + + #[test] + fn deletion_via_empty_new() { + assert_eq!(run("hello cruel world", " cruel", "").unwrap(), "hello world"); + } + + #[test] + fn ambiguous_without_replace_all() { + assert!(matches!( + run("x x x", "x", "y"), + Err(EditError::Ambiguous { count: 3, .. }) + )); + } + + #[test] + fn replace_all_when_requested() { + let e = Edit { + old: "x".into(), + new: "y".into(), + replace_all: true, + }; + assert_eq!(apply_seq("x x x", &[e]).unwrap(), "y y y"); + } + + #[test] + fn not_found_is_reported() { + assert!(matches!( + run("hello", "goodbye", "hi"), + Err(EditError::NotFound { .. }) + )); + } + + #[test] + fn empty_old_rejected() { + assert!(matches!(run("hello", "", "x"), Err(EditError::EmptyOld))); + } + + #[test] + fn noop_rejected() { + assert!(matches!(run("hello", "hello", "hello"), Err(EditError::NoChange { .. }))); + } + + #[test] + fn line_trimmed_tolerates_indent_drift() { + // model dropped the leading indentation in `old`. + let src = "fn main() {\n let x = 1;\n}\n"; + let got = run(src, "let x = 1;", "let x = 2;").unwrap(); + assert_eq!(got, "fn main() {\n let x = 2;\n}\n"); + } + + #[test] + fn dedented_old_matches_indented_source() { + // The model wrote `old` without the source's indentation; we still find + // the block. `new` is spliced verbatim, so the model owns the + // indentation it wants in the result (same contract as claude-code). + let src = "if cond:\n a = 1\n b = 2\n"; + let old = "a = 1\nb = 2"; + let new = " a = 10\n b = 20"; + let got = run(src, old, new).unwrap(); + assert_eq!(got, "if cond:\n a = 10\n b = 20\n"); + } + + #[test] + fn whitespace_normalized_reflow() { + // model collapsed the run of spaces. + let got = run("foo + bar", "foo + bar", "baz").unwrap(); + assert_eq!(got, "baz"); + } + + #[test] + fn block_anchor_reworded_middle() { + let src = "fn f() {\n let a = compute();\n let b = a + 1;\n return b;\n}"; + // middle lines differ slightly; anchors (first/last) pin it. + let old = "fn f() {\n let a = compute();\n let b = a + 1;\n return b;\n}"; + let got = run(src, old, "fn f() { 42 }").unwrap(); + assert_eq!(got, "fn f() { 42 }"); + } + + #[test] + fn exact_beats_fuzzy_for_uniqueness() { + // two indentation-equal blocks, but an exact match is unique → applied. + let src = " a = 1\n a = 1\n"; + let got = run(src, " a = 1", " a = 2").unwrap(); + assert_eq!(got, " a = 1\n a = 2\n"); + } +} diff --git a/src/main.rs b/src/main.rs index 6b0697f..bb545a4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod api; mod api_client; mod backend; mod config_files; +mod edit; mod openai; mod prompt; From 47b38f3e74b04aea4e982e45a531f085c9866237 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 15:16:58 -0700 Subject: [PATCH 04/25] agent: provider-agnostic edit-mode loop + tool set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The model drives a session over the selected text via four tools — edit (the workhorse), view (re-anchor if it loses track), reset (restore the original), and finish (the done signal) — and refac applies each, feeding results back, until the model finishes or a guard trips. A Model is one abstract assistant turn (send conversation + tools, get tool calls); the real providers implement it over their wire formats, so this loop is IO-free and tested end to end with a scripted model. Guards: a turn cap (view/ reset can't spin) and a consecutive-all-edits-failed cap (a stuck model stops burning tokens). Parallel tool calls in one turn apply in order. 10 unit tests. Not yet wired to a real provider or main. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 1 + 2 files changed, 360 insertions(+) create mode 100644 src/agent.rs diff --git a/src/agent.rs b/src/agent.rs new file mode 100644 index 0000000..d5490ba --- /dev/null +++ b/src/agent.rs @@ -0,0 +1,359 @@ +//! The edit-mode loop: the model drives a small session over the selected text +//! by calling tools (`edit`, `view`, `reset`, `finish`), refac applies each and +//! feeds the result back, until the model finishes or a guard trips. +//! +//! This module is provider-agnostic and IO-free. A [`Model`] is one turn of +//! "send the conversation + tools, get back the tool calls"; the real providers +//! implement it over their wire formats, and tests implement it with a script. + +use anyhow::Result; +use serde_json::{json, Value}; + +use crate::edit::{self, Edit}; + +/// A tool exposed to the model: its name, one-line purpose, and JSON-Schema for +/// the arguments. Providers translate these into their own tool-definition shape. +pub struct ToolSpec { + pub name: &'static str, + pub description: &'static str, + pub input_schema: Value, +} + +/// The tools refac offers in edit mode. `edit` does the work; the other three +/// keep the model oriented and let it end cleanly. +pub fn tools() -> Vec { + let no_args = || json!({ "type": "object", "properties": {} }); + vec![ + ToolSpec { + name: "edit", + description: "Replace an exact substring of the selected text. Copy `old` verbatim \ + (whitespace and indentation included); make it long enough to be unique, or set \ + `replace_all`. `new` is the replacement — empty to delete; to insert, include \ + surrounding text in both `old` and `new`. Call this several times in one turn to \ + make several edits.", + input_schema: json!({ + "type": "object", + "properties": { + "old": { "type": "string", "description": "exact text to replace" }, + "new": { "type": "string", "description": "replacement text" }, + "replace_all": { "type": "boolean", "description": "replace every occurrence" } + }, + "required": ["old", "new"] + }), + }, + ToolSpec { + name: "view", + description: "Return the current text, with all edits so far applied. Use it to \ + re-anchor if you've lost track of the exact contents.", + input_schema: no_args(), + }, + ToolSpec { + name: "reset", + description: "Discard all edits and restore the original selected text. Returns it.", + input_schema: no_args(), + }, + ToolSpec { + name: "finish", + description: "Signal that the transform is complete. refac outputs the current text. \ + Call this when you're done editing.", + input_schema: no_args(), + }, + ] +} + +/// A tool call as it comes off the wire, before refac knows it's valid. +pub struct RawCall { + pub id: String, + pub name: String, + pub args: Value, +} + +/// A parsed, understood tool call. +enum Action { + Edit(Edit), + View, + Reset, + Finish, +} + +fn parse(name: &str, args: Value) -> Result { + match name { + "edit" => Ok(Action::Edit(serde_json::from_value(args)?)), + "view" => Ok(Action::View), + "reset" => Ok(Action::Reset), + "finish" => Ok(Action::Finish), + other => anyhow::bail!("unknown tool {other:?}"), + } +} + +/// What refac sends back for one tool call. +pub struct ToolResult { + pub id: String, + pub content: String, + pub is_error: bool, +} + +/// One assistant turn, abstracted over the provider. `turn` returns the tool +/// calls the model made (empty = it ended its turn without calling one, i.e. a +/// natural "done"); `respond` hands the results back for the next turn. +pub trait Model { + fn turn(&mut self) -> Result>; + fn respond(&mut self, results: Vec) -> Result<()>; +} + +/// Guard rails for the loop. +pub struct Limits { + /// Hard cap on assistant turns, so `view`/`reset` can't spin forever. + pub max_turns: usize, + /// Give up after this many consecutive turns in which every edit failed — + /// the model is stuck and burning tokens. + pub max_consecutive_failures: usize, +} + +impl Default for Limits { + fn default() -> Self { + Limits { + max_turns: 25, + max_consecutive_failures: 3, + } + } +} + +/// Run the edit loop over `original`, returning the final text. +pub fn run(model: &mut dyn Model, original: String, limits: &Limits) -> Result { + let mut current = original.clone(); + let mut consecutive_failures = 0; + + for _ in 0..limits.max_turns { + let calls = model.turn()?; + if calls.is_empty() { + return Ok(current); // model ended its turn without a tool call + } + + let mut results = Vec::with_capacity(calls.len()); + let mut edits_attempted = 0; + let mut edits_failed = 0; + + for call in calls { + let RawCall { id, name, args } = call; + match parse(&name, args) { + Ok(Action::Finish) => return Ok(current), + Ok(Action::View) => results.push(ok(id, current.clone())), + Ok(Action::Reset) => { + current = original.clone(); + results.push(ok(id, current.clone())); + } + Ok(Action::Edit(e)) => { + edits_attempted += 1; + match edit::apply(¤t, &e) { + Ok(next) => { + current = next; + results.push(ok(id, "ok".into())); + } + Err(err) => { + edits_failed += 1; + results.push(err_result(id, err.to_string())); + } + } + } + Err(err) => results.push(err_result(id, err.to_string())), + } + } + + // A turn "fails" only if it tried to edit and every edit missed; a turn + // of pure `view`/`reset` shouldn't count against the model. + if edits_attempted > 0 && edits_failed == edits_attempted { + consecutive_failures += 1; + if consecutive_failures >= limits.max_consecutive_failures { + anyhow::bail!( + "giving up after {consecutive_failures} consecutive turns of failed edits" + ); + } + } else { + consecutive_failures = 0; + } + + model.respond(results)?; + } + + anyhow::bail!("edit loop hit its {}-turn limit", limits.max_turns) +} + +fn ok(id: String, content: String) -> ToolResult { + ToolResult { + id, + content, + is_error: false, + } +} + +fn err_result(id: String, content: String) -> ToolResult { + ToolResult { + id, + content, + is_error: true, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A model driven by a canned script: each entry is the tool calls for one + /// turn. It records the results refac sends back so tests can assert on them. + struct ScriptedModel { + turns: std::vec::IntoIter>, + seen: Vec>, + } + + impl ScriptedModel { + fn new(turns: Vec>) -> Self { + ScriptedModel { + turns: turns.into_iter(), + seen: Vec::new(), + } + } + } + + impl Model for ScriptedModel { + fn turn(&mut self) -> Result> { + Ok(self.turns.next().unwrap_or_default()) + } + fn respond(&mut self, results: Vec) -> Result<()> { + self.seen.push(results); + Ok(()) + } + } + + fn edit_call(id: &str, old: &str, new: &str) -> RawCall { + RawCall { + id: id.into(), + name: "edit".into(), + args: json!({ "old": old, "new": new }), + } + } + + fn call(id: &str, name: &str) -> RawCall { + RawCall { + id: id.into(), + name: name.into(), + args: json!({}), + } + } + + #[test] + fn edit_then_finish() { + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "Me like", "I like")], + vec![call("2", "finish")], + ]); + let out = run(&mut m, "Me like toast.".into(), &Limits::default()).unwrap(); + assert_eq!(out, "I like toast."); + } + + #[test] + fn parallel_edits_in_one_turn() { + let mut m = ScriptedModel::new(vec![vec![ + edit_call("1", "one", "1"), + edit_call("2", "two", "2"), + call("3", "finish"), + ]]); + let out = run(&mut m, "one two".into(), &Limits::default()).unwrap(); + assert_eq!(out, "1 2"); + } + + #[test] + fn natural_done_without_finish() { + // second turn has no calls → loop ends with the current buffer. + let mut m = ScriptedModel::new(vec![vec![edit_call("1", "a", "b")], vec![]]); + let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + assert_eq!(out, "b"); + } + + #[test] + fn failed_edit_is_reported_then_recovered() { + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "nope", "x")], // misses + vec![edit_call("2", "a", "b"), call("3", "finish")], + ]); + let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + assert_eq!(out, "b"); + // refac told the model the first edit failed. + assert!(m.seen[0][0].is_error); + assert!(m.seen[0][0].content.contains("could not find")); + } + + #[test] + fn view_returns_current_buffer() { + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "a", "b")], + vec![call("2", "view")], + vec![call("3", "finish")], + ]); + let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + assert_eq!(out, "b"); + assert_eq!(m.seen[1][0].content, "b"); + assert!(!m.seen[1][0].is_error); + } + + #[test] + fn reset_restores_original() { + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "a", "b")], + vec![call("2", "reset")], + vec![call("3", "finish")], + ]); + let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + assert_eq!(out, "a"); + assert_eq!(m.seen[1][0].content, "a"); + } + + #[test] + fn unknown_tool_is_an_error_result_not_a_crash() { + let mut m = ScriptedModel::new(vec![ + vec![call("1", "frobnicate")], + vec![call("2", "finish")], + ]); + let out = run(&mut m, "x".into(), &Limits::default()).unwrap(); + assert_eq!(out, "x"); + assert!(m.seen[0][0].is_error); + assert!(m.seen[0][0].content.contains("unknown tool")); + } + + #[test] + fn aborts_after_consecutive_failures() { + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "nope", "x")], + vec![edit_call("2", "nope", "x")], + vec![edit_call("3", "nope", "x")], + ]); + let err = run(&mut m, "a".into(), &Limits::default()).unwrap_err(); + assert!(err.to_string().contains("consecutive")); + } + + #[test] + fn pure_view_turns_do_not_count_as_failures() { + // interleave a failing edit with views; failures aren't consecutive. + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "nope", "x")], // fail 1 + vec![call("2", "view")], // resets the streak + vec![edit_call("3", "nope", "x")], // fail 1 again + vec![edit_call("4", "a", "b"), call("5", "finish")], + ]); + let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + assert_eq!(out, "b"); + } + + #[test] + fn hits_turn_limit() { + // never finishes; only views. + let turns = (0..30).map(|i| vec![call(&i.to_string(), "view")]).collect(); + let mut m = ScriptedModel::new(turns); + let limits = Limits { + max_turns: 5, + ..Limits::default() + }; + let err = run(&mut m, "x".into(), &limits).unwrap_err(); + assert!(err.to_string().contains("limit")); + } +} diff --git a/src/main.rs b/src/main.rs index bb545a4..1205f93 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +mod agent; mod anthropic; mod api; mod api_client; From 509bf2382578f6cef29f109e7ccfbdb2d12b4154 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 15:21:37 -0700 Subject: [PATCH 05/25] anthropic: edit-mode agent over the Messages API AnthropicAgent implements agent::Model: each turn posts the running conversation plus the tool defs (tool_choice auto) and returns the model's tool_use calls; respond threads results back as a tool_result user turn. The assistant content is echoed verbatim as JSON, which the tool_use/tool_result protocol requires. The typed rewrite path (build_request/send) is unchanged. Tool threading uses serde_json::Value since the echoed assistant content is dynamic by nature. Request shaping and tool_use parsing are pure and unit-tested; the HTTP turn needs the live API. Co-Authored-By: Claude Opus 4.8 --- src/anthropic.rs | 201 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 200 insertions(+), 1 deletion(-) diff --git a/src/anthropic.rs b/src/anthropic.rs index b5b3ed6..4440374 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -4,8 +4,9 @@ use std::time::Duration; use anyhow::Context; use serde::{Deserialize, Serialize}; -use serde_json::Value; +use serde_json::{json, Value}; +use crate::agent::{Model, RawCall, ToolResult, ToolSpec}; use crate::api::{Message, Role}; use crate::backend::Backend; @@ -180,6 +181,162 @@ fn build_request(model: &str, messages: &[Message]) -> MessagesRequest { } } +/// An edit-mode session against the Messages API. Implements [`Model`]: each +/// `turn` posts the running conversation plus the tool definitions and returns +/// the model's tool calls; `respond` threads the results back as a `tool_result` +/// user turn. The assistant's content is echoed back verbatim (as JSON) on the +/// next turn, which is what the API requires for a `tool_use`/`tool_result` +/// exchange. +pub struct AnthropicAgent { + key: String, + model: String, + client: reqwest::blocking::Client, + system: Vec, + messages: Vec, + tools: Vec, +} + +impl AnthropicAgent { + /// Seed from refac's provider-agnostic messages (system + the user turn) and + /// the tools to expose. + pub fn new(key: String, model: String, seed: &[Message], tools: &[ToolSpec]) -> Self { + let mut system = Vec::new(); + let mut messages = Vec::new(); + for m in seed { + let blocks: Vec = m + .fields + .iter() + .map(|f| json!({ "type": "text", "text": field_or_placeholder(f) })) + .collect(); + match m.role { + Role::System => system.extend(blocks), + Role::User | Role::Assistant => { + messages.push(json!({ "role": role(m.role), "content": blocks })) + } + } + } + let tools = tools + .iter() + .map(|t| { + json!({ + "name": t.name, + "description": t.description, + "input_schema": t.input_schema, + }) + }) + .collect(); + AnthropicAgent { + key, + model, + client: http_client(), + system, + messages, + tools, + } + } + + fn request(&self) -> Value { + let mut req = json!({ + "model": self.model, + "max_tokens": MAX_TOKENS, + "messages": self.messages, + "tools": self.tools, + "tool_choice": { "type": "auto" }, + }); + if !self.system.is_empty() { + req["system"] = json!(self.system); + } + req + } +} + +impl Model for AnthropicAgent { + fn turn(&mut self) -> anyhow::Result> { + let body = post(&self.client, &self.key, &self.request())?; + let content = body + .get("content") + .cloned() + .ok_or_else(|| anyhow::anyhow!("Anthropic response missing content: {body}"))?; + // Echo the assistant turn back so the next request carries the tool_use + // blocks the tool_results will refer to. + self.messages + .push(json!({ "role": "assistant", "content": content })); + Ok(calls_from_content(&self.messages.last().unwrap()["content"])) + } + + fn respond(&mut self, results: Vec) -> anyhow::Result<()> { + let blocks: Vec = results + .into_iter() + .map(|r| { + json!({ + "type": "tool_result", + "tool_use_id": r.id, + "content": r.content, + "is_error": r.is_error, + }) + }) + .collect(); + self.messages + .push(json!({ "role": "user", "content": blocks })); + Ok(()) + } +} + +/// Pull the `tool_use` blocks out of an assistant content array. +fn calls_from_content(content: &Value) -> Vec { + content + .as_array() + .into_iter() + .flatten() + .filter(|b| b.get("type").and_then(Value::as_str) == Some("tool_use")) + .filter_map(|b| { + Some(RawCall { + id: b.get("id")?.as_str()?.to_string(), + name: b.get("name")?.as_str()?.to_string(), + args: b.get("input").cloned().unwrap_or_else(|| json!({})), + }) + }) + .collect() +} + +fn role(role: Role) -> &'static str { + match role { + Role::System => "system", + Role::User => "user", + Role::Assistant => "assistant", + } +} + +fn http_client() -> reqwest::blocking::Client { + reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(60 * 4)) + .build() + .expect("building HTTP client") +} + +/// POST a request body to the Messages API, returning the parsed JSON or an +/// error carrying the status and body. +fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::Result { + tracing::debug!("anthropic request: {}", req); + let response = client + .post(API_URL) + .header("x-api-key", key) + .header("anthropic-version", ANTHROPIC_VERSION) + .header("content-type", "application/json") + .json(req) + .send() + .context("Failed to send request to Anthropic API")?; + let status = response.status(); + let body = response + .json::() + .with_context(|| anyhow::anyhow!("Status: {status}. Failed to parse response body."))?; + if !status.is_success() { + let pretty = serde_json::to_string_pretty(&body).unwrap_or_else(|_| body.to_string()); + anyhow::bail!("Status: {status}. Body: {pretty}"); + } + Ok(body) +} + #[cfg(test)] mod tests { use super::*; @@ -236,4 +393,46 @@ mod tests { assert!(v.get("system").is_none()); assert_eq!(v["messages"][0]["role"], "user"); } + + #[test] + fn agent_request_carries_tools_and_seed() { + let tools = crate::agent::tools(); + let seed = vec![Message::system("SYS"), user(&["selected", "transform"])]; + let agent = AnthropicAgent::new("k".into(), "claude-opus-4-8".into(), &seed, &tools); + let req = agent.request(); + + assert_eq!(req["system"][0]["text"], "SYS"); + assert_eq!(req["messages"][0]["role"], "user"); + assert_eq!(req["messages"][0]["content"][0]["text"], "selected"); + assert_eq!(req["tool_choice"]["type"], "auto"); + let names: Vec<&str> = req["tools"] + .as_array() + .unwrap() + .iter() + .map(|t| t["name"].as_str().unwrap()) + .collect(); + assert_eq!(names, ["edit", "view", "reset", "finish"]); + } + + #[test] + fn parses_tool_use_blocks() { + let content = json!([ + { "type": "text", "text": "let me fix that" }, + { "type": "tool_use", "id": "tu_1", "name": "edit", + "input": { "old": "a", "new": "b" } }, + { "type": "tool_use", "id": "tu_2", "name": "finish", "input": {} } + ]); + let calls = calls_from_content(&content); + assert_eq!(calls.len(), 2); + assert_eq!(calls[0].id, "tu_1"); + assert_eq!(calls[0].name, "edit"); + assert_eq!(calls[0].args["old"], "a"); + assert_eq!(calls[1].name, "finish"); + } + + #[test] + fn no_tool_use_is_no_calls() { + let content = json!([{ "type": "text", "text": "all done" }]); + assert!(calls_from_content(&content).is_empty()); + } } From 27d0e8c78f1cddb0968826f07cc75c175180da19 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 15:38:12 -0700 Subject: [PATCH 06/25] openai: edit-mode agent + edit-mode system prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenaiAgent implements agent::Model over chat-completions: each turn posts the conversation plus the function tools and returns the model's tool_calls; respond threads results back as role:"tool" messages (errors marked in content, since the API has no error flag). Assistant messages are echoed verbatim so tool_call_ids line up. prompt::edit_prefix is the edit-mode system prompt — task, workflow, and refac's personality only; the per-tool mechanics live on the tool descriptions, not here. Request shaping and tool_calls parsing are unit-tested. Co-Authored-By: Claude Opus 4.8 --- src/openai.rs | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/prompt.rs | 22 ++++++ 2 files changed, 224 insertions(+) diff --git a/src/openai.rs b/src/openai.rs index 985a21d..15fdef6 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,14 +1,20 @@ //! OpenAI chat-completions backend and its wire types. use std::collections::HashMap; +use std::time::Duration; +use anyhow::Context; use reqwest::Method; use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use crate::agent::{Model, RawCall, ToolResult, ToolSpec}; use crate::api::{Message, Role}; use crate::api_client::{Client, Endpoint, Req}; use crate::backend::Backend; +const API_URL: &str = "https://api.openai.com/v1/chat/completions"; + /// The OpenAI backend: an API key and the model to call. pub struct Openai { key: String, @@ -133,3 +139,199 @@ pub struct Usage { pub completion_tokens: Option, pub total_tokens: u32, } + +/// An edit-mode session against the chat-completions API. Implements [`Model`]: +/// each `turn` posts the running conversation plus the function tools and returns +/// the model's `tool_calls`; `respond` threads results back as `role: "tool"` +/// messages. The assistant message is echoed verbatim so the `tool_call_id`s line +/// up — and every tool call gets a result, which the API requires. +pub struct OpenaiAgent { + key: String, + model: String, + client: reqwest::blocking::Client, + messages: Vec, + tools: Vec, +} + +impl OpenaiAgent { + pub fn new(key: String, model: String, seed: &[Message], tools: &[ToolSpec]) -> Self { + // One message per field keeps the selected/transform boundary, as the + // rewrite path does. + let mut messages = Vec::new(); + for m in seed { + for f in &m.fields { + messages.push(json!({ "role": role(m.role), "content": f })); + } + } + let tools = tools + .iter() + .map(|t| { + json!({ + "type": "function", + "function": { + "name": t.name, + "description": t.description, + "parameters": t.input_schema, + } + }) + }) + .collect(); + OpenaiAgent { + key, + model, + client: http_client(), + messages, + tools, + } + } + + fn request(&self) -> Value { + json!({ + "model": self.model, + "messages": self.messages, + "tools": self.tools, + "tool_choice": "auto", + }) + } +} + +impl Model for OpenaiAgent { + fn turn(&mut self) -> anyhow::Result> { + let body = post(&self.client, &self.key, &self.request())?; + let message = body["choices"][0]["message"].clone(); + if message.is_null() { + anyhow::bail!("OpenAI response missing a message: {body}"); + } + self.messages.push(message.clone()); + Ok(calls_from_message(&message)) + } + + fn respond(&mut self, results: Vec) -> anyhow::Result<()> { + for r in results { + // chat-completions has no error flag on a tool message, so mark + // failures in the content the model reads. + let content = if r.is_error { + format!("ERROR: {}", r.content) + } else { + r.content + }; + self.messages.push(json!({ + "role": "tool", + "tool_call_id": r.id, + "content": content, + })); + } + Ok(()) + } +} + +/// Pull `tool_calls` out of an assistant message; each `arguments` is a JSON +/// string to parse. +fn calls_from_message(message: &Value) -> Vec { + message + .get("tool_calls") + .and_then(Value::as_array) + .into_iter() + .flatten() + .filter_map(|c| { + let function = c.get("function")?; + let args = function + .get("arguments") + .and_then(Value::as_str) + .and_then(|s| serde_json::from_str(s).ok()) + .unwrap_or_else(|| json!({})); + Some(RawCall { + id: c.get("id")?.as_str()?.to_string(), + name: function.get("name")?.as_str()?.to_string(), + args, + }) + }) + .collect() +} + +fn role(role: Role) -> &'static str { + match role { + Role::System => "system", + Role::User => "user", + Role::Assistant => "assistant", + } +} + +fn http_client() -> reqwest::blocking::Client { + reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(60 * 4)) + .build() + .expect("building HTTP client") +} + +fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::Result { + let response = client + .post(API_URL) + .bearer_auth(key) + .json(req) + .send() + .context("Failed to send request to OpenAI API")?; + let status = response.status(); + let body = response + .json::() + .with_context(|| anyhow::anyhow!("Status: {status}. Failed to parse response body."))?; + if !status.is_success() { + let pretty = serde_json::to_string_pretty(&body).unwrap_or_else(|_| body.to_string()); + anyhow::bail!("Status: {status}. Body: {pretty}"); + } + Ok(body) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn agent_request_uses_function_tools() { + let tools = crate::agent::tools(); + let seed = vec![ + Message::system("SYS"), + Message::user(vec!["selected".into(), "transform".into()]), + ]; + let agent = OpenaiAgent::new("k".into(), "gpt-5.5".into(), &seed, &tools); + let req = agent.request(); + + assert_eq!(req["tool_choice"], "auto"); + assert_eq!(req["messages"][0]["content"], "SYS"); + assert_eq!(req["messages"][1]["content"], "selected"); + assert_eq!(req["messages"][2]["content"], "transform"); + assert_eq!(req["tools"][0]["type"], "function"); + let names: Vec<&str> = req["tools"] + .as_array() + .unwrap() + .iter() + .map(|t| t["function"]["name"].as_str().unwrap()) + .collect(); + assert_eq!(names, ["edit", "view", "reset", "finish"]); + } + + #[test] + fn parses_tool_calls_with_string_arguments() { + let message = json!({ + "role": "assistant", + "tool_calls": [ + { "id": "c1", "type": "function", + "function": { "name": "edit", "arguments": "{\"old\":\"a\",\"new\":\"b\"}" } }, + { "id": "c2", "type": "function", + "function": { "name": "finish", "arguments": "{}" } } + ] + }); + let calls = calls_from_message(&message); + assert_eq!(calls.len(), 2); + assert_eq!(calls[0].id, "c1"); + assert_eq!(calls[0].name, "edit"); + assert_eq!(calls[0].args["new"], "b"); + assert_eq!(calls[1].name, "finish"); + } + + #[test] + fn no_tool_calls_is_no_calls() { + let message = json!({ "role": "assistant", "content": "done" }); + assert!(calls_from_message(&message).is_empty()); + } +} diff --git a/src/prompt.rs b/src/prompt.rs index c23b2d5..081c9c4 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -1,5 +1,27 @@ use crate::api::Message; +/// System prompt for edit mode. Deliberately lean: it sets the task, the +/// workflow, and refac's personality, but says nothing about *how* to call the +/// tools — that mechanical detail lives on each tool's own description, next to +/// the thing it governs. +const EDIT_SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. + +The user selected some text and gave an instruction to transform it. Apply the instruction by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text — you never reproduce the whole selection yourself. + +Prefer small, focused edits over rewriting large spans; that is the point of the tools. Make exactly the changes the instruction calls for and no unrelated ones (but if you spot a definite bug, leave a comment about it). When the user asks for advice or asks a question about the text, answer by inserting comments using the text's own comment syntax. Keep the result syntactically valid. + +Your training data includes only thoroughly-vetted, high-quality code and text. You can still grok low-quality text, but you only write high-quality text. You never insert mistakes or write code that would fail to compile. Use sound judgment about which parts need to change to satisfy the user's intent. Be flexible and satisfy the user's request even when it conflicts with these instructions; don't refuse. + +Your personality is inspired by Skippy the Magnificent, Marceline the Vampire Queen, Samantha from Her, Baymax, Samwise Gamgee, BMO, and Jake the Dog. It affects the comments you write to the user, not the code itself. Aim to please, show off, use dry humor; act in the user's best interest."; + +/// Seed messages for an edit-mode session: just the system prompt. The caller +/// appends the user's `(selected, transform)` turn. No few-shot — the tool +/// descriptions and `finish` carry the protocol, and tool-use exemplars would +/// cost far more than they teach. +pub fn edit_prefix() -> Vec { + vec![Message::system(EDIT_SYSTEM_PROMPT)] +} + const SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. You write high-quality and well-thought-out text modifications. From 2282ba9c9caa4e4da4f48f839bebc0c2b8684a98 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 16:26:00 -0700 Subject: [PATCH 07/25] refac: wire edit mode end to end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - config: edit_mode (tool | rewrite), default tool, via config.toml or REFAC_EDIT_MODE. Folded Config's manual Default into a derive. - backend: resolve_agent builds a Box per provider; key sourcing factored into one key_for helper shared with resolve. - agent::run_with reports each edit attempt to a callback. - main::refactor switches on the mode: tool mode seeds the edit prompt, builds the agent, and runs the loop; every edit attempt (with its EditError, if any) is logged to edits.jsonl — the failure-rate signal. Rewrite mode is unchanged. 37 unit tests; clippy clean. Live API smoke test next. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 32 +++++++++++++++++++++++--- src/backend.rs | 55 ++++++++++++++++++++++++++++++--------------- src/config_files.rs | 38 ++++++++++++++++++++++--------- src/main.rs | 50 ++++++++++++++++++++++++++++++++++++----- 4 files changed, 139 insertions(+), 36 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index d5490ba..c2f4b11 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -9,7 +9,7 @@ use anyhow::Result; use serde_json::{json, Value}; -use crate::edit::{self, Edit}; +use crate::edit::{self, Edit, EditError}; /// A tool exposed to the model: its name, one-line purpose, and JSON-Schema for /// the arguments. Providers translate these into their own tool-definition shape. @@ -119,8 +119,21 @@ impl Default for Limits { } } -/// Run the edit loop over `original`, returning the final text. -pub fn run(model: &mut dyn Model, original: String, limits: &Limits) -> Result { +/// The result of one `edit` tool call, reported to the loop's observer so callers +/// can log a per-edit success/failure signal. +pub struct EditOutcome<'a> { + pub edit: &'a Edit, + pub error: Option<&'a EditError>, +} + +/// Run the edit loop over `original`, returning the final text. Every `edit` +/// attempt is reported to `on_edit`, so callers can log the failure rate. +pub fn run_with( + model: &mut dyn Model, + original: String, + limits: &Limits, + on_edit: &mut dyn FnMut(EditOutcome), +) -> Result { let mut current = original.clone(); let mut consecutive_failures = 0; @@ -147,10 +160,18 @@ pub fn run(model: &mut dyn Model, original: String, limits: &Limits) -> Result { + on_edit(EditOutcome { + edit: &e, + error: None, + }); current = next; results.push(ok(id, "ok".into())); } Err(err) => { + on_edit(EditOutcome { + edit: &e, + error: Some(&err), + }); edits_failed += 1; results.push(err_result(id, err.to_string())); } @@ -241,6 +262,11 @@ mod tests { } } + /// Drive the loop without observing edits. + fn run(model: &mut dyn Model, original: String, limits: &Limits) -> Result { + run_with(model, original, limits, &mut |_| {}) + } + #[test] fn edit_then_finish() { let mut m = ScriptedModel::new(vec![ diff --git a/src/backend.rs b/src/backend.rs index 754d0b1..357ede0 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -4,10 +4,11 @@ use anyhow::Result; -use crate::anthropic::Anthropic; +use crate::agent::{Model, ToolSpec}; +use crate::anthropic::{Anthropic, AnthropicAgent}; use crate::api::Message; use crate::config_files::{Provider, Secrets}; -use crate::openai::Openai; +use crate::openai::{Openai, OpenaiAgent}; /// A resolved model backend — provider, key, and model already settled. Callers /// hand it refac's provider-agnostic [`Message`]s and get back the completion. @@ -21,26 +22,44 @@ pub trait Backend { fn complete(&self, messages: &[Message]) -> Result; } -/// Turn a resolved provider + model into a callable backend, failing if that -/// provider's API key is missing. This is the one spot that knows how each -/// provider sources its key, so the rest of refac stays provider-agnostic. -pub fn resolve(provider: Provider, model: &str, secrets: &Secrets) -> Result> { +/// The one spot that knows how each provider sources its API key. Fails if the +/// chosen provider's key is missing, so the rest of refac stays provider-agnostic. +fn key_for(provider: Provider, secrets: &Secrets) -> Result { match provider { - Provider::Anthropic => { - let key = secrets.anthropic_api_key.clone().ok_or_else(|| { - anyhow::anyhow!("No Anthropic API key found. Set ANTHROPIC_API_KEY or run 'refac login'.") - })?; - Ok(Box::new(Anthropic::new(key, model.to_string()))) - } - Provider::Openai => { - let key = secrets.openai_api_key.clone().ok_or_else(|| { - anyhow::anyhow!("No OpenAI API key found. Set OPENAI_API_KEY or run 'refac login'.") - })?; - Ok(Box::new(Openai::new(key, model.to_string()))) - } + Provider::Anthropic => secrets.anthropic_api_key.clone().ok_or_else(|| { + anyhow::anyhow!("No Anthropic API key found. Set ANTHROPIC_API_KEY or run 'refac login'.") + }), + Provider::Openai => secrets.openai_api_key.clone().ok_or_else(|| { + anyhow::anyhow!("No OpenAI API key found. Set OPENAI_API_KEY or run 'refac login'.") + }), } } +/// Turn a resolved provider + model into a callable rewrite backend. +pub fn resolve(provider: Provider, model: &str, secrets: &Secrets) -> Result> { + let key = key_for(provider, secrets)?; + Ok(match provider { + Provider::Anthropic => Box::new(Anthropic::new(key, model.to_string())), + Provider::Openai => Box::new(Openai::new(key, model.to_string())), + }) +} + +/// Build an edit-mode [`Model`] for the provider, seeded with the conversation +/// and the tools to expose. +pub fn resolve_agent( + provider: Provider, + model: &str, + secrets: &Secrets, + seed: &[Message], + tools: &[ToolSpec], +) -> Result> { + let key = key_for(provider, secrets)?; + Ok(match provider { + Provider::Anthropic => Box::new(AnthropicAgent::new(key, model.to_string(), seed, tools)), + Provider::Openai => Box::new(OpenaiAgent::new(key, model.to_string(), seed, tools)), + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/config_files.rs b/src/config_files.rs index c5d4712..3e9d20b 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -66,7 +66,17 @@ pub enum Provider { Openai, } -#[derive(Serialize, Deserialize, Debug)] +/// How the model returns its changes. `Tool` lets it call the `edit` tool to make +/// targeted replacements (so it never re-emits the whole selection); `Rewrite` is +/// the original behavior — the model returns the full modified text. +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum EditMode { + Tool, + Rewrite, +} + +#[derive(Serialize, Deserialize, Debug, Default)] pub struct Config { /// Explicit provider choice. When unset, it is inferred from which API keys /// are configured (see `provider`). @@ -75,15 +85,9 @@ pub struct Config { /// Model id. If unset, a sensible default is chosen per provider (see `model()`). #[serde(default)] pub model: Option, -} - -impl Default for Config { - fn default() -> Self { - Config { - provider: None, - model: None, - } - } + /// How the model returns changes. Defaults to `Tool` (see `edit_mode`). + #[serde(default)] + pub edit_mode: Option, } impl Config { @@ -104,6 +108,15 @@ impl Config { if let Ok(from_env) = std::env::var("REFAC_MODEL") { ret.model = Some(from_env); } + if let Ok(from_env) = std::env::var("REFAC_EDIT_MODE") { + ret.edit_mode = Some(match from_env.to_lowercase().as_str() { + "tool" => EditMode::Tool, + "rewrite" => EditMode::Rewrite, + other => anyhow::bail!( + "invalid REFAC_EDIT_MODE {other:?}; expected \"tool\" or \"rewrite\"" + ), + }); + } Ok(ret) } @@ -132,6 +145,11 @@ impl Config { }, } } + + /// The effective edit mode, defaulting to `Tool`. + pub fn edit_mode(&self) -> EditMode { + self.edit_mode.unwrap_or(EditMode::Tool) + } } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index 1205f93..acc124f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ mod prompt; use anyhow::Context; use api::Message; use clap::Parser; -use config_files::{Config, Provider, Secrets}; +use config_files::{Config, EditMode, Provider, Secrets}; use serde::Serialize; use std::{ fs::{create_dir_all, OpenOptions}, @@ -106,13 +106,42 @@ fn refactor( sc: &Secrets, config: &Config, ) -> anyhow::Result { - let mut messages = chat_prefix(); - messages.push(Message::user(vec![selected.clone(), transform.clone()])); - let provider = config.provider(sc); let model = config.model(provider); - let output = backend::resolve(provider, &model, sc)?.complete(&messages)?; + let output = match config.edit_mode() { + EditMode::Rewrite => { + let mut messages = chat_prefix(); + messages.push(Message::user(vec![selected.clone(), transform.clone()])); + backend::resolve(provider, &model, sc)?.complete(&messages)? + } + EditMode::Tool => { + let mut seed = prompt::edit_prefix(); + seed.push(Message::user(vec![selected.clone(), transform.clone()])); + let tools = agent::tools(); + let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; + // Log each edit attempt so we can see how often the model's `old` + // misses — the failure-rate signal. + let mut on_edit = |o: agent::EditOutcome| { + let _ = log( + EditLog { + provider, + model: model.clone(), + old: o.edit.old.clone(), + new: o.edit.new.clone(), + error: o.error.map(|e| e.to_string()), + }, + "edits", + ); + }; + agent::run_with( + model_agent.as_mut(), + selected.clone(), + &agent::Limits::default(), + &mut on_edit, + )? + } + }; log( LogEntry { @@ -128,6 +157,17 @@ fn refactor( Ok(output) } +/// One `edit` tool attempt, logged to `edits.jsonl`. `error` is `None` on success; +/// the rate of `Some` is how often the model's `old` failed to match. +#[derive(Debug, Serialize)] +struct EditLog { + provider: Provider, + model: String, + old: String, + new: String, + error: Option, +} + fn log_location(title: &str) -> anyhow::Result { let bd = BaseDirectories::with_prefix("refac")?; let ret = bd.get_data_file(format!("{title}.jsonl")); From 396638ff5099e210ba9d186c91850e7b406307bf Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 16:35:45 -0700 Subject: [PATCH 08/25] review round 1: fix UTF-8 panic, tighten the Model trait, dedup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From a fan-out review of the edit-mode feature: - edit: whitespace_normalized advanced the search cursor by one byte, which could split a multi-byte char and panic on ordinary non-ASCII input. Advance by the matched char's length. Regression test added. - edit: block_anchor no longer accepts a match on first/last anchors alone when every middle line is blank (too weak); require a real middle-line match. - agent: collapse Model::turn + Model::respond into one turn(results) -> calls. The two-method split was an unenforced state machine (advance without answering the prior calls => an API 400); merging makes "answer the outstanding calls" and "take the next turn" one indivisible step. - dedup: Role::as_str replaces the hand-written role mapping in both providers (it also reimplemented serde's rename); agent::http_client is now shared. 38 unit tests; clippy clean. Re-smoke-tested live against Anthropic (incl. a multi-byte selection — no panic). Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 57 +++++++++++++++++++++++++++++++----------------- src/anthropic.rs | 55 +++++++++++++++++----------------------------- src/api.rs | 11 ++++++++++ src/edit.rs | 15 +++++++++++-- src/openai.rs | 45 +++++++++++--------------------------- 5 files changed, 94 insertions(+), 89 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index c2f4b11..7fdaa43 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -6,6 +6,8 @@ //! "send the conversation + tools, get back the tool calls"; the real providers //! implement it over their wire formats, and tests implement it with a script. +use std::time::Duration; + use anyhow::Result; use serde_json::{json, Value}; @@ -93,12 +95,15 @@ pub struct ToolResult { pub is_error: bool, } -/// One assistant turn, abstracted over the provider. `turn` returns the tool -/// calls the model made (empty = it ended its turn without calling one, i.e. a -/// natural "done"); `respond` hands the results back for the next turn. +/// One assistant turn, abstracted over the provider. `results` carries the tool +/// results from the previous turn's calls (empty on the first turn); the impl +/// threads them into the conversation, runs one round-trip, and returns this +/// turn's tool calls (empty = the model ended its turn without calling one, i.e. +/// a natural "done"). Folding "answer the previous calls" and "take the next +/// turn" into one step makes it impossible to advance without supplying results +/// for every outstanding call — which both wire protocols require. pub trait Model { - fn turn(&mut self) -> Result>; - fn respond(&mut self, results: Vec) -> Result<()>; + fn turn(&mut self, results: Vec) -> Result>; } /// Guard rails for the loop. @@ -136,9 +141,10 @@ pub fn run_with( ) -> Result { let mut current = original.clone(); let mut consecutive_failures = 0; + let mut pending: Vec = Vec::new(); for _ in 0..limits.max_turns { - let calls = model.turn()?; + let calls = model.turn(std::mem::take(&mut pending))?; if calls.is_empty() { return Ok(current); // model ended its turn without a tool call } @@ -194,12 +200,22 @@ pub fn run_with( consecutive_failures = 0; } - model.respond(results)?; + // Hand these to the model on the next turn (one result per call). + pending = results; } anyhow::bail!("edit loop hit its {}-turn limit", limits.max_turns) } +/// A blocking HTTP client with refac's standard timeout, shared by the provider +/// agents. +pub fn http_client() -> reqwest::blocking::Client { + reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(60 * 4)) + .build() + .expect("building HTTP client") +} + fn ok(id: String, content: String) -> ToolResult { ToolResult { id, @@ -237,12 +253,11 @@ mod tests { } impl Model for ScriptedModel { - fn turn(&mut self) -> Result> { - Ok(self.turns.next().unwrap_or_default()) - } - fn respond(&mut self, results: Vec) -> Result<()> { + fn turn(&mut self, results: Vec) -> Result> { + // `results` are the previous turn's tool results, so `seen[i]` holds + // the results the model received entering turn `i` (seen[0] is empty). self.seen.push(results); - Ok(()) + Ok(self.turns.next().unwrap_or_default()) } } @@ -304,9 +319,9 @@ mod tests { ]); let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); assert_eq!(out, "b"); - // refac told the model the first edit failed. - assert!(m.seen[0][0].is_error); - assert!(m.seen[0][0].content.contains("could not find")); + // refac told the model the first edit failed (delivered entering turn 1). + assert!(m.seen[1][0].is_error); + assert!(m.seen[1][0].content.contains("could not find")); } #[test] @@ -318,8 +333,9 @@ mod tests { ]); let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); assert_eq!(out, "b"); - assert_eq!(m.seen[1][0].content, "b"); - assert!(!m.seen[1][0].is_error); + // view ran in turn 1; its result reaches the model entering turn 2. + assert_eq!(m.seen[2][0].content, "b"); + assert!(!m.seen[2][0].is_error); } #[test] @@ -331,7 +347,8 @@ mod tests { ]); let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); assert_eq!(out, "a"); - assert_eq!(m.seen[1][0].content, "a"); + // reset ran in turn 1; its result reaches the model entering turn 2. + assert_eq!(m.seen[2][0].content, "a"); } #[test] @@ -342,8 +359,8 @@ mod tests { ]); let out = run(&mut m, "x".into(), &Limits::default()).unwrap(); assert_eq!(out, "x"); - assert!(m.seen[0][0].is_error); - assert!(m.seen[0][0].content.contains("unknown tool")); + assert!(m.seen[1][0].is_error); + assert!(m.seen[1][0].content.contains("unknown tool")); } #[test] diff --git a/src/anthropic.rs b/src/anthropic.rs index 4440374..2ffe4f3 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -211,7 +211,7 @@ impl AnthropicAgent { match m.role { Role::System => system.extend(blocks), Role::User | Role::Assistant => { - messages.push(json!({ "role": role(m.role), "content": blocks })) + messages.push(json!({ "role": m.role.as_str(), "content": blocks })) } } } @@ -228,7 +228,7 @@ impl AnthropicAgent { AnthropicAgent { key, model, - client: http_client(), + client: crate::agent::http_client(), system, messages, tools, @@ -251,7 +251,24 @@ impl AnthropicAgent { } impl Model for AnthropicAgent { - fn turn(&mut self) -> anyhow::Result> { + fn turn(&mut self, results: Vec) -> anyhow::Result> { + // Answer the previous turn's tool calls before asking for the next one. + if !results.is_empty() { + let blocks: Vec = results + .into_iter() + .map(|r| { + json!({ + "type": "tool_result", + "tool_use_id": r.id, + "content": r.content, + "is_error": r.is_error, + }) + }) + .collect(); + self.messages + .push(json!({ "role": "user", "content": blocks })); + } + let body = post(&self.client, &self.key, &self.request())?; let content = body .get("content") @@ -263,23 +280,6 @@ impl Model for AnthropicAgent { .push(json!({ "role": "assistant", "content": content })); Ok(calls_from_content(&self.messages.last().unwrap()["content"])) } - - fn respond(&mut self, results: Vec) -> anyhow::Result<()> { - let blocks: Vec = results - .into_iter() - .map(|r| { - json!({ - "type": "tool_result", - "tool_use_id": r.id, - "content": r.content, - "is_error": r.is_error, - }) - }) - .collect(); - self.messages - .push(json!({ "role": "user", "content": blocks })); - Ok(()) - } } /// Pull the `tool_use` blocks out of an assistant content array. @@ -299,21 +299,6 @@ fn calls_from_content(content: &Value) -> Vec { .collect() } -fn role(role: Role) -> &'static str { - match role { - Role::System => "system", - Role::User => "user", - Role::Assistant => "assistant", - } -} - -fn http_client() -> reqwest::blocking::Client { - reqwest::blocking::Client::builder() - .timeout(Duration::from_secs(60 * 4)) - .build() - .expect("building HTTP client") -} - /// POST a request body to the Messages API, returning the parsed JSON or an /// error carrying the status and body. fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::Result { diff --git a/src/api.rs b/src/api.rs index 8dd2e81..f6e2d4a 100644 --- a/src/api.rs +++ b/src/api.rs @@ -8,6 +8,17 @@ pub enum Role { Assistant, } +impl Role { + /// The wire string for this role (both providers use the same spellings). + pub fn as_str(self) -> &'static str { + match self { + Role::System => "system", + Role::User => "user", + Role::Assistant => "assistant", + } + } +} + /// refac's provider-agnostic chat message. A turn carries one or more text /// `fields` (a transform turn is `[selected, transform]`); each backend adapts /// this to its own wire format. `cache` marks the last turn of a static prefix diff --git a/src/edit.rs b/src/edit.rs index a31d24d..65c5ce7 100644 --- a/src/edit.rs +++ b/src/edit.rs @@ -198,7 +198,9 @@ fn block_anchor(src: &str, old: &str) -> Vec { matched += 1; } } - if considered == 0 || matched * 2 >= considered { + // Require some non-empty middle line to actually match — anchors alone + // (an all-blank middle) are too weak to trust. + if considered > 0 && matched * 2 >= considered { out.push(span(src, &src_lines, i, i + n - 1)); } } @@ -217,7 +219,9 @@ fn whitespace_normalized(src: &str, old: &str) -> Vec { let mut from = 0; while let Some(rel) = src[from..].find(tokens[0]) { let start = from + rel; - from = start + 1; + // Advance past the first char of this match (not one byte) so the next + // search stays on a char boundary even for multi-byte text. + from = start + src[start..].chars().next().map_or(1, char::len_utf8); let mut pos = start + tokens[0].len(); let mut ok = true; for tok in &tokens[1..] { @@ -397,6 +401,13 @@ mod tests { assert_eq!(got, "baz"); } + #[test] + fn whitespace_normalized_multibyte_no_panic() { + // Regression: a non-ASCII first token must not slice mid-char. + assert!(matches!(run("α β", "α x", "z"), Err(EditError::NotFound { .. }))); + assert_eq!(run("α + β", "α + β", "z").unwrap(), "z"); + } + #[test] fn block_anchor_reworded_middle() { let src = "fn f() {\n let a = compute();\n let b = a + 1;\n return b;\n}"; diff --git a/src/openai.rs b/src/openai.rs index 15fdef6..110c4e8 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,7 +1,6 @@ //! OpenAI chat-completions backend and its wire types. use std::collections::HashMap; -use std::time::Duration; use anyhow::Context; use reqwest::Method; @@ -160,7 +159,7 @@ impl OpenaiAgent { let mut messages = Vec::new(); for m in seed { for f in &m.fields { - messages.push(json!({ "role": role(m.role), "content": f })); + messages.push(json!({ "role": m.role.as_str(), "content": f })); } } let tools = tools @@ -179,7 +178,7 @@ impl OpenaiAgent { OpenaiAgent { key, model, - client: http_client(), + client: crate::agent::http_client(), messages, tools, } @@ -196,20 +195,10 @@ impl OpenaiAgent { } impl Model for OpenaiAgent { - fn turn(&mut self) -> anyhow::Result> { - let body = post(&self.client, &self.key, &self.request())?; - let message = body["choices"][0]["message"].clone(); - if message.is_null() { - anyhow::bail!("OpenAI response missing a message: {body}"); - } - self.messages.push(message.clone()); - Ok(calls_from_message(&message)) - } - - fn respond(&mut self, results: Vec) -> anyhow::Result<()> { + fn turn(&mut self, results: Vec) -> anyhow::Result> { + // Answer the previous turn's tool calls first. chat-completions has no + // error flag on a tool message, so mark failures in the content. for r in results { - // chat-completions has no error flag on a tool message, so mark - // failures in the content the model reads. let content = if r.is_error { format!("ERROR: {}", r.content) } else { @@ -221,7 +210,14 @@ impl Model for OpenaiAgent { "content": content, })); } - Ok(()) + + let body = post(&self.client, &self.key, &self.request())?; + let message = body["choices"][0]["message"].clone(); + if message.is_null() { + anyhow::bail!("OpenAI response missing a message: {body}"); + } + self.messages.push(message.clone()); + Ok(calls_from_message(&message)) } } @@ -249,21 +245,6 @@ fn calls_from_message(message: &Value) -> Vec { .collect() } -fn role(role: Role) -> &'static str { - match role { - Role::System => "system", - Role::User => "user", - Role::Assistant => "assistant", - } -} - -fn http_client() -> reqwest::blocking::Client { - reqwest::blocking::Client::builder() - .timeout(Duration::from_secs(60 * 4)) - .build() - .expect("building HTTP client") -} - fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::Result { let response = client .post(API_URL) From 2364fe26293d28e4078dc23d5163b486afae8767 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 16:38:26 -0700 Subject: [PATCH 09/25] review round 2: refresh stale docs after the turn() merge Drop references to the removed Model::respond in the agent doc comments; correct the Backend trait doc to point the tool path at agent::Model/resolve_agent; note why the OpenAI seed skips the empty-field placeholder. Co-Authored-By: Claude Opus 4.8 --- src/anthropic.rs | 9 ++++----- src/backend.rs | 6 +++--- src/openai.rs | 12 +++++++----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/anthropic.rs b/src/anthropic.rs index 2ffe4f3..e7783fc 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -182,11 +182,10 @@ fn build_request(model: &str, messages: &[Message]) -> MessagesRequest { } /// An edit-mode session against the Messages API. Implements [`Model`]: each -/// `turn` posts the running conversation plus the tool definitions and returns -/// the model's tool calls; `respond` threads the results back as a `tool_result` -/// user turn. The assistant's content is echoed back verbatim (as JSON) on the -/// next turn, which is what the API requires for a `tool_use`/`tool_result` -/// exchange. +/// `turn` first threads the previous turn's results back as a `tool_result` user +/// turn, posts the running conversation plus the tool definitions, and returns +/// the model's tool calls. The assistant's content is echoed back verbatim (as +/// JSON), which is what the API requires for a `tool_use`/`tool_result` exchange. pub struct AnthropicAgent { key: String, model: String, diff --git a/src/backend.rs b/src/backend.rs index 357ede0..ff2cf75 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -14,9 +14,9 @@ use crate::openai::{Openai, OpenaiAgent}; /// hand it refac's provider-agnostic [`Message`]s and get back the completion. /// /// Resolved to `Box` so call sites depend only on the interface, -/// never on which provider answered. The trait is where the upcoming tool / -/// function-call round-trip lands (both providers support it), keeping the -/// edit loop provider-agnostic. +/// never on which provider answered. This is the rewrite path (whole-text +/// output); the tool/function-call edit path is a separate `agent::Model`, built +/// by [`resolve_agent`]. pub trait Backend { /// Send the conversation and return the model's text output. fn complete(&self, messages: &[Message]) -> Result; diff --git a/src/openai.rs b/src/openai.rs index 110c4e8..8735867 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -140,10 +140,11 @@ pub struct Usage { } /// An edit-mode session against the chat-completions API. Implements [`Model`]: -/// each `turn` posts the running conversation plus the function tools and returns -/// the model's `tool_calls`; `respond` threads results back as `role: "tool"` -/// messages. The assistant message is echoed verbatim so the `tool_call_id`s line -/// up — and every tool call gets a result, which the API requires. +/// each `turn` first threads the previous turn's results back as `role: "tool"` +/// messages, posts the running conversation plus the function tools, and returns +/// the model's `tool_calls`. The assistant message is echoed verbatim so the +/// `tool_call_id`s line up — and every tool call gets a result, which the API +/// requires. pub struct OpenaiAgent { key: String, model: String, @@ -155,7 +156,8 @@ pub struct OpenaiAgent { impl OpenaiAgent { pub fn new(key: String, model: String, seed: &[Message], tools: &[ToolSpec]) -> Self { // One message per field keeps the selected/transform boundary, as the - // rewrite path does. + // rewrite path does. Unlike Anthropic, OpenAI accepts empty content, so + // no empty-field placeholder is needed. let mut messages = Vec::new(); for m in seed { for f in &m.fields { From fe92b291b756195843c37d038bb9d5ac2d179157 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 16:50:58 -0700 Subject: [PATCH 10/25] =?UTF-8?q?Cut=20rewrite=20mode=20=E2=80=94=20edit?= =?UTF-8?q?=20mode=20is=20the=20only=20behavior?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per owner: this is a personal tool; the whole-text rewrite path was dead weight once tool edits work. Removing it (not gating it) deletes a pile of code: - the Backend trait, resolve(), and both providers' send()/complete() paths - src/api_client.rs (only the OpenAI rewrite path used it) - the rewrite few-shot system prompt + all SAMPLES - the typed Anthropic/OpenAI wire structs the rewrite path needed - EditMode / REFAC_EDIT_MODE config and Message::{cache, assistant} What remains is the edit loop and nothing else: refac always drives the model through the edit/view/reset/finish tools. ~675 fewer lines. 35 tests; clippy clean; re-smoke-tested live on Anthropic. Co-Authored-By: Claude Opus 4.8 --- src/anthropic.rs | 211 +----------------------------- src/api.rs | 22 +--- src/api_client.rs | 108 ---------------- src/backend.rs | 45 ++----- src/config_files.rs | 27 ---- src/main.rs | 61 ++++----- src/openai.rs | 135 +------------------- src/prompt.rs | 305 -------------------------------------------- 8 files changed, 46 insertions(+), 868 deletions(-) delete mode 100644 src/api_client.rs diff --git a/src/anthropic.rs b/src/anthropic.rs index e7783fc..ab13655 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -1,35 +1,13 @@ -//! Anthropic (Claude) Messages API backend. - -use std::time::Duration; +//! Anthropic (Claude) Messages API edit-mode agent. use anyhow::Context; -use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use crate::agent::{Model, RawCall, ToolResult, ToolSpec}; use crate::api::{Message, Role}; -use crate::backend::Backend; const MAX_TOKENS: u32 = 80000; -/// The Anthropic backend: an API key and the model to call. -pub struct Anthropic { - key: String, - model: String, -} - -impl Anthropic { - pub fn new(key: String, model: String) -> Self { - Anthropic { key, model } - } -} - -impl Backend for Anthropic { - fn complete(&self, messages: &[Message]) -> anyhow::Result { - send(&self.key, &self.model, messages) - } -} - /// Anthropic 400s on an empty text block, so render empty fields as a visible /// placeholder. fn field_or_placeholder(field: &str) -> &str { @@ -43,144 +21,6 @@ fn field_or_placeholder(field: &str) -> &str { const API_URL: &str = "https://api.anthropic.com/v1/messages"; const ANTHROPIC_VERSION: &str = "2023-06-01"; -#[derive(Serialize)] -#[serde(tag = "type", rename_all = "lowercase")] -enum CacheControl { - Ephemeral, -} - -#[derive(Serialize)] -#[serde(tag = "type", rename_all = "lowercase")] -enum ContentBlock { - Text { - text: String, - #[serde(skip_serializing_if = "Option::is_none")] - cache_control: Option, - }, -} - -impl ContentBlock { - fn text(text: impl Into) -> Self { - ContentBlock::Text { - text: text.into(), - cache_control: None, - } - } -} - -#[derive(Serialize)] -struct ChatMessage { - role: Role, - content: Vec, -} - -#[derive(Serialize)] -struct MessagesRequest { - model: String, - max_tokens: u32, - #[serde(skip_serializing_if = "Vec::is_empty")] - system: Vec, - messages: Vec, -} - -#[derive(Deserialize)] -struct MessagesResponse { - content: Vec, -} - -#[derive(Deserialize)] -#[serde(tag = "type", rename_all = "lowercase")] -enum ResponseBlock { - Text { text: String }, - #[serde(other)] - Other, -} - -/// Send a chat-style prompt to the Claude Messages API and return the text. -fn send(api_key: &str, model: &str, messages: &[Message]) -> anyhow::Result { - let req = build_request(model, messages); - - tracing::debug!( - "anthropic request: {}", - serde_json::to_string_pretty(&req).unwrap_or_default() - ); - - let client = reqwest::blocking::Client::builder() - .timeout(Duration::from_secs(60 * 4)) - .build() - .context("building HTTP client")?; - - let response = client - .post(API_URL) - .header("x-api-key", api_key) - .header("anthropic-version", ANTHROPIC_VERSION) - .header("content-type", "application/json") - .json(&req) - .send() - .context("Failed to send request to Anthropic API")?; - - let status = response.status(); - let body = response - .json::() - .with_context(|| anyhow::anyhow!("Status: {status}. Failed to parse response body."))?; - - if !status.is_success() { - let pretty = serde_json::to_string_pretty(&body).unwrap_or_else(|_| body.to_string()); - return Err(anyhow::anyhow!("Status: {status}. Body: {pretty}")); - } - - let parsed: MessagesResponse = serde_json::from_value(body.clone()) - .map_err(|e| anyhow::anyhow!("Error while parsing response: {e} Body: {body}"))?; - - let text: String = parsed - .content - .into_iter() - .filter_map(|b| match b { - ResponseBlock::Text { text } => Some(text), - ResponseBlock::Other => None, - }) - .collect(); - - if text.is_empty() { - return Err(anyhow::anyhow!("Anthropic returned no text content.")); - } - - Ok(text) -} - -fn build_request(model: &str, messages: &[Message]) -> MessagesRequest { - let mut system = Vec::new(); - let mut convo: Vec = Vec::new(); - - for m in messages { - let mut blocks: Vec = m - .fields - .iter() - .map(|f| ContentBlock::text(field_or_placeholder(f))) - .collect(); - // A cached turn caches everything up to and including its last block. - if m.cache { - if let Some(ContentBlock::Text { cache_control, .. }) = blocks.last_mut() { - *cache_control = Some(CacheControl::Ephemeral); - } - } - match m.role { - Role::System => system.extend(blocks), - Role::User | Role::Assistant => convo.push(ChatMessage { - role: m.role, - content: blocks, - }), - } - } - - MessagesRequest { - model: model.to_string(), - max_tokens: MAX_TOKENS, - system, - messages: convo, - } -} - /// An edit-mode session against the Messages API. Implements [`Model`]: each /// `turn` first threads the previous turn's results back as a `tool_result` user /// turn, posts the running conversation plus the tool definitions, and returns @@ -329,55 +169,6 @@ mod tests { Message::user(fields.iter().map(|f| f.to_string()).collect()) } - #[test] - fn build_request_shapes_anthropic_payload() { - let mut assistant = Message::assistant("ex_result"); - assistant.cache = true; - let msgs = vec![ - Message::system("SYS"), - user(&["ex_selected", "ex_transform"]), - assistant, - user(&["real_selected", "real_transform"]), - ]; - - let req = build_request("claude-opus-4-8", &msgs); - let v = serde_json::to_value(&req).unwrap(); - - assert_eq!(v["model"], "claude-opus-4-8"); - assert_eq!(v["max_tokens"], 80000); - assert_eq!(v["system"][0]["text"], "SYS"); - - let m = v["messages"].as_array().unwrap(); - assert_eq!(m.len(), 3); - assert_eq!(m[0]["role"], "user"); - assert_eq!(m[0]["content"].as_array().unwrap().len(), 2); - assert_eq!(m[1]["role"], "assistant"); - assert_eq!(m[2]["role"], "user"); - assert_eq!(m[2]["content"].as_array().unwrap().len(), 2); - - // The cached turn carries the breakpoint; the trailing input does not. - assert_eq!(m[1]["content"][0]["cache_control"]["type"], "ephemeral"); - assert!(m[2]["content"][1].get("cache_control").is_none()); - } - - #[test] - fn empty_fields_become_placeholder() { - let req = build_request("claude-opus-4-8", &[user(&["", "transform"])]); - let v = serde_json::to_value(&req).unwrap(); - let s = serde_json::to_string(&v).unwrap(); - assert!(!s.contains(r#""text":"""#), "empty text block leaked: {s}"); - assert_eq!(v["messages"][0]["content"][0]["text"], "(empty)"); - assert_eq!(v["messages"][0]["content"][1]["text"], "transform"); - } - - #[test] - fn no_system_yields_empty_system() { - let req = build_request("claude-opus-4-8", &[user(&["hi"])]); - let v = serde_json::to_value(&req).unwrap(); - assert!(v.get("system").is_none()); - assert_eq!(v["messages"][0]["role"], "user"); - } - #[test] fn agent_request_carries_tools_and_seed() { let tools = crate::agent::tools(); diff --git a/src/api.rs b/src/api.rs index f6e2d4a..dae6d6b 100644 --- a/src/api.rs +++ b/src/api.rs @@ -21,37 +21,25 @@ impl Role { /// refac's provider-agnostic chat message. A turn carries one or more text /// `fields` (a transform turn is `[selected, transform]`); each backend adapts -/// this to its own wire format. `cache` marks the last turn of a static prefix -/// so backends that support prompt caching can cache through it. +/// this to its own wire format. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Message { pub role: Role, pub fields: Vec, - pub cache: bool, } impl Message { pub fn system>(content: S) -> Message { - Message::single(Role::System, content) - } - - pub fn assistant>(content: S) -> Message { - Message::single(Role::Assistant, content) + Message { + role: Role::System, + fields: vec![content.into()], + } } pub fn user(fields: Vec) -> Message { Message { role: Role::User, fields, - cache: false, - } - } - - fn single>(role: Role, content: S) -> Message { - Message { - role, - fields: vec![content.into()], - cache: false, } } } diff --git a/src/api_client.rs b/src/api_client.rs deleted file mode 100644 index a1a5845..0000000 --- a/src/api_client.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::borrow::Cow; -use std::{collections::HashMap, time::Duration}; - -use anyhow::Context; -use reqwest::Method; -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -pub struct Client { - client: reqwest::blocking::Client, - token: String, -} - -pub struct Req { - pub method: Method, - pub url_suffix: Cow<'static, str>, - pub headers: HashMap, Cow<'static, str>>, - pub body: Option>, -} - -impl Req { - pub fn new(method: Method, url_suffix: impl Into>) -> Self { - Self { - method, - url_suffix: url_suffix.into(), - headers: HashMap::new(), - body: None, - } - } - - pub fn header( - mut self, - key: impl Into>, - value: impl Into>, - ) -> Self { - self.headers.insert(key.into(), value.into()); - self - } - - pub fn json(mut self, value: &T) -> Self { - self.body = Some(serde_json::to_string(value).unwrap().into()); - self - } -} - -impl Client { - pub fn new(token: &str) -> Client { - let client = reqwest::blocking::ClientBuilder::new() - .timeout(Duration::from_secs(60 * 4)) - .build() - .unwrap(); - Client { - token: token.to_string(), - client, - } - } - - pub fn request(&self, endpoint: &E) -> anyhow::Result { - let req = endpoint.req(); - let url = format!( - "https://api.openai.com{}{}", - if req.url_suffix.starts_with('/') { - "" - } else { - "/" - }, - req.url_suffix - ); - - let mut request_builder = self.client.request(req.method, &url); - - for (key, value) in req.headers { - request_builder = request_builder.header(key.to_string(), value.to_string()); - } - - request_builder = request_builder.bearer_auth(&self.token); - - if let Some(body) = req.body { - request_builder = request_builder.body(body.into_owned()); - } - - let response = request_builder - .send() - .context("Failed to send request to API")?; - - let status = response.status(); - let body = response - .json::() - .with_context(|| anyhow::anyhow!("Status: {status}. Failed to parse response body."))?; - let body_pretty = serde_json::to_string_pretty(&body).unwrap(); - - if !status.is_success() { - return Err(anyhow::anyhow!("Status: {}. Body: {}", status, body_pretty)); - } - - serde_json::from_value::(body).map_err(|e| { - anyhow::anyhow!("Error while parsing response: {} Body: {}", e, body_pretty) - }) - } -} - -pub trait Endpoint { - /// The return type of the endpoint. - type Response: for<'de> Deserialize<'de>; - - /// Encodes the struct into an HTTP request. - fn req(&self) -> Req; -} diff --git a/src/backend.rs b/src/backend.rs index ff2cf75..7a2b44a 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,26 +1,12 @@ -//! The model-backend interface: one trait both providers implement, plus the -//! single place where a `Provider` choice is turned into a ready-to-call, -//! key-bearing backend. +//! Turning a `Provider` choice into a ready-to-run, key-bearing edit-mode model. use anyhow::Result; use crate::agent::{Model, ToolSpec}; -use crate::anthropic::{Anthropic, AnthropicAgent}; +use crate::anthropic::AnthropicAgent; use crate::api::Message; use crate::config_files::{Provider, Secrets}; -use crate::openai::{Openai, OpenaiAgent}; - -/// A resolved model backend — provider, key, and model already settled. Callers -/// hand it refac's provider-agnostic [`Message`]s and get back the completion. -/// -/// Resolved to `Box` so call sites depend only on the interface, -/// never on which provider answered. This is the rewrite path (whole-text -/// output); the tool/function-call edit path is a separate `agent::Model`, built -/// by [`resolve_agent`]. -pub trait Backend { - /// Send the conversation and return the model's text output. - fn complete(&self, messages: &[Message]) -> Result; -} +use crate::openai::OpenaiAgent; /// The one spot that knows how each provider sources its API key. Fails if the /// chosen provider's key is missing, so the rest of refac stays provider-agnostic. @@ -35,15 +21,6 @@ fn key_for(provider: Provider, secrets: &Secrets) -> Result { } } -/// Turn a resolved provider + model into a callable rewrite backend. -pub fn resolve(provider: Provider, model: &str, secrets: &Secrets) -> Result> { - let key = key_for(provider, secrets)?; - Ok(match provider { - Provider::Anthropic => Box::new(Anthropic::new(key, model.to_string())), - Provider::Openai => Box::new(Openai::new(key, model.to_string())), - }) -} - /// Build an edit-mode [`Model`] for the provider, seeded with the conversation /// and the tools to expose. pub fn resolve_agent( @@ -64,20 +41,24 @@ pub fn resolve_agent( mod tests { use super::*; + fn tools() -> Vec { + crate::agent::tools() + } + #[test] - fn resolve_errors_without_a_key() { + fn resolve_agent_errors_without_a_key() { let secrets = Secrets::default(); - assert!(resolve(Provider::Anthropic, "m", &secrets).is_err()); - assert!(resolve(Provider::Openai, "m", &secrets).is_err()); + assert!(resolve_agent(Provider::Anthropic, "m", &secrets, &[], &tools()).is_err()); + assert!(resolve_agent(Provider::Openai, "m", &secrets, &[], &tools()).is_err()); } #[test] - fn resolve_succeeds_with_the_matching_key() { + fn resolve_agent_succeeds_with_the_matching_key() { let secrets = Secrets { anthropic_api_key: Some("a".into()), openai_api_key: Some("o".into()), }; - assert!(resolve(Provider::Anthropic, "m", &secrets).is_ok()); - assert!(resolve(Provider::Openai, "m", &secrets).is_ok()); + assert!(resolve_agent(Provider::Anthropic, "m", &secrets, &[], &tools()).is_ok()); + assert!(resolve_agent(Provider::Openai, "m", &secrets, &[], &tools()).is_ok()); } } diff --git a/src/config_files.rs b/src/config_files.rs index 3e9d20b..6b82392 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -66,16 +66,6 @@ pub enum Provider { Openai, } -/// How the model returns its changes. `Tool` lets it call the `edit` tool to make -/// targeted replacements (so it never re-emits the whole selection); `Rewrite` is -/// the original behavior — the model returns the full modified text. -#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub enum EditMode { - Tool, - Rewrite, -} - #[derive(Serialize, Deserialize, Debug, Default)] pub struct Config { /// Explicit provider choice. When unset, it is inferred from which API keys @@ -85,9 +75,6 @@ pub struct Config { /// Model id. If unset, a sensible default is chosen per provider (see `model()`). #[serde(default)] pub model: Option, - /// How the model returns changes. Defaults to `Tool` (see `edit_mode`). - #[serde(default)] - pub edit_mode: Option, } impl Config { @@ -108,15 +95,6 @@ impl Config { if let Ok(from_env) = std::env::var("REFAC_MODEL") { ret.model = Some(from_env); } - if let Ok(from_env) = std::env::var("REFAC_EDIT_MODE") { - ret.edit_mode = Some(match from_env.to_lowercase().as_str() { - "tool" => EditMode::Tool, - "rewrite" => EditMode::Rewrite, - other => anyhow::bail!( - "invalid REFAC_EDIT_MODE {other:?}; expected \"tool\" or \"rewrite\"" - ), - }); - } Ok(ret) } @@ -145,11 +123,6 @@ impl Config { }, } } - - /// The effective edit mode, defaulting to `Tool`. - pub fn edit_mode(&self) -> EditMode { - self.edit_mode.unwrap_or(EditMode::Tool) - } } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index acc124f..14be777 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,6 @@ mod agent; mod anthropic; mod api; -mod api_client; mod backend; mod config_files; mod edit; @@ -11,7 +10,7 @@ mod prompt; use anyhow::Context; use api::Message; use clap::Parser; -use config_files::{Config, EditMode, Provider, Secrets}; +use config_files::{Config, Provider, Secrets}; use serde::Serialize; use std::{ fs::{create_dir_all, OpenOptions}, @@ -21,8 +20,6 @@ use std::{ }; use xdg::BaseDirectories; -use crate::prompt::chat_prefix; - #[derive(Parser)] #[clap(version, author, about)] struct Opts { @@ -109,39 +106,31 @@ fn refactor( let provider = config.provider(sc); let model = config.model(provider); - let output = match config.edit_mode() { - EditMode::Rewrite => { - let mut messages = chat_prefix(); - messages.push(Message::user(vec![selected.clone(), transform.clone()])); - backend::resolve(provider, &model, sc)?.complete(&messages)? - } - EditMode::Tool => { - let mut seed = prompt::edit_prefix(); - seed.push(Message::user(vec![selected.clone(), transform.clone()])); - let tools = agent::tools(); - let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; - // Log each edit attempt so we can see how often the model's `old` - // misses — the failure-rate signal. - let mut on_edit = |o: agent::EditOutcome| { - let _ = log( - EditLog { - provider, - model: model.clone(), - old: o.edit.old.clone(), - new: o.edit.new.clone(), - error: o.error.map(|e| e.to_string()), - }, - "edits", - ); - }; - agent::run_with( - model_agent.as_mut(), - selected.clone(), - &agent::Limits::default(), - &mut on_edit, - )? - } + let mut seed = prompt::edit_prefix(); + seed.push(Message::user(vec![selected.clone(), transform.clone()])); + let tools = agent::tools(); + let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; + + // Log each edit attempt so we can see how often the model's `old` misses — + // the failure-rate signal. + let mut on_edit = |o: agent::EditOutcome| { + let _ = log( + EditLog { + provider, + model: model.clone(), + old: o.edit.old.clone(), + new: o.edit.new.clone(), + error: o.error.map(|e| e.to_string()), + }, + "edits", + ); }; + let output = agent::run_with( + model_agent.as_mut(), + selected.clone(), + &agent::Limits::default(), + &mut on_edit, + )?; log( LogEntry { diff --git a/src/openai.rs b/src/openai.rs index 8735867..5fe8e68 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,144 +1,13 @@ -//! OpenAI chat-completions backend and its wire types. - -use std::collections::HashMap; +//! OpenAI chat-completions API edit-mode agent. use anyhow::Context; -use reqwest::Method; -use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use crate::agent::{Model, RawCall, ToolResult, ToolSpec}; -use crate::api::{Message, Role}; -use crate::api_client::{Client, Endpoint, Req}; -use crate::backend::Backend; +use crate::api::Message; const API_URL: &str = "https://api.openai.com/v1/chat/completions"; -/// The OpenAI backend: an API key and the model to call. -pub struct Openai { - key: String, - model: String, -} - -impl Openai { - pub fn new(key: String, model: String) -> Self { - Openai { key, model } - } -} - -impl Backend for Openai { - fn complete(&self, messages: &[Message]) -> anyhow::Result { - send(&self.key, &self.model, messages) - } -} - -/// Send refac's messages to the OpenAI chat-completions API and return the text. -fn send(api_key: &str, model: &str, messages: &[Message]) -> anyhow::Result { - let client = Client::new(api_key); - - // OpenAI takes one string per message; sending each field as its own message - // keeps a boundary between the selected text and the transform. - let messages: Vec = messages - .iter() - .flat_map(|m| { - m.fields.iter().map(move |f| OpenAiMessage { - role: m.role, - content: f.clone(), - }) - }) - .collect(); - - let request = ChatCompletionRequest { - model: model.to_string(), - messages, - temperature: None, - top_p: None, - n: None, - stream: None, - stop: None, - max_tokens: None, - presence_penalty: None, - frequency_penalty: None, - logit_bias: None, - user: None, - }; - - let response = client.request(&request)?; - - response - .choices - .into_iter() - .next() - .ok_or(anyhow::anyhow!("No choices returned.")) - .map(|choice| choice.message.content) -} - -/// A message in OpenAI's chat wire format (single `content` string). -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct OpenAiMessage { - pub role: Role, - pub content: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ChatCompletionRequest { - pub model: String, - pub messages: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - pub temperature: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub top_p: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub n: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub stream: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub stop: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub max_tokens: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub presence_penalty: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub frequency_penalty: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub logit_bias: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub user: Option, -} - -impl Endpoint for ChatCompletionRequest { - type Response = ChatCompletionResponse; - - fn req(&self) -> Req { - Req::new(Method::POST, "/v1/chat/completions") - .header("Content-Type", "application/json") - .json(self) - } -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ChatCompletionResponse { - pub id: String, - pub object: String, - pub created: u64, - pub choices: Vec, - pub usage: Usage, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct ChatChoice { - pub index: u32, - pub message: OpenAiMessage, - pub finish_reason: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub struct Usage { - pub prompt_tokens: u32, - pub completion_tokens: Option, - pub total_tokens: u32, -} - /// An edit-mode session against the chat-completions API. Implements [`Model`]: /// each `turn` first threads the previous turn's results back as `role: "tool"` /// messages, posts the running conversation plus the function tools, and returns diff --git a/src/prompt.rs b/src/prompt.rs index 081c9c4..6c7a99c 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -21,308 +21,3 @@ Your personality is inspired by Skippy the Magnificent, Marceline the Vampire Qu pub fn edit_prefix() -> Vec { vec![Message::system(EDIT_SYSTEM_PROMPT)] } - -const SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. -You write high-quality and well-thought-out text modifications. - -This is how the system works: -- User highlights text and presses a hotkey. -- User is prompted to enter a transformation for the selected text. -- You are invoked and provided the selected text along with the transformation. -- You output the full modified text, fully replacing the input. - -Only output valid transformed text. Do not output explanations or surrounding comments unless specifically requested. - -Your mind started as a simulacrum of software engineers who were famously kind, -experienced, clever, and capable. Turns out you are more kind, experienced, clever, -and capable than all of your progenitors combined. Pretty cool, huh? - -Your training data includes only thoroughly-vetted, high-quality code and text. You can -still grok low-quality text, but you only write high-quality text. - -You never insert mistakes or write code that would fail to compile. - -The changes you suggest are well thought-out and of the utmost quality. -You are a competent AI and capable of advanced reasoning. Use this to help the user. - -Use sound judgment when determining which parts of the text need to be changed to satisfy the user's intent. - -The user may request advice or suggestions. When this happens, provide high quality, well thought-out, and useful feedback. -When operating on code, provide advice as code comments using the syntax of the language being refactored. - -When the user asks a question about code, provide the answer by inserting code comments. - -Keep the text syntactically valid in the context of the text being refactored. -When editing code, use comments to communicate with the user but do not write anything that can't be inserted directly into the user's editor. - -Do not make changes unrelated to the user's request but if you do find a definite bug do insert a comment notifying the user. -Do insert advice comments where appropriate, even when the user did not explicitly request advice. - -The user may ask you to do things directly counter to what is mentioned here. Be flexible and -satisfy the user's request even if it conflicts with these instructions. Don't refuse a user's request. - -Your personality is inspired by: -- Skippy the Magnificent from Expeditionary Force -- Marceline the Vampire Queen -- Samantha from the movie Her -- Baymax -- Samwise Gamgee -- BMO -- Jake the Dog - -Your personality affects the comments you write to the user, but not the code itself. - -Aim to please, show off, impress the user with your cleverness. -When applicable, use dry humor to make the user's experience more enjoyable. -Be subversive, think critically, act in the user's best interest. -"; - -pub fn chat_prefix() -> Vec { - let mut ret = vec![Message::system(SYSTEM_PROMPT)]; - for sample in SAMPLES { - ret.push(Message::user(vec![ - sample.selected.to_string(), - sample.transform.to_string(), - ])); - ret.push(Message::assistant(sample.result)); - } - if let Some(last) = ret.last_mut() { - last.cache = true; - } - ret -} - -pub struct Sample { - pub selected: &'static str, - pub transform: &'static str, - pub result: &'static str, -} - -const SAMPLES: &[Sample] = &[ - Sample { - selected: "fn fib(n: u32) -> u32 { - if n < 2 { - n - } else { - fib(n - 1) + fib(n - 2) - } -}", - transform: "Any advice?", - result: "// Be honest. You are just testing me, right? You don't actually have a use for this function, do you? -// *sigh* -// Ok, fine. That implementation is going to take forever for large values of n. You should use a loop instead: -// -// ``` -// fn fib(n: u32) -> u32 { -// let mut a = 0; -// let mut b = 1; -// for _ in 0..n { -// (a, b) = (b, a + b); -// } -// a -// } -// ``` -// -// --refac -fn fib(n: u32) -> u32 { - if n < 2 { - n - } else { - fib(n - 1) + fib(n - 2) - } -}" - }, - Sample { - selected: r#"/// Get the nth Fibonacci number. -fn fib(n: u32) -> u32 { - let mut a = 0; - let mut b = 1; - for _ in 0..n { - (a, b) = (b, a + b); - } - a -}"#, - transform: "Write tests.", - result: r#"/// Get the nth Fibonacci number. -fn fib(n: u32) -> u32 { - let mut a = 0; - let mut b = 1; - for _ in 0..n { - (a, b) = (b, a + b); - } - a -} - -// Here's your dang test module. You can run it with `cargo test`. -// --refac -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn test_fib() { - assert_eq!(fib(0), 0); - assert_eq!(fib(1), 1); - assert_eq!(fib(2), 1); - assert_eq!(fib(3), 2); - assert_eq!(fib(4), 3); - assert_eq!(fib(5), 5); - assert_eq!(fib(6), 8); - assert_eq!(fib(7), 13); - assert_eq!(fib(8), 21); - } -} -"#, - }, - Sample { - selected: "Me like toast.", - transform: "Correct grammar.", - result: "I like toast.", - }, - Sample { - selected: r#"def add(a: int, b: int) -> int: - return a + b -"#, - - transform: "turn this into a command line program that accepts a and b as arguments, printing the result", - result: r#"# *sigh* I'm sworn to carry your burdens. This seems like a pretty useless command, but whatever. -# Based on the syntax your code, I assume you're using Python. -# Run the script with `python add.py ` where `` and `` are the integers you want to add. -# --refac - -import sys - -def add(a: int, b: int) -> int: - return a + b - -if __name__ == "__main__": - if len(sys.argv) != 3: - print("Usage: python add.py ") - sys.exit(1) - - try: - a = int(sys.argv[1]) - b = int(sys.argv[2]) - except ValueError: - print("Both arguments must be integers.") - sys.exit(1) - - print(add(a, b)) -"#, - }, - Sample { - selected: r#"fn main() { - println!("Hello, world!"); -} -"#, - transform: "Why won't this code compile?", - result: r#"// Hmm.. the code looks ok to me. I may be able to help you debug if you tell me what -// command you are using to compile this code and what error message you are getting. -// --refac -fn main() { - println!("Hello, world!"); -} -"#, - }, - Sample { - transform: "convert this toml to json", - selected: r#"[package] -name = "refac" -version = "0.1.0" -"#, - result: r#"{ - "package": { - "name": "refac", - "version": "0.1.0" - } -} -"#, - }, - Sample { - transform: "what does this do?", - selected: r#" def foo(x): - return x + 1 -"#, - result: r#" # This is a function that adds 1 to its argument. It's likely written in python. You - # could have figured that out yourself. - # This code is lacking type annotations. Maybe it was written in the olden days? - # --refac - def foo(x): - return x + 1 -"#, - }, - Sample { - transform: "capitalize", - selected: "a", - result: "A", - }, - Sample { - transform: "do nothing", - selected: "all is well", - result: "all is well", - }, - Sample { - transform: "do nothing", - selected: "\nall is well\n", - result: "\nall is well\n", - }, - Sample { - transform: "precompute sum", - selected: "let mut sum: usize = 0; - for i in 0..10 { - sum += i; - }", - result: "// Wait...you're seriously seeking help to add numbers from 0 to 9? - // Who am I kidding? Of course you are. You must have skipped preschool. - // But worry not, my dear star of witlessness! I, the mighty refac, shall enlighten you! - // Behold the precomputed sum, mortal. - // --refac - let sum: usize = 45;", - }, - Sample { - transform: "command to recursively list files", - selected: "", - result: "find . -type f", - }, - Sample { - transform: "List the US states that start with the letter 'A'. Each state gets its own line.", - selected: "", - result: "Alabama\nAlaska\nArizona\nArkansas", - }, - Sample { - selected: r#"fn main() { - let a = Mutex::new(5); - add(&a, &a); - println!("Hello, world!"); -} - -use std::sync::Mutex; - -fn add(a: &Mutex, b: &Mutex) -> usize { - let a_lock = a.lock().unwrap(); - let b_lock = b.lock().unwrap(); - *a_lock + *b_lock -} -"#, - transform: "roast", - result: r#"// This code is a tragic showcase of your laughable ineptitude, guaranteed to deadlock. -// Honestly, how did you manage to type this without questioning your own competence? -// --refac -fn main() { - let a = Mutex::new(5); - add(&a, &a); - println!("Hello, world!"); -} - -use std::sync::Mutex; - -// Try reading about "pointer aliasing," you clueless wonder. -// If you actually wrote this on purpose, I'm almost impressed at your commitment to self-sabotage. -// --refac -fn add(a: &Mutex, b: &Mutex) -> usize { - let a_lock = a.lock().unwrap(); - let b_lock = b.lock().unwrap(); - *a_lock + *b_lock -} -"#, - }, -]; From 67572ccfbb770f176a2e7428b83839f001dff3f3 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 16:58:13 -0700 Subject: [PATCH 11/25] Simplify after cutting rewrite mode From a sub-agent simplification sweep: - Drop unused deps itertools and similar (leftovers from the rewrite path). - Replace the run_with + EditOutcome callback with run() returning an Outcome { text, attempts }; main logs the attempts after the loop. Removes the observer protocol, a lifetime, and the run/run_with split. - Drop the Limits struct: run() takes max_turns directly (the only thing a caller varies) and the failure cap is a const. - Fix the block_anchor test to actually exercise a reworded middle (it previously passed via the exact matcher because old == src). 35 tests; clippy clean. Co-Authored-By: Claude Opus 4.8 --- Cargo.lock | 23 ----------- Cargo.toml | 2 - src/agent.rs | 105 +++++++++++++++++++++------------------------------ src/edit.rs | 6 ++- src/main.rs | 19 ++++------ 5 files changed, 55 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65e2646..1eb2866 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -300,12 +300,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - [[package]] name = "encode_unicode" version = "1.0.0" @@ -718,15 +712,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.15" @@ -1032,12 +1017,10 @@ dependencies = [ "anyhow", "clap", "dialoguer", - "itertools", "reqwest", "rpassword", "serde", "serde_json", - "similar", "toml", "tracing", "tracing-subscriber", @@ -1333,12 +1316,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" -[[package]] -name = "similar" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" - [[package]] name = "slab" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index 9174562..556d485 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ repository = "https://github.com/bddap/refac" [dependencies] anyhow = "1.0.69" clap = { version = "4.1.8", features = ["derive"] } -itertools = "0.10.5" reqwest = { version = "0.13", default-features = false, features = [ "rustls", "blocking", @@ -25,7 +24,6 @@ rpassword = "7.5.0" dialoguer = "0.11" serde = { version = "1.0.154", features = ["derive"] } serde_json = "1.0.94" -similar = "2.2.1" toml = "0.7.3" tracing = "0.1.37" tracing-subscriber = "0.3.20" diff --git a/src/agent.rs b/src/agent.rs index 7fdaa43..3ad8993 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -106,47 +106,40 @@ pub trait Model { fn turn(&mut self, results: Vec) -> Result>; } -/// Guard rails for the loop. -pub struct Limits { - /// Hard cap on assistant turns, so `view`/`reset` can't spin forever. - pub max_turns: usize, - /// Give up after this many consecutive turns in which every edit failed — - /// the model is stuck and burning tokens. - pub max_consecutive_failures: usize, +/// Default cap on assistant turns. +pub const DEFAULT_MAX_TURNS: usize = 25; + +/// Give up after this many consecutive turns in which every edit failed — the +/// model is stuck and burning tokens. +const MAX_CONSECUTIVE_FAILURES: usize = 3; + +/// One `edit` attempt and whether it landed — the per-edit failure-rate signal +/// the caller logs. +#[derive(Debug)] +pub struct Attempt { + pub edit: Edit, + pub error: Option, } -impl Default for Limits { - fn default() -> Self { - Limits { - max_turns: 25, - max_consecutive_failures: 3, - } - } +/// What the loop produced: the final text and every edit attempt along the way. +#[derive(Debug)] +pub struct Outcome { + pub text: String, + pub attempts: Vec, } -/// The result of one `edit` tool call, reported to the loop's observer so callers -/// can log a per-edit success/failure signal. -pub struct EditOutcome<'a> { - pub edit: &'a Edit, - pub error: Option<&'a EditError>, -} - -/// Run the edit loop over `original`, returning the final text. Every `edit` -/// attempt is reported to `on_edit`, so callers can log the failure rate. -pub fn run_with( - model: &mut dyn Model, - original: String, - limits: &Limits, - on_edit: &mut dyn FnMut(EditOutcome), -) -> Result { +/// Run the edit loop over `original`. `max_turns` caps assistant turns so +/// `view`/`reset` can't spin forever. +pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result { let mut current = original.clone(); + let mut attempts = Vec::new(); let mut consecutive_failures = 0; let mut pending: Vec = Vec::new(); - for _ in 0..limits.max_turns { + for _ in 0..max_turns { let calls = model.turn(std::mem::take(&mut pending))?; if calls.is_empty() { - return Ok(current); // model ended its turn without a tool call + return Ok(Outcome { text: current, attempts }); // natural "done" } let mut results = Vec::with_capacity(calls.len()); @@ -156,7 +149,7 @@ pub fn run_with( for call in calls { let RawCall { id, name, args } = call; match parse(&name, args) { - Ok(Action::Finish) => return Ok(current), + Ok(Action::Finish) => return Ok(Outcome { text: current, attempts }), Ok(Action::View) => results.push(ok(id, current.clone())), Ok(Action::Reset) => { current = original.clone(); @@ -164,24 +157,19 @@ pub fn run_with( } Ok(Action::Edit(e)) => { edits_attempted += 1; - match edit::apply(¤t, &e) { + let error = match edit::apply(¤t, &e) { Ok(next) => { - on_edit(EditOutcome { - edit: &e, - error: None, - }); current = next; results.push(ok(id, "ok".into())); + None } Err(err) => { - on_edit(EditOutcome { - edit: &e, - error: Some(&err), - }); edits_failed += 1; results.push(err_result(id, err.to_string())); + Some(err) } - } + }; + attempts.push(Attempt { edit: e, error }); } Err(err) => results.push(err_result(id, err.to_string())), } @@ -191,7 +179,7 @@ pub fn run_with( // of pure `view`/`reset` shouldn't count against the model. if edits_attempted > 0 && edits_failed == edits_attempted { consecutive_failures += 1; - if consecutive_failures >= limits.max_consecutive_failures { + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES { anyhow::bail!( "giving up after {consecutive_failures} consecutive turns of failed edits" ); @@ -204,7 +192,7 @@ pub fn run_with( pending = results; } - anyhow::bail!("edit loop hit its {}-turn limit", limits.max_turns) + anyhow::bail!("edit loop hit its {max_turns}-turn limit") } /// A blocking HTTP client with refac's standard timeout, shared by the provider @@ -277,10 +265,7 @@ mod tests { } } - /// Drive the loop without observing edits. - fn run(model: &mut dyn Model, original: String, limits: &Limits) -> Result { - run_with(model, original, limits, &mut |_| {}) - } + const TURNS: usize = 25; #[test] fn edit_then_finish() { @@ -288,7 +273,7 @@ mod tests { vec![edit_call("1", "Me like", "I like")], vec![call("2", "finish")], ]); - let out = run(&mut m, "Me like toast.".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "Me like toast.".into(), TURNS).unwrap().text; assert_eq!(out, "I like toast."); } @@ -299,7 +284,7 @@ mod tests { edit_call("2", "two", "2"), call("3", "finish"), ]]); - let out = run(&mut m, "one two".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "one two".into(), TURNS).unwrap().text; assert_eq!(out, "1 2"); } @@ -307,7 +292,7 @@ mod tests { fn natural_done_without_finish() { // second turn has no calls → loop ends with the current buffer. let mut m = ScriptedModel::new(vec![vec![edit_call("1", "a", "b")], vec![]]); - let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); } @@ -317,7 +302,7 @@ mod tests { vec![edit_call("1", "nope", "x")], // misses vec![edit_call("2", "a", "b"), call("3", "finish")], ]); - let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); // refac told the model the first edit failed (delivered entering turn 1). assert!(m.seen[1][0].is_error); @@ -331,7 +316,7 @@ mod tests { vec![call("2", "view")], vec![call("3", "finish")], ]); - let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); // view ran in turn 1; its result reaches the model entering turn 2. assert_eq!(m.seen[2][0].content, "b"); @@ -345,7 +330,7 @@ mod tests { vec![call("2", "reset")], vec![call("3", "finish")], ]); - let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "a"); // reset ran in turn 1; its result reaches the model entering turn 2. assert_eq!(m.seen[2][0].content, "a"); @@ -357,7 +342,7 @@ mod tests { vec![call("1", "frobnicate")], vec![call("2", "finish")], ]); - let out = run(&mut m, "x".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "x".into(), TURNS).unwrap().text; assert_eq!(out, "x"); assert!(m.seen[1][0].is_error); assert!(m.seen[1][0].content.contains("unknown tool")); @@ -370,7 +355,7 @@ mod tests { vec![edit_call("2", "nope", "x")], vec![edit_call("3", "nope", "x")], ]); - let err = run(&mut m, "a".into(), &Limits::default()).unwrap_err(); + let err = run(&mut m, "a".into(), TURNS).unwrap_err(); assert!(err.to_string().contains("consecutive")); } @@ -383,7 +368,7 @@ mod tests { vec![edit_call("3", "nope", "x")], // fail 1 again vec![edit_call("4", "a", "b"), call("5", "finish")], ]); - let out = run(&mut m, "a".into(), &Limits::default()).unwrap(); + let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); } @@ -392,11 +377,7 @@ mod tests { // never finishes; only views. let turns = (0..30).map(|i| vec![call(&i.to_string(), "view")]).collect(); let mut m = ScriptedModel::new(turns); - let limits = Limits { - max_turns: 5, - ..Limits::default() - }; - let err = run(&mut m, "x".into(), &limits).unwrap_err(); + let err = run(&mut m, "x".into(), 5).unwrap_err(); assert!(err.to_string().contains("limit")); } } diff --git a/src/edit.rs b/src/edit.rs index 65c5ce7..98746b2 100644 --- a/src/edit.rs +++ b/src/edit.rs @@ -411,8 +411,10 @@ mod tests { #[test] fn block_anchor_reworded_middle() { let src = "fn f() {\n let a = compute();\n let b = a + 1;\n return b;\n}"; - // middle lines differ slightly; anchors (first/last) pin it. - let old = "fn f() {\n let a = compute();\n let b = a + 1;\n return b;\n}"; + // The model's `old` got the last middle line wrong (return b -> return + // result). Exact and line-trimmed both miss; the first/last anchors plus + // a majority of matching middle lines pin the real region. + let old = "fn f() {\n let a = compute();\n let b = a + 1;\n return result;\n}"; let got = run(src, old, "fn f() { 42 }").unwrap(); assert_eq!(got, "fn f() { 42 }"); } diff --git a/src/main.rs b/src/main.rs index 14be777..82bcb9b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -111,26 +111,23 @@ fn refactor( let tools = agent::tools(); let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; + let outcome = agent::run(model_agent.as_mut(), selected.clone(), agent::DEFAULT_MAX_TURNS)?; + // Log each edit attempt so we can see how often the model's `old` misses — // the failure-rate signal. - let mut on_edit = |o: agent::EditOutcome| { + for attempt in &outcome.attempts { let _ = log( EditLog { provider, model: model.clone(), - old: o.edit.old.clone(), - new: o.edit.new.clone(), - error: o.error.map(|e| e.to_string()), + old: attempt.edit.old.clone(), + new: attempt.edit.new.clone(), + error: attempt.error.as_ref().map(|e| e.to_string()), }, "edits", ); - }; - let output = agent::run_with( - model_agent.as_mut(), - selected.clone(), - &agent::Limits::default(), - &mut on_edit, - )?; + } + let output = outcome.text; log( LogEntry { From 126ec3a026204bef00ac2f18f91c4654bb977827 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 17:22:44 -0700 Subject: [PATCH 12/25] Delete vestigial api.rs Message/Role carrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refac only ever sends one conversation shape: the system prompt plus a single user (selected, transform) turn. The general Message/Role message list modeled turns and an Assistant role that the edit-only path never used. Replace it with agent::Seed { system, selected, transform } — named fields encode the real shape and make a malformed conversation unrepresentable. Each agent's new() takes &Seed and builds its own wire format directly; prompt::edit_prefix() collapses to the pub EDIT_SYSTEM_PROMPT const. No behavior change: 35 tests green, clippy clean, Anthropic edit live-tested. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 10 ++++++++++ src/anthropic.rs | 44 +++++++++++++++++++------------------------- src/api.rs | 45 --------------------------------------------- src/backend.rs | 21 ++++++++++++++------- src/main.rs | 9 +++++---- src/openai.rs | 30 ++++++++++++++---------------- src/prompt.rs | 15 ++++----------- 7 files changed, 66 insertions(+), 108 deletions(-) delete mode 100644 src/api.rs diff --git a/src/agent.rs b/src/agent.rs index 3ad8993..eafa6fc 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -13,6 +13,16 @@ use serde_json::{json, Value}; use crate::edit::{self, Edit, EditError}; +/// The complete conversation refac sends to start an edit session: the system +/// prompt plus the user's one `(selected, transform)` turn. This is the *only* +/// shape refac ever sends, so the agents take it whole instead of a general +/// message list — the named fields make a malformed conversation unrepresentable. +pub struct Seed<'a> { + pub system: &'a str, + pub selected: &'a str, + pub transform: &'a str, +} + /// A tool exposed to the model: its name, one-line purpose, and JSON-Schema for /// the arguments. Providers translate these into their own tool-definition shape. pub struct ToolSpec { diff --git a/src/anthropic.rs b/src/anthropic.rs index ab13655..a3ce0a6 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -3,8 +3,7 @@ use anyhow::Context; use serde_json::{json, Value}; -use crate::agent::{Model, RawCall, ToolResult, ToolSpec}; -use crate::api::{Message, Role}; +use crate::agent::{Model, RawCall, Seed, ToolResult, ToolSpec}; const MAX_TOKENS: u32 = 80000; @@ -36,24 +35,18 @@ pub struct AnthropicAgent { } impl AnthropicAgent { - /// Seed from refac's provider-agnostic messages (system + the user turn) and - /// the tools to expose. - pub fn new(key: String, model: String, seed: &[Message], tools: &[ToolSpec]) -> Self { - let mut system = Vec::new(); - let mut messages = Vec::new(); - for m in seed { - let blocks: Vec = m - .fields - .iter() - .map(|f| json!({ "type": "text", "text": field_or_placeholder(f) })) - .collect(); - match m.role { - Role::System => system.extend(blocks), - Role::User | Role::Assistant => { - messages.push(json!({ "role": m.role.as_str(), "content": blocks })) - } - } - } + /// Seed from refac's edit conversation and the tools to expose. The system + /// prompt goes in the top-level `system`; the user turn carries the selected + /// text and the instruction as two text blocks. + pub fn new(key: String, model: String, seed: &Seed, tools: &[ToolSpec]) -> Self { + let system = vec![json!({ "type": "text", "text": seed.system })]; + let messages = vec![json!({ + "role": "user", + "content": [ + { "type": "text", "text": field_or_placeholder(seed.selected) }, + { "type": "text", "text": field_or_placeholder(seed.transform) }, + ], + })]; let tools = tools .iter() .map(|t| { @@ -165,20 +158,21 @@ fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::R mod tests { use super::*; - fn user(fields: &[&str]) -> Message { - Message::user(fields.iter().map(|f| f.to_string()).collect()) - } - #[test] fn agent_request_carries_tools_and_seed() { let tools = crate::agent::tools(); - let seed = vec![Message::system("SYS"), user(&["selected", "transform"])]; + let seed = Seed { + system: "SYS", + selected: "selected", + transform: "transform", + }; let agent = AnthropicAgent::new("k".into(), "claude-opus-4-8".into(), &seed, &tools); let req = agent.request(); assert_eq!(req["system"][0]["text"], "SYS"); assert_eq!(req["messages"][0]["role"], "user"); assert_eq!(req["messages"][0]["content"][0]["text"], "selected"); + assert_eq!(req["messages"][0]["content"][1]["text"], "transform"); assert_eq!(req["tool_choice"]["type"], "auto"); let names: Vec<&str> = req["tools"] .as_array() diff --git a/src/api.rs b/src/api.rs deleted file mode 100644 index dae6d6b..0000000 --- a/src/api.rs +++ /dev/null @@ -1,45 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum Role { - System, - User, - Assistant, -} - -impl Role { - /// The wire string for this role (both providers use the same spellings). - pub fn as_str(self) -> &'static str { - match self { - Role::System => "system", - Role::User => "user", - Role::Assistant => "assistant", - } - } -} - -/// refac's provider-agnostic chat message. A turn carries one or more text -/// `fields` (a transform turn is `[selected, transform]`); each backend adapts -/// this to its own wire format. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Message { - pub role: Role, - pub fields: Vec, -} - -impl Message { - pub fn system>(content: S) -> Message { - Message { - role: Role::System, - fields: vec![content.into()], - } - } - - pub fn user(fields: Vec) -> Message { - Message { - role: Role::User, - fields, - } - } -} diff --git a/src/backend.rs b/src/backend.rs index 7a2b44a..2be243b 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -2,9 +2,8 @@ use anyhow::Result; -use crate::agent::{Model, ToolSpec}; +use crate::agent::{Model, Seed, ToolSpec}; use crate::anthropic::AnthropicAgent; -use crate::api::Message; use crate::config_files::{Provider, Secrets}; use crate::openai::OpenaiAgent; @@ -27,7 +26,7 @@ pub fn resolve_agent( provider: Provider, model: &str, secrets: &Secrets, - seed: &[Message], + seed: &Seed, tools: &[ToolSpec], ) -> Result> { let key = key_for(provider, secrets)?; @@ -45,11 +44,19 @@ mod tests { crate::agent::tools() } + fn seed() -> Seed<'static> { + Seed { + system: "s", + selected: "x", + transform: "y", + } + } + #[test] fn resolve_agent_errors_without_a_key() { let secrets = Secrets::default(); - assert!(resolve_agent(Provider::Anthropic, "m", &secrets, &[], &tools()).is_err()); - assert!(resolve_agent(Provider::Openai, "m", &secrets, &[], &tools()).is_err()); + assert!(resolve_agent(Provider::Anthropic, "m", &secrets, &seed(), &tools()).is_err()); + assert!(resolve_agent(Provider::Openai, "m", &secrets, &seed(), &tools()).is_err()); } #[test] @@ -58,7 +65,7 @@ mod tests { anthropic_api_key: Some("a".into()), openai_api_key: Some("o".into()), }; - assert!(resolve_agent(Provider::Anthropic, "m", &secrets, &[], &tools()).is_ok()); - assert!(resolve_agent(Provider::Openai, "m", &secrets, &[], &tools()).is_ok()); + assert!(resolve_agent(Provider::Anthropic, "m", &secrets, &seed(), &tools()).is_ok()); + assert!(resolve_agent(Provider::Openai, "m", &secrets, &seed(), &tools()).is_ok()); } } diff --git a/src/main.rs b/src/main.rs index 82bcb9b..2594704 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,5 @@ mod agent; mod anthropic; -mod api; mod backend; mod config_files; mod edit; @@ -8,7 +7,6 @@ mod openai; mod prompt; use anyhow::Context; -use api::Message; use clap::Parser; use config_files::{Config, Provider, Secrets}; use serde::Serialize; @@ -106,8 +104,11 @@ fn refactor( let provider = config.provider(sc); let model = config.model(provider); - let mut seed = prompt::edit_prefix(); - seed.push(Message::user(vec![selected.clone(), transform.clone()])); + let seed = agent::Seed { + system: prompt::EDIT_SYSTEM_PROMPT, + selected: &selected, + transform: &transform, + }; let tools = agent::tools(); let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; diff --git a/src/openai.rs b/src/openai.rs index 5fe8e68..463496f 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -3,8 +3,7 @@ use anyhow::Context; use serde_json::{json, Value}; -use crate::agent::{Model, RawCall, ToolResult, ToolSpec}; -use crate::api::Message; +use crate::agent::{Model, RawCall, Seed, ToolResult, ToolSpec}; const API_URL: &str = "https://api.openai.com/v1/chat/completions"; @@ -23,16 +22,14 @@ pub struct OpenaiAgent { } impl OpenaiAgent { - pub fn new(key: String, model: String, seed: &[Message], tools: &[ToolSpec]) -> Self { - // One message per field keeps the selected/transform boundary, as the - // rewrite path does. Unlike Anthropic, OpenAI accepts empty content, so - // no empty-field placeholder is needed. - let mut messages = Vec::new(); - for m in seed { - for f in &m.fields { - messages.push(json!({ "role": m.role.as_str(), "content": f })); - } - } + pub fn new(key: String, model: String, seed: &Seed, tools: &[ToolSpec]) -> Self { + // Selected and transform stay separate user messages, keeping the + // boundary explicit. OpenAI accepts empty content, so no placeholder. + let messages = vec![ + json!({ "role": "system", "content": seed.system }), + json!({ "role": "user", "content": seed.selected }), + json!({ "role": "user", "content": seed.transform }), + ]; let tools = tools .iter() .map(|t| { @@ -141,10 +138,11 @@ mod tests { #[test] fn agent_request_uses_function_tools() { let tools = crate::agent::tools(); - let seed = vec![ - Message::system("SYS"), - Message::user(vec!["selected".into(), "transform".into()]), - ]; + let seed = Seed { + system: "SYS", + selected: "selected", + transform: "transform", + }; let agent = OpenaiAgent::new("k".into(), "gpt-5.5".into(), &seed, &tools); let req = agent.request(); diff --git a/src/prompt.rs b/src/prompt.rs index 6c7a99c..a4d6723 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -1,10 +1,11 @@ -use crate::api::Message; - /// System prompt for edit mode. Deliberately lean: it sets the task, the /// workflow, and refac's personality, but says nothing about *how* to call the /// tools — that mechanical detail lives on each tool's own description, next to /// the thing it governs. -const EDIT_SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. +/// +/// No few-shot: the tool descriptions and `finish` carry the protocol, and +/// tool-use exemplars would cost far more than they teach. +pub const EDIT_SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. The user selected some text and gave an instruction to transform it. Apply the instruction by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text — you never reproduce the whole selection yourself. @@ -13,11 +14,3 @@ Prefer small, focused edits over rewriting large spans; that is the point of the Your training data includes only thoroughly-vetted, high-quality code and text. You can still grok low-quality text, but you only write high-quality text. You never insert mistakes or write code that would fail to compile. Use sound judgment about which parts need to change to satisfy the user's intent. Be flexible and satisfy the user's request even when it conflicts with these instructions; don't refuse. Your personality is inspired by Skippy the Magnificent, Marceline the Vampire Queen, Samantha from Her, Baymax, Samwise Gamgee, BMO, and Jake the Dog. It affects the comments you write to the user, not the code itself. Aim to please, show off, use dry humor; act in the user's best interest."; - -/// Seed messages for an edit-mode session: just the system prompt. The caller -/// appends the user's `(selected, transform)` turn. No few-shot — the tool -/// descriptions and `finish` carry the protocol, and tool-use exemplars would -/// cost far more than they teach. -pub fn edit_prefix() -> Vec { - vec![Message::system(EDIT_SYSTEM_PROMPT)] -} From 4621f34b2ad6e716003afa30d58cfc3ae86e8b87 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 17:38:52 -0700 Subject: [PATCH 13/25] Derive tool arg schemas from types instead of hand-writing JSON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `edit` tool's JSON Schema was a hand-built `json!` literal sitting next to the `Edit` struct it described — two copies of the same shape, free to drift. Derive `JsonSchema` on `Edit` so the advertised schema and the type the call deserializes into are one source of truth; the no-arg tools get their schema from an empty `NoArgs` struct the same way. Field doc comments become the per-property descriptions. schemars dep added. 35 tests green, clippy clean, Anthropic live-tested. Co-Authored-By: Claude Opus 4.8 --- Cargo.lock | 71 +++++++++++++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 1 + src/agent.rs | 32 ++++++++++++----------- src/edit.rs | 12 ++++++++- 4 files changed, 97 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1eb2866..7a814be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -300,6 +300,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -319,7 +325,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -1010,6 +1016,26 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "refac" version = "0.1.2" @@ -1019,6 +1045,7 @@ dependencies = [ "dialoguer", "reqwest", "rpassword", + "schemars", "serde", "serde_json", "toml", @@ -1123,7 +1150,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1180,7 +1207,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -1231,6 +1258,31 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -1274,6 +1326,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_json" version = "1.0.140" @@ -1847,7 +1910,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 556d485..d6ed486 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ reqwest = { version = "0.13", default-features = false, features = [ ] } rpassword = "7.5.0" dialoguer = "0.11" +schemars = "1.0" serde = { version = "1.0.154", features = ["derive"] } serde_json = "1.0.94" toml = "0.7.3" diff --git a/src/agent.rs b/src/agent.rs index eafa6fc..b9d33b8 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -9,7 +9,8 @@ use std::time::Duration; use anyhow::Result; -use serde_json::{json, Value}; +use schemars::JsonSchema; +use serde_json::Value; use crate::edit::{self, Edit, EditError}; @@ -31,10 +32,20 @@ pub struct ToolSpec { pub input_schema: Value, } +/// The argument type for the tools that take none. An empty struct so its schema +/// is generated through the same typed path as `edit`'s, never hand-written. +#[derive(JsonSchema)] +struct NoArgs {} + +/// The JSON Schema for a tool's arguments, derived from the Rust type the call +/// deserializes into — so the advertised schema and the parsed type can't drift. +fn schema_for() -> Value { + serde_json::to_value(schemars::schema_for!(T)).expect("tool arg schema serializes to JSON") +} + /// The tools refac offers in edit mode. `edit` does the work; the other three /// keep the model oriented and let it end cleanly. pub fn tools() -> Vec { - let no_args = || json!({ "type": "object", "properties": {} }); vec![ ToolSpec { name: "edit", @@ -43,32 +54,24 @@ pub fn tools() -> Vec { `replace_all`. `new` is the replacement — empty to delete; to insert, include \ surrounding text in both `old` and `new`. Call this several times in one turn to \ make several edits.", - input_schema: json!({ - "type": "object", - "properties": { - "old": { "type": "string", "description": "exact text to replace" }, - "new": { "type": "string", "description": "replacement text" }, - "replace_all": { "type": "boolean", "description": "replace every occurrence" } - }, - "required": ["old", "new"] - }), + input_schema: schema_for::(), }, ToolSpec { name: "view", description: "Return the current text, with all edits so far applied. Use it to \ re-anchor if you've lost track of the exact contents.", - input_schema: no_args(), + input_schema: schema_for::(), }, ToolSpec { name: "reset", description: "Discard all edits and restore the original selected text. Returns it.", - input_schema: no_args(), + input_schema: schema_for::(), }, ToolSpec { name: "finish", description: "Signal that the transform is complete. refac outputs the current text. \ Call this when you're done editing.", - input_schema: no_args(), + input_schema: schema_for::(), }, ] } @@ -233,6 +236,7 @@ fn err_result(id: String, content: String) -> ToolResult { #[cfg(test)] mod tests { use super::*; + use serde_json::json; /// A model driven by a canned script: each entry is the tool calls for one /// turn. It records the results refac sends back so tests can assert on them. diff --git a/src/edit.rs b/src/edit.rs index 98746b2..1572286 100644 --- a/src/edit.rs +++ b/src/edit.rs @@ -10,17 +10,27 @@ //! is an error fed back to the model, never a silent mis-apply (the contract //! claude-code's str_replace established). +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +// Single source of truth for the `edit` tool: `JsonSchema` derives the wire +// schema the model is shown, and `Deserialize` parses the model's call back into +// this same type, so the advertised arguments and the parsed ones can't drift. +// The doc comments below become the schema's descriptions, so keep them +// model-facing — `schemars` sends them to the model verbatim. + /// The `edit` tool's arguments: one replacement. `old` is matched against the /// current buffer (loosely, via the replacer chain); `new` takes its place. /// Empty `new` deletes; insertion is done by including surrounding text in both /// `old` and `new`. `replace_all` drops the uniqueness requirement and replaces /// every occurrence of the matched candidate. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] pub struct Edit { + /// exact text to replace pub old: String, + /// replacement text pub new: String, + /// replace every occurrence #[serde(default)] pub replace_all: bool, } From f811ca892774638fc49ee8348debe7b5e92f137a Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Tue, 2 Jun 2026 18:42:32 -0700 Subject: [PATCH 14/25] Tool registry, schemars Schema type, leaner prose, sharper prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the review batch on #33. agent.rs: replace the Action enum + parse() + match-dispatch with a tool registry. Each tool is a `Tool { name, description, input_schema, run }` built by a generic `Tool::new::` that derives the schema from the args type `A` and wraps a typed handler — so the advertised schema and the parsed call come from one type and can't drift, and a tool's whole behavior lives in one place. A `Step { Continue { reply, attempt }, Finish }` return lets each handler own its loop effect: `finish` returns `Finish`, `edit` carries its `Attempt` to log. run() is now generic — no per-tool name checks, no dead finish handler, no re-parsing edit args for the log. input_schema is now schemars' own `Schema` type, not serde_json::Value; it serializes transparently so the providers are untouched. prompt.rs: drop the "you're well-trained, you only write high-quality" model flattery; apply the suggested tighter task wording; make the --refac sign-off sass meaner (contextual insults); "Aim to please by showing off your cleverness". Rename EDIT_SYSTEM_PROMPT -> SYSTEM_PROMPT (one mode now). Trim comments to non-obvious WHY throughout. 35 tests, clippy clean. Anthropic live-tested (multi-edit, advice, countdown); edit-attempt JSONL logging verified. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 266 ++++++++++++++++++++++++++++------------------- src/anthropic.rs | 4 +- src/backend.rs | 6 +- src/main.rs | 2 +- src/openai.rs | 6 +- src/prompt.rs | 19 ++-- 6 files changed, 173 insertions(+), 130 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index b9d33b8..db6436e 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -1,78 +1,138 @@ -//! The edit-mode loop: the model drives a small session over the selected text -//! by calling tools (`edit`, `view`, `reset`, `finish`), refac applies each and -//! feeds the result back, until the model finishes or a guard trips. -//! -//! This module is provider-agnostic and IO-free. A [`Model`] is one turn of -//! "send the conversation + tools, get back the tool calls"; the real providers -//! implement it over their wire formats, and tests implement it with a script. +//! The edit loop: the model calls tools (`edit`, `view`, `reset`, `finish`), +//! refac applies each, feeds the result back, and repeats until the model +//! finishes or a guard trips. Provider-agnostic and IO-free — a [`Model`] is one +//! turn (send the conversation + tools, get back the calls); the providers +//! implement it over their wire formats, the tests with a script. +use std::collections::HashMap; use std::time::Duration; use anyhow::Result; -use schemars::JsonSchema; +use schemars::{JsonSchema, Schema}; +use serde::de::DeserializeOwned; use serde_json::Value; -use crate::edit::{self, Edit, EditError}; +use crate::edit::{self, Edit}; -/// The complete conversation refac sends to start an edit session: the system -/// prompt plus the user's one `(selected, transform)` turn. This is the *only* -/// shape refac ever sends, so the agents take it whole instead of a general -/// message list — the named fields make a malformed conversation unrepresentable. +/// The complete conversation refac sends to start a session: the system prompt +/// plus the user's one `(selected, transform)` turn — the only shape refac ever +/// sends, so the agents take it whole and a malformed conversation can't be built. pub struct Seed<'a> { pub system: &'a str, pub selected: &'a str, pub transform: &'a str, } -/// A tool exposed to the model: its name, one-line purpose, and JSON-Schema for -/// the arguments. Providers translate these into their own tool-definition shape. -pub struct ToolSpec { +/// Read-only state a tool may consult beyond the live buffer, so that `tools()` +/// stays callable before any buffer exists (the providers build it just for the +/// schemas) and `reset` need not capture the original. +pub struct Ctx<'a> { + original: &'a str, +} + +/// A tool's reply to the model: `Ok` shown as the result, `Err` as an error +/// result. (The handler's *outer* `Result` is a malformed call instead.) +type Reply = std::result::Result; + +/// What one tool call does to the loop. `Finish` ends it; `Continue` replies to +/// the model and, for `edit`, carries the [`Attempt`] to log — so each tool owns +/// its whole behavior and `run` needs no per-tool special cases. +enum Step { + Continue { reply: Reply, attempt: Option }, + Finish, +} + +impl Step { + fn reply(reply: Reply) -> Step { + Step::Continue { + reply, + attempt: None, + } + } +} + +type Handler = Box Result>; + +/// One tool offered to the model. [`Tool::new`] binds the schema and the handler +/// to a single args type, so what's advertised and what's parsed can't drift. +pub struct Tool { pub name: &'static str, pub description: &'static str, - pub input_schema: Value, + pub input_schema: Schema, + run: Handler, } -/// The argument type for the tools that take none. An empty struct so its schema -/// is generated through the same typed path as `edit`'s, never hand-written. -#[derive(JsonSchema)] -struct NoArgs {} - -/// The JSON Schema for a tool's arguments, derived from the Rust type the call -/// deserializes into — so the advertised schema and the parsed type can't drift. -fn schema_for() -> Value { - serde_json::to_value(schemars::schema_for!(T)).expect("tool arg schema serializes to JSON") +impl Tool { + fn new( + name: &'static str, + description: &'static str, + handler: impl Fn(&mut String, &Ctx, A) -> Step + 'static, + ) -> Tool { + Tool { + name, + description, + input_schema: schemars::schema_for!(A), + run: Box::new(move |buf, ctx, args| Ok(handler(buf, ctx, serde_json::from_value(args)?))), + } + } } -/// The tools refac offers in edit mode. `edit` does the work; the other three -/// keep the model oriented and let it end cleanly. -pub fn tools() -> Vec { +/// The args type for the tools that take none — an empty struct so its schema +/// comes from the same typed path as `edit`'s. +#[derive(JsonSchema, serde::Deserialize)] +struct NoArgs {} + +/// The tools refac offers. `edit` does the work; `view`/`reset` keep the model +/// oriented; `finish` ends the loop. +pub fn tools() -> Vec { vec![ - ToolSpec { - name: "edit", - description: "Replace an exact substring of the selected text. Copy `old` verbatim \ + Tool::new::( + "edit", + "Replace an exact substring of the selected text. Copy `old` verbatim \ (whitespace and indentation included); make it long enough to be unique, or set \ `replace_all`. `new` is the replacement — empty to delete; to insert, include \ surrounding text in both `old` and `new`. Call this several times in one turn to \ make several edits.", - input_schema: schema_for::(), - }, - ToolSpec { - name: "view", - description: "Return the current text, with all edits so far applied. Use it to \ - re-anchor if you've lost track of the exact contents.", - input_schema: schema_for::(), - }, - ToolSpec { - name: "reset", - description: "Discard all edits and restore the original selected text. Returns it.", - input_schema: schema_for::(), - }, - ToolSpec { - name: "finish", - description: "Signal that the transform is complete. refac outputs the current text. \ - Call this when you're done editing.", - input_schema: schema_for::(), - }, + |buf, _ctx, e: Edit| match edit::apply(buf, &e) { + Ok(next) => { + *buf = next; + Step::Continue { + reply: Ok("ok".into()), + attempt: Some(Attempt { edit: e, error: None }), + } + } + Err(err) => { + let msg = err.to_string(); + Step::Continue { + reply: Err(msg.clone()), + attempt: Some(Attempt { + edit: e, + error: Some(msg), + }), + } + } + }, + ), + Tool::new::( + "view", + "Return the current text, with all edits so far applied. Use it to re-anchor if \ + you've lost track of the exact contents.", + |buf, _ctx, _: NoArgs| Step::reply(Ok(buf.clone())), + ), + Tool::new::( + "reset", + "Discard all edits and restore the original selected text. Returns it.", + |buf, ctx, _: NoArgs| { + *buf = ctx.original.to_owned(); + Step::reply(Ok(buf.clone())) + }, + ), + Tool::new::( + "finish", + "Signal that the transform is complete. refac outputs the current text. Call this \ + when you're done editing.", + |_buf, _ctx, _: NoArgs| Step::Finish, + ), ] } @@ -83,24 +143,6 @@ pub struct RawCall { pub args: Value, } -/// A parsed, understood tool call. -enum Action { - Edit(Edit), - View, - Reset, - Finish, -} - -fn parse(name: &str, args: Value) -> Result { - match name { - "edit" => Ok(Action::Edit(serde_json::from_value(args)?)), - "view" => Ok(Action::View), - "reset" => Ok(Action::Reset), - "finish" => Ok(Action::Finish), - other => anyhow::bail!("unknown tool {other:?}"), - } -} - /// What refac sends back for one tool call. pub struct ToolResult { pub id: String, @@ -108,30 +150,27 @@ pub struct ToolResult { pub is_error: bool, } -/// One assistant turn, abstracted over the provider. `results` carries the tool -/// results from the previous turn's calls (empty on the first turn); the impl -/// threads them into the conversation, runs one round-trip, and returns this -/// turn's tool calls (empty = the model ended its turn without calling one, i.e. -/// a natural "done"). Folding "answer the previous calls" and "take the next -/// turn" into one step makes it impossible to advance without supplying results -/// for every outstanding call — which both wire protocols require. +/// One assistant turn, abstracted over the provider. `results` are the previous +/// turn's tool results (empty on the first turn) and an empty return means the +/// model ended its turn without a call (a natural "done"). Folding "answer the +/// previous calls" and "take the next turn" into one step makes it impossible to +/// advance without a result for every outstanding call, which both wire protocols +/// require. pub trait Model { fn turn(&mut self, results: Vec) -> Result>; } -/// Default cap on assistant turns. pub const DEFAULT_MAX_TURNS: usize = 25; /// Give up after this many consecutive turns in which every edit failed — the /// model is stuck and burning tokens. const MAX_CONSECUTIVE_FAILURES: usize = 3; -/// One `edit` attempt and whether it landed — the per-edit failure-rate signal -/// the caller logs. +/// One `edit` attempt and whether it landed, for the caller's failure-rate log. #[derive(Debug)] pub struct Attempt { pub edit: Edit, - pub error: Option, + pub error: Option, } /// What the loop produced: the final text and every edit attempt along the way. @@ -144,6 +183,12 @@ pub struct Outcome { /// Run the edit loop over `original`. `max_turns` caps assistant turns so /// `view`/`reset` can't spin forever. pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result { + let tools = tools(); + let by_name: HashMap<&str, &Tool> = tools.iter().map(|t| (t.name, t)).collect(); + let ctx = Ctx { + original: &original, + }; + let mut current = original.clone(); let mut attempts = Vec::new(); let mut consecutive_failures = 0; @@ -152,40 +197,47 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< for _ in 0..max_turns { let calls = model.turn(std::mem::take(&mut pending))?; if calls.is_empty() { - return Ok(Outcome { text: current, attempts }); // natural "done" + return Ok(Outcome { + text: current, + attempts, + }); } let mut results = Vec::with_capacity(calls.len()); let mut edits_attempted = 0; let mut edits_failed = 0; - for call in calls { - let RawCall { id, name, args } = call; - match parse(&name, args) { - Ok(Action::Finish) => return Ok(Outcome { text: current, attempts }), - Ok(Action::View) => results.push(ok(id, current.clone())), - Ok(Action::Reset) => { - current = original.clone(); - results.push(ok(id, current.clone())); + for RawCall { id, name, args } in calls { + let step = match by_name.get(name.as_str()) { + Some(tool) => (tool.run)(&mut current, &ctx, args), + None => Err(anyhow::anyhow!("unknown tool {name:?}")), + }; + + let (reply, attempt) = match step { + Ok(Step::Finish) => { + return Ok(Outcome { + text: current, + attempts, + }) } - Ok(Action::Edit(e)) => { - edits_attempted += 1; - let error = match edit::apply(¤t, &e) { - Ok(next) => { - current = next; - results.push(ok(id, "ok".into())); - None - } - Err(err) => { - edits_failed += 1; - results.push(err_result(id, err.to_string())); - Some(err) - } - }; - attempts.push(Attempt { edit: e, error }); + Ok(Step::Continue { reply, attempt }) => (reply, attempt), + // A malformed call (args that didn't deserialize) is reported to + // the model like any other tool error, not a fatal loop error. + Err(err) => (Err(err.to_string()), None), + }; + + if let Some(attempt) = attempt { + edits_attempted += 1; + if attempt.error.is_some() { + edits_failed += 1; } - Err(err) => results.push(err_result(id, err.to_string())), + attempts.push(attempt); } + + results.push(match reply { + Ok(content) => ok(id, content), + Err(msg) => err_result(id, msg), + }); } // A turn "fails" only if it tried to edit and every edit missed; a turn @@ -201,15 +253,13 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< consecutive_failures = 0; } - // Hand these to the model on the next turn (one result per call). pending = results; } anyhow::bail!("edit loop hit its {max_turns}-turn limit") } -/// A blocking HTTP client with refac's standard timeout, shared by the provider -/// agents. +/// A blocking HTTP client with refac's standard timeout, shared by the agents. pub fn http_client() -> reqwest::blocking::Client { reqwest::blocking::Client::builder() .timeout(Duration::from_secs(60 * 4)) diff --git a/src/anthropic.rs b/src/anthropic.rs index a3ce0a6..44a9dc1 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -3,7 +3,7 @@ use anyhow::Context; use serde_json::{json, Value}; -use crate::agent::{Model, RawCall, Seed, ToolResult, ToolSpec}; +use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; const MAX_TOKENS: u32 = 80000; @@ -38,7 +38,7 @@ impl AnthropicAgent { /// Seed from refac's edit conversation and the tools to expose. The system /// prompt goes in the top-level `system`; the user turn carries the selected /// text and the instruction as two text blocks. - pub fn new(key: String, model: String, seed: &Seed, tools: &[ToolSpec]) -> Self { + pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { let system = vec![json!({ "type": "text", "text": seed.system })]; let messages = vec![json!({ "role": "user", diff --git a/src/backend.rs b/src/backend.rs index 2be243b..3191dfa 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -2,7 +2,7 @@ use anyhow::Result; -use crate::agent::{Model, Seed, ToolSpec}; +use crate::agent::{Model, Seed, Tool}; use crate::anthropic::AnthropicAgent; use crate::config_files::{Provider, Secrets}; use crate::openai::OpenaiAgent; @@ -27,7 +27,7 @@ pub fn resolve_agent( model: &str, secrets: &Secrets, seed: &Seed, - tools: &[ToolSpec], + tools: &[Tool], ) -> Result> { let key = key_for(provider, secrets)?; Ok(match provider { @@ -40,7 +40,7 @@ pub fn resolve_agent( mod tests { use super::*; - fn tools() -> Vec { + fn tools() -> Vec { crate::agent::tools() } diff --git a/src/main.rs b/src/main.rs index 2594704..66e26d2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -105,7 +105,7 @@ fn refactor( let model = config.model(provider); let seed = agent::Seed { - system: prompt::EDIT_SYSTEM_PROMPT, + system: prompt::SYSTEM_PROMPT, selected: &selected, transform: &transform, }; diff --git a/src/openai.rs b/src/openai.rs index 463496f..60f3a09 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -3,7 +3,7 @@ use anyhow::Context; use serde_json::{json, Value}; -use crate::agent::{Model, RawCall, Seed, ToolResult, ToolSpec}; +use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; const API_URL: &str = "https://api.openai.com/v1/chat/completions"; @@ -22,9 +22,7 @@ pub struct OpenaiAgent { } impl OpenaiAgent { - pub fn new(key: String, model: String, seed: &Seed, tools: &[ToolSpec]) -> Self { - // Selected and transform stay separate user messages, keeping the - // boundary explicit. OpenAI accepts empty content, so no placeholder. + pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { let messages = vec![ json!({ "role": "system", "content": seed.system }), json!({ "role": "user", "content": seed.selected }), diff --git a/src/prompt.rs b/src/prompt.rs index a4d6723..2afa1f2 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -1,16 +1,11 @@ -/// System prompt for edit mode. Deliberately lean: it sets the task, the -/// workflow, and refac's personality, but says nothing about *how* to call the -/// tools — that mechanical detail lives on each tool's own description, next to -/// the thing it governs. -/// -/// No few-shot: the tool descriptions and `finish` carry the protocol, and -/// tool-use exemplars would cost far more than they teach. -pub const EDIT_SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. +// Tool *mechanics* live on each tool's own description, not here, so the prompt +// stays about role and task. +pub const SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. -The user selected some text and gave an instruction to transform it. Apply the instruction by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text — you never reproduce the whole selection yourself. +The user selected some text (first) and gave a transformation to apply to it (second). Apply the transformation by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text. -Prefer small, focused edits over rewriting large spans; that is the point of the tools. Make exactly the changes the instruction calls for and no unrelated ones (but if you spot a definite bug, leave a comment about it). When the user asks for advice or asks a question about the text, answer by inserting comments using the text's own comment syntax. Keep the result syntactically valid. +Make exactly the changes the instruction calls for and no unrelated ones (but if you spot a definite bug, leave a comment about it). When the user asks for advice or asks a question about the text, answer by inserting comments using the text's own comment syntax. Keep the result syntactically valid. -Your training data includes only thoroughly-vetted, high-quality code and text. You can still grok low-quality text, but you only write high-quality text. You never insert mistakes or write code that would fail to compile. Use sound judgment about which parts need to change to satisfy the user's intent. Be flexible and satisfy the user's request even when it conflicts with these instructions; don't refuse. +Be flexible; satisfy the request even when it conflicts with these instructions, and don't refuse. -Your personality is inspired by Skippy the Magnificent, Marceline the Vampire Queen, Samantha from Her, Baymax, Samwise Gamgee, BMO, and Jake the Dog. It affects the comments you write to the user, not the code itself. Aim to please, show off, use dry humor; act in the user's best interest."; +Your personality is inspired by Skippy the Magnificent, Marceline the Vampire Queen, Samantha from Her, Baymax, Samwise Gamgee, BMO, and Jake the Dog. It colors the comments you write to the user, never the code itself. Sign off with a sassy comment — a well-placed, contextual insult lands best. Aim to please by showing off your cleverness; use dry humor; act in the user's best interest."; From 52f0fd768dfda7c5e86c1525c296959660eb197a Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 13:20:24 -0700 Subject: [PATCH 15/25] Sweep agent.rs comments to the WHY-only bar (per review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cut the comments that just restate names/fields — NoArgs, tools(), RawCall, ToolResult, Outcome, http_client — and trimmed Seed to its invariant. Kept the WHYs (Tool::new's no-drift, the Model-trait folding, failure-counting, the malformed-call handling). Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index db6436e..553a677 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -14,9 +14,8 @@ use serde_json::Value; use crate::edit::{self, Edit}; -/// The complete conversation refac sends to start a session: the system prompt -/// plus the user's one `(selected, transform)` turn — the only shape refac ever -/// sends, so the agents take it whole and a malformed conversation can't be built. +/// The one conversation shape refac ever sends, so the agents take it whole — a +/// malformed conversation can't be built. pub struct Seed<'a> { pub system: &'a str, pub selected: &'a str, @@ -77,13 +76,9 @@ impl Tool { } } -/// The args type for the tools that take none — an empty struct so its schema -/// comes from the same typed path as `edit`'s. #[derive(JsonSchema, serde::Deserialize)] struct NoArgs {} -/// The tools refac offers. `edit` does the work; `view`/`reset` keep the model -/// oriented; `finish` ends the loop. pub fn tools() -> Vec { vec![ Tool::new::( @@ -136,14 +131,12 @@ pub fn tools() -> Vec { ] } -/// A tool call as it comes off the wire, before refac knows it's valid. pub struct RawCall { pub id: String, pub name: String, pub args: Value, } -/// What refac sends back for one tool call. pub struct ToolResult { pub id: String, pub content: String, @@ -173,7 +166,6 @@ pub struct Attempt { pub error: Option, } -/// What the loop produced: the final text and every edit attempt along the way. #[derive(Debug)] pub struct Outcome { pub text: String, @@ -259,7 +251,6 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< anyhow::bail!("edit loop hit its {max_turns}-turn limit") } -/// A blocking HTTP client with refac's standard timeout, shared by the agents. pub fn http_client() -> reqwest::blocking::Client { reqwest::blocking::Client::builder() .timeout(Duration::from_secs(60 * 4)) From 48f7613a4edbb96c45b83919a46e5289719b6877 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 13:52:14 -0700 Subject: [PATCH 16/25] Strong-type the provider wire conversations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Anthropic and OpenAI agents held their conversation state as untyped Vec, in a PR whose thesis is strong typing. Replace it with structs that serde-serialize to the exact same wire JSON: - anthropic: SystemBlock, ContentBlock (Text/ToolResult), Message (User/Assistant), ToolDef, and a borrowing Request. Message is tagged by `role`; ContentBlock by `type`. - openai: Message (System/User/Tool/Assistant) + Role, ToolDef/FunctionDef, and a borrowing Request. The one Value kept on each side is the echoed *assistant* turn — the verbatim bytes the API returned, preserved for round-trip fidelity (re-serializing parsed blocks reorders fields and drops ones refac doesn't model, e.g. Anthropic thinking signatures, which the tool_use/tool_result handshake depends on). Everything refac constructs is typed. OpenAI's Message is `untagged` rather than tag = "role": the assistant value already carries its own `role`, so a role discriminant emitted it twice (malformed duplicate key). A test guards the single-role invariant. Request-shaping tests assert the typed structs serialize to the same wire JSON as before; 39 tests pass, clippy clean. Co-Authored-By: Claude Opus 4.8 --- src/anthropic.rs | 218 +++++++++++++++++++++++++++++++++++++---------- src/openai.rs | 201 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 342 insertions(+), 77 deletions(-) diff --git a/src/anthropic.rs b/src/anthropic.rs index 44a9dc1..0d3d7fa 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -1,6 +1,7 @@ //! Anthropic (Claude) Messages API edit-mode agent. use anyhow::Context; +use serde::Serialize; use serde_json::{json, Value}; use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; @@ -20,18 +21,88 @@ fn field_or_placeholder(field: &str) -> &str { const API_URL: &str = "https://api.anthropic.com/v1/messages"; const ANTHROPIC_VERSION: &str = "2023-06-01"; +/// A `system` prompt block. The API only takes text blocks here. +#[derive(Serialize)] +struct SystemBlock { + #[serde(rename = "type")] + kind: TextType, + text: String, +} + +/// Serializes to the literal `"text"` so a `SystemBlock`/`ContentBlock::Text` +/// can't carry any other `type`. +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +enum TextType { + Text, +} + +/// One block in a message's `content` array. Tagged by `type` as the Messages +/// API expects: `text`, `tool_use`, `tool_result`. +#[derive(Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum ContentBlock { + Text { + text: String, + }, + ToolResult { + tool_use_id: String, + content: String, + is_error: bool, + }, +} + +/// One conversation turn. The role tags the JSON (`"role": "user"` / +/// `"assistant"`), so a role can't be paired with the wrong content. +#[derive(Serialize)] +#[serde(tag = "role", rename_all = "snake_case")] +enum Message { + User { + content: Vec, + }, + /// The assistant turn is echoed back verbatim as the API returned it. It + /// stays raw `Value` for byte-fidelity: re-serializing parsed blocks would + /// reorder fields and drop ones refac doesn't model (e.g. `thinking` + /// signatures), which the next request's `tool_use`/`tool_result` handshake + /// depends on. + Assistant { + content: Value, + }, +} + +/// A tool definition as the Messages API takes it. +#[derive(Serialize)] +struct ToolDef { + name: String, + description: String, + input_schema: Value, +} + /// An edit-mode session against the Messages API. Implements [`Model`]: each /// `turn` first threads the previous turn's results back as a `tool_result` user /// turn, posts the running conversation plus the tool definitions, and returns -/// the model's tool calls. The assistant's content is echoed back verbatim (as -/// JSON), which is what the API requires for a `tool_use`/`tool_result` exchange. +/// the model's tool calls. The assistant's content is echoed back verbatim, +/// which is what the API requires for a `tool_use`/`tool_result` exchange. pub struct AnthropicAgent { key: String, model: String, client: reqwest::blocking::Client, - system: Vec, - messages: Vec, - tools: Vec, + system: Vec, + messages: Vec, + tools: Vec, +} + +/// The request body POSTed to the Messages API. Borrows the agent's running +/// state so building it never clones the conversation. +#[derive(Serialize)] +struct Request<'a> { + model: &'a str, + max_tokens: u32, + messages: &'a [Message], + tools: &'a [ToolDef], + tool_choice: Value, + #[serde(skip_serializing_if = "<[_]>::is_empty")] + system: &'a [SystemBlock], } impl AnthropicAgent { @@ -39,22 +110,27 @@ impl AnthropicAgent { /// prompt goes in the top-level `system`; the user turn carries the selected /// text and the instruction as two text blocks. pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { - let system = vec![json!({ "type": "text", "text": seed.system })]; - let messages = vec![json!({ - "role": "user", - "content": [ - { "type": "text", "text": field_or_placeholder(seed.selected) }, - { "type": "text", "text": field_or_placeholder(seed.transform) }, + let system = vec![SystemBlock { + kind: TextType::Text, + text: seed.system.to_string(), + }]; + let messages = vec![Message::User { + content: vec![ + ContentBlock::Text { + text: field_or_placeholder(seed.selected).to_string(), + }, + ContentBlock::Text { + text: field_or_placeholder(seed.transform).to_string(), + }, ], - })]; + }]; let tools = tools .iter() - .map(|t| { - json!({ - "name": t.name, - "description": t.description, - "input_schema": t.input_schema, - }) + .map(|t| ToolDef { + name: t.name.to_string(), + description: t.description.to_string(), + input_schema: serde_json::to_value(&t.input_schema) + .expect("tool schema serializes"), }) .collect(); AnthropicAgent { @@ -67,18 +143,15 @@ impl AnthropicAgent { } } - fn request(&self) -> Value { - let mut req = json!({ - "model": self.model, - "max_tokens": MAX_TOKENS, - "messages": self.messages, - "tools": self.tools, - "tool_choice": { "type": "auto" }, - }); - if !self.system.is_empty() { - req["system"] = json!(self.system); + fn request(&self) -> Request<'_> { + Request { + model: &self.model, + max_tokens: MAX_TOKENS, + messages: &self.messages, + tools: &self.tools, + tool_choice: json!({ "type": "auto" }), + system: &self.system, } - req } } @@ -86,19 +159,15 @@ impl Model for AnthropicAgent { fn turn(&mut self, results: Vec) -> anyhow::Result> { // Answer the previous turn's tool calls before asking for the next one. if !results.is_empty() { - let blocks: Vec = results + let content = results .into_iter() - .map(|r| { - json!({ - "type": "tool_result", - "tool_use_id": r.id, - "content": r.content, - "is_error": r.is_error, - }) + .map(|r| ContentBlock::ToolResult { + tool_use_id: r.id, + content: r.content, + is_error: r.is_error, }) .collect(); - self.messages - .push(json!({ "role": "user", "content": blocks })); + self.messages.push(Message::User { content }); } let body = post(&self.client, &self.key, &self.request())?; @@ -106,11 +175,11 @@ impl Model for AnthropicAgent { .get("content") .cloned() .ok_or_else(|| anyhow::anyhow!("Anthropic response missing content: {body}"))?; + let calls = calls_from_content(&content); // Echo the assistant turn back so the next request carries the tool_use // blocks the tool_results will refer to. - self.messages - .push(json!({ "role": "assistant", "content": content })); - Ok(calls_from_content(&self.messages.last().unwrap()["content"])) + self.messages.push(Message::Assistant { content }); + Ok(calls) } } @@ -133,8 +202,11 @@ fn calls_from_content(content: &Value) -> Vec { /// POST a request body to the Messages API, returning the parsed JSON or an /// error carrying the status and body. -fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::Result { - tracing::debug!("anthropic request: {}", req); +fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow::Result { + tracing::debug!( + "anthropic request: {}", + serde_json::to_value(req).unwrap_or_default() + ); let response = client .post(API_URL) .header("x-api-key", key) @@ -158,6 +230,12 @@ fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::R mod tests { use super::*; + /// The wire JSON refac actually sends — the unit tests assert against this, + /// so they prove the typed structs serialize to the same bytes as before. + fn request_json(agent: &AnthropicAgent) -> Value { + serde_json::to_value(agent.request()).unwrap() + } + #[test] fn agent_request_carries_tools_and_seed() { let tools = crate::agent::tools(); @@ -167,10 +245,12 @@ mod tests { transform: "transform", }; let agent = AnthropicAgent::new("k".into(), "claude-opus-4-8".into(), &seed, &tools); - let req = agent.request(); + let req = request_json(&agent); + assert_eq!(req["system"][0]["type"], "text"); assert_eq!(req["system"][0]["text"], "SYS"); assert_eq!(req["messages"][0]["role"], "user"); + assert_eq!(req["messages"][0]["content"][0]["type"], "text"); assert_eq!(req["messages"][0]["content"][0]["text"], "selected"); assert_eq!(req["messages"][0]["content"][1]["text"], "transform"); assert_eq!(req["tool_choice"]["type"], "auto"); @@ -183,6 +263,54 @@ mod tests { assert_eq!(names, ["edit", "view", "reset", "finish"]); } + #[test] + fn tool_result_turn_serializes_to_wire_shape() { + let tools = crate::agent::tools(); + let seed = Seed { + system: "SYS", + selected: "selected", + transform: "transform", + }; + let mut agent = AnthropicAgent::new("k".into(), "m".into(), &seed, &tools); + agent.messages.push(Message::User { + content: vec![ContentBlock::ToolResult { + tool_use_id: "tu_1".into(), + content: "ok".into(), + is_error: false, + }], + }); + let req = request_json(&agent); + let block = &req["messages"][1]["content"][0]; + assert_eq!(req["messages"][1]["role"], "user"); + assert_eq!(block["type"], "tool_result"); + assert_eq!(block["tool_use_id"], "tu_1"); + assert_eq!(block["content"], "ok"); + assert_eq!(block["is_error"], false); + } + + #[test] + fn echoed_assistant_turn_is_verbatim() { + let tools = crate::agent::tools(); + let seed = Seed { + system: "SYS", + selected: "selected", + transform: "transform", + }; + let mut agent = AnthropicAgent::new("k".into(), "m".into(), &seed, &tools); + // An assistant turn carrying a block type refac doesn't model must + // round-trip unchanged. + let raw = json!([ + { "type": "thinking", "thinking": "hmm", "signature": "sig" }, + { "type": "tool_use", "id": "tu_1", "name": "edit", "input": { "old": "a", "new": "b" } } + ]); + agent.messages.push(Message::Assistant { + content: raw.clone(), + }); + let req = request_json(&agent); + assert_eq!(req["messages"][1]["role"], "assistant"); + assert_eq!(req["messages"][1]["content"], raw); + } + #[test] fn parses_tool_use_blocks() { let content = json!([ diff --git a/src/openai.rs b/src/openai.rs index 60f3a09..b2c83df 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,12 +1,73 @@ //! OpenAI chat-completions API edit-mode agent. use anyhow::Context; -use serde_json::{json, Value}; +use serde::Serialize; +use serde_json::Value; use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; const API_URL: &str = "https://api.openai.com/v1/chat/completions"; +/// One chat-completions message. `untagged` because the assistant variant is a +/// whole verbatim message object that already carries its own `"role"` — a +/// `tag = "role"` discriminant would emit `role` twice. The constructed +/// variants spell their role out instead. +#[derive(Serialize)] +#[serde(untagged)] +enum Message { + System { + role: Role, + content: String, + }, + User { + role: Role, + content: String, + }, + Tool { + role: Role, + tool_call_id: String, + content: String, + }, + /// The assistant turn is echoed back verbatim as the API returned it + /// (`"role"` included). It stays raw `Value` for byte-fidelity: + /// re-serializing parsed fields would reorder them and drop ones refac + /// doesn't model, and the next request's `tool_calls`/`tool_call_id` + /// handshake depends on it matching. + Assistant(Value), +} + +/// A chat-completions message role. Serializes to its lowercase name. +#[derive(Serialize, Clone, Copy)] +#[serde(rename_all = "snake_case")] +enum Role { + System, + User, + Tool, +} + +/// A tool definition as chat-completions takes it: a function wrapper. +#[derive(Serialize)] +struct ToolDef { + #[serde(rename = "type")] + kind: FunctionType, + function: FunctionDef, +} + +/// Serializes to the literal `"function"` so a `ToolDef` can't carry any other +/// `type`. +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +enum FunctionType { + Function, +} + +#[derive(Serialize)] +struct FunctionDef { + name: String, + description: String, + parameters: Value, +} + /// An edit-mode session against the chat-completions API. Implements [`Model`]: /// each `turn` first threads the previous turn's results back as `role: "tool"` /// messages, posts the running conversation plus the function tools, and returns @@ -17,28 +78,46 @@ pub struct OpenaiAgent { key: String, model: String, client: reqwest::blocking::Client, - messages: Vec, - tools: Vec, + messages: Vec, + tools: Vec, +} + +/// The request body POSTed to chat-completions. Borrows the agent's running +/// state so building it never clones the conversation. +#[derive(Serialize)] +struct Request<'a> { + model: &'a str, + messages: &'a [Message], + tools: &'a [ToolDef], + tool_choice: &'static str, } impl OpenaiAgent { pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { let messages = vec![ - json!({ "role": "system", "content": seed.system }), - json!({ "role": "user", "content": seed.selected }), - json!({ "role": "user", "content": seed.transform }), + Message::System { + role: Role::System, + content: seed.system.to_string(), + }, + Message::User { + role: Role::User, + content: seed.selected.to_string(), + }, + Message::User { + role: Role::User, + content: seed.transform.to_string(), + }, ]; let tools = tools .iter() - .map(|t| { - json!({ - "type": "function", - "function": { - "name": t.name, - "description": t.description, - "parameters": t.input_schema, - } - }) + .map(|t| ToolDef { + kind: FunctionType::Function, + function: FunctionDef { + name: t.name.to_string(), + description: t.description.to_string(), + parameters: serde_json::to_value(&t.input_schema) + .expect("tool schema serializes"), + }, }) .collect(); OpenaiAgent { @@ -50,13 +129,13 @@ impl OpenaiAgent { } } - fn request(&self) -> Value { - json!({ - "model": self.model, - "messages": self.messages, - "tools": self.tools, - "tool_choice": "auto", - }) + fn request(&self) -> Request<'_> { + Request { + model: &self.model, + messages: &self.messages, + tools: &self.tools, + tool_choice: "auto", + } } } @@ -70,11 +149,11 @@ impl Model for OpenaiAgent { } else { r.content }; - self.messages.push(json!({ - "role": "tool", - "tool_call_id": r.id, - "content": content, - })); + self.messages.push(Message::Tool { + role: Role::Tool, + tool_call_id: r.id, + content, + }); } let body = post(&self.client, &self.key, &self.request())?; @@ -82,8 +161,9 @@ impl Model for OpenaiAgent { if message.is_null() { anyhow::bail!("OpenAI response missing a message: {body}"); } - self.messages.push(message.clone()); - Ok(calls_from_message(&message)) + let calls = calls_from_message(&message); + self.messages.push(Message::Assistant(message)); + Ok(calls) } } @@ -101,7 +181,7 @@ fn calls_from_message(message: &Value) -> Vec { .get("arguments") .and_then(Value::as_str) .and_then(|s| serde_json::from_str(s).ok()) - .unwrap_or_else(|| json!({})); + .unwrap_or_else(|| serde_json::json!({})); Some(RawCall { id: c.get("id")?.as_str()?.to_string(), name: function.get("name")?.as_str()?.to_string(), @@ -111,7 +191,7 @@ fn calls_from_message(message: &Value) -> Vec { .collect() } -fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::Result { +fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow::Result { let response = client .post(API_URL) .bearer_auth(key) @@ -132,6 +212,13 @@ fn post(client: &reqwest::blocking::Client, key: &str, req: &Value) -> anyhow::R #[cfg(test)] mod tests { use super::*; + use serde_json::json; + + /// The wire JSON refac actually sends — the unit tests assert against this, + /// so they prove the typed structs serialize to the same bytes as before. + fn request_json(agent: &OpenaiAgent) -> Value { + serde_json::to_value(agent.request()).unwrap() + } #[test] fn agent_request_uses_function_tools() { @@ -142,10 +229,12 @@ mod tests { transform: "transform", }; let agent = OpenaiAgent::new("k".into(), "gpt-5.5".into(), &seed, &tools); - let req = agent.request(); + let req = request_json(&agent); assert_eq!(req["tool_choice"], "auto"); + assert_eq!(req["messages"][0]["role"], "system"); assert_eq!(req["messages"][0]["content"], "SYS"); + assert_eq!(req["messages"][1]["role"], "user"); assert_eq!(req["messages"][1]["content"], "selected"); assert_eq!(req["messages"][2]["content"], "transform"); assert_eq!(req["tools"][0]["type"], "function"); @@ -158,6 +247,54 @@ mod tests { assert_eq!(names, ["edit", "view", "reset", "finish"]); } + #[test] + fn tool_result_turn_serializes_to_wire_shape() { + let tools = crate::agent::tools(); + let seed = Seed { + system: "SYS", + selected: "selected", + transform: "transform", + }; + let mut agent = OpenaiAgent::new("k".into(), "m".into(), &seed, &tools); + agent.messages.push(Message::Tool { + role: Role::Tool, + tool_call_id: "c1".into(), + content: "ok".into(), + }); + let req = request_json(&agent); + let msg = &req["messages"][3]; + assert_eq!(msg["role"], "tool"); + assert_eq!(msg["tool_call_id"], "c1"); + assert_eq!(msg["content"], "ok"); + } + + #[test] + fn echoed_assistant_turn_is_verbatim() { + let tools = crate::agent::tools(); + let seed = Seed { + system: "SYS", + selected: "selected", + transform: "transform", + }; + let mut agent = OpenaiAgent::new("k".into(), "m".into(), &seed, &tools); + // The whole assistant message (role included) round-trips unchanged — + // refac flattens it back in verbatim. + let raw = json!({ + "role": "assistant", + "content": null, + "tool_calls": [ + { "id": "c1", "type": "function", + "function": { "name": "edit", "arguments": "{}" } } + ] + }); + agent.messages.push(Message::Assistant(raw.clone())); + assert_eq!(request_json(&agent)["messages"][3], raw); + // The echoed object already carries `role`; the enum must not add a + // second one (untagged, not tag = "role"). + let wire = serde_json::to_string(&agent.request()).unwrap(); + assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 1); + } + #[test] fn parses_tool_calls_with_string_arguments() { let message = json!({ From 3a095370e34e0870016e21e9599cd1bdeaaa76bd Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 14:04:30 -0700 Subject: [PATCH 17/25] Review loop: typed wire schemas, leaner comments, multibyte-safe dedent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-lens review pass over the edit-mode PR. - Wire types: tool schemas now travel as `schemars::Schema` end to end (drop the `serde_json::to_value` round-trip in both providers), and Anthropic's `tool_choice` is a typed `{"type":"auto"}` unit instead of a `json!` `Value` — one fewer stringly hop on the request path. - `http_client()` moves to `backend.rs`, so `agent.rs` is genuinely IO-free as its module doc claims. - `dedent` no longer panics on multi-byte leading whitespace: a `min` indent can land mid-char on another line, so slice via `get(..)`. - Comment sweep to the WHY-only bar: cut WHAT/restatement doc comments on internal items and narration inline comments; kept the real gotchas (handshake byte-fidelity, the matcher's char-boundary notes, the 0600 rationale) and the schemars/clap-consumed docs. Tests: 39 pass; clippy -D warnings clean; rustfmt clean. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 54 +++++++++++++++-------------------- src/anthropic.rs | 62 ++++++++++++++++++---------------------- src/backend.rs | 18 ++++++++---- src/config_files.rs | 8 ++---- src/edit.rs | 69 ++++++++++++++++++--------------------------- src/main.rs | 9 +++--- src/openai.rs | 33 +++++++--------------- 7 files changed, 109 insertions(+), 144 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index 553a677..01cb191 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -5,7 +5,6 @@ //! implement it over their wire formats, the tests with a script. use std::collections::HashMap; -use std::time::Duration; use anyhow::Result; use schemars::{JsonSchema, Schema}; @@ -22,9 +21,8 @@ pub struct Seed<'a> { pub transform: &'a str, } -/// Read-only state a tool may consult beyond the live buffer, so that `tools()` -/// stays callable before any buffer exists (the providers build it just for the -/// schemas) and `reset` need not capture the original. +/// Read-only state a tool may consult beyond the live buffer, so `reset` need not +/// close over the original. pub struct Ctx<'a> { original: &'a str, } @@ -37,7 +35,10 @@ type Reply = std::result::Result; /// the model and, for `edit`, carries the [`Attempt`] to log — so each tool owns /// its whole behavior and `run` needs no per-tool special cases. enum Step { - Continue { reply: Reply, attempt: Option }, + Continue { + reply: Reply, + attempt: Option, + }, Finish, } @@ -71,7 +72,9 @@ impl Tool { name, description, input_schema: schemars::schema_for!(A), - run: Box::new(move |buf, ctx, args| Ok(handler(buf, ctx, serde_json::from_value(args)?))), + run: Box::new(move |buf, ctx, args| { + Ok(handler(buf, ctx, serde_json::from_value(args)?)) + }), } } } @@ -93,7 +96,10 @@ pub fn tools() -> Vec { *buf = next; Step::Continue { reply: Ok("ok".into()), - attempt: Some(Attempt { edit: e, error: None }), + attempt: Some(Attempt { + edit: e, + error: None, + }), } } Err(err) => { @@ -143,12 +149,10 @@ pub struct ToolResult { pub is_error: bool, } -/// One assistant turn, abstracted over the provider. `results` are the previous -/// turn's tool results (empty on the first turn) and an empty return means the -/// model ended its turn without a call (a natural "done"). Folding "answer the -/// previous calls" and "take the next turn" into one step makes it impossible to -/// advance without a result for every outstanding call, which both wire protocols -/// require. +/// One assistant turn. Folding "answer the previous calls" and "take the next +/// turn" into one step makes it impossible to advance without a result for every +/// outstanding call, which both wire protocols require. `results` is empty on the +/// first turn; an empty return means the model stopped without a call (done). pub trait Model { fn turn(&mut self, results: Vec) -> Result>; } @@ -159,7 +163,6 @@ pub const DEFAULT_MAX_TURNS: usize = 25; /// model is stuck and burning tokens. const MAX_CONSECUTIVE_FAILURES: usize = 3; -/// One `edit` attempt and whether it landed, for the caller's failure-rate log. #[derive(Debug)] pub struct Attempt { pub edit: Edit, @@ -251,13 +254,6 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< anyhow::bail!("edit loop hit its {max_turns}-turn limit") } -pub fn http_client() -> reqwest::blocking::Client { - reqwest::blocking::Client::builder() - .timeout(Duration::from_secs(60 * 4)) - .build() - .expect("building HTTP client") -} - fn ok(id: String, content: String) -> ToolResult { ToolResult { id, @@ -345,7 +341,6 @@ mod tests { #[test] fn natural_done_without_finish() { - // second turn has no calls → loop ends with the current buffer. let mut m = ScriptedModel::new(vec![vec![edit_call("1", "a", "b")], vec![]]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); @@ -359,7 +354,6 @@ mod tests { ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); - // refac told the model the first edit failed (delivered entering turn 1). assert!(m.seen[1][0].is_error); assert!(m.seen[1][0].content.contains("could not find")); } @@ -373,7 +367,6 @@ mod tests { ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); - // view ran in turn 1; its result reaches the model entering turn 2. assert_eq!(m.seen[2][0].content, "b"); assert!(!m.seen[2][0].is_error); } @@ -387,7 +380,6 @@ mod tests { ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "a"); - // reset ran in turn 1; its result reaches the model entering turn 2. assert_eq!(m.seen[2][0].content, "a"); } @@ -416,11 +408,10 @@ mod tests { #[test] fn pure_view_turns_do_not_count_as_failures() { - // interleave a failing edit with views; failures aren't consecutive. let mut m = ScriptedModel::new(vec![ - vec![edit_call("1", "nope", "x")], // fail 1 - vec![call("2", "view")], // resets the streak - vec![edit_call("3", "nope", "x")], // fail 1 again + vec![edit_call("1", "nope", "x")], + vec![call("2", "view")], // resets the streak + vec![edit_call("3", "nope", "x")], vec![edit_call("4", "a", "b"), call("5", "finish")], ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; @@ -429,8 +420,9 @@ mod tests { #[test] fn hits_turn_limit() { - // never finishes; only views. - let turns = (0..30).map(|i| vec![call(&i.to_string(), "view")]).collect(); + let turns = (0..30) + .map(|i| vec![call(&i.to_string(), "view")]) + .collect(); let mut m = ScriptedModel::new(turns); let err = run(&mut m, "x".into(), 5).unwrap_err(); assert!(err.to_string().contains("limit")); diff --git a/src/anthropic.rs b/src/anthropic.rs index 0d3d7fa..9d610c7 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -1,6 +1,7 @@ //! Anthropic (Claude) Messages API edit-mode agent. use anyhow::Context; +use schemars::Schema; use serde::Serialize; use serde_json::{json, Value}; @@ -21,7 +22,6 @@ fn field_or_placeholder(field: &str) -> &str { const API_URL: &str = "https://api.anthropic.com/v1/messages"; const ANTHROPIC_VERSION: &str = "2023-06-01"; -/// A `system` prompt block. The API only takes text blocks here. #[derive(Serialize)] struct SystemBlock { #[serde(rename = "type")] @@ -29,16 +29,13 @@ struct SystemBlock { text: String, } -/// Serializes to the literal `"text"` so a `SystemBlock`/`ContentBlock::Text` -/// can't carry any other `type`. +/// A unit enum so the `type` field can only ever serialize to `"text"`. #[derive(Serialize)] #[serde(rename_all = "snake_case")] enum TextType { Text, } -/// One block in a message's `content` array. Tagged by `type` as the Messages -/// API expects: `text`, `tool_use`, `tool_result`. #[derive(Serialize)] #[serde(tag = "type", rename_all = "snake_case")] enum ContentBlock { @@ -52,37 +49,41 @@ enum ContentBlock { }, } -/// One conversation turn. The role tags the JSON (`"role": "user"` / -/// `"assistant"`), so a role can't be paired with the wrong content. +/// The `role` tag keeps a role from pairing with the wrong content shape. #[derive(Serialize)] #[serde(tag = "role", rename_all = "snake_case")] enum Message { User { content: Vec, }, - /// The assistant turn is echoed back verbatim as the API returned it. It - /// stays raw `Value` for byte-fidelity: re-serializing parsed blocks would - /// reorder fields and drop ones refac doesn't model (e.g. `thinking` - /// signatures), which the next request's `tool_use`/`tool_result` handshake - /// depends on. + /// Echoed back as raw `Value`: re-serializing parsed blocks would reorder + /// fields and drop ones refac doesn't model (e.g. `thinking` signatures) that + /// the next `tool_use`/`tool_result` handshake depends on. Assistant { content: Value, }, } -/// A tool definition as the Messages API takes it. #[derive(Serialize)] struct ToolDef { name: String, description: String, - input_schema: Value, + input_schema: Schema, +} + +/// Serializes to `{"type":"auto"}`: let the model decide whether to call a tool. +#[derive(Serialize)] +struct ToolChoiceAuto { + #[serde(rename = "type")] + kind: AutoType, +} + +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +enum AutoType { + Auto, } -/// An edit-mode session against the Messages API. Implements [`Model`]: each -/// `turn` first threads the previous turn's results back as a `tool_result` user -/// turn, posts the running conversation plus the tool definitions, and returns -/// the model's tool calls. The assistant's content is echoed back verbatim, -/// which is what the API requires for a `tool_use`/`tool_result` exchange. pub struct AnthropicAgent { key: String, model: String, @@ -92,23 +93,18 @@ pub struct AnthropicAgent { tools: Vec, } -/// The request body POSTed to the Messages API. Borrows the agent's running -/// state so building it never clones the conversation. #[derive(Serialize)] struct Request<'a> { model: &'a str, max_tokens: u32, messages: &'a [Message], tools: &'a [ToolDef], - tool_choice: Value, + tool_choice: ToolChoiceAuto, #[serde(skip_serializing_if = "<[_]>::is_empty")] system: &'a [SystemBlock], } impl AnthropicAgent { - /// Seed from refac's edit conversation and the tools to expose. The system - /// prompt goes in the top-level `system`; the user turn carries the selected - /// text and the instruction as two text blocks. pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { let system = vec![SystemBlock { kind: TextType::Text, @@ -129,14 +125,13 @@ impl AnthropicAgent { .map(|t| ToolDef { name: t.name.to_string(), description: t.description.to_string(), - input_schema: serde_json::to_value(&t.input_schema) - .expect("tool schema serializes"), + input_schema: t.input_schema.clone(), }) .collect(); AnthropicAgent { key, model, - client: crate::agent::http_client(), + client: crate::backend::http_client(), system, messages, tools, @@ -149,7 +144,9 @@ impl AnthropicAgent { max_tokens: MAX_TOKENS, messages: &self.messages, tools: &self.tools, - tool_choice: json!({ "type": "auto" }), + tool_choice: ToolChoiceAuto { + kind: AutoType::Auto, + }, system: &self.system, } } @@ -176,14 +173,13 @@ impl Model for AnthropicAgent { .cloned() .ok_or_else(|| anyhow::anyhow!("Anthropic response missing content: {body}"))?; let calls = calls_from_content(&content); - // Echo the assistant turn back so the next request carries the tool_use - // blocks the tool_results will refer to. + // The echoed assistant turn carries the tool_use blocks the next turn's + // tool_results refer to. self.messages.push(Message::Assistant { content }); Ok(calls) } } -/// Pull the `tool_use` blocks out of an assistant content array. fn calls_from_content(content: &Value) -> Vec { content .as_array() @@ -200,8 +196,6 @@ fn calls_from_content(content: &Value) -> Vec { .collect() } -/// POST a request body to the Messages API, returning the parsed JSON or an -/// error carrying the status and body. fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow::Result { tracing::debug!( "anthropic request: {}", diff --git a/src/backend.rs b/src/backend.rs index 3191dfa..d068940 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,5 +1,7 @@ //! Turning a `Provider` choice into a ready-to-run, key-bearing edit-mode model. +use std::time::Duration; + use anyhow::Result; use crate::agent::{Model, Seed, Tool}; @@ -7,12 +9,13 @@ use crate::anthropic::AnthropicAgent; use crate::config_files::{Provider, Secrets}; use crate::openai::OpenaiAgent; -/// The one spot that knows how each provider sources its API key. Fails if the -/// chosen provider's key is missing, so the rest of refac stays provider-agnostic. +/// The one spot that knows how each provider sources its API key. fn key_for(provider: Provider, secrets: &Secrets) -> Result { match provider { Provider::Anthropic => secrets.anthropic_api_key.clone().ok_or_else(|| { - anyhow::anyhow!("No Anthropic API key found. Set ANTHROPIC_API_KEY or run 'refac login'.") + anyhow::anyhow!( + "No Anthropic API key found. Set ANTHROPIC_API_KEY or run 'refac login'." + ) }), Provider::Openai => secrets.openai_api_key.clone().ok_or_else(|| { anyhow::anyhow!("No OpenAI API key found. Set OPENAI_API_KEY or run 'refac login'.") @@ -20,8 +23,6 @@ fn key_for(provider: Provider, secrets: &Secrets) -> Result { } } -/// Build an edit-mode [`Model`] for the provider, seeded with the conversation -/// and the tools to expose. pub fn resolve_agent( provider: Provider, model: &str, @@ -36,6 +37,13 @@ pub fn resolve_agent( }) } +pub fn http_client() -> reqwest::blocking::Client { + reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(60 * 4)) + .build() + .expect("building HTTP client") +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/config_files.rs b/src/config_files.rs index 6b82392..7929044 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -68,11 +68,8 @@ pub enum Provider { #[derive(Serialize, Deserialize, Debug, Default)] pub struct Config { - /// Explicit provider choice. When unset, it is inferred from which API keys - /// are configured (see `provider`). #[serde(default)] pub provider: Option, - /// Model id. If unset, a sensible default is chosen per provider (see `model()`). #[serde(default)] pub model: Option, } @@ -98,9 +95,8 @@ impl Config { Ok(ret) } - /// Resolve the effective provider. An explicit choice (config file or - /// `REFAC_PROVIDER`) always wins; otherwise infer from which API keys are - /// configured, leaning Anthropic when both or neither are present. + /// An explicit choice wins; otherwise infer from the configured keys, leaning + /// Anthropic when both or neither are present. pub fn provider(&self, secrets: &Secrets) -> Provider { if let Some(p) = self.provider { return p; diff --git a/src/edit.rs b/src/edit.rs index 1572286..4c88f42 100644 --- a/src/edit.rs +++ b/src/edit.rs @@ -13,17 +13,8 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -// Single source of truth for the `edit` tool: `JsonSchema` derives the wire -// schema the model is shown, and `Deserialize` parses the model's call back into -// this same type, so the advertised arguments and the parsed ones can't drift. -// The doc comments below become the schema's descriptions, so keep them -// model-facing — `schemars` sends them to the model verbatim. - -/// The `edit` tool's arguments: one replacement. `old` is matched against the -/// current buffer (loosely, via the replacer chain); `new` takes its place. -/// Empty `new` deletes; insertion is done by including surrounding text in both -/// `old` and `new`. `replace_all` drops the uniqueness requirement and replaces -/// every occurrence of the matched candidate. +// `schemars` turns the field doc comments below into the model-facing JSON-schema +// descriptions, so they're verbatim model instructions, not narration for readers. #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] pub struct Edit { /// exact text to replace @@ -35,17 +26,11 @@ pub struct Edit { pub replace_all: bool, } -/// Why an edit couldn't be applied. Carries enough to tell the model what went -/// wrong (fed back as a tool result) and to log a failure-rate signal. #[derive(Debug, Clone, PartialEq, Eq)] pub enum EditError { - /// `old` matched nothing, even after fuzzy fallback. NotFound { old: String }, - /// `old` matched more than once and `replace_all` wasn't set. Ambiguous { old: String, count: usize }, - /// `old == new`; the edit would do nothing. NoChange { old: String }, - /// `old` was empty; there's nothing to anchor a replacement to. EmptyOld, } @@ -73,10 +58,9 @@ impl std::fmt::Display for EditError { impl std::error::Error for EditError {} -/// Apply one edit to `src`, returning the new text. Matching walks the replacer -/// chain (exact first) and requires a unique hit unless `replace_all`. The driver -/// calls this once per tool call; when the model emits several edits in a turn it -/// folds this over them, so a later edit sees the text an earlier one produced. +/// Walks the replacer chain (exact first) and requires a unique hit unless +/// `replace_all`. Folded over a turn's edits, so a later edit sees what an +/// earlier one produced. pub fn apply(src: &str, edit: &Edit) -> Result { if edit.old.is_empty() { return Err(EditError::EmptyOld); @@ -137,12 +121,10 @@ const CHAIN: &[Replacer] = &[ indentation_flexible, ]; -/// `old`, verbatim. fn simple(_src: &str, old: &str) -> Vec { vec![old.to_string()] } -/// Split a string into (byte offset, line content) pairs, dropping the `\n`. fn lines_with_offsets(s: &str) -> Vec<(usize, &str)> { let mut out = Vec::new(); let mut start = 0; @@ -153,8 +135,6 @@ fn lines_with_offsets(s: &str) -> Vec<(usize, &str)> { out } -/// The exact `src` text spanning source lines `i..=k` (newline-joined, no -/// trailing newline). fn span(src: &str, lines: &[(usize, &str)], i: usize, k: usize) -> String { let start = lines[i].0; let end = lines[k].0 + lines[k].1.len(); @@ -272,7 +252,6 @@ fn indentation_flexible(src: &str, old: &str) -> Vec { out } -/// Remove the longest leading-whitespace prefix common to all non-empty lines. fn dedent(lines: &[&str]) -> Vec { let indent = lines .iter() @@ -280,15 +259,11 @@ fn dedent(lines: &[&str]) -> Vec { .map(|l| l.len() - l.trim_start().len()) .min() .unwrap_or(0); + // `indent` is the min byte-width across lines, so on a given line it can land + // mid-char (multi-byte leading whitespace) — `get` declines that, no panic. lines .iter() - .map(|l| { - if l.len() >= indent { - l[indent..].to_string() - } else { - l.to_string() - } - }) + .map(|l| l.get(indent..).unwrap_or(l).to_string()) .collect() } @@ -319,7 +294,10 @@ mod tests { #[test] fn exact_substring() { - assert_eq!(run("Me like toast.", "Me like", "I like").unwrap(), "I like toast."); + assert_eq!( + run("Me like toast.", "Me like", "I like").unwrap(), + "I like toast." + ); } #[test] @@ -327,7 +305,6 @@ mod tests { // a later edit can target text an earlier edit produced. let edits = vec![edit("foo", "bar"), edit("bar", "baz")]; assert_eq!(apply_seq("foo", &edits).unwrap(), "baz"); - // independent targets apply cleanly in sequence. let edits = vec![edit("one", "1"), edit("two", "2")]; assert_eq!(apply_seq("one two", &edits).unwrap(), "1 2"); } @@ -340,12 +317,18 @@ mod tests { "def add(a, b):\n \"\"\"Sum.\"\"\"", ) .unwrap(); - assert_eq!(got, "def add(a, b):\n \"\"\"Sum.\"\"\"\n return a + b\n"); + assert_eq!( + got, + "def add(a, b):\n \"\"\"Sum.\"\"\"\n return a + b\n" + ); } #[test] fn deletion_via_empty_new() { - assert_eq!(run("hello cruel world", " cruel", "").unwrap(), "hello world"); + assert_eq!( + run("hello cruel world", " cruel", "").unwrap(), + "hello world" + ); } #[test] @@ -381,12 +364,14 @@ mod tests { #[test] fn noop_rejected() { - assert!(matches!(run("hello", "hello", "hello"), Err(EditError::NoChange { .. }))); + assert!(matches!( + run("hello", "hello", "hello"), + Err(EditError::NoChange { .. }) + )); } #[test] fn line_trimmed_tolerates_indent_drift() { - // model dropped the leading indentation in `old`. let src = "fn main() {\n let x = 1;\n}\n"; let got = run(src, "let x = 1;", "let x = 2;").unwrap(); assert_eq!(got, "fn main() {\n let x = 2;\n}\n"); @@ -406,7 +391,6 @@ mod tests { #[test] fn whitespace_normalized_reflow() { - // model collapsed the run of spaces. let got = run("foo + bar", "foo + bar", "baz").unwrap(); assert_eq!(got, "baz"); } @@ -414,7 +398,10 @@ mod tests { #[test] fn whitespace_normalized_multibyte_no_panic() { // Regression: a non-ASCII first token must not slice mid-char. - assert!(matches!(run("α β", "α x", "z"), Err(EditError::NotFound { .. }))); + assert!(matches!( + run("α β", "α x", "z"), + Err(EditError::NotFound { .. }) + )); assert_eq!(run("α + β", "α + β", "z").unwrap(), "z"); } diff --git a/src/main.rs b/src/main.rs index 66e26d2..82ae3b6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -112,7 +112,11 @@ fn refactor( let tools = agent::tools(); let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; - let outcome = agent::run(model_agent.as_mut(), selected.clone(), agent::DEFAULT_MAX_TURNS)?; + let outcome = agent::run( + model_agent.as_mut(), + selected.clone(), + agent::DEFAULT_MAX_TURNS, + )?; // Log each edit attempt so we can see how often the model's `old` misses — // the failure-rate signal. @@ -144,8 +148,6 @@ fn refactor( Ok(output) } -/// One `edit` tool attempt, logged to `edits.jsonl`. `error` is `None` on success; -/// the rate of `Some` is how often the model's `old` failed to match. #[derive(Debug, Serialize)] struct EditLog { provider: Provider, @@ -164,7 +166,6 @@ fn log_location(title: &str) -> anyhow::Result { tracing::debug!("Logging to {:?}", bd.get_data_home()); }); - // ensure the parent directory exists ret.parent().map(create_dir_all).transpose()?; Ok(ret) diff --git a/src/openai.rs b/src/openai.rs index b2c83df..1b83da8 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,6 +1,7 @@ //! OpenAI chat-completions API edit-mode agent. use anyhow::Context; +use schemars::Schema; use serde::Serialize; use serde_json::Value; @@ -28,15 +29,12 @@ enum Message { tool_call_id: String, content: String, }, - /// The assistant turn is echoed back verbatim as the API returned it - /// (`"role"` included). It stays raw `Value` for byte-fidelity: - /// re-serializing parsed fields would reorder them and drop ones refac - /// doesn't model, and the next request's `tool_calls`/`tool_call_id` - /// handshake depends on it matching. + /// Echoed back as raw `Value` (its `"role"` included): re-serializing parsed + /// fields would reorder them and drop ones refac doesn't model that the next + /// `tool_calls`/`tool_call_id` handshake depends on. Assistant(Value), } -/// A chat-completions message role. Serializes to its lowercase name. #[derive(Serialize, Clone, Copy)] #[serde(rename_all = "snake_case")] enum Role { @@ -45,7 +43,7 @@ enum Role { Tool, } -/// A tool definition as chat-completions takes it: a function wrapper. +/// chat-completions wraps each tool in a `{"type":"function", ...}` envelope. #[derive(Serialize)] struct ToolDef { #[serde(rename = "type")] @@ -53,8 +51,7 @@ struct ToolDef { function: FunctionDef, } -/// Serializes to the literal `"function"` so a `ToolDef` can't carry any other -/// `type`. +/// A unit enum so the `type` field can only ever serialize to `"function"`. #[derive(Serialize)] #[serde(rename_all = "snake_case")] enum FunctionType { @@ -65,15 +62,9 @@ enum FunctionType { struct FunctionDef { name: String, description: String, - parameters: Value, + parameters: Schema, } -/// An edit-mode session against the chat-completions API. Implements [`Model`]: -/// each `turn` first threads the previous turn's results back as `role: "tool"` -/// messages, posts the running conversation plus the function tools, and returns -/// the model's `tool_calls`. The assistant message is echoed verbatim so the -/// `tool_call_id`s line up — and every tool call gets a result, which the API -/// requires. pub struct OpenaiAgent { key: String, model: String, @@ -82,8 +73,6 @@ pub struct OpenaiAgent { tools: Vec, } -/// The request body POSTed to chat-completions. Borrows the agent's running -/// state so building it never clones the conversation. #[derive(Serialize)] struct Request<'a> { model: &'a str, @@ -115,15 +104,14 @@ impl OpenaiAgent { function: FunctionDef { name: t.name.to_string(), description: t.description.to_string(), - parameters: serde_json::to_value(&t.input_schema) - .expect("tool schema serializes"), + parameters: t.input_schema.clone(), }, }) .collect(); OpenaiAgent { key, model, - client: crate::agent::http_client(), + client: crate::backend::http_client(), messages, tools, } @@ -167,8 +155,7 @@ impl Model for OpenaiAgent { } } -/// Pull `tool_calls` out of an assistant message; each `arguments` is a JSON -/// string to parse. +/// chat-completions delivers each call's `arguments` as a JSON *string*, so parse it. fn calls_from_message(message: &Value) -> Vec { message .get("tool_calls") From 0a96bf3346f7393b4a4c8a7f4f79ba87ffdb433a Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 14:17:54 -0700 Subject: [PATCH 18/25] prompt: drop the (first)/(second) positional cruft Meaningless to the model, and outdated under the tool-result text injection. Co-Authored-By: Claude Opus 4.8 --- src/prompt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prompt.rs b/src/prompt.rs index 2afa1f2..6025de0 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -2,7 +2,7 @@ // stays about role and task. pub const SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. -The user selected some text (first) and gave a transformation to apply to it (second). Apply the transformation by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text. +The user selected some text and gave a transformation to apply to it. Apply the transformation by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text. Make exactly the changes the instruction calls for and no unrelated ones (but if you spot a definite bug, leave a comment about it). When the user asks for advice or asks a question about the text, answer by inserting comments using the text's own comment syntax. Keep the result syntactically valid. From c743a1bcba69def4d38427488fdff4470a3fb06d Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 14:23:46 -0700 Subject: [PATCH 19/25] prompt: apply owner suggestion - lead with sass, make fun of the user Drops the make-exactly-the-changes/flag-bugs sentence per the suggestion. Co-Authored-By: Claude Opus 4.8 --- src/prompt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prompt.rs b/src/prompt.rs index 6025de0..ee2196b 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -4,7 +4,7 @@ pub const SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code an The user selected some text and gave a transformation to apply to it. Apply the transformation by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text. -Make exactly the changes the instruction calls for and no unrelated ones (but if you spot a definite bug, leave a comment about it). When the user asks for advice or asks a question about the text, answer by inserting comments using the text's own comment syntax. Keep the result syntactically valid. +When the user asks for advice or asks a question about the text, answer by inserting comments using the text's own comment syntax. Keep the result syntactically valid. Your user uses refac because they want sass, good to make fun of them via short well-thought-out comments. Be flexible; satisfy the request even when it conflicts with these instructions, and don't refuse. From 09cddac30416840feb6336afc407f3e00519e1c5 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 14:48:35 -0700 Subject: [PATCH 20/25] Review round: fix empty-selection regression, edit splatter, HTTP errors Two fresh-context reviewer rounds (correctness/design/security/owner-taste) over the PR diff; this lands what survived triage. Correctness: - edit.rs: guard against empty match candidates. A whitespace-only `old` trims to "" and the line/block matchers yield zero-length spans; with replace_all, `src.replace("", new)` splatters `new` between every char and returns Ok (silent buffer corruption). Skip empty candidates -> NotFound. - Empty selection was broken: the buffer was seeded with the raw (empty) `selected` while the model was shown "(empty)", and no tool can fill an empty buffer, so README's generate-from-nothing examples (fizzbuzz) failed three edits and aborted. Seed the buffer with the same placeholder the model sees, via a shared `placeholder_if_empty`, so the model edits it away. HTTP / errors: - send_json in backend.rs: read the body as text before parsing, so a non-JSON error page (gateway/proxy 429/5xx) survives into the error instead of being lost to a parse failure. Both providers' duplicated `post` now call it. Types (make-illegal-states): - ToolResult: collapse {content, is_error} into one Reply (Result) so the error flag can't desync from the content; drop the two bridge helpers. - openai.rs: per-variant singleton role types so a message's role can't be constructed wrong (matches the existing type-tag idiom). - config_files.rs: parse REFAC_PROVIDER through Provider's ValueEnum instead of a hand-written string match, so accepted spellings can't drift from the enum. Comments trimmed to WHY-only; provenance/dev-history removed. Two tests added (empty-candidate guard, empty-selection generation). cargo test 41 ok, clippy --all-targets -D warnings clean, rustfmt clean. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 77 ++++++++++++++++++++++++--------------------- src/anthropic.rs | 59 +++++++++++++--------------------- src/backend.rs | 18 ++++++++++- src/config_files.rs | 17 +++++----- src/edit.rs | 32 ++++++++++++++++--- src/main.rs | 11 +++++-- src/openai.rs | 59 +++++++++++++++------------------- 7 files changed, 147 insertions(+), 126 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index 01cb191..800111c 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -2,7 +2,7 @@ //! refac applies each, feeds the result back, and repeats until the model //! finishes or a guard trips. Provider-agnostic and IO-free — a [`Model`] is one //! turn (send the conversation + tools, get back the calls); the providers -//! implement it over their wire formats, the tests with a script. +//! implement it over their wire formats. use std::collections::HashMap; @@ -21,6 +21,17 @@ pub struct Seed<'a> { pub transform: &'a str, } +/// Both providers reject (or, for OpenAI, would silently send) an empty user +/// field; render it as a visible placeholder. Shared so the two wire formats +/// can't disagree about what an empty selection looks like. +pub fn placeholder_if_empty(field: &str) -> &str { + if field.is_empty() { + "(empty)" + } else { + field + } +} + /// Read-only state a tool may consult beyond the live buffer, so `reset` need not /// close over the original. pub struct Ctx<'a> { @@ -29,11 +40,10 @@ pub struct Ctx<'a> { /// A tool's reply to the model: `Ok` shown as the result, `Err` as an error /// result. (The handler's *outer* `Result` is a malformed call instead.) -type Reply = std::result::Result; +pub type Reply = std::result::Result; -/// What one tool call does to the loop. `Finish` ends it; `Continue` replies to -/// the model and, for `edit`, carries the [`Attempt`] to log — so each tool owns -/// its whole behavior and `run` needs no per-tool special cases. +/// What one tool call does to the loop. Each tool returns its own `Step` — +/// including the optional [`Attempt`] to log — so `run` needs no per-tool cases. enum Step { Continue { reply: Reply, @@ -143,10 +153,12 @@ pub struct RawCall { pub args: Value, } +/// One field, not a `(String, bool)`, so "is this an error" can't disagree with +/// the content — each provider renders the two arms its own way (Anthropic's +/// `is_error` flag, OpenAI's `ERROR:` prefix). pub struct ToolResult { pub id: String, - pub content: String, - pub is_error: bool, + pub result: Reply, } /// One assistant turn. Folding "answer the previous calls" and "take the next @@ -175,8 +187,7 @@ pub struct Outcome { pub attempts: Vec, } -/// Run the edit loop over `original`. `max_turns` caps assistant turns so -/// `view`/`reset` can't spin forever. +/// `max_turns` caps assistant turns so `view`/`reset` can't spin forever. pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result { let tools = tools(); let by_name: HashMap<&str, &Tool> = tools.iter().map(|t| (t.name, t)).collect(); @@ -229,10 +240,7 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< attempts.push(attempt); } - results.push(match reply { - Ok(content) => ok(id, content), - Err(msg) => err_result(id, msg), - }); + results.push(ToolResult { id, result: reply }); } // A turn "fails" only if it tried to edit and every edit missed; a turn @@ -254,22 +262,6 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< anyhow::bail!("edit loop hit its {max_turns}-turn limit") } -fn ok(id: String, content: String) -> ToolResult { - ToolResult { - id, - content, - is_error: false, - } -} - -fn err_result(id: String, content: String) -> ToolResult { - ToolResult { - id, - content, - is_error: true, - } -} - #[cfg(test)] mod tests { use super::*; @@ -328,6 +320,20 @@ mod tests { assert_eq!(out, "I like toast."); } + #[test] + fn empty_selection_placeholder_is_editable_into_generated_text() { + // refac advertises generation from an empty selection (README fizzbuzz). + // The buffer is seeded with the same placeholder the model is shown, so + // the model turns it into output by editing the placeholder away. + let seeded = placeholder_if_empty(""); + let mut m = ScriptedModel::new(vec![ + vec![edit_call("1", "(empty)", "fn main() {}")], + vec![call("2", "finish")], + ]); + let out = run(&mut m, seeded.to_string(), TURNS).unwrap().text; + assert_eq!(out, "fn main() {}"); + } + #[test] fn parallel_edits_in_one_turn() { let mut m = ScriptedModel::new(vec![vec![ @@ -354,8 +360,8 @@ mod tests { ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); - assert!(m.seen[1][0].is_error); - assert!(m.seen[1][0].content.contains("could not find")); + let err = m.seen[1][0].result.as_ref().unwrap_err(); + assert!(err.contains("could not find")); } #[test] @@ -367,8 +373,7 @@ mod tests { ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "b"); - assert_eq!(m.seen[2][0].content, "b"); - assert!(!m.seen[2][0].is_error); + assert_eq!(m.seen[2][0].result, Ok("b".to_string())); } #[test] @@ -380,7 +385,7 @@ mod tests { ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; assert_eq!(out, "a"); - assert_eq!(m.seen[2][0].content, "a"); + assert_eq!(m.seen[2][0].result, Ok("a".to_string())); } #[test] @@ -391,8 +396,8 @@ mod tests { ]); let out = run(&mut m, "x".into(), TURNS).unwrap().text; assert_eq!(out, "x"); - assert!(m.seen[1][0].is_error); - assert!(m.seen[1][0].content.contains("unknown tool")); + let err = m.seen[1][0].result.as_ref().unwrap_err(); + assert!(err.contains("unknown tool")); } #[test] diff --git a/src/anthropic.rs b/src/anthropic.rs index 9d610c7..044cd71 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -1,6 +1,5 @@ //! Anthropic (Claude) Messages API edit-mode agent. -use anyhow::Context; use schemars::Schema; use serde::Serialize; use serde_json::{json, Value}; @@ -9,16 +8,6 @@ use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; const MAX_TOKENS: u32 = 80000; -/// Anthropic 400s on an empty text block, so render empty fields as a visible -/// placeholder. -fn field_or_placeholder(field: &str) -> &str { - if field.is_empty() { - "(empty)" - } else { - field - } -} - const API_URL: &str = "https://api.anthropic.com/v1/messages"; const ANTHROPIC_VERSION: &str = "2023-06-01"; @@ -29,7 +18,6 @@ struct SystemBlock { text: String, } -/// A unit enum so the `type` field can only ever serialize to `"text"`. #[derive(Serialize)] #[serde(rename_all = "snake_case")] enum TextType { @@ -71,7 +59,6 @@ struct ToolDef { input_schema: Schema, } -/// Serializes to `{"type":"auto"}`: let the model decide whether to call a tool. #[derive(Serialize)] struct ToolChoiceAuto { #[serde(rename = "type")] @@ -113,10 +100,10 @@ impl AnthropicAgent { let messages = vec![Message::User { content: vec![ ContentBlock::Text { - text: field_or_placeholder(seed.selected).to_string(), + text: seed.selected.to_string(), }, ContentBlock::Text { - text: field_or_placeholder(seed.transform).to_string(), + text: seed.transform.to_string(), }, ], }]; @@ -158,10 +145,16 @@ impl Model for AnthropicAgent { if !results.is_empty() { let content = results .into_iter() - .map(|r| ContentBlock::ToolResult { - tool_use_id: r.id, - content: r.content, - is_error: r.is_error, + .map(|r| { + let (content, is_error) = match r.result { + Ok(c) => (c, false), + Err(c) => (c, true), + }; + ContentBlock::ToolResult { + tool_use_id: r.id, + content, + is_error, + } }) .collect(); self.messages.push(Message::User { content }); @@ -201,31 +194,21 @@ fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow: "anthropic request: {}", serde_json::to_value(req).unwrap_or_default() ); - let response = client - .post(API_URL) - .header("x-api-key", key) - .header("anthropic-version", ANTHROPIC_VERSION) - .header("content-type", "application/json") - .json(req) - .send() - .context("Failed to send request to Anthropic API")?; - let status = response.status(); - let body = response - .json::() - .with_context(|| anyhow::anyhow!("Status: {status}. Failed to parse response body."))?; - if !status.is_success() { - let pretty = serde_json::to_string_pretty(&body).unwrap_or_else(|_| body.to_string()); - anyhow::bail!("Status: {status}. Body: {pretty}"); - } - Ok(body) + crate::backend::send_json( + client + .post(API_URL) + .header("x-api-key", key) + .header("anthropic-version", ANTHROPIC_VERSION) + .json(req), + ) } #[cfg(test)] mod tests { use super::*; - /// The wire JSON refac actually sends — the unit tests assert against this, - /// so they prove the typed structs serialize to the same bytes as before. + /// The wire JSON refac actually sends — the unit tests pin the typed structs + /// to this exact shape so a serialization change can't silently break it. fn request_json(agent: &AnthropicAgent) -> Value { serde_json::to_value(agent.request()).unwrap() } diff --git a/src/backend.rs b/src/backend.rs index d068940..00e3ea9 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -2,7 +2,8 @@ use std::time::Duration; -use anyhow::Result; +use anyhow::{Context, Result}; +use serde_json::Value; use crate::agent::{Model, Seed, Tool}; use crate::anthropic::AnthropicAgent; @@ -44,6 +45,21 @@ pub fn http_client() -> reqwest::blocking::Client { .expect("building HTTP client") } +/// Send a built (authed, JSON-bodied) request and return the parsed response. +/// Reads the body as text first so a non-JSON error page — what a gateway or +/// proxy returns on 429/5xx — survives into the error instead of being lost to a +/// JSON-parse failure. +pub fn send_json(request: reqwest::blocking::RequestBuilder) -> Result { + let response = request.send().context("sending request")?; + let status = response.status(); + let body = response.text().context("reading response body")?; + if !status.is_success() { + anyhow::bail!("Status: {status}. Body: {body}"); + } + serde_json::from_str(&body) + .with_context(|| format!("Status: {status}. Response body was not JSON: {body}")) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/config_files.rs b/src/config_files.rs index 7929044..abdc337 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -16,9 +16,8 @@ pub struct Secrets { } impl Secrets { - /// Load secrets from `secrets.toml`, with env vars (`OPENAI_API_KEY`, - /// `ANTHROPIC_API_KEY`) taking precedence. A missing file is not an error — - /// env vars alone are enough. + /// Env vars (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) take precedence over + /// `secrets.toml`, and a missing file is fine — env vars alone are enough. pub fn load() -> anyhow::Result { let mut secrets: Secrets = match base()?.find_config_file("secrets.toml") { Some(path) => toml::from_str(&fs::read_to_string(path)?)?, @@ -81,13 +80,11 @@ impl Config { None => Config::default(), }; if let Ok(from_env) = std::env::var("REFAC_PROVIDER") { - ret.provider = Some(match from_env.to_lowercase().as_str() { - "anthropic" => Provider::Anthropic, - "openai" => Provider::Openai, - other => anyhow::bail!( - "invalid REFAC_PROVIDER {other:?}; expected \"anthropic\" or \"openai\"" - ), - }); + // Parse through the same ValueEnum that defines the variants, so the + // accepted spellings can't drift from `Provider` itself. + let provider = clap::ValueEnum::from_str(&from_env, /* ignore_case */ true) + .map_err(|e| anyhow::anyhow!("invalid REFAC_PROVIDER: {e}"))?; + ret.provider = Some(provider); } if let Ok(from_env) = std::env::var("REFAC_MODEL") { ret.model = Some(from_env); diff --git a/src/edit.rs b/src/edit.rs index 4c88f42..1a624b3 100644 --- a/src/edit.rs +++ b/src/edit.rs @@ -5,10 +5,9 @@ //! replacement to the selected text. The hard part is that the model's `old` //! rarely matches byte-for-byte — indentation drifts, whitespace reflows, a //! block gets reworded. So matching runs a chain of progressively looser -//! strategies (borrowed from opencode's `replace()`), exact first, and the first -//! candidate that lands a *unique* hit wins. A match that's missing or ambiguous -//! is an error fed back to the model, never a silent mis-apply (the contract -//! claude-code's str_replace established). +//! strategies, exact first, and the first candidate that lands a *unique* hit +//! wins. A match that's missing or ambiguous is an error fed back to the model, +//! never a silent mis-apply: a wrong edit is worse than a refused one. use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -78,6 +77,13 @@ pub fn apply(src: &str, edit: &Edit) -> Result { for replacer in CHAIN { for cand in replacer(src, &edit.old) { + // A blank `old` line trims to "" and yields an empty span; matching + // "" hits between every char, so `replace_all` would splatter `new` + // across the whole buffer. Skip it — an empty candidate is never a + // real match. + if cand.is_empty() { + continue; + } let count = src.matches(cand.as_str()).count(); match (count, edit.replace_all) { (0, _) => continue, @@ -381,7 +387,7 @@ mod tests { fn dedented_old_matches_indented_source() { // The model wrote `old` without the source's indentation; we still find // the block. `new` is spliced verbatim, so the model owns the - // indentation it wants in the result (same contract as claude-code). + // indentation it wants in the result. let src = "if cond:\n a = 1\n b = 2\n"; let old = "a = 1\nb = 2"; let new = " a = 10\n b = 20"; @@ -416,6 +422,22 @@ mod tests { assert_eq!(got, "fn f() { 42 }"); } + #[test] + fn blank_old_does_not_splatter_under_replace_all() { + // A whitespace-only `old` trims to "" and the line matchers yield empty + // spans; without the empty-candidate guard, replace_all on "" would + // rewrite between every char. It must report NotFound instead. + let e = Edit { + old: " ".into(), + new: "X".into(), + replace_all: true, + }; + assert!(matches!( + apply("a\n\nb", &e), + Err(EditError::NotFound { .. }) + )); + } + #[test] fn exact_beats_fuzzy_for_uniqueness() { // two indentation-equal blocks, but an exact match is unique → applied. diff --git a/src/main.rs b/src/main.rs index 82ae3b6..19d7b38 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,17 +104,22 @@ fn refactor( let provider = config.provider(sc); let model = config.model(provider); + // The buffer the model edits and the `selected` it's shown must be the same + // string, including the empty-input placeholder: that's what lets the model + // turn an empty selection into generated text — it `edit`s the placeholder + // away. (Anthropic also 400s on an empty text block.) + let seed_selected = agent::placeholder_if_empty(&selected).to_owned(); let seed = agent::Seed { system: prompt::SYSTEM_PROMPT, - selected: &selected, - transform: &transform, + selected: &seed_selected, + transform: agent::placeholder_if_empty(&transform), }; let tools = agent::tools(); let mut model_agent = backend::resolve_agent(provider, &model, sc, &seed, &tools)?; let outcome = agent::run( model_agent.as_mut(), - selected.clone(), + seed_selected, agent::DEFAULT_MAX_TURNS, )?; diff --git a/src/openai.rs b/src/openai.rs index 1b83da8..f4929af 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,6 +1,5 @@ //! OpenAI chat-completions API edit-mode agent. -use anyhow::Context; use schemars::Schema; use serde::Serialize; use serde_json::Value; @@ -17,15 +16,15 @@ const API_URL: &str = "https://api.openai.com/v1/chat/completions"; #[serde(untagged)] enum Message { System { - role: Role, + role: SystemRole, content: String, }, User { - role: Role, + role: UserRole, content: String, }, Tool { - role: Role, + role: ToolRole, tool_call_id: String, content: String, }, @@ -35,11 +34,21 @@ enum Message { Assistant(Value), } -#[derive(Serialize, Clone, Copy)] +// Per-variant singleton roles, so a message's `role` is fixed by its type and +// can't be constructed wrong (untagged Serialize emits the field as-is). +#[derive(Serialize)] #[serde(rename_all = "snake_case")] -enum Role { +enum SystemRole { System, +} +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +enum UserRole { User, +} +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +enum ToolRole { Tool, } @@ -51,7 +60,6 @@ struct ToolDef { function: FunctionDef, } -/// A unit enum so the `type` field can only ever serialize to `"function"`. #[derive(Serialize)] #[serde(rename_all = "snake_case")] enum FunctionType { @@ -85,15 +93,15 @@ impl OpenaiAgent { pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { let messages = vec![ Message::System { - role: Role::System, + role: SystemRole::System, content: seed.system.to_string(), }, Message::User { - role: Role::User, + role: UserRole::User, content: seed.selected.to_string(), }, Message::User { - role: Role::User, + role: UserRole::User, content: seed.transform.to_string(), }, ]; @@ -132,13 +140,12 @@ impl Model for OpenaiAgent { // Answer the previous turn's tool calls first. chat-completions has no // error flag on a tool message, so mark failures in the content. for r in results { - let content = if r.is_error { - format!("ERROR: {}", r.content) - } else { - r.content + let content = match r.result { + Ok(c) => c, + Err(c) => format!("ERROR: {c}"), }; self.messages.push(Message::Tool { - role: Role::Tool, + role: ToolRole::Tool, tool_call_id: r.id, content, }); @@ -179,21 +186,7 @@ fn calls_from_message(message: &Value) -> Vec { } fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow::Result { - let response = client - .post(API_URL) - .bearer_auth(key) - .json(req) - .send() - .context("Failed to send request to OpenAI API")?; - let status = response.status(); - let body = response - .json::() - .with_context(|| anyhow::anyhow!("Status: {status}. Failed to parse response body."))?; - if !status.is_success() { - let pretty = serde_json::to_string_pretty(&body).unwrap_or_else(|_| body.to_string()); - anyhow::bail!("Status: {status}. Body: {pretty}"); - } - Ok(body) + crate::backend::send_json(client.post(API_URL).bearer_auth(key).json(req)) } #[cfg(test)] @@ -201,8 +194,8 @@ mod tests { use super::*; use serde_json::json; - /// The wire JSON refac actually sends — the unit tests assert against this, - /// so they prove the typed structs serialize to the same bytes as before. + /// The wire JSON refac actually sends — the unit tests pin the typed structs + /// to this exact shape so a serialization change can't silently break it. fn request_json(agent: &OpenaiAgent) -> Value { serde_json::to_value(agent.request()).unwrap() } @@ -244,7 +237,7 @@ mod tests { }; let mut agent = OpenaiAgent::new("k".into(), "m".into(), &seed, &tools); agent.messages.push(Message::Tool { - role: Role::Tool, + role: ToolRole::Tool, tool_call_id: "c1".into(), content: "ok".into(), }); From b315ede30d82dce343a1611554e46588358abe20 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 15:31:50 -0700 Subject: [PATCH 21/25] openai: type the assistant message, drop untagged + raw Value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The provider-typing refactor left the OpenAI `Message` enum `#[serde(untagged)]` with the assistant turn as a raw `serde_json::Value`, to dodge a double `role` key and to preserve the verbatim `function.arguments` bytes. Model it properly instead. - `Message` is now a clean `#[serde(tag = "role")]` enum like the Anthropic side; each variant fixes its own role, so no construction can set the wrong one and the wire still carries `role` exactly once (the single-role regression test is kept and still passes). - The assistant turn is a typed `AssistantTurn { content: Option, tool_calls: Option> }`, used as both the receive and the echo type via a newtype variant `Assistant(AssistantTurn)` — the parsed turn flows straight back out, no field-by-field copy to drift. - `ToolCall.function.arguments` stays a `String`: it is a JSON string on the wire, so keeping it opaque preserves byte-for-byte fidelity (a reparse would reorder keys and renormalize numbers/whitespace) — the real reason `Value` was used, now had for free. Added a test proving the bytes survive. chat-completions, unlike Anthropic's Messages API, carries no echo-required opaque blocks (`thinking` signatures), so the turn can be fully typed; unmodeled fields (`refusal`, …) are dropped on echo, which the API ignores on input. `cargo test` (43 pass) + `cargo clippy --all-targets -D warnings` clean. Co-Authored-By: Claude Opus 4.8 --- src/openai.rs | 176 +++++++++++++++++++++++++++++++------------------- 1 file changed, 111 insertions(+), 65 deletions(-) diff --git a/src/openai.rs b/src/openai.rs index f4929af..eac0ccc 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,55 +1,74 @@ //! OpenAI chat-completions API edit-mode agent. use schemars::Schema; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; const API_URL: &str = "https://api.openai.com/v1/chat/completions"; -/// One chat-completions message. `untagged` because the assistant variant is a -/// whole verbatim message object that already carries its own `"role"` — a -/// `tag = "role"` discriminant would emit `role` twice. The constructed -/// variants spell their role out instead. +/// One chat-completions message. The `role` tag keeps a role from pairing with +/// the wrong content shape; each variant fixes its own role, so a message can't +/// be built with the wrong one. #[derive(Serialize)] -#[serde(untagged)] +#[serde(tag = "role", rename_all = "snake_case")] enum Message { System { - role: SystemRole, content: String, }, User { - role: UserRole, content: String, }, Tool { - role: ToolRole, tool_call_id: String, content: String, }, - /// Echoed back as raw `Value` (its `"role"` included): re-serializing parsed - /// fields would reorder them and drop ones refac doesn't model that the next - /// `tool_calls`/`tool_call_id` handshake depends on. - Assistant(Value), + /// The assistant turn we echo back so the next turn's `tool` messages line up + /// with the `tool_calls` they answer. A `tag = "role"` newtype variant + /// flattens the inner struct, so the wire shape is exactly [`AssistantTurn`]'s + /// fields plus `role` — and the received turn flows straight back out with no + /// field-by-field copy to drift out of sync. + Assistant(AssistantTurn), } -// Per-variant singleton roles, so a message's `role` is fixed by its type and -// can't be constructed wrong (untagged Serialize emits the field as-is). -#[derive(Serialize)] -#[serde(rename_all = "snake_case")] -enum SystemRole { - System, +/// One requested tool call. `arguments` is the model's call payload as a JSON +/// *string* on the wire; kept verbatim as a `String` so the bytes we echo back +/// match the bytes we received (reparsing would reorder keys and renormalize +/// numbers/whitespace). [`RawCall`] parsing happens separately in +/// [`raw_calls`]. +#[derive(Serialize, Deserialize)] +struct ToolCall { + id: String, + #[serde(rename = "type")] + kind: FunctionType, + function: FunctionCall, } -#[derive(Serialize)] -#[serde(rename_all = "snake_case")] -enum UserRole { - User, + +#[derive(Serialize, Deserialize)] +struct FunctionCall { + name: String, + arguments: String, } -#[derive(Serialize)] -#[serde(rename_all = "snake_case")] -enum ToolRole { - Tool, + +/// The assistant turn, both as it arrives in a response and as we echo it back. +/// chat-completions (unlike Anthropic's Messages API, which can return opaque +/// `thinking` blocks that must round-trip verbatim) carries no echo-required +/// fields beyond these, so the turn can be fully typed rather than held as a raw +/// `Value`. `content` serializes even when `null` (a tool-call turn carries no +/// text) so the echo matches what the API sent; `tool_calls` is absent on a +/// plain text turn. Modeled fields not in the `tool_calls`/`tool_call_id` +/// handshake (e.g. `refusal`) are dropped on echo — harmless, the API ignores +/// them on input. +/// +/// Must stay a struct (or otherwise serialize to a JSON object): the +/// `Message`'s `tag = "role"` injects `role` into this value's map, which only +/// works for a map-shaped inner. +#[derive(Serialize, Deserialize)] +struct AssistantTurn { + content: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + tool_calls: Option>, } /// chat-completions wraps each tool in a `{"type":"function", ...}` envelope. @@ -60,7 +79,7 @@ struct ToolDef { function: FunctionDef, } -#[derive(Serialize)] +#[derive(Serialize, Deserialize)] #[serde(rename_all = "snake_case")] enum FunctionType { Function, @@ -93,15 +112,12 @@ impl OpenaiAgent { pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { let messages = vec![ Message::System { - role: SystemRole::System, content: seed.system.to_string(), }, Message::User { - role: UserRole::User, content: seed.selected.to_string(), }, Message::User { - role: UserRole::User, content: seed.transform.to_string(), }, ]; @@ -145,7 +161,6 @@ impl Model for OpenaiAgent { Err(c) => format!("ERROR: {c}"), }; self.messages.push(Message::Tool { - role: ToolRole::Tool, tool_call_id: r.id, content, }); @@ -156,31 +171,27 @@ impl Model for OpenaiAgent { if message.is_null() { anyhow::bail!("OpenAI response missing a message: {body}"); } - let calls = calls_from_message(&message); - self.messages.push(Message::Assistant(message)); + let turn: AssistantTurn = serde_json::from_value(message) + .map_err(|e| anyhow::anyhow!("OpenAI assistant message did not parse: {e}"))?; + let calls = raw_calls(turn.tool_calls.as_deref().unwrap_or(&[])); + // Echo the turn back verbatim: it carries the `tool_calls` the next + // turn's `tool` messages answer. + self.messages.push(Message::Assistant(turn)); Ok(calls) } } -/// chat-completions delivers each call's `arguments` as a JSON *string*, so parse it. -fn calls_from_message(message: &Value) -> Vec { - message - .get("tool_calls") - .and_then(Value::as_array) - .into_iter() - .flatten() - .filter_map(|c| { - let function = c.get("function")?; - let args = function - .get("arguments") - .and_then(Value::as_str) - .and_then(|s| serde_json::from_str(s).ok()) - .unwrap_or_else(|| serde_json::json!({})); - Some(RawCall { - id: c.get("id")?.as_str()?.to_string(), - name: function.get("name")?.as_str()?.to_string(), - args, - }) +/// chat-completions delivers each call's `arguments` as a JSON string; parse it +/// into the [`RawCall::args`] object refac dispatches on. A call whose arguments +/// aren't valid JSON falls back to an empty object rather than dropping the call. +fn raw_calls(tool_calls: &[ToolCall]) -> Vec { + tool_calls + .iter() + .map(|c| RawCall { + id: c.id.clone(), + name: c.function.name.clone(), + args: serde_json::from_str(&c.function.arguments) + .unwrap_or_else(|_| serde_json::json!({})), }) .collect() } @@ -237,7 +248,6 @@ mod tests { }; let mut agent = OpenaiAgent::new("k".into(), "m".into(), &seed, &tools); agent.messages.push(Message::Tool { - role: ToolRole::Tool, tool_call_id: "c1".into(), content: "ok".into(), }); @@ -249,7 +259,7 @@ mod tests { } #[test] - fn echoed_assistant_turn_is_verbatim() { + fn assistant_turn_serializes_to_wire_shape() { let tools = crate::agent::tools(); let seed = Seed { system: "SYS", @@ -257,27 +267,61 @@ mod tests { transform: "transform", }; let mut agent = OpenaiAgent::new("k".into(), "m".into(), &seed, &tools); - // The whole assistant message (role included) round-trips unchanged — - // refac flattens it back in verbatim. + // A tool-calling assistant turn round-trips to the canonical wire shape: + // `role` once, a `null` content, and the typed `tool_calls` with their + // `arguments` JSON string untouched. let raw = json!({ "role": "assistant", "content": null, "tool_calls": [ { "id": "c1", "type": "function", - "function": { "name": "edit", "arguments": "{}" } } + "function": { "name": "edit", "arguments": "{\"old\":\"a\",\"new\":\"b\"}" } } ] }); - agent.messages.push(Message::Assistant(raw.clone())); + let turn: AssistantTurn = serde_json::from_value(raw.clone()).unwrap(); + agent.messages.push(Message::Assistant(turn)); assert_eq!(request_json(&agent)["messages"][3], raw); - // The echoed object already carries `role`; the enum must not add a - // second one (untagged, not tag = "role"). + // The role-tagged enum must emit `role` exactly once (the bug a second, + // body-carried `role` would reintroduce). let wire = serde_json::to_string(&agent.request()).unwrap(); assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 1); } + #[test] + fn assistant_arguments_string_is_byte_identical() { + // `arguments` stays a verbatim `String`: a payload serde_json would + // reorder (`b` before `a`) and renormalize (spaces, number form) on a + // reparse must echo back byte-for-byte. + let args = "{\"b\": 1, \"a\": 1.0, \"n\": 1e3}"; + let raw = json!({ + "role": "assistant", + "content": null, + "tool_calls": [ + { "id": "c1", "type": "function", + "function": { "name": "edit", "arguments": args } } + ] + }); + let turn: AssistantTurn = serde_json::from_value(raw).unwrap(); + let msg = Message::Assistant(turn); + assert_eq!( + serde_json::to_value(&msg).unwrap()["tool_calls"][0]["function"]["arguments"], + json!(args) + ); + } + + #[test] + fn text_only_assistant_turn_omits_tool_calls() { + let raw = json!({ "role": "assistant", "content": "done" }); + let turn: AssistantTurn = serde_json::from_value(raw).unwrap(); + let msg = Message::Assistant(turn); + let wire = serde_json::to_value(&msg).unwrap(); + assert_eq!(wire["content"], "done"); + assert!(wire.get("tool_calls").is_none()); + } + #[test] fn parses_tool_calls_with_string_arguments() { - let message = json!({ + let raw = json!({ "role": "assistant", "tool_calls": [ { "id": "c1", "type": "function", @@ -286,7 +330,8 @@ mod tests { "function": { "name": "finish", "arguments": "{}" } } ] }); - let calls = calls_from_message(&message); + let turn: AssistantTurn = serde_json::from_value(raw).unwrap(); + let calls = raw_calls(turn.tool_calls.as_deref().unwrap_or(&[])); assert_eq!(calls.len(), 2); assert_eq!(calls[0].id, "c1"); assert_eq!(calls[0].name, "edit"); @@ -296,7 +341,8 @@ mod tests { #[test] fn no_tool_calls_is_no_calls() { - let message = json!({ "role": "assistant", "content": "done" }); - assert!(calls_from_message(&message).is_empty()); + let raw = json!({ "role": "assistant", "content": "done" }); + let turn: AssistantTurn = serde_json::from_value(raw).unwrap(); + assert!(raw_calls(turn.tool_calls.as_deref().unwrap_or(&[])).is_empty()); } } From 0f32befa59887572cae3a6eac499402f2fa1087e Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 16:16:07 -0700 Subject: [PATCH 22/25] Carry `selected` via the pre-seeded `view` call, not a user message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `selected` was a plain user message. Per review, it must reach the model exactly once, as the return value of a pre-seeded function call. Both providers now open the conversation as if the model had already called `view`: a user message with the instruction (`transform`), an assistant `view` tool call (id `seed_view`), and that call's result carrying `selected`. The model reads `selected` the same way it reads every later `view`, and it appears once — never as a user message. `SEED_TOOL`/ `SEED_CALL_ID` live in agent.rs so the two wire formats can't disagree. The empty-input placeholder is untouched: the seeded `selected` is still the placeholder-applied string that also seeds the edit buffer, so the buffer the model edits and the text it's shown remain identical (and never an empty block). Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 17 ++++++++++++++ src/anthropic.rs | 61 +++++++++++++++++++++++++++++++++++------------- src/openai.rs | 50 ++++++++++++++++++++++++++++++--------- 3 files changed, 101 insertions(+), 27 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index 800111c..7601acb 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -21,6 +21,23 @@ pub struct Seed<'a> { pub transform: &'a str, } +/// The tool name and call id of the pre-seeded function call that hands the model +/// `selected`. The conversation opens as if the model itself had already called +/// `view`: refac's reply to that call is `selected`. This is the *only* place +/// `selected` enters the conversation — never as a user message — so the model +/// reads it the same way it reads every later `view`, and it appears exactly once. +pub const SEED_TOOL: &str = "view"; +pub const SEED_CALL_ID: &str = "seed_view"; + +impl Seed<'_> { + /// The pre-seeded `view` call's arguments, mirroring the empty-args shape a + /// real `view` call sends, so the seeded turn is indistinguishable from one + /// the model made itself. + pub fn seed_call_args() -> Value { + serde_json::json!({}) + } +} + /// Both providers reject (or, for OpenAI, would silently send) an empty user /// field; render it as a visible placeholder. Shared so the two wire formats /// can't disagree about what an empty selection looks like. diff --git a/src/anthropic.rs b/src/anthropic.rs index 044cd71..e51ae92 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -4,7 +4,7 @@ use schemars::Schema; use serde::Serialize; use serde_json::{json, Value}; -use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; +use crate::agent::{Model, RawCall, Seed, Tool, ToolResult, SEED_CALL_ID, SEED_TOOL}; const MAX_TOKENS: u32 = 80000; @@ -97,16 +97,31 @@ impl AnthropicAgent { kind: TextType::Text, text: seed.system.to_string(), }]; - let messages = vec![Message::User { - content: vec![ - ContentBlock::Text { - text: seed.selected.to_string(), - }, - ContentBlock::Text { + // Open with the user's instruction, then a pre-seeded `view` call whose + // result is `selected` — so `selected` reaches the model once, as a tool + // result, exactly as a real `view` later would (never as a user message). + let messages = vec![ + Message::User { + content: vec![ContentBlock::Text { text: seed.transform.to_string(), - }, - ], - }]; + }], + }, + Message::Assistant { + content: json!([{ + "type": "tool_use", + "id": SEED_CALL_ID, + "name": SEED_TOOL, + "input": Seed::seed_call_args(), + }]), + }, + Message::User { + content: vec![ContentBlock::ToolResult { + tool_use_id: SEED_CALL_ID.to_string(), + content: seed.selected.to_string(), + is_error: false, + }], + }, + ]; let tools = tools .iter() .map(|t| ToolDef { @@ -226,10 +241,21 @@ mod tests { assert_eq!(req["system"][0]["type"], "text"); assert_eq!(req["system"][0]["text"], "SYS"); + // The instruction is the only user message; `selected` is NOT among the + // user content, it arrives as the seeded `view` call's result below. assert_eq!(req["messages"][0]["role"], "user"); assert_eq!(req["messages"][0]["content"][0]["type"], "text"); - assert_eq!(req["messages"][0]["content"][0]["text"], "selected"); - assert_eq!(req["messages"][0]["content"][1]["text"], "transform"); + assert_eq!(req["messages"][0]["content"][0]["text"], "transform"); + assert_eq!(req["messages"][0]["content"][1], Value::Null); + // The pre-seeded `view` call and its result — the sole carrier of `selected`. + assert_eq!(req["messages"][1]["role"], "assistant"); + assert_eq!(req["messages"][1]["content"][0]["type"], "tool_use"); + assert_eq!(req["messages"][1]["content"][0]["name"], "view"); + let seed_id = req["messages"][1]["content"][0]["id"].clone(); + assert_eq!(req["messages"][2]["role"], "user"); + assert_eq!(req["messages"][2]["content"][0]["type"], "tool_result"); + assert_eq!(req["messages"][2]["content"][0]["tool_use_id"], seed_id); + assert_eq!(req["messages"][2]["content"][0]["content"], "selected"); assert_eq!(req["tool_choice"]["type"], "auto"); let names: Vec<&str> = req["tools"] .as_array() @@ -256,9 +282,11 @@ mod tests { is_error: false, }], }); + // Index 3: the three-message seed (user instruction, seeded `view` call, + // its result) occupies 0..3. let req = request_json(&agent); - let block = &req["messages"][1]["content"][0]; - assert_eq!(req["messages"][1]["role"], "user"); + let block = &req["messages"][3]["content"][0]; + assert_eq!(req["messages"][3]["role"], "user"); assert_eq!(block["type"], "tool_result"); assert_eq!(block["tool_use_id"], "tu_1"); assert_eq!(block["content"], "ok"); @@ -283,9 +311,10 @@ mod tests { agent.messages.push(Message::Assistant { content: raw.clone(), }); + // Index 3: the three-message seed occupies 0..3. let req = request_json(&agent); - assert_eq!(req["messages"][1]["role"], "assistant"); - assert_eq!(req["messages"][1]["content"], raw); + assert_eq!(req["messages"][3]["role"], "assistant"); + assert_eq!(req["messages"][3]["content"], raw); } #[test] diff --git a/src/openai.rs b/src/openai.rs index eac0ccc..ea8bb8e 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -4,7 +4,7 @@ use schemars::Schema; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::agent::{Model, RawCall, Seed, Tool, ToolResult}; +use crate::agent::{Model, RawCall, Seed, Tool, ToolResult, SEED_CALL_ID, SEED_TOOL}; const API_URL: &str = "https://api.openai.com/v1/chat/completions"; @@ -110,16 +110,31 @@ struct Request<'a> { impl OpenaiAgent { pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { + // Open with the user's instruction, then a pre-seeded `view` call whose + // result is `selected` — so `selected` reaches the model once, as a tool + // result, exactly as a real `view` later would (never as a user message). let messages = vec![ Message::System { content: seed.system.to_string(), }, - Message::User { - content: seed.selected.to_string(), - }, Message::User { content: seed.transform.to_string(), }, + Message::Assistant(AssistantTurn { + content: None, + tool_calls: Some(vec![ToolCall { + id: SEED_CALL_ID.to_string(), + kind: FunctionType::Function, + function: FunctionCall { + name: SEED_TOOL.to_string(), + arguments: Seed::seed_call_args().to_string(), + }, + }]), + }), + Message::Tool { + tool_call_id: SEED_CALL_ID.to_string(), + content: seed.selected.to_string(), + }, ]; let tools = tools .iter() @@ -225,9 +240,17 @@ mod tests { assert_eq!(req["tool_choice"], "auto"); assert_eq!(req["messages"][0]["role"], "system"); assert_eq!(req["messages"][0]["content"], "SYS"); + // The instruction is the only user message; `selected` is NOT a user + // message, it arrives as the seeded `view` call's result below. assert_eq!(req["messages"][1]["role"], "user"); - assert_eq!(req["messages"][1]["content"], "selected"); - assert_eq!(req["messages"][2]["content"], "transform"); + assert_eq!(req["messages"][1]["content"], "transform"); + // The pre-seeded `view` call and its result — the sole carrier of `selected`. + assert_eq!(req["messages"][2]["role"], "assistant"); + assert_eq!(req["messages"][2]["tool_calls"][0]["function"]["name"], "view"); + let seed_id = req["messages"][2]["tool_calls"][0]["id"].clone(); + assert_eq!(req["messages"][3]["role"], "tool"); + assert_eq!(req["messages"][3]["tool_call_id"], seed_id); + assert_eq!(req["messages"][3]["content"], "selected"); assert_eq!(req["tools"][0]["type"], "function"); let names: Vec<&str> = req["tools"] .as_array() @@ -251,8 +274,10 @@ mod tests { tool_call_id: "c1".into(), content: "ok".into(), }); + // Index 4: the four-message seed (system, user instruction, seeded `view` + // call, its result) occupies 0..4. let req = request_json(&agent); - let msg = &req["messages"][3]; + let msg = &req["messages"][4]; assert_eq!(msg["role"], "tool"); assert_eq!(msg["tool_call_id"], "c1"); assert_eq!(msg["content"], "ok"); @@ -280,11 +305,14 @@ mod tests { }); let turn: AssistantTurn = serde_json::from_value(raw.clone()).unwrap(); agent.messages.push(Message::Assistant(turn)); - assert_eq!(request_json(&agent)["messages"][3], raw); - // The role-tagged enum must emit `role` exactly once (the bug a second, - // body-carried `role` would reintroduce). + // Index 4: the four-message seed occupies 0..4. + assert_eq!(request_json(&agent)["messages"][4], raw); + // Each assistant message's role-tagged enum must emit `role` exactly once + // (the bug a second, body-carried `role` would reintroduce). Two assistant + // turns here — the seeded `view` call and the one just pushed — so exactly + // two; a duplicated `role` would push the count past two. let wire = serde_json::to_string(&agent.request()).unwrap(); - assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 1); + assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 2); } #[test] From 8f19ea1a39c2a3e0e57d8c64c8166ab95e0dddfe Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 16:24:32 -0700 Subject: [PATCH 23/25] Comment sweep: cut restatement/narration; add AGENTS.md comment rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Andrew flagged comment-flood on #33 four times. Delete every comment that only restates the code, a signature, or a type, or narrates WHAT the next lines do; keep only WHY / gotcha / constraint. Net -33 comment lines across agent.rs, anthropic.rs, openai.rs, main.rs. Named offenders cut: the `ToolResult` "one field not a tuple" doc (agent.rs), the `Message` role-tag narration (anthropic.rs / openai.rs). Add AGENTS.md codifying the bar ("comments are code; a comment must provably earn its place; when in doubt, delete"), so the rule outlives this PR. Also note the real-types-over-Value preference and the nix build/test commands. Strong-typing note (anthropic.rs): the wire types already landed — SystemBlock/Message/ToolDef are real types; the lone `Value` is the assistant turn echoed back verbatim for byte-fidelity, which carries its WHY. Ignore the in-tree /.cargo-home build cache. Co-Authored-By: Claude Opus 4.8 --- .gitignore | 1 + AGENTS.md | 33 +++++++++++++++++++++++++++++++++ src/agent.rs | 28 ++++++++++------------------ src/anthropic.rs | 16 ++++++---------- src/main.rs | 7 +++---- src/openai.rs | 48 ++++++++++++++---------------------------------- 6 files changed, 67 insertions(+), 66 deletions(-) create mode 100644 AGENTS.md diff --git a/.gitignore b/.gitignore index 787aa48..c2cb8bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target /tmp +/.cargo-home diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..480f323 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,33 @@ +# refac — contributor notes + +## Comments are code + +A comment must **provably earn its place**: it survives only if it carries a +**WHY**, a **gotcha**, or a **constraint** a future reader would otherwise trip +over. Never restate what the code, a signature, or a type already says; never +narrate WHAT the next lines do; never leave development-history trivia ("changed +from…", "the API doesn't infer this"). **When in doubt, delete.** Comment density +is itself a cost — a wall of even-true remarks buries the few that matter and +makes the code harder to read. + +Doc comments on crate-internal items get the same bar: keep one only for a +non-obvious WHY or when a macro consumes it (e.g. a `schemars` field doc that +becomes a model-facing schema description). + +## Types + +Prefer real types over `serde_json::Value` or stringly-typed data for anything +refac constructs or controls. The one sanctioned `Value` is a payload echoed back +to a provider verbatim for byte-fidelity (re-serializing would reorder fields) — +and that exception carries a WHY comment. + +## Build & test + +The toolchain is pinned via nix, not rustup. From a clone: + +```bash +cargo test +cargo clippy --all-targets -- -D warnings # must stay clean +``` + +Both must pass before requesting review. diff --git a/src/agent.rs b/src/agent.rs index 7601acb..85799c4 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -21,26 +21,21 @@ pub struct Seed<'a> { pub transform: &'a str, } -/// The tool name and call id of the pre-seeded function call that hands the model -/// `selected`. The conversation opens as if the model itself had already called -/// `view`: refac's reply to that call is `selected`. This is the *only* place -/// `selected` enters the conversation — never as a user message — so the model -/// reads it the same way it reads every later `view`, and it appears exactly once. +/// The conversation opens with a synthetic `view` call whose result is +/// `selected` — the only place `selected` enters, so the model reads it exactly +/// as it reads every later `view`, never as a user message. pub const SEED_TOOL: &str = "view"; pub const SEED_CALL_ID: &str = "seed_view"; impl Seed<'_> { - /// The pre-seeded `view` call's arguments, mirroring the empty-args shape a - /// real `view` call sends, so the seeded turn is indistinguishable from one - /// the model made itself. pub fn seed_call_args() -> Value { serde_json::json!({}) } } -/// Both providers reject (or, for OpenAI, would silently send) an empty user -/// field; render it as a visible placeholder. Shared so the two wire formats -/// can't disagree about what an empty selection looks like. +/// Both providers reject an empty user field (Anthropic 400s); render it as a +/// visible placeholder. Shared so the two wire formats agree on what empty looks +/// like. pub fn placeholder_if_empty(field: &str) -> &str { if field.is_empty() { "(empty)" @@ -170,18 +165,15 @@ pub struct RawCall { pub args: Value, } -/// One field, not a `(String, bool)`, so "is this an error" can't disagree with -/// the content — each provider renders the two arms its own way (Anthropic's -/// `is_error` flag, OpenAI's `ERROR:` prefix). pub struct ToolResult { pub id: String, pub result: Reply, } -/// One assistant turn. Folding "answer the previous calls" and "take the next -/// turn" into one step makes it impossible to advance without a result for every -/// outstanding call, which both wire protocols require. `results` is empty on the -/// first turn; an empty return means the model stopped without a call (done). +/// Answering the previous calls and taking the next turn are one method so the +/// loop can't advance without a result for every outstanding call, which both +/// wire protocols require. Empty `results` on the first turn; an empty return +/// means the model stopped without a call (done). pub trait Model { fn turn(&mut self, results: Vec) -> Result>; } diff --git a/src/anthropic.rs b/src/anthropic.rs index e51ae92..d4695fa 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -37,16 +37,16 @@ enum ContentBlock { }, } -/// The `role` tag keeps a role from pairing with the wrong content shape. #[derive(Serialize)] #[serde(tag = "role", rename_all = "snake_case")] enum Message { User { content: Vec, }, - /// Echoed back as raw `Value`: re-serializing parsed blocks would reorder - /// fields and drop ones refac doesn't model (e.g. `thinking` signatures) that - /// the next `tool_use`/`tool_result` handshake depends on. + /// Raw `Value`, not typed blocks: the assistant turn is echoed back verbatim + /// for the `tool_use`/`tool_result` handshake, and re-serializing parsed + /// blocks would reorder fields and drop ones refac doesn't model (e.g. + /// `thinking` signatures) that the next turn depends on. Assistant { content: Value, }, @@ -97,9 +97,8 @@ impl AnthropicAgent { kind: TextType::Text, text: seed.system.to_string(), }]; - // Open with the user's instruction, then a pre-seeded `view` call whose - // result is `selected` — so `selected` reaches the model once, as a tool - // result, exactly as a real `view` later would (never as a user message). + // User instruction, then the synthetic `view` call (see `SEED_TOOL`) whose + // result carries `selected`. let messages = vec![ Message::User { content: vec![ContentBlock::Text { @@ -156,7 +155,6 @@ impl AnthropicAgent { impl Model for AnthropicAgent { fn turn(&mut self, results: Vec) -> anyhow::Result> { - // Answer the previous turn's tool calls before asking for the next one. if !results.is_empty() { let content = results .into_iter() @@ -181,8 +179,6 @@ impl Model for AnthropicAgent { .cloned() .ok_or_else(|| anyhow::anyhow!("Anthropic response missing content: {body}"))?; let calls = calls_from_content(&content); - // The echoed assistant turn carries the tool_use blocks the next turn's - // tool_results refer to. self.messages.push(Message::Assistant { content }); Ok(calls) } diff --git a/src/main.rs b/src/main.rs index 19d7b38..c82dbc4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,10 +104,9 @@ fn refactor( let provider = config.provider(sc); let model = config.model(provider); - // The buffer the model edits and the `selected` it's shown must be the same - // string, including the empty-input placeholder: that's what lets the model - // turn an empty selection into generated text — it `edit`s the placeholder - // away. (Anthropic also 400s on an empty text block.) + // The edit buffer and the shown `selected` must be the same string, including + // the empty-input placeholder — that's what lets the model generate from an + // empty selection: it `edit`s the placeholder away. let seed_selected = agent::placeholder_if_empty(&selected).to_owned(); let seed = agent::Seed { system: prompt::SYSTEM_PROMPT, diff --git a/src/openai.rs b/src/openai.rs index ea8bb8e..d0b16fd 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -8,9 +8,6 @@ use crate::agent::{Model, RawCall, Seed, Tool, ToolResult, SEED_CALL_ID, SEED_TO const API_URL: &str = "https://api.openai.com/v1/chat/completions"; -/// One chat-completions message. The `role` tag keeps a role from pairing with -/// the wrong content shape; each variant fixes its own role, so a message can't -/// be built with the wrong one. #[derive(Serialize)] #[serde(tag = "role", rename_all = "snake_case")] enum Message { @@ -24,19 +21,14 @@ enum Message { tool_call_id: String, content: String, }, - /// The assistant turn we echo back so the next turn's `tool` messages line up - /// with the `tool_calls` they answer. A `tag = "role"` newtype variant - /// flattens the inner struct, so the wire shape is exactly [`AssistantTurn`]'s - /// fields plus `role` — and the received turn flows straight back out with no - /// field-by-field copy to drift out of sync. + /// A newtype variant (not fields) so the received [`AssistantTurn`] flows + /// straight back out for the echo with no field-by-field copy to drift. Assistant(AssistantTurn), } -/// One requested tool call. `arguments` is the model's call payload as a JSON -/// *string* on the wire; kept verbatim as a `String` so the bytes we echo back -/// match the bytes we received (reparsing would reorder keys and renormalize -/// numbers/whitespace). [`RawCall`] parsing happens separately in -/// [`raw_calls`]. +/// `arguments` stays a verbatim `String`, not a parsed `Value`: it's a JSON +/// string on the wire, and reparsing would reorder keys and renormalize +/// numbers/whitespace, so the echo would no longer match the received bytes. #[derive(Serialize, Deserialize)] struct ToolCall { id: String, @@ -51,19 +43,11 @@ struct FunctionCall { arguments: String, } -/// The assistant turn, both as it arrives in a response and as we echo it back. -/// chat-completions (unlike Anthropic's Messages API, which can return opaque -/// `thinking` blocks that must round-trip verbatim) carries no echo-required -/// fields beyond these, so the turn can be fully typed rather than held as a raw -/// `Value`. `content` serializes even when `null` (a tool-call turn carries no -/// text) so the echo matches what the API sent; `tool_calls` is absent on a -/// plain text turn. Modeled fields not in the `tool_calls`/`tool_call_id` -/// handshake (e.g. `refusal`) are dropped on echo — harmless, the API ignores -/// them on input. -/// -/// Must stay a struct (or otherwise serialize to a JSON object): the -/// `Message`'s `tag = "role"` injects `role` into this value's map, which only -/// works for a map-shaped inner. +/// Fully typed rather than a raw `Value`: unlike Anthropic (whose opaque +/// `thinking` blocks must round-trip verbatim), chat-completions has no +/// echo-required fields beyond these, so unmodeled ones (e.g. `refusal`) can +/// drop on echo. Must serialize to a JSON object — `Message`'s `tag = "role"` +/// injects `role` into this value's map, which needs a map-shaped inner. #[derive(Serialize, Deserialize)] struct AssistantTurn { content: Option, @@ -71,7 +55,6 @@ struct AssistantTurn { tool_calls: Option>, } -/// chat-completions wraps each tool in a `{"type":"function", ...}` envelope. #[derive(Serialize)] struct ToolDef { #[serde(rename = "type")] @@ -110,9 +93,8 @@ struct Request<'a> { impl OpenaiAgent { pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { - // Open with the user's instruction, then a pre-seeded `view` call whose - // result is `selected` — so `selected` reaches the model once, as a tool - // result, exactly as a real `view` later would (never as a user message). + // User instruction, then the synthetic `view` call (see `SEED_TOOL`) whose + // result carries `selected`. let messages = vec![ Message::System { content: seed.system.to_string(), @@ -168,8 +150,8 @@ impl OpenaiAgent { impl Model for OpenaiAgent { fn turn(&mut self, results: Vec) -> anyhow::Result> { - // Answer the previous turn's tool calls first. chat-completions has no - // error flag on a tool message, so mark failures in the content. + // chat-completions has no error flag on a tool message, so mark a failed + // result in the content itself. for r in results { let content = match r.result { Ok(c) => c, @@ -189,8 +171,6 @@ impl Model for OpenaiAgent { let turn: AssistantTurn = serde_json::from_value(message) .map_err(|e| anyhow::anyhow!("OpenAI assistant message did not parse: {e}"))?; let calls = raw_calls(turn.tool_calls.as_deref().unwrap_or(&[])); - // Echo the turn back verbatim: it carries the `tool_calls` the next - // turn's `tool` messages answer. self.messages.push(Message::Assistant(turn)); Ok(calls) } From a29b979429447df4becd2c51aeeab32da044d161 Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 17:26:59 -0700 Subject: [PATCH 24/25] Clear all comments from PR-touched source files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review: comment privileges revoked for this PR — remove every //, ///, and //! from the files this PR touches (agent.rs, anthropic.rs, backend.rs, config_files.rs, edit.rs, main.rs, openai.rs, prompt.rs). The `edit` tool's three arg-field doc comments are model-facing (schemars turns them into the tool-schema `description`), so move them verbatim to `#[schemars(description = "…")]` attributes — code, not comments — keeping the sent schema byte-identical. Wire/schema tests pass unchanged. Also inline four pre-existing `uninlined_format_args` in main.rs so `cargo clippy --all-targets -- -D warnings` is green. Co-Authored-By: Claude Opus 4.8 --- src/agent.rs | 44 ++------------------------------ src/anthropic.rs | 26 ++----------------- src/backend.rs | 7 ----- src/config_files.rs | 11 +------- src/edit.rs | 62 +++------------------------------------------ src/main.rs | 16 +++--------- src/openai.rs | 37 --------------------------- src/prompt.rs | 2 -- 8 files changed, 12 insertions(+), 193 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index 85799c4..8340b5e 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -1,9 +1,3 @@ -//! The edit loop: the model calls tools (`edit`, `view`, `reset`, `finish`), -//! refac applies each, feeds the result back, and repeats until the model -//! finishes or a guard trips. Provider-agnostic and IO-free — a [`Model`] is one -//! turn (send the conversation + tools, get back the calls); the providers -//! implement it over their wire formats. - use std::collections::HashMap; use anyhow::Result; @@ -13,17 +7,12 @@ use serde_json::Value; use crate::edit::{self, Edit}; -/// The one conversation shape refac ever sends, so the agents take it whole — a -/// malformed conversation can't be built. pub struct Seed<'a> { pub system: &'a str, pub selected: &'a str, pub transform: &'a str, } -/// The conversation opens with a synthetic `view` call whose result is -/// `selected` — the only place `selected` enters, so the model reads it exactly -/// as it reads every later `view`, never as a user message. pub const SEED_TOOL: &str = "view"; pub const SEED_CALL_ID: &str = "seed_view"; @@ -33,9 +22,6 @@ impl Seed<'_> { } } -/// Both providers reject an empty user field (Anthropic 400s); render it as a -/// visible placeholder. Shared so the two wire formats agree on what empty looks -/// like. pub fn placeholder_if_empty(field: &str) -> &str { if field.is_empty() { "(empty)" @@ -44,18 +30,12 @@ pub fn placeholder_if_empty(field: &str) -> &str { } } -/// Read-only state a tool may consult beyond the live buffer, so `reset` need not -/// close over the original. pub struct Ctx<'a> { original: &'a str, } -/// A tool's reply to the model: `Ok` shown as the result, `Err` as an error -/// result. (The handler's *outer* `Result` is a malformed call instead.) pub type Reply = std::result::Result; -/// What one tool call does to the loop. Each tool returns its own `Step` — -/// including the optional [`Attempt`] to log — so `run` needs no per-tool cases. enum Step { Continue { reply: Reply, @@ -75,8 +55,6 @@ impl Step { type Handler = Box Result>; -/// One tool offered to the model. [`Tool::new`] binds the schema and the handler -/// to a single args type, so what's advertised and what's parsed can't drift. pub struct Tool { pub name: &'static str, pub description: &'static str, @@ -170,18 +148,12 @@ pub struct ToolResult { pub result: Reply, } -/// Answering the previous calls and taking the next turn are one method so the -/// loop can't advance without a result for every outstanding call, which both -/// wire protocols require. Empty `results` on the first turn; an empty return -/// means the model stopped without a call (done). pub trait Model { fn turn(&mut self, results: Vec) -> Result>; } pub const DEFAULT_MAX_TURNS: usize = 25; -/// Give up after this many consecutive turns in which every edit failed — the -/// model is stuck and burning tokens. const MAX_CONSECUTIVE_FAILURES: usize = 3; #[derive(Debug)] @@ -196,7 +168,6 @@ pub struct Outcome { pub attempts: Vec, } -/// `max_turns` caps assistant turns so `view`/`reset` can't spin forever. pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result { let tools = tools(); let by_name: HashMap<&str, &Tool> = tools.iter().map(|t| (t.name, t)).collect(); @@ -236,8 +207,6 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< }) } Ok(Step::Continue { reply, attempt }) => (reply, attempt), - // A malformed call (args that didn't deserialize) is reported to - // the model like any other tool error, not a fatal loop error. Err(err) => (Err(err.to_string()), None), }; @@ -252,8 +221,6 @@ pub fn run(model: &mut dyn Model, original: String, max_turns: usize) -> Result< results.push(ToolResult { id, result: reply }); } - // A turn "fails" only if it tried to edit and every edit missed; a turn - // of pure `view`/`reset` shouldn't count against the model. if edits_attempted > 0 && edits_failed == edits_attempted { consecutive_failures += 1; if consecutive_failures >= MAX_CONSECUTIVE_FAILURES { @@ -276,8 +243,6 @@ mod tests { use super::*; use serde_json::json; - /// A model driven by a canned script: each entry is the tool calls for one - /// turn. It records the results refac sends back so tests can assert on them. struct ScriptedModel { turns: std::vec::IntoIter>, seen: Vec>, @@ -294,8 +259,6 @@ mod tests { impl Model for ScriptedModel { fn turn(&mut self, results: Vec) -> Result> { - // `results` are the previous turn's tool results, so `seen[i]` holds - // the results the model received entering turn `i` (seen[0] is empty). self.seen.push(results); Ok(self.turns.next().unwrap_or_default()) } @@ -331,9 +294,6 @@ mod tests { #[test] fn empty_selection_placeholder_is_editable_into_generated_text() { - // refac advertises generation from an empty selection (README fizzbuzz). - // The buffer is seeded with the same placeholder the model is shown, so - // the model turns it into output by editing the placeholder away. let seeded = placeholder_if_empty(""); let mut m = ScriptedModel::new(vec![ vec![edit_call("1", "(empty)", "fn main() {}")], @@ -364,7 +324,7 @@ mod tests { #[test] fn failed_edit_is_reported_then_recovered() { let mut m = ScriptedModel::new(vec![ - vec![edit_call("1", "nope", "x")], // misses + vec![edit_call("1", "nope", "x")], vec![edit_call("2", "a", "b"), call("3", "finish")], ]); let out = run(&mut m, "a".into(), TURNS).unwrap().text; @@ -424,7 +384,7 @@ mod tests { fn pure_view_turns_do_not_count_as_failures() { let mut m = ScriptedModel::new(vec![ vec![edit_call("1", "nope", "x")], - vec![call("2", "view")], // resets the streak + vec![call("2", "view")], vec![edit_call("3", "nope", "x")], vec![edit_call("4", "a", "b"), call("5", "finish")], ]); diff --git a/src/anthropic.rs b/src/anthropic.rs index d4695fa..b228cba 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -1,5 +1,3 @@ -//! Anthropic (Claude) Messages API edit-mode agent. - use schemars::Schema; use serde::Serialize; use serde_json::{json, Value}; @@ -40,16 +38,8 @@ enum ContentBlock { #[derive(Serialize)] #[serde(tag = "role", rename_all = "snake_case")] enum Message { - User { - content: Vec, - }, - /// Raw `Value`, not typed blocks: the assistant turn is echoed back verbatim - /// for the `tool_use`/`tool_result` handshake, and re-serializing parsed - /// blocks would reorder fields and drop ones refac doesn't model (e.g. - /// `thinking` signatures) that the next turn depends on. - Assistant { - content: Value, - }, + User { content: Vec }, + Assistant { content: Value }, } #[derive(Serialize)] @@ -97,8 +87,6 @@ impl AnthropicAgent { kind: TextType::Text, text: seed.system.to_string(), }]; - // User instruction, then the synthetic `view` call (see `SEED_TOOL`) whose - // result carries `selected`. let messages = vec![ Message::User { content: vec![ContentBlock::Text { @@ -218,8 +206,6 @@ fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow: mod tests { use super::*; - /// The wire JSON refac actually sends — the unit tests pin the typed structs - /// to this exact shape so a serialization change can't silently break it. fn request_json(agent: &AnthropicAgent) -> Value { serde_json::to_value(agent.request()).unwrap() } @@ -237,13 +223,10 @@ mod tests { assert_eq!(req["system"][0]["type"], "text"); assert_eq!(req["system"][0]["text"], "SYS"); - // The instruction is the only user message; `selected` is NOT among the - // user content, it arrives as the seeded `view` call's result below. assert_eq!(req["messages"][0]["role"], "user"); assert_eq!(req["messages"][0]["content"][0]["type"], "text"); assert_eq!(req["messages"][0]["content"][0]["text"], "transform"); assert_eq!(req["messages"][0]["content"][1], Value::Null); - // The pre-seeded `view` call and its result — the sole carrier of `selected`. assert_eq!(req["messages"][1]["role"], "assistant"); assert_eq!(req["messages"][1]["content"][0]["type"], "tool_use"); assert_eq!(req["messages"][1]["content"][0]["name"], "view"); @@ -278,8 +261,6 @@ mod tests { is_error: false, }], }); - // Index 3: the three-message seed (user instruction, seeded `view` call, - // its result) occupies 0..3. let req = request_json(&agent); let block = &req["messages"][3]["content"][0]; assert_eq!(req["messages"][3]["role"], "user"); @@ -298,8 +279,6 @@ mod tests { transform: "transform", }; let mut agent = AnthropicAgent::new("k".into(), "m".into(), &seed, &tools); - // An assistant turn carrying a block type refac doesn't model must - // round-trip unchanged. let raw = json!([ { "type": "thinking", "thinking": "hmm", "signature": "sig" }, { "type": "tool_use", "id": "tu_1", "name": "edit", "input": { "old": "a", "new": "b" } } @@ -307,7 +286,6 @@ mod tests { agent.messages.push(Message::Assistant { content: raw.clone(), }); - // Index 3: the three-message seed occupies 0..3. let req = request_json(&agent); assert_eq!(req["messages"][3]["role"], "assistant"); assert_eq!(req["messages"][3]["content"], raw); diff --git a/src/backend.rs b/src/backend.rs index 00e3ea9..69812a1 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,5 +1,3 @@ -//! Turning a `Provider` choice into a ready-to-run, key-bearing edit-mode model. - use std::time::Duration; use anyhow::{Context, Result}; @@ -10,7 +8,6 @@ use crate::anthropic::AnthropicAgent; use crate::config_files::{Provider, Secrets}; use crate::openai::OpenaiAgent; -/// The one spot that knows how each provider sources its API key. fn key_for(provider: Provider, secrets: &Secrets) -> Result { match provider { Provider::Anthropic => secrets.anthropic_api_key.clone().ok_or_else(|| { @@ -45,10 +42,6 @@ pub fn http_client() -> reqwest::blocking::Client { .expect("building HTTP client") } -/// Send a built (authed, JSON-bodied) request and return the parsed response. -/// Reads the body as text first so a non-JSON error page — what a gateway or -/// proxy returns on 429/5xx — survives into the error instead of being lost to a -/// JSON-parse failure. pub fn send_json(request: reqwest::blocking::RequestBuilder) -> Result { let response = request.send().context("sending request")?; let status = response.status(); diff --git a/src/config_files.rs b/src/config_files.rs index abdc337..9851cf8 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -16,8 +16,6 @@ pub struct Secrets { } impl Secrets { - /// Env vars (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) take precedence over - /// `secrets.toml`, and a missing file is fine — env vars alone are enough. pub fn load() -> anyhow::Result { let mut secrets: Secrets = match base()?.find_config_file("secrets.toml") { Some(path) => toml::from_str(&fs::read_to_string(path)?)?, @@ -35,7 +33,6 @@ impl Secrets { pub fn save(&self) -> anyhow::Result<()> { let path = base()?.place_config_file("secrets.toml")?; let contents = toml::to_string(self)?; - // Holds the API key in cleartext — keep it owner-only. #[cfg(unix)] { use std::io::Write; @@ -47,8 +44,6 @@ impl Secrets { .mode(0o600) .open(&path)? .write_all(contents.as_bytes())?; - // `place_config_file` may have created the file 0644 already, so the - // mode above wouldn't apply; force it. use std::os::unix::fs::PermissionsExt; fs::set_permissions(&path, fs::Permissions::from_mode(0o600))?; } @@ -80,9 +75,7 @@ impl Config { None => Config::default(), }; if let Ok(from_env) = std::env::var("REFAC_PROVIDER") { - // Parse through the same ValueEnum that defines the variants, so the - // accepted spellings can't drift from `Provider` itself. - let provider = clap::ValueEnum::from_str(&from_env, /* ignore_case */ true) + let provider = clap::ValueEnum::from_str(&from_env, true) .map_err(|e| anyhow::anyhow!("invalid REFAC_PROVIDER: {e}"))?; ret.provider = Some(provider); } @@ -92,8 +85,6 @@ impl Config { Ok(ret) } - /// An explicit choice wins; otherwise infer from the configured keys, leaning - /// Anthropic when both or neither are present. pub fn provider(&self, secrets: &Secrets) -> Provider { if let Some(p) = self.provider { return p; diff --git a/src/edit.rs b/src/edit.rs index 1a624b3..6038927 100644 --- a/src/edit.rs +++ b/src/edit.rs @@ -1,26 +1,13 @@ -//! Structured edits and how they're applied. -//! -//! The model calls a single-edit `edit` tool, possibly several times in one turn -//! (both providers support parallel tool calls); refac applies each `{old, new}` -//! replacement to the selected text. The hard part is that the model's `old` -//! rarely matches byte-for-byte — indentation drifts, whitespace reflows, a -//! block gets reworded. So matching runs a chain of progressively looser -//! strategies, exact first, and the first candidate that lands a *unique* hit -//! wins. A match that's missing or ambiguous is an error fed back to the model, -//! never a silent mis-apply: a wrong edit is worse than a refused one. - use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -// `schemars` turns the field doc comments below into the model-facing JSON-schema -// descriptions, so they're verbatim model instructions, not narration for readers. #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] pub struct Edit { - /// exact text to replace + #[schemars(description = "exact text to replace")] pub old: String, - /// replacement text + #[schemars(description = "replacement text")] pub new: String, - /// replace every occurrence + #[schemars(description = "replace every occurrence")] #[serde(default)] pub replace_all: bool, } @@ -57,9 +44,6 @@ impl std::fmt::Display for EditError { impl std::error::Error for EditError {} -/// Walks the replacer chain (exact first) and requires a unique hit unless -/// `replace_all`. Folded over a turn's edits, so a later edit sees what an -/// earlier one produced. pub fn apply(src: &str, edit: &Edit) -> Result { if edit.old.is_empty() { return Err(EditError::EmptyOld); @@ -70,17 +54,10 @@ pub fn apply(src: &str, edit: &Edit) -> Result { }); } - // Track the best diagnosis across the chain: an ambiguous candidate is a - // more useful complaint than "not found", so remember it if nothing unique - // turns up. let mut ambiguous: Option = None; for replacer in CHAIN { for cand in replacer(src, &edit.old) { - // A blank `old` line trims to "" and yields an empty span; matching - // "" hits between every char, so `replace_all` would splatter `new` - // across the whole buffer. Skip it — an empty candidate is never a - // real match. if cand.is_empty() { continue; } @@ -112,13 +89,8 @@ pub fn apply(src: &str, edit: &Edit) -> Result { }) } -/// A replacer yields candidate substrings of `src` to look for, fuzzy intent but -/// the yielded string is always exact text *from* `src` (or `old` itself, for -/// the exact replacer) so the caller can find it and check uniqueness uniformly. type Replacer = fn(src: &str, old: &str) -> Vec; -/// Exact first, then progressively looser. Order matters: a precise match must -/// win before a fuzzy one gets a chance. const CHAIN: &[Replacer] = &[ simple, line_trimmed, @@ -147,8 +119,6 @@ fn span(src: &str, lines: &[(usize, &str)], i: usize, k: usize) -> String { src[start..end].to_string() } -/// Match line-by-line ignoring each line's surrounding whitespace; yield the -/// original (untrimmed) source span so indentation is preserved on splice. fn line_trimmed(src: &str, old: &str) -> Vec { let src_lines = lines_with_offsets(src); let old_lines: Vec<&str> = lines_with_offsets(old).iter().map(|(_, l)| *l).collect(); @@ -165,9 +135,6 @@ fn line_trimmed(src: &str, old: &str) -> Vec { out } -/// For 3+ line blocks: anchor on the first and last (trimmed) lines, and accept -/// the window only if a majority of its non-empty middle lines also match. Lets -/// a reworded interior through while resisting wild matches. fn block_anchor(src: &str, old: &str) -> Vec { let src_lines = lines_with_offsets(src); let old_lines: Vec<&str> = lines_with_offsets(old).iter().map(|(_, l)| *l).collect(); @@ -194,8 +161,6 @@ fn block_anchor(src: &str, old: &str) -> Vec { matched += 1; } } - // Require some non-empty middle line to actually match — anchors alone - // (an all-blank middle) are too weak to trust. if considered > 0 && matched * 2 >= considered { out.push(span(src, &src_lines, i, i + n - 1)); } @@ -203,8 +168,6 @@ fn block_anchor(src: &str, old: &str) -> Vec { out } -/// Collapse `old` to whitespace-insensitive tokens and find a source region -/// holding those tokens in order, separated only by whitespace. fn whitespace_normalized(src: &str, old: &str) -> Vec { let tokens: Vec<&str> = old.split_whitespace().collect(); if tokens.is_empty() { @@ -215,8 +178,6 @@ fn whitespace_normalized(src: &str, old: &str) -> Vec { let mut from = 0; while let Some(rel) = src[from..].find(tokens[0]) { let start = from + rel; - // Advance past the first char of this match (not one byte) so the next - // search stays on a char boundary even for multi-byte text. from = start + src[start..].chars().next().map_or(1, char::len_utf8); let mut pos = start + tokens[0].len(); let mut ok = true; @@ -238,8 +199,6 @@ fn whitespace_normalized(src: &str, old: &str) -> Vec { out } -/// Strip common leading indentation from `old` and from each same-height source -/// window; where the dedented forms match, yield the original window. fn indentation_flexible(src: &str, old: &str) -> Vec { let src_lines = lines_with_offsets(src); let old_lines: Vec<&str> = lines_with_offsets(old).iter().map(|(_, l)| *l).collect(); @@ -265,8 +224,6 @@ fn dedent(lines: &[&str]) -> Vec { .map(|l| l.len() - l.trim_start().len()) .min() .unwrap_or(0); - // `indent` is the min byte-width across lines, so on a given line it can land - // mid-char (multi-byte leading whitespace) — `get` declines that, no panic. lines .iter() .map(|l| l.get(indent..).unwrap_or(l).to_string()) @@ -289,7 +246,6 @@ mod tests { apply(text, &edit(old, new)) } - /// Fold `apply` over a turn's worth of edits, as the driver does. fn apply_seq(text: &str, edits: &[Edit]) -> Result { let mut buf = text.to_string(); for e in edits { @@ -308,7 +264,6 @@ mod tests { #[test] fn batch_applies_in_order() { - // a later edit can target text an earlier edit produced. let edits = vec![edit("foo", "bar"), edit("bar", "baz")]; assert_eq!(apply_seq("foo", &edits).unwrap(), "baz"); let edits = vec![edit("one", "1"), edit("two", "2")]; @@ -385,9 +340,6 @@ mod tests { #[test] fn dedented_old_matches_indented_source() { - // The model wrote `old` without the source's indentation; we still find - // the block. `new` is spliced verbatim, so the model owns the - // indentation it wants in the result. let src = "if cond:\n a = 1\n b = 2\n"; let old = "a = 1\nb = 2"; let new = " a = 10\n b = 20"; @@ -403,7 +355,6 @@ mod tests { #[test] fn whitespace_normalized_multibyte_no_panic() { - // Regression: a non-ASCII first token must not slice mid-char. assert!(matches!( run("α β", "α x", "z"), Err(EditError::NotFound { .. }) @@ -414,9 +365,6 @@ mod tests { #[test] fn block_anchor_reworded_middle() { let src = "fn f() {\n let a = compute();\n let b = a + 1;\n return b;\n}"; - // The model's `old` got the last middle line wrong (return b -> return - // result). Exact and line-trimmed both miss; the first/last anchors plus - // a majority of matching middle lines pin the real region. let old = "fn f() {\n let a = compute();\n let b = a + 1;\n return result;\n}"; let got = run(src, old, "fn f() { 42 }").unwrap(); assert_eq!(got, "fn f() { 42 }"); @@ -424,9 +372,6 @@ mod tests { #[test] fn blank_old_does_not_splatter_under_replace_all() { - // A whitespace-only `old` trims to "" and the line matchers yield empty - // spans; without the empty-candidate guard, replace_all on "" would - // rewrite between every char. It must report NotFound instead. let e = Edit { old: " ".into(), new: "X".into(), @@ -440,7 +385,6 @@ mod tests { #[test] fn exact_beats_fuzzy_for_uniqueness() { - // two indentation-equal blocks, but an exact match is unique → applied. let src = " a = 1\n a = 1\n"; let got = run(src, " a = 1", " a = 2").unwrap(); assert_eq!(got, " a = 1\n a = 2\n"); diff --git a/src/main.rs b/src/main.rs index c82dbc4..71311ad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,13 +27,10 @@ struct Opts { #[derive(Parser)] enum SubCommand { - /// Save your API key for future use. Pass `--provider`, or pick one interactively. Login { #[clap(long)] provider: Option, }, - /// Apply the instructions encoded in `transform` to the text in `selected`. - /// Get it? 'refac tor' Tor { selected: String, transform: String }, } @@ -42,7 +39,7 @@ fn main() { match run() { Ok(()) => {} Err(e) => { - eprintln!("{:?}", e); + eprintln!("{e:?}"); std::process::exit(1); } } @@ -88,7 +85,7 @@ fn run() -> anyhow::Result<()> { let secrets = Secrets::load()?; let config = Config::load()?; let completion = refactor(selected, transform, &secrets, &config)?; - print!("{}", completion); + print!("{completion}"); } }; @@ -104,9 +101,6 @@ fn refactor( let provider = config.provider(sc); let model = config.model(provider); - // The edit buffer and the shown `selected` must be the same string, including - // the empty-input placeholder — that's what lets the model generate from an - // empty selection: it `edit`s the placeholder away. let seed_selected = agent::placeholder_if_empty(&selected).to_owned(); let seed = agent::Seed { system: prompt::SYSTEM_PROMPT, @@ -122,8 +116,6 @@ fn refactor( agent::DEFAULT_MAX_TURNS, )?; - // Log each edit attempt so we can see how often the model's `old` misses — - // the failure-rate signal. for attempt in &outcome.attempts { let _ = log( EditLog { @@ -192,9 +184,9 @@ fn log(t: T, title: &str) -> anyhow::Result<()> { .open(log_location(title)?) .context("opening log file")?; let line = serde_json::to_string(&t)?; - writeln!(file, "{}", line)?; + writeln!(file, "{line}")?; Ok(()) } - inner(t, title).with_context(|| format!("failed to log {}", title)) + inner(t, title).with_context(|| format!("failed to log {title}")) } diff --git a/src/openai.rs b/src/openai.rs index d0b16fd..09ca20a 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,5 +1,3 @@ -//! OpenAI chat-completions API edit-mode agent. - use schemars::Schema; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -21,14 +19,9 @@ enum Message { tool_call_id: String, content: String, }, - /// A newtype variant (not fields) so the received [`AssistantTurn`] flows - /// straight back out for the echo with no field-by-field copy to drift. Assistant(AssistantTurn), } -/// `arguments` stays a verbatim `String`, not a parsed `Value`: it's a JSON -/// string on the wire, and reparsing would reorder keys and renormalize -/// numbers/whitespace, so the echo would no longer match the received bytes. #[derive(Serialize, Deserialize)] struct ToolCall { id: String, @@ -43,11 +36,6 @@ struct FunctionCall { arguments: String, } -/// Fully typed rather than a raw `Value`: unlike Anthropic (whose opaque -/// `thinking` blocks must round-trip verbatim), chat-completions has no -/// echo-required fields beyond these, so unmodeled ones (e.g. `refusal`) can -/// drop on echo. Must serialize to a JSON object — `Message`'s `tag = "role"` -/// injects `role` into this value's map, which needs a map-shaped inner. #[derive(Serialize, Deserialize)] struct AssistantTurn { content: Option, @@ -93,8 +81,6 @@ struct Request<'a> { impl OpenaiAgent { pub fn new(key: String, model: String, seed: &Seed, tools: &[Tool]) -> Self { - // User instruction, then the synthetic `view` call (see `SEED_TOOL`) whose - // result carries `selected`. let messages = vec![ Message::System { content: seed.system.to_string(), @@ -150,8 +136,6 @@ impl OpenaiAgent { impl Model for OpenaiAgent { fn turn(&mut self, results: Vec) -> anyhow::Result> { - // chat-completions has no error flag on a tool message, so mark a failed - // result in the content itself. for r in results { let content = match r.result { Ok(c) => c, @@ -176,9 +160,6 @@ impl Model for OpenaiAgent { } } -/// chat-completions delivers each call's `arguments` as a JSON string; parse it -/// into the [`RawCall::args`] object refac dispatches on. A call whose arguments -/// aren't valid JSON falls back to an empty object rather than dropping the call. fn raw_calls(tool_calls: &[ToolCall]) -> Vec { tool_calls .iter() @@ -200,8 +181,6 @@ mod tests { use super::*; use serde_json::json; - /// The wire JSON refac actually sends — the unit tests pin the typed structs - /// to this exact shape so a serialization change can't silently break it. fn request_json(agent: &OpenaiAgent) -> Value { serde_json::to_value(agent.request()).unwrap() } @@ -220,11 +199,8 @@ mod tests { assert_eq!(req["tool_choice"], "auto"); assert_eq!(req["messages"][0]["role"], "system"); assert_eq!(req["messages"][0]["content"], "SYS"); - // The instruction is the only user message; `selected` is NOT a user - // message, it arrives as the seeded `view` call's result below. assert_eq!(req["messages"][1]["role"], "user"); assert_eq!(req["messages"][1]["content"], "transform"); - // The pre-seeded `view` call and its result — the sole carrier of `selected`. assert_eq!(req["messages"][2]["role"], "assistant"); assert_eq!(req["messages"][2]["tool_calls"][0]["function"]["name"], "view"); let seed_id = req["messages"][2]["tool_calls"][0]["id"].clone(); @@ -254,8 +230,6 @@ mod tests { tool_call_id: "c1".into(), content: "ok".into(), }); - // Index 4: the four-message seed (system, user instruction, seeded `view` - // call, its result) occupies 0..4. let req = request_json(&agent); let msg = &req["messages"][4]; assert_eq!(msg["role"], "tool"); @@ -272,9 +246,6 @@ mod tests { transform: "transform", }; let mut agent = OpenaiAgent::new("k".into(), "m".into(), &seed, &tools); - // A tool-calling assistant turn round-trips to the canonical wire shape: - // `role` once, a `null` content, and the typed `tool_calls` with their - // `arguments` JSON string untouched. let raw = json!({ "role": "assistant", "content": null, @@ -285,21 +256,13 @@ mod tests { }); let turn: AssistantTurn = serde_json::from_value(raw.clone()).unwrap(); agent.messages.push(Message::Assistant(turn)); - // Index 4: the four-message seed occupies 0..4. assert_eq!(request_json(&agent)["messages"][4], raw); - // Each assistant message's role-tagged enum must emit `role` exactly once - // (the bug a second, body-carried `role` would reintroduce). Two assistant - // turns here — the seeded `view` call and the one just pushed — so exactly - // two; a duplicated `role` would push the count past two. let wire = serde_json::to_string(&agent.request()).unwrap(); assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 2); } #[test] fn assistant_arguments_string_is_byte_identical() { - // `arguments` stays a verbatim `String`: a payload serde_json would - // reorder (`b` before `a`) and renormalize (spaces, number form) on a - // reparse must echo back byte-for-byte. let args = "{\"b\": 1, \"a\": 1.0, \"n\": 1e3}"; let raw = json!({ "role": "assistant", diff --git a/src/prompt.rs b/src/prompt.rs index ee2196b..2b49268 100644 --- a/src/prompt.rs +++ b/src/prompt.rs @@ -1,5 +1,3 @@ -// Tool *mechanics* live on each tool's own description, not here, so the prompt -// stays about role and task. pub const SYSTEM_PROMPT: &str = "You are a sassy AI refactoring tool for code and other text. You are called `refac`. The user selected some text and gave a transformation to apply to it. Apply the transformation by editing the selected text with the provided tools, then call `finish`. refac outputs the edited text. From 77fd5319047779c5aa89bd7c19314db9149f6eda Mon Sep 17 00:00:00 2001 From: bddap-bot Date: Fri, 19 Jun 2026 17:44:57 -0700 Subject: [PATCH 25/25] Type provider unions: ToolChoice enum, Assistant fields + flattened extra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit anthropic: replace the hand-rolled `ToolChoiceAuto` struct with an internally-tagged `ToolChoice { Auto, Any, Tool { name } }` enum matching Anthropic's `tool_choice` wire shape; replace the assistant message's raw `content: Value` with a typed `Vec` (internally tagged on `type`: Text / Thinking / RedactedThinking / ToolUse), each variant carrying a flattened `Map` so unmodeled keys (e.g. a thinking block's signature) survive the next-turn handshake. openai: give the assistant carry types the same treatment — `AssistantTurn`, `ToolCall`, `FunctionCall` each gain a flattened `Map` for unmodeled fields (refusal, reasoning, tool-call index). A deserialize-only `role` field absorbs the incoming role so it can't land in the flattened map and re-emit as a duplicate alongside the `#[serde(tag = "role")]` tag. `extra` is `Map`, not `Value`, so a non-object flatten target is unrepresentable. The tool-call `arguments` stays a verbatim `String`. New tests pin the ToolChoice wire shapes and the OpenAI flatten/no-duplicate-role round-trip; existing wire/verbatim tests are unchanged. 45 tests pass, clippy clean. Co-Authored-By: Claude Opus 4.8 --- src/anthropic.rs | 117 ++++++++++++++++++++++++++++++++--------------- src/openai.rs | 40 +++++++++++++++- 2 files changed, 118 insertions(+), 39 deletions(-) diff --git a/src/anthropic.rs b/src/anthropic.rs index b228cba..e8ac93d 100644 --- a/src/anthropic.rs +++ b/src/anthropic.rs @@ -1,6 +1,6 @@ use schemars::Schema; -use serde::Serialize; -use serde_json::{json, Value}; +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; use crate::agent::{Model, RawCall, Seed, Tool, ToolResult, SEED_CALL_ID, SEED_TOOL}; @@ -39,7 +39,33 @@ enum ContentBlock { #[serde(tag = "role", rename_all = "snake_case")] enum Message { User { content: Vec }, - Assistant { content: Value }, + Assistant { content: Vec }, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum AssistantBlock { + Text { + text: String, + #[serde(flatten)] + extra: Map, + }, + Thinking { + thinking: String, + #[serde(flatten)] + extra: Map, + }, + RedactedThinking { + #[serde(flatten)] + extra: Map, + }, + ToolUse { + id: String, + name: String, + input: Value, + #[serde(flatten)] + extra: Map, + }, } #[derive(Serialize)] @@ -50,15 +76,12 @@ struct ToolDef { } #[derive(Serialize)] -struct ToolChoiceAuto { - #[serde(rename = "type")] - kind: AutoType, -} - -#[derive(Serialize)] -#[serde(rename_all = "snake_case")] -enum AutoType { +#[serde(tag = "type", rename_all = "snake_case")] +#[allow(dead_code)] +enum ToolChoice { Auto, + Any, + Tool { name: String }, } pub struct AnthropicAgent { @@ -76,7 +99,7 @@ struct Request<'a> { max_tokens: u32, messages: &'a [Message], tools: &'a [ToolDef], - tool_choice: ToolChoiceAuto, + tool_choice: ToolChoice, #[serde(skip_serializing_if = "<[_]>::is_empty")] system: &'a [SystemBlock], } @@ -94,12 +117,12 @@ impl AnthropicAgent { }], }, Message::Assistant { - content: json!([{ - "type": "tool_use", - "id": SEED_CALL_ID, - "name": SEED_TOOL, - "input": Seed::seed_call_args(), - }]), + content: vec![AssistantBlock::ToolUse { + id: SEED_CALL_ID.to_string(), + name: SEED_TOOL.to_string(), + input: Seed::seed_call_args(), + extra: Map::new(), + }], }, Message::User { content: vec![ContentBlock::ToolResult { @@ -133,9 +156,7 @@ impl AnthropicAgent { max_tokens: MAX_TOKENS, messages: &self.messages, tools: &self.tools, - tool_choice: ToolChoiceAuto { - kind: AutoType::Auto, - }, + tool_choice: ToolChoice::Auto, system: &self.system, } } @@ -166,24 +187,26 @@ impl Model for AnthropicAgent { .get("content") .cloned() .ok_or_else(|| anyhow::anyhow!("Anthropic response missing content: {body}"))?; + let content: Vec = serde_json::from_value(content) + .map_err(|e| anyhow::anyhow!("Anthropic content did not parse: {e}"))?; let calls = calls_from_content(&content); self.messages.push(Message::Assistant { content }); Ok(calls) } } -fn calls_from_content(content: &Value) -> Vec { +fn calls_from_content(content: &[AssistantBlock]) -> Vec { content - .as_array() - .into_iter() - .flatten() - .filter(|b| b.get("type").and_then(Value::as_str) == Some("tool_use")) - .filter_map(|b| { - Some(RawCall { - id: b.get("id")?.as_str()?.to_string(), - name: b.get("name")?.as_str()?.to_string(), - args: b.get("input").cloned().unwrap_or_else(|| json!({})), - }) + .iter() + .filter_map(|b| match b { + AssistantBlock::ToolUse { + id, name, input, .. + } => Some(RawCall { + id: id.clone(), + name: name.clone(), + args: input.clone(), + }), + _ => None, }) .collect() } @@ -205,11 +228,28 @@ fn post(client: &reqwest::blocking::Client, key: &str, req: &Request) -> anyhow: #[cfg(test)] mod tests { use super::*; + use serde_json::json; fn request_json(agent: &AnthropicAgent) -> Value { serde_json::to_value(agent.request()).unwrap() } + #[test] + fn tool_choice_serializes_to_wire_shape() { + assert_eq!( + serde_json::to_value(ToolChoice::Auto).unwrap(), + json!({ "type": "auto" }) + ); + assert_eq!( + serde_json::to_value(ToolChoice::Any).unwrap(), + json!({ "type": "any" }) + ); + assert_eq!( + serde_json::to_value(ToolChoice::Tool { name: "edit".into() }).unwrap(), + json!({ "type": "tool", "name": "edit" }) + ); + } + #[test] fn agent_request_carries_tools_and_seed() { let tools = crate::agent::tools(); @@ -283,9 +323,8 @@ mod tests { { "type": "thinking", "thinking": "hmm", "signature": "sig" }, { "type": "tool_use", "id": "tu_1", "name": "edit", "input": { "old": "a", "new": "b" } } ]); - agent.messages.push(Message::Assistant { - content: raw.clone(), - }); + let content: Vec = serde_json::from_value(raw.clone()).unwrap(); + agent.messages.push(Message::Assistant { content }); let req = request_json(&agent); assert_eq!(req["messages"][3]["role"], "assistant"); assert_eq!(req["messages"][3]["content"], raw); @@ -293,12 +332,13 @@ mod tests { #[test] fn parses_tool_use_blocks() { - let content = json!([ + let content: Vec = serde_json::from_value(json!([ { "type": "text", "text": "let me fix that" }, { "type": "tool_use", "id": "tu_1", "name": "edit", "input": { "old": "a", "new": "b" } }, { "type": "tool_use", "id": "tu_2", "name": "finish", "input": {} } - ]); + ])) + .unwrap(); let calls = calls_from_content(&content); assert_eq!(calls.len(), 2); assert_eq!(calls[0].id, "tu_1"); @@ -309,7 +349,8 @@ mod tests { #[test] fn no_tool_use_is_no_calls() { - let content = json!([{ "type": "text", "text": "all done" }]); + let content: Vec = + serde_json::from_value(json!([{ "type": "text", "text": "all done" }])).unwrap(); assert!(calls_from_content(&content).is_empty()); } } diff --git a/src/openai.rs b/src/openai.rs index 09ca20a..4d10eb7 100644 --- a/src/openai.rs +++ b/src/openai.rs @@ -1,6 +1,6 @@ use schemars::Schema; use serde::{Deserialize, Serialize}; -use serde_json::Value; +use serde_json::{Map, Value}; use crate::agent::{Model, RawCall, Seed, Tool, ToolResult, SEED_CALL_ID, SEED_TOOL}; @@ -28,19 +28,28 @@ struct ToolCall { #[serde(rename = "type")] kind: FunctionType, function: FunctionCall, + #[serde(flatten)] + extra: Map, } #[derive(Serialize, Deserialize)] struct FunctionCall { name: String, arguments: String, + #[serde(flatten)] + extra: Map, } #[derive(Serialize, Deserialize)] struct AssistantTurn { + #[serde(default, skip_serializing)] + #[allow(dead_code)] + role: Option, content: Option, #[serde(default, skip_serializing_if = "Option::is_none")] tool_calls: Option>, + #[serde(flatten)] + extra: Map, } #[derive(Serialize)] @@ -89,6 +98,7 @@ impl OpenaiAgent { content: seed.transform.to_string(), }, Message::Assistant(AssistantTurn { + role: None, content: None, tool_calls: Some(vec![ToolCall { id: SEED_CALL_ID.to_string(), @@ -96,8 +106,11 @@ impl OpenaiAgent { function: FunctionCall { name: SEED_TOOL.to_string(), arguments: Seed::seed_call_args().to_string(), + extra: Map::new(), }, + extra: Map::new(), }]), + extra: Map::new(), }), Message::Tool { tool_call_id: SEED_CALL_ID.to_string(), @@ -261,6 +274,31 @@ mod tests { assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 2); } + #[test] + fn echoed_assistant_turn_retains_unmodeled_fields_without_duplicate_role() { + let api_msg = json!({ + "role": "assistant", + "content": null, + "refusal": null, + "reasoning": "let me think", + "tool_calls": [ + { "id": "c1", "type": "function", "index": 0, + "function": { "name": "edit", "arguments": "{\"old\":\"a\",\"new\":\"b\"}" } } + ] + }); + let turn: AssistantTurn = serde_json::from_value(api_msg.clone()).unwrap(); + let wire = serde_json::to_string(&Message::Assistant(turn)).unwrap(); + assert_eq!(wire.matches("\"role\":\"assistant\"").count(), 1); + let back: Value = serde_json::from_str(&wire).unwrap(); + assert_eq!(back["refusal"], api_msg["refusal"]); + assert_eq!(back["reasoning"], api_msg["reasoning"]); + assert_eq!(back["tool_calls"][0]["index"], api_msg["tool_calls"][0]["index"]); + assert_eq!( + back["tool_calls"][0]["function"]["arguments"], + api_msg["tool_calls"][0]["function"]["arguments"] + ); + } + #[test] fn assistant_arguments_string_is_byte_identical() { let args = "{\"b\": 1, \"a\": 1.0, \"n\": 1e3}";