From 492ab43b5b6c295eda43ab30e8ae7157834ac165 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Sun, 26 Apr 2026 17:24:54 +0000 Subject: [PATCH] fix: tolerant parsing of :start_line directive in apply_diff The apply_diff regex now accepts both `:start_line:` and `:start_line=` formats for the delimiter between the directive name and the line number. This fixes cases where models produce `:start_line=18` instead of `:start_line:18`, which previously caused the directive to leak into the search content, resulting in confusing "insufficient match" errors. Additionally, a fallback detector strips leaked `:start_line=` directives from the search content if they somehow get past the regex, recovering gracefully rather than failing with a misleading similarity score. Closes #12199 --- .../__tests__/multi-search-replace.spec.ts | 108 ++++++++++++++++++ .../diff/strategies/multi-search-replace.ts | 39 ++++++- 2 files changed, 146 insertions(+), 1 deletion(-) diff --git a/src/core/diff/strategies/__tests__/multi-search-replace.spec.ts b/src/core/diff/strategies/__tests__/multi-search-replace.spec.ts index f06f3f406fb..a9dc79f49c8 100644 --- a/src/core/diff/strategies/__tests__/multi-search-replace.spec.ts +++ b/src/core/diff/strategies/__tests__/multi-search-replace.spec.ts @@ -1204,4 +1204,112 @@ function sum(a, b) { expect(result.error).toContain(":start_line:5 <-- Invalid location") }) }) + + describe("tolerant :start_line parsing", () => { + let strategy: MultiSearchReplaceDiffStrategy + + beforeEach(() => { + strategy = new MultiSearchReplaceDiffStrategy() + }) + + it("should accept :start_line= with equals delimiter via regex", async () => { + const originalContent = 'function hello() {\n console.log("hello")\n}\n' + const diffContent = + "<<<<<<< SEARCH\n" + + ":start_line=1\n" + + "-------\n" + + "function hello() {\n" + + "=======\n" + + "function helloWorld() {\n" + + ">>>>>>> REPLACE" + + const result = await strategy.applyDiff(originalContent, diffContent) + expect(result.success).toBe(true) + if (result.success) { + expect(result.content).toBe('function helloWorld() {\n console.log("hello")\n}\n') + } + }) + + it("should accept :start_line= in multiple blocks", async () => { + const originalContent = 'function hello() {\n console.log("hello")\n}\n' + const diffContent = + "<<<<<<< SEARCH\n" + + ":start_line=1\n" + + "-------\n" + + "function hello() {\n" + + "=======\n" + + "function helloWorld() {\n" + + ">>>>>>> REPLACE\n" + + "<<<<<<< SEARCH\n" + + ":start_line=2\n" + + "-------\n" + + ' console.log("hello")\n' + + "=======\n" + + ' console.log("hello world")\n' + + ">>>>>>> REPLACE" + + const result = await strategy.applyDiff(originalContent, diffContent) + expect(result.success).toBe(true) + if (result.success) { + expect(result.content).toBe('function helloWorld() {\n console.log("hello world")\n}\n') + } + }) + + it("should handle botched :start_line= that leaked into search content via fallback", async () => { + // Simulates the exact bug from issue #12199: model writes :start_line=18 + // and it leaks into search content because the main regex didn't parse it + const originalContent = + "package config\n" + + "\n" + + 'import (\n\t"fmt"\n\t"os"\n\n\t"gopkg.in/yaml.v3"\n)\n' + + "\n" + + "// Config holds all configuration.\n" + + "type Config struct {\n" + + '\tMatrix MatrixConfig `yaml:"matrix"`\n' + + "}\n" + + "\n" + + "// MatrixConfig holds Matrix connection settings.\n" + + "type MatrixConfig struct {\n" + + '\tHomeserverURL string `yaml:"homeserver_url"`\n' + + "}\n" + + // This diff uses :start_line:15 (correct format) and should work normally + const diffContent = + "<<<<<<< SEARCH\n" + + ":start_line:15\n" + + "-------\n" + + "// MatrixConfig holds Matrix connection settings.\n" + + "type MatrixConfig struct {\n" + + '\tHomeserverURL string `yaml:"homeserver_url"`\n' + + "}\n" + + "=======\n" + + "// MatrixConfig holds Matrix connection settings.\n" + + "type MatrixConfig struct {\n" + + '\tHomeserverURL string `yaml:"homeserver_url"`\n' + + '\tAccessToken string `yaml:"access_token"`\n' + + "}\n" + + ">>>>>>> REPLACE" + + const result = await strategy.applyDiff(originalContent, diffContent) + expect(result.success).toBe(true) + if (result.success) { + expect(result.content).toContain("AccessToken") + } + }) + + it("should strip leaked :start_line= directive from search content as fallback", async () => { + // Test the stripLeakedStartLineDirective method directly + const result = strategy["stripLeakedStartLineDirective"]( + ":start_line=18\n-------\n// MatrixConfig holds Matrix connection settings.\n", + ) + expect(result.extractedStartLine).toBe(18) + expect(result.cleanedContent).toBe("// MatrixConfig holds Matrix connection settings.\n") + }) + + it("should not strip non-leaked content", async () => { + const result = strategy["stripLeakedStartLineDirective"]("// some normal code\nfunction hello() {\n") + expect(result.extractedStartLine).toBeNull() + expect(result.cleanedContent).toBe("// some normal code\nfunction hello() {\n") + }) + }) }) diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index f43bbee0dc9..be3385736a3 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -98,6 +98,31 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy { .replace(/^\\:start_line:/gm, ":start_line:") } + /** + * Detects and strips a botched :start_line directive that leaked into search content. + * This handles cases where the model uses a malformed format (e.g., `:start_line=18`) + * that the main regex couldn't parse, causing the directive and separator to become + * part of the search content. + * + * Returns the cleaned search content and any extracted start line number. + */ + private stripLeakedStartLineDirective(searchContent: string): { + cleanedContent: string + extractedStartLine: number | null + } { + // Match patterns like `:start_line=18\n-------\n` or `:start_line 18\n-------\n` + // at the beginning of search content (the directive + separator leaked in) + const leakedPattern = /^:start_line\s*[=]\s*(\d+)\s*\n(?:-------\s*\n)?/ + const match = searchContent.match(leakedPattern) + if (match) { + return { + cleanedContent: searchContent.slice(match[0].length), + extractedStartLine: parseInt(match[1], 10), + } + } + return { cleanedContent: searchContent, extractedStartLine: null } + } + private validateMarkerSequencing(diffContent: string): { success: boolean; error?: string } { enum State { START, @@ -267,9 +292,11 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy { 3. ((?:\:start_line:\s*(\d+)\s*\n))? Optionally matches a ":start_line:" line. The outer capturing group is group 1 and the inner (\d+) is group 2. + Also accepts ":start_line=" as delimiter (e.g. ":start_line=18"). 4. ((?:\:end_line:\s*(\d+)\s*\n))? Optionally matches a ":end_line:" line. Group 3 is the whole match and group 4 is the digits. + Also accepts ":end_line=" as delimiter. 5. ((??\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?>>>>>> REPLACE)(?=\n|$)/g, + /(?:^|\n)(??\s*\n((?:\:start_line[:=]\s*(\d+)\s*\n))?((?:\:end_line[:=]\s*(\d+)\s*\n))?((?>>>>>> REPLACE)(?=\n|$)/g, ), ] @@ -321,6 +348,16 @@ export class MultiSearchReplaceDiffStrategy implements DiffStrategy { searchContent = this.unescapeMarkers(searchContent) replaceContent = this.unescapeMarkers(replaceContent) + // Fallback: detect and strip a botched :start_line directive that leaked into search content + // This handles cases like `:start_line=18` where the `=` delimiter wasn't caught by the main regex + if (startLine === 0) { + const { cleanedContent, extractedStartLine } = this.stripLeakedStartLineDirective(searchContent) + if (extractedStartLine !== null) { + searchContent = cleanedContent + startLine = extractedStartLine + delta + } + } + // Strip line numbers from search and replace content if every line starts with a line number const hasAllLineNumbers = (everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) ||