fielding · sedge-bot · May 14, 2026
diff --git a/packages/vscode-extension/package.json b/packages/vscode-extension/package.json
@@ -113,6 +113,7 @@
   "scripts": {
     "vscode:prepublish": "npm run compile",
     "compile": "tsc -p ./",
+    "test": "npm run compile && node test/marker-scan-core.test.js",
     "watch": "tsc -watch -p ./"
   },
   "devDependencies": {

diff --git a/packages/vscode-extension/src/extension.ts b/packages/vscode-extension/src/extension.ts
@@ -1,7 +1,7 @@
 import * as vscode from 'vscode';
+import { MarkerMatch, MarkerType, scanMarkerLines } from './markerScanCore';
 
 // Marker types and their colors
-type MarkerType = 'intervention' | 'uncertainty' | 'directive';
 
 interface MarkerDef {
   pattern: string;
@@ -31,21 +31,6 @@ const MARKERS: Record<MarkerType, MarkerDef> = {
   },
 };
 
-// Keyword aliases for markers (case-insensitive matching)
-// Strength order: intervention > uncertainty > directive
-const MARKER_KEYWORDS: Record<MarkerType, string[]> = {
-  intervention: ['FIXME', 'BUG', 'XXX'],        // Maps to !! (highest priority)
-  uncertainty: ['TODO', 'HACK'],                // Maps to ??
-  directive: ['NOTE', 'NB'],                    // Maps to >> (lowest priority)
-};
-
-// Priority order for conflict resolution (lower = stronger)
-const MARKER_PRIORITY: Record<MarkerType, number> = {
-  intervention: 1,
-  uncertainty: 2,
-  directive: 3,
-};
-
 // Diagnostic colors (for inline error/warning badges)
 type DiagnosticLevel = 'error' | 'warning' | 'info' | 'hint';
 
@@ -73,27 +58,6 @@ const DIAGNOSTIC_COLORS: Record<DiagnosticLevel, DiagnosticStyle> = {
   },
 };
 
-// Common comment prefix patterns
-const COMMENT_PATTERNS: RegExp[] = [
-  /^(\s*)(\/\/\/)/,        // /// doc comments
-  /^(\s*)(\/\/)/,          // // C-style
-  /^(\s*)(#)/,             // # Python/Shell/Ruby
-  /^(\s*)(--)/,            // -- SQL/Lua/Haskell
-  /^(\s*)(;)/,             // ; Lisp/Assembly
-  /^(\s*)(\/\*+)/,         // /* block
-  /^(\s*)(\*)/,            // * block continuation
-  /^(\s*)(<!--)/,          // <!-- HTML/XML
-  /^(\s*)(%)/,             // % LaTeX/Prolog
-  /^(\s*)(rem\s)/i,        // REM Basic/Batch
-];
-
-interface MarkerMatch {
-  type: MarkerType;
-  lineNum: number;
-  startChar: number;      // Start of comment (including leading whitespace for padding)
-  endChar: number;        // End of line text
-}
-
 // ============================================================================
 // Marker Decoration Manager (left-aligned comment badges)
 // ============================================================================
@@ -145,103 +109,12 @@ class MarkerDecorationManager {
 class MarkerScanner {
   scan(document: vscode.TextDocument): MarkerMatch[] {
     const config = vscode.workspace.getConfiguration('human-plus-plus');
-    const matches: MarkerMatch[] = [];
-
-    const text = document.getText();
-    const lines = text.split('\n');
-
-    // Build list of enabled markers
-    const enabledMarkers: [MarkerType, MarkerDef][] = [];
-    for (const [type, def] of Object.entries(MARKERS) as [MarkerType, MarkerDef][]) {
-      if (config.get(def.configKey, true)) {
-        enabledMarkers.push([type, def]);
-      }
-    }
-
-    if (enabledMarkers.length === 0) {
-      return matches;
-    }
-
-    for (let lineNum = 0; lineNum < lines.length; lineNum++) {
-      const line = lines[lineNum];
-
-      // Try to match a comment pattern
-      for (const commentPattern of COMMENT_PATTERNS) {
-        const commentMatch = commentPattern.exec(line);
-        if (!commentMatch) {
-          continue;
-        }
-
-        const leadingWhitespace = commentMatch[1].length;
-        const prefixEnd = commentMatch[0].length;
-        const commentText = line.slice(prefixEnd);
 
-        // First check for explicit markers (!!, ??, >>) - these always win
-        let foundType: MarkerType | null = null;
-
-        for (const [type, def] of enabledMarkers) {
-          const markerRegex = new RegExp(`^\\s*(${def.pattern.replace(/\?/g, '\\?')})(?=\\s|$)`);
-          if (markerRegex.test(commentText)) {
-            foundType = type;
-            break;
-          }
-        }
-
-        // If no explicit marker, check for keyword aliases
-        if (!foundType) {
-          foundType = this.findKeywordMatch(commentText, enabledMarkers);
-        }
-
-        if (foundType) {
-          // Find end of actual text (trim trailing whitespace)
-          const trimmedEnd = line.trimEnd().length;
-          matches.push({
-            type: foundType,
-            lineNum,
-            startChar: leadingWhitespace,  // Start from the comment symbol
-            endChar: trimmedEnd,
-          });
-        }
-
-        break; // Only check first comment pattern per line
-      }
-    }
-
-    return matches;
-  }
-
-  /**
-   * Check for keyword aliases in comment text.
-   * Returns the strongest matching marker type, or null if no match.
-   * Keywords are matched case-insensitively with word boundaries.
-   */
-  private findKeywordMatch(
-    commentText: string,
-    enabledMarkers: [MarkerType, MarkerDef][]
-  ): MarkerType | null {
-    let bestMatch: MarkerType | null = null;
-    let bestPriority = Infinity;
-
-    for (const [type] of enabledMarkers) {
-      const keywords = MARKER_KEYWORDS[type];
-      if (!keywords) continue;
-
-      for (const keyword of keywords) {
-        // Match keyword at word boundary, case-insensitive
-        // Supports: // TODO: ..., // [TODO] ..., // TODO(...) ..., etc.
-        const keywordRegex = new RegExp(`\\b${keyword}\\b`, 'i');
-        if (keywordRegex.test(commentText)) {
-          const priority = MARKER_PRIORITY[type];
-          if (priority < bestPriority) {
-            bestMatch = type;
-            bestPriority = priority;
-          }
-          break; // Found this type, check next type for potentially stronger match
-        }
-      }
-    }
-
-    return bestMatch;
+    return scanMarkerLines(document.getText(), {
+      intervention: config.get(MARKERS.intervention.configKey, true),
+      uncertainty: config.get(MARKERS.uncertainty.configKey, true),
+      directive: config.get(MARKERS.directive.configKey, true),
+    });
   }
 }
 

diff --git a/packages/vscode-extension/src/markerScanCore.ts b/packages/vscode-extension/src/markerScanCore.ts
@@ -0,0 +1,214 @@
+export type MarkerType = 'intervention' | 'uncertainty' | 'directive';
+
+export interface MarkerMatch {
+  type: MarkerType;
+  lineNum: number;
+  startChar: number;
+  endChar: number;
+}
+
+export interface EnabledMarkers {
+  intervention: boolean;
+  uncertainty: boolean;
+  directive: boolean;
+}
+
+const MARKER_PATTERNS: Record<MarkerType, string> = {
+  intervention: '!!',
+  uncertainty: '??',
+  directive: '>>',
+};
+
+// Keyword aliases for markers (case-insensitive matching)
+// Strength order: intervention > uncertainty > directive
+const MARKER_KEYWORDS: Record<MarkerType, string[]> = {
+  intervention: ['FIXME', 'BUG', 'XXX'],
+  uncertainty: ['TODO', 'HACK'],
+  directive: ['NOTE', 'NB'],
+};
+
+// Priority order for conflict resolution (lower = stronger)
+const MARKER_PRIORITY: Record<MarkerType, number> = {
+  intervention: 1,
+  uncertainty: 2,
+  directive: 3,
+};
+
+const MARKER_ORDER: MarkerType[] = ['intervention', 'uncertainty', 'directive'];
+
+interface CommentPrefix {
+  index: number;
+  length: number;
+}
+
+const LINE_START_COMMENT_PATTERNS: RegExp[] = [
+  /^(\s*)(\/\/\/)/,        // /// doc comments
+  /^(\s*)(\/\/)/,          // // C-style
+  /^(\s*)(#)/,             // # Python/Shell/Ruby
+  /^(\s*)(--)/,            // -- SQL/Lua/Haskell
+  /^(\s*)(;)/,             // ; Lisp/Assembly
+  /^(\s*)(\/\*+)/,         // /* block
+  /^(\s*)(\*)/,            // * block continuation
+  /^(\s*)(<!--)/,          // <!-- HTML/XML
+  /^(\s*)(%)/,             // % LaTeX/Prolog
+  /^(\s*)(rem\s)/i,        // REM Basic/Batch
+];
+
+const INLINE_COMMENT_TOKENS = ['///', '//', '#', '--', ';', '/*', '<!--', '%'];
+
+export function scanMarkerLines(text: string, enabled: EnabledMarkers): MarkerMatch[] {
+  const matches: MarkerMatch[] = [];
+  const lines = text.split('\n');
+
+  if (!MARKER_ORDER.some((type) => enabled[type])) {
+    return matches;
+  }
+
+  for (let lineNum = 0; lineNum < lines.length; lineNum++) {
+    const line = lines[lineNum];
+    const prefix = findCommentPrefix(line);
+    if (!prefix) {
+      continue;
+    }
+
+    const commentText = line.slice(prefix.index + prefix.length);
+    let foundType = findExplicitMarker(commentText, enabled);
+
+    if (!foundType) {
+      foundType = findKeywordMatch(commentText, enabled);
+    }
+
+    if (foundType) {
+      matches.push({
+        type: foundType,
+        lineNum,
+        startChar: prefix.index,
+        endChar: line.trimEnd().length,
+      });
+    }
+  }
+
+  return matches;
+}
+
+function findCommentPrefix(line: string): CommentPrefix | null {
+  for (const pattern of LINE_START_COMMENT_PATTERNS) {
+    const match = pattern.exec(line);
+    if (match) {
+      return { index: match[1].length, length: match[2].length };
+    }
+  }
+
+  return findInlineCommentPrefix(line);
+}
+
+function findInlineCommentPrefix(line: string): CommentPrefix | null {
+  let quote: 'single' | 'double' | 'backtick' | null = null;
+  let escaped = false;
+
+  for (let index = 0; index < line.length; index++) {
+    const char = line[index];
+
+    if (escaped) {
+      escaped = false;
+      continue;
+    }
+
+    if (quote) {
+      if (char === '\\') {
+        escaped = true;
+        continue;
+      }
+
+      if ((quote === 'single' && char === "'") ||
+          (quote === 'double' && char === '"') ||
+          (quote === 'backtick' && char === '`')) {
+        quote = null;
+      }
+      continue;
+    }
+
+    if (char === "'") {
+      quote = 'single';
+      continue;
+    }
+    if (char === '"') {
+      quote = 'double';
+      continue;
+    }
+    if (char === '`') {
+      quote = 'backtick';
+      continue;
+    }
+
+    for (const token of INLINE_COMMENT_TOKENS) {
+      if (line.startsWith(token, index) && isInlineCommentBoundary(line, index, token)) {
+        return { index, length: token.length };
+      }
+    }
+  }
+
+  return null;
+}
+
+function isInlineCommentBoundary(line: string, index: number, token: string): boolean {
+  const before = index === 0 ? '' : line[index - 1];
+  const after = line[index + token.length] ?? '';
+
+  if (index > 0 && !/\s/.test(before)) {
+    return false;
+  }
+
+  if (token === '//' && after === '/') {
+    return false;
+  }
+
+  // Avoid treating URLs and operators as comments; normal inline comments have
+  // whitespace before the token and whitespace/end after it.
+  return after === '' || /\s/.test(after);
+}
+
+function findExplicitMarker(commentText: string, enabled: EnabledMarkers): MarkerType | null {
+  for (const type of MARKER_ORDER) {
+    if (!enabled[type]) {
+      continue;
+    }
+
+    const pattern = escapeRegex(MARKER_PATTERNS[type]);
+    const markerRegex = new RegExp(`^\\s*(${pattern})(?=\\s|$)`);
+    if (markerRegex.test(commentText)) {
+      return type;
+    }
+  }
+
+  return null;
+}
+
+function findKeywordMatch(commentText: string, enabled: EnabledMarkers): MarkerType | null {
+  let bestMatch: MarkerType | null = null;
+  let bestPriority = Infinity;
+
+  for (const type of MARKER_ORDER) {
+    if (!enabled[type]) {
+      continue;
+    }
+
+    for (const keyword of MARKER_KEYWORDS[type]) {
+      const keywordRegex = new RegExp(`\\b${escapeRegex(keyword)}\\b`, 'i');
+      if (keywordRegex.test(commentText)) {
+        const priority = MARKER_PRIORITY[type];
+        if (priority < bestPriority) {
+          bestMatch = type;
+          bestPriority = priority;
+        }
+        break;
+      }
+    }
+  }
+
+  return bestMatch;
+}
+
+function escapeRegex(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}