Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/vscode-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
"scripts": {
"vscode:prepublish": "npm run compile",
"compile": "tsc -p ./",
"test": "npm run compile && node test/marker-scan-core.test.js",
"watch": "tsc -watch -p ./"
},
"devDependencies": {
Expand Down
139 changes: 6 additions & 133 deletions packages/vscode-extension/src/extension.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as vscode from 'vscode';
import { MarkerMatch, MarkerType, scanMarkerLines } from './markerScanCore';

// Marker types and their colors
type MarkerType = 'intervention' | 'uncertainty' | 'directive';

interface MarkerDef {
pattern: string;
Expand Down Expand Up @@ -31,21 +31,6 @@ const MARKERS: Record<MarkerType, MarkerDef> = {
},
};

// Keyword aliases for markers (case-insensitive matching)
// Strength order: intervention > uncertainty > directive
const MARKER_KEYWORDS: Record<MarkerType, string[]> = {
intervention: ['FIXME', 'BUG', 'XXX'], // Maps to !! (highest priority)
uncertainty: ['TODO', 'HACK'], // Maps to ??
directive: ['NOTE', 'NB'], // Maps to >> (lowest priority)
};

// Priority order for conflict resolution (lower = stronger)
const MARKER_PRIORITY: Record<MarkerType, number> = {
intervention: 1,
uncertainty: 2,
directive: 3,
};

// Diagnostic colors (for inline error/warning badges)
type DiagnosticLevel = 'error' | 'warning' | 'info' | 'hint';

Expand Down Expand Up @@ -73,27 +58,6 @@ const DIAGNOSTIC_COLORS: Record<DiagnosticLevel, DiagnosticStyle> = {
},
};

// Common comment prefix patterns
const COMMENT_PATTERNS: RegExp[] = [
/^(\s*)(\/\/\/)/, // /// doc comments
/^(\s*)(\/\/)/, // // C-style
/^(\s*)(#)/, // # Python/Shell/Ruby
/^(\s*)(--)/, // -- SQL/Lua/Haskell
/^(\s*)(;)/, // ; Lisp/Assembly
/^(\s*)(\/\*+)/, // /* block
/^(\s*)(\*)/, // * block continuation
/^(\s*)(<!--)/, // <!-- HTML/XML
/^(\s*)(%)/, // % LaTeX/Prolog
/^(\s*)(rem\s)/i, // REM Basic/Batch
];

interface MarkerMatch {
type: MarkerType;
lineNum: number;
startChar: number; // Start of comment (including leading whitespace for padding)
endChar: number; // End of line text
}

// ============================================================================
// Marker Decoration Manager (left-aligned comment badges)
// ============================================================================
Expand Down Expand Up @@ -145,103 +109,12 @@ class MarkerDecorationManager {
class MarkerScanner {
scan(document: vscode.TextDocument): MarkerMatch[] {
const config = vscode.workspace.getConfiguration('human-plus-plus');
const matches: MarkerMatch[] = [];

const text = document.getText();
const lines = text.split('\n');

// Build list of enabled markers
const enabledMarkers: [MarkerType, MarkerDef][] = [];
for (const [type, def] of Object.entries(MARKERS) as [MarkerType, MarkerDef][]) {
if (config.get(def.configKey, true)) {
enabledMarkers.push([type, def]);
}
}

if (enabledMarkers.length === 0) {
return matches;
}

for (let lineNum = 0; lineNum < lines.length; lineNum++) {
const line = lines[lineNum];

// Try to match a comment pattern
for (const commentPattern of COMMENT_PATTERNS) {
const commentMatch = commentPattern.exec(line);
if (!commentMatch) {
continue;
}

const leadingWhitespace = commentMatch[1].length;
const prefixEnd = commentMatch[0].length;
const commentText = line.slice(prefixEnd);

// First check for explicit markers (!!, ??, >>) - these always win
let foundType: MarkerType | null = null;

for (const [type, def] of enabledMarkers) {
const markerRegex = new RegExp(`^\\s*(${def.pattern.replace(/\?/g, '\\?')})(?=\\s|$)`);
if (markerRegex.test(commentText)) {
foundType = type;
break;
}
}

// If no explicit marker, check for keyword aliases
if (!foundType) {
foundType = this.findKeywordMatch(commentText, enabledMarkers);
}

if (foundType) {
// Find end of actual text (trim trailing whitespace)
const trimmedEnd = line.trimEnd().length;
matches.push({
type: foundType,
lineNum,
startChar: leadingWhitespace, // Start from the comment symbol
endChar: trimmedEnd,
});
}

break; // Only check first comment pattern per line
}
}

return matches;
}

/**
* Check for keyword aliases in comment text.
* Returns the strongest matching marker type, or null if no match.
* Keywords are matched case-insensitively with word boundaries.
*/
private findKeywordMatch(
commentText: string,
enabledMarkers: [MarkerType, MarkerDef][]
): MarkerType | null {
let bestMatch: MarkerType | null = null;
let bestPriority = Infinity;

for (const [type] of enabledMarkers) {
const keywords = MARKER_KEYWORDS[type];
if (!keywords) continue;

for (const keyword of keywords) {
// Match keyword at word boundary, case-insensitive
// Supports: // TODO: ..., // [TODO] ..., // TODO(...) ..., etc.
const keywordRegex = new RegExp(`\\b${keyword}\\b`, 'i');
if (keywordRegex.test(commentText)) {
const priority = MARKER_PRIORITY[type];
if (priority < bestPriority) {
bestMatch = type;
bestPriority = priority;
}
break; // Found this type, check next type for potentially stronger match
}
}
}

return bestMatch;
return scanMarkerLines(document.getText(), {
intervention: config.get(MARKERS.intervention.configKey, true),
uncertainty: config.get(MARKERS.uncertainty.configKey, true),
directive: config.get(MARKERS.directive.configKey, true),
});
}
}

Expand Down
214 changes: 214 additions & 0 deletions packages/vscode-extension/src/markerScanCore.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
export type MarkerType = 'intervention' | 'uncertainty' | 'directive';

export interface MarkerMatch {
type: MarkerType;
lineNum: number;
startChar: number;
endChar: number;
}

export interface EnabledMarkers {
intervention: boolean;
uncertainty: boolean;
directive: boolean;
}

const MARKER_PATTERNS: Record<MarkerType, string> = {
intervention: '!!',
uncertainty: '??',
directive: '>>',
};

// Keyword aliases for markers (case-insensitive matching)
// Strength order: intervention > uncertainty > directive
const MARKER_KEYWORDS: Record<MarkerType, string[]> = {
intervention: ['FIXME', 'BUG', 'XXX'],
uncertainty: ['TODO', 'HACK'],
directive: ['NOTE', 'NB'],
};

// Priority order for conflict resolution (lower = stronger)
const MARKER_PRIORITY: Record<MarkerType, number> = {
intervention: 1,
uncertainty: 2,
directive: 3,
};

const MARKER_ORDER: MarkerType[] = ['intervention', 'uncertainty', 'directive'];

interface CommentPrefix {
index: number;
length: number;
}

const LINE_START_COMMENT_PATTERNS: RegExp[] = [
/^(\s*)(\/\/\/)/, // /// doc comments
/^(\s*)(\/\/)/, // // C-style
/^(\s*)(#)/, // # Python/Shell/Ruby
/^(\s*)(--)/, // -- SQL/Lua/Haskell
/^(\s*)(;)/, // ; Lisp/Assembly
/^(\s*)(\/\*+)/, // /* block
/^(\s*)(\*)/, // * block continuation
/^(\s*)(<!--)/, // <!-- HTML/XML
/^(\s*)(%)/, // % LaTeX/Prolog
/^(\s*)(rem\s)/i, // REM Basic/Batch
];

const INLINE_COMMENT_TOKENS = ['///', '//', '#', '--', ';', '/*', '<!--', '%'];

export function scanMarkerLines(text: string, enabled: EnabledMarkers): MarkerMatch[] {
const matches: MarkerMatch[] = [];
const lines = text.split('\n');

if (!MARKER_ORDER.some((type) => enabled[type])) {
return matches;
}

for (let lineNum = 0; lineNum < lines.length; lineNum++) {
const line = lines[lineNum];
const prefix = findCommentPrefix(line);
if (!prefix) {
continue;
}

const commentText = line.slice(prefix.index + prefix.length);
let foundType = findExplicitMarker(commentText, enabled);

if (!foundType) {
foundType = findKeywordMatch(commentText, enabled);
}

if (foundType) {
matches.push({
type: foundType,
lineNum,
startChar: prefix.index,
endChar: line.trimEnd().length,
});
}
}

return matches;
}

function findCommentPrefix(line: string): CommentPrefix | null {
for (const pattern of LINE_START_COMMENT_PATTERNS) {
const match = pattern.exec(line);
if (match) {
return { index: match[1].length, length: match[2].length };
}
}

return findInlineCommentPrefix(line);
}

function findInlineCommentPrefix(line: string): CommentPrefix | null {
let quote: 'single' | 'double' | 'backtick' | null = null;
let escaped = false;

for (let index = 0; index < line.length; index++) {
const char = line[index];

if (escaped) {
escaped = false;
continue;
}

if (quote) {
if (char === '\\') {
escaped = true;
continue;
}

if ((quote === 'single' && char === "'") ||
(quote === 'double' && char === '"') ||
(quote === 'backtick' && char === '`')) {
quote = null;
}
continue;
}

if (char === "'") {
quote = 'single';
continue;
}
if (char === '"') {
quote = 'double';
continue;
}
if (char === '`') {
quote = 'backtick';
continue;
}

for (const token of INLINE_COMMENT_TOKENS) {
if (line.startsWith(token, index) && isInlineCommentBoundary(line, index, token)) {
return { index, length: token.length };
}
}
}

return null;
}

function isInlineCommentBoundary(line: string, index: number, token: string): boolean {
const before = index === 0 ? '' : line[index - 1];
const after = line[index + token.length] ?? '';

if (index > 0 && !/\s/.test(before)) {
return false;
}

if (token === '//' && after === '/') {
return false;
}

// Avoid treating URLs and operators as comments; normal inline comments have
// whitespace before the token and whitespace/end after it.
return after === '' || /\s/.test(after);
}

function findExplicitMarker(commentText: string, enabled: EnabledMarkers): MarkerType | null {
for (const type of MARKER_ORDER) {
if (!enabled[type]) {
continue;
}

const pattern = escapeRegex(MARKER_PATTERNS[type]);
const markerRegex = new RegExp(`^\\s*(${pattern})(?=\\s|$)`);
if (markerRegex.test(commentText)) {
return type;
}
}

return null;
}

function findKeywordMatch(commentText: string, enabled: EnabledMarkers): MarkerType | null {
let bestMatch: MarkerType | null = null;
let bestPriority = Infinity;

for (const type of MARKER_ORDER) {
if (!enabled[type]) {
continue;
}

for (const keyword of MARKER_KEYWORDS[type]) {
const keywordRegex = new RegExp(`\\b${escapeRegex(keyword)}\\b`, 'i');
if (keywordRegex.test(commentText)) {
const priority = MARKER_PRIORITY[type];
if (priority < bestPriority) {
bestMatch = type;
bestPriority = priority;
}
break;
}
}
}

return bestMatch;
}

function escapeRegex(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
Loading