From 5a0dd13b0f9c8ea4da713b7169395127641c4b3f Mon Sep 17 00:00:00 2001 From: mbeaulne Date: Mon, 29 Jun 2026 14:09:13 -0400 Subject: [PATCH] Query weight change --- src/services/componentSearchIndex.test.ts | 47 ++++++++++ src/services/componentSearchIndex.ts | 103 +++++++++++++++++++++- 2 files changed, 147 insertions(+), 3 deletions(-) diff --git a/src/services/componentSearchIndex.test.ts b/src/services/componentSearchIndex.test.ts index 091bc12a4..2f54e77ed 100644 --- a/src/services/componentSearchIndex.test.ts +++ b/src/services/componentSearchIndex.test.ts @@ -691,6 +691,53 @@ describe("lexicalSearch", () => { expect(results).not.toContain("plural"); }); + it("supports per-query field boosts in bracket directives", () => { + const index = buildSearchIndex([ + makeSourced({ + digest: "title-match", + spec: { + name: "upload_file", + inputs: [], + outputs: [], + implementation: { container: { image: "x" } }, + }, + }), + makeSourced({ + digest: "metadata-match", + spec: { + name: "generic_component", + inputs: [], + outputs: [], + implementation: { container: { image: "x" } }, + metadata: { annotations: { intent: "upload" } }, + }, + }), + makeSourced({ + digest: "directive-noise", + spec: { + name: "metadata_helper", + inputs: [], + outputs: [], + implementation: { container: { image: "x" } }, + }, + }), + ]); + + expect(lexicalSearch(index, "upload [metadata^20]")[0]?.digest).toBe( + "metadata-match", + ); + expect( + lexicalSearch(index, "upload [metadata+10, title-5]").map( + (result) => result.digest, + ), + ).toEqual(["metadata-match"]); + expect( + lexicalSearch(index, "upload [metadata^20]").map( + (result) => result.digest, + ), + ).not.toContain("directive-noise"); + }); + it("ignores natural-language filler words that would otherwise swamp intent", () => { const index = buildSearchIndex([ makeSourced({ diff --git a/src/services/componentSearchIndex.ts b/src/services/componentSearchIndex.ts index e7af11218..9560d9d0d 100644 --- a/src/services/componentSearchIndex.ts +++ b/src/services/componentSearchIndex.ts @@ -437,6 +437,8 @@ function tokenizeQuery(text: string): { interface ParsedSearchQuery { positiveText: string; negativeText: string; + fieldWeights: Record; + fieldPhraseBonuses: Record; } // Bind a negation to its term(s): capture consecutive words but stop at a @@ -446,10 +448,81 @@ interface ParsedSearchQuery { const NEGATIVE_CONSTRAINT_PATTERN = /\b(?:without|excluding|exclude|not|no)\b\s+(?:(?:to|use|using)\s+)?([a-z0-9][a-z0-9-]*(?:\s+(?!(?:and|or|but|then|also|plus|with)\b)[a-z0-9][a-z0-9-]*)*)/gi; const NEGATIVE_LEADING_FILLER_PATTERN = /^(?:(?:to|use|using)\s+)+/i; +const FIELD_BOOST_BLOCK_PATTERN = /\[([^\]]+)\]/g; +const FIELD_BOOST_DIRECTIVE_PATTERN = + /^([a-z]+)(\+{1,}|-{1,}|[+-]\d+(?:\.\d+)?|\^\d+(?:\.\d+)?|=\d+(?:\.\d+)?)$/; + +function cloneFieldWeights(): Record { + return { ...FIELD_WEIGHTS }; +} + +function buildFieldPhraseBonuses( + fieldWeights: Record, +): Record { + return Object.fromEntries( + SEARCH_FIELDS.map((field) => { + const baseWeight = FIELD_WEIGHTS[field]; + const ratio = baseWeight > 0 ? fieldWeights[field] / baseWeight : 0; + return [field, FIELD_PHRASE_BONUS[field] * ratio]; + }), + ) as Record; +} + +function applyFieldBoostDirective( + fieldWeights: Record, + directive: string, +): void { + const match = FIELD_BOOST_DIRECTIVE_PATTERN.exec(directive.trim()); + if (!match) return; + + const fieldName = match[1]; + const operator = match[2]; + if (!fieldName || !operator) return; + + const field = FIELD_ALIASES[fieldName]; + if (!field) return; + + const currentWeight = fieldWeights[field]; + let nextWeight = currentWeight; + if (operator.startsWith("^")) { + nextWeight = currentWeight * Number(operator.slice(1)); + } else if (operator.startsWith("=")) { + nextWeight = Number(operator.slice(1)); + } else if (/^\++$/.test(operator)) { + nextWeight = currentWeight + operator.length; + } else if (/^-+$/.test(operator)) { + nextWeight = currentWeight - operator.length; + } else { + nextWeight = currentWeight + Number(operator); + } + + fieldWeights[field] = Number.isFinite(nextWeight) + ? Math.max(0, nextWeight) + : currentWeight; +} + +function extractFieldBoosts(text: string): { + textWithoutBoosts: string; + fieldWeights: Record; +} { + const fieldWeights = cloneFieldWeights(); + const textWithoutBoosts = text.replace( + FIELD_BOOST_BLOCK_PATTERN, + (_match, block: string) => { + for (const directive of block.split(/[\s,]+/)) { + applyFieldBoostDirective(fieldWeights, directive); + } + return " "; + }, + ); + + return { textWithoutBoosts, fieldWeights }; +} function parseSearchQuery(text: string): ParsedSearchQuery { + const { textWithoutBoosts, fieldWeights } = extractFieldBoosts(text); const negativeParts: string[] = []; - const positiveText = text.replace( + const positiveText = textWithoutBoosts.replace( NEGATIVE_CONSTRAINT_PATTERN, (_match, negativePart: string) => { negativeParts.push( @@ -462,6 +535,8 @@ function parseSearchQuery(text: string): ParsedSearchQuery { return { positiveText, negativeText: negativeParts.join(" "), + fieldWeights, + fieldPhraseBonuses: buildFieldPhraseBonuses(fieldWeights), }; } @@ -499,6 +574,24 @@ const FIELD_PHRASE_BONUS: Record = { metadata: 1, }; +const FIELD_ALIASES: Record = { + command: "implementation", + commands: "implementation", + desc: "description", + description: "description", + implementation: "implementation", + impl: "implementation", + input: "io", + inputs: "io", + io: "io", + metadata: "metadata", + meta: "metadata", + name: "name", + output: "io", + outputs: "io", + title: "name", +}; + const PREFIX_MATCH_BONUS_MULTIPLIER = 0.5; const FUZZY_MATCH_BONUS_MULTIPLIER = 0.75; const ALL_QUERY_TOKENS_BONUS = 6; @@ -670,6 +763,8 @@ function scoreEntry( phraseTokenSequences: string[][], negativeTokens: string[], tokenWeights: Map, + fieldWeights: Record, + fieldPhraseBonuses: Record, ): { score: number; matchedFields: MatchField[] } { const matched = new Set(); let score = 0; @@ -690,7 +785,7 @@ function scoreEntry( for (const concept of concepts) { for (const field of SEARCH_FIELDS) { const fieldText = entry.searchable[field]; - const fieldWeight = FIELD_WEIGHTS[field]; + const fieldWeight = fieldWeights[field]; const matchingTokens = concept.filter((token) => fieldText.includes(token), ); @@ -743,7 +838,7 @@ function scoreEntry( ) { continue; } - score += FIELD_PHRASE_BONUS[field]; + score += fieldPhraseBonuses[field]; matched.add(field); } } @@ -793,6 +888,8 @@ export function lexicalSearch( phraseTokenSequences, negativeTokens, tokenWeights, + parsedQuery.fieldWeights, + parsedQuery.fieldPhraseBonuses, ); if (score === 0) continue; scored.push({