diff --git a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts index 5ede4030d..7d1f6497f 100644 --- a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts +++ b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.test.ts @@ -22,6 +22,7 @@ import { buildResults, buildSourcedHydratedReferences, collectAllSourcedReferences, + LEXICAL_RESULT_LIMIT, PUBLISHED_SOURCE, registeredLibrariesFingerprint, rerankedMatches, @@ -287,6 +288,22 @@ describe("buildLexicalMatches / buildAiCandidateMatches", () => { expect(buildAiCandidateMatches(index, "")).toEqual([]); }); + it("caps displayed lexical results below the AI candidate pool", () => { + const broadIndex = buildSearchIndex( + Array.from({ length: 20 }, (_, i) => ({ + reference: ref(`train-${i}`, `train_${i}`), + source: source("standard"), + })), + ); + + expect(buildLexicalMatches(broadIndex, "train")).toHaveLength( + LEXICAL_RESULT_LIMIT, + ); + expect(buildAiCandidateMatches(broadIndex, "train").length).toBeGreaterThan( + LEXICAL_RESULT_LIMIT, + ); + }); + it("returns no AI candidates when literal search finds nothing", () => { expect(buildAiCandidateMatches(index, "qqzznomatch")).toEqual([]); }); diff --git a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts index 964bd1fdb..10e0ef128 100644 --- a/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts +++ b/src/routes/v2/pages/Editor/components/componentSearchV2Logic.ts @@ -29,8 +29,8 @@ import type { HydratedComponentReference, } from "@/utils/componentSpec"; -/** How many lexical hits to display before the user asks for AI judgment. */ -export const LEXICAL_RESULT_LIMIT = 50; +/** How many hits to show by default before the user asks for AI judgment. */ +export const LEXICAL_RESULT_LIMIT = 10; const AI_CANDIDATE_LIMIT = 80; const AI_LEXICAL_CANDIDATE_LIMIT = 60; const AI_SOURCE_DIVERSITY_CANDIDATES_PER_SOURCE = 8; diff --git a/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts b/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts index 88a5a61b3..b61a63c0c 100644 --- a/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts +++ b/src/routes/v2/pages/Editor/hooks/useComponentSearchV2State.ts @@ -238,9 +238,11 @@ export function useComponentSearchV2State( !isEmbeddingSearchPending && (rerankData?.matches.length ?? 0) > 0; - const displayedMatches = isRerankActive - ? rerankedMatches(rerankData, rerankBaseMatches) - : lexicalMatches; + const displayedMatches = ( + isRerankActive + ? rerankedMatches(rerankData, rerankBaseMatches) + : lexicalMatches + ).slice(0, LEXICAL_RESULT_LIMIT); const buildEmbeddingMatches = async ({ sourceIndex, diff --git a/src/services/componentSearchIndex.test.ts b/src/services/componentSearchIndex.test.ts index 05edbbdcc..091bc12a4 100644 --- a/src/services/componentSearchIndex.test.ts +++ b/src/services/componentSearchIndex.test.ts @@ -1012,6 +1012,41 @@ describe("lexicalSearch", () => { expect(results[0]?.matchedFields).toContain("implementation"); }); + it("keeps implementation-only matches below intent-field matches", () => { + const index = buildSearchIndex([ + makeSourced({ + digest: "name-match", + spec: { + name: "word_counter", + inputs: [], + outputs: [], + implementation: { container: { image: "x" } }, + }, + }), + makeSourced({ + digest: "command-comment-match", + spec: { + name: "binarize_column", + inputs: [], + outputs: [], + implementation: { + container: { + image: "python:3.11", + args: ["# exec() takes no keyword arguments"], + }, + }, + }, + }), + ]); + + const results = lexicalSearch(index, "word"); + expect(results.map((result) => result.digest)).toEqual([ + "name-match", + "command-comment-match", + ]); + expect(results[1]?.matchedFields).toContain("implementation"); + }); + it("preserves the source on each returned match", () => { const index = buildSearchIndex(fixtures); const results = lexicalSearch(index, "my_custom_train"); diff --git a/src/services/componentSearchIndex.ts b/src/services/componentSearchIndex.ts index 4652910f7..1a5512465 100644 --- a/src/services/componentSearchIndex.ts +++ b/src/services/componentSearchIndex.ts @@ -477,16 +477,20 @@ const FIELD_WEIGHTS: Record = { name: 5, description: 2, io: 2, - implementation: 1, - metadata: 1, + // Keep implementation/metadata searchable for recall, but make matches there + // much weaker than user-facing component intent fields. A common word in a + // command comment should not look as relevant as the same word in a name, + // description, or input/output. + implementation: 0.25, + metadata: 0.5, }; const FIELD_PHRASE_BONUS: Record = { name: 10, description: 4, io: 4, - implementation: 2, - metadata: 2, + implementation: 0.5, + metadata: 1, }; const PREFIX_MATCH_BONUS_MULTIPLIER = 0.5;