From f3bb210e5b600ebf54d5f58b560de07ce7751f34 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 14:28:08 -0400 Subject: [PATCH 1/8] feat: enforce retrieval relevance thresholds Add explicit score semantics and pre-packaging relevance filtering for memory search, including deterministic regression coverage for noisy direct-fact retrieval with integration-heavy memory sets. --- openapi.json | 212 ++++++++++++++++++ openapi.yaml | 150 +++++++++++++ .../memory-route-config-override.test.ts | 99 ++++++-- src/app/runtime-container.ts | 1 + src/db/repository-types.ts | 3 + .../response-schema-coverage.test.ts | 9 +- src/routes/memories.ts | 15 +- src/routes/memory-response-formatters.ts | 22 ++ src/schemas/memories.ts | 37 ++- src/schemas/search-response-parts.ts | 19 ++ .../memory-search-runtime-config.test.ts | 5 + .../retrieval-relevance-regression.test.ts | 201 +++++++++++++++++ .../__tests__/retrieval-trace.test.ts | 57 +++++ .../__tests__/scoped-dispatch.test.ts | 2 +- src/services/__tests__/test-fixtures.ts | 29 +++ src/services/memory-search.ts | 172 +++++++++++--- src/services/memory-service-types.ts | 2 + src/services/memory-service.ts | 10 +- src/services/relevance-policy.ts | 147 ++++++++++++ src/services/retrieval-trace.ts | 37 ++- src/services/search-pipeline.ts | 25 ++- 21 files changed, 1176 insertions(+), 78 deletions(-) create mode 100644 src/services/__tests__/retrieval-relevance-regression.test.ts create mode 100644 src/services/relevance-policy.ts diff --git a/openapi.json b/openapi.json index 000eb02..411f949 100644 --- a/openapi.json +++ b/openapi.json @@ -3560,6 +3560,12 @@ "source_site": { "type": "string" }, + "threshold": { + "description": "Optional normalized relevance threshold. Results below this semantic relevance floor are excluded before injection packaging.", + "maximum": 1, + "minimum": 0, + "type": "number" + }, "token_budget": { "maximum": 50000, "minimum": 100, @@ -3696,12 +3702,30 @@ "description": "Memory metadata persisted on the row, including caller-supplied verbatim metadata (set via /v1/memories/ingest/quick with skip_extraction=true) and core-generated metadata (e.g. cmo_id, memberMemoryIds, headline). Mirrors the shape /v1/memories/list and /v1/memories/:id return.", "type": "object" }, + "ranking_score": { + "type": [ + "number", + "null" + ] + }, + "relevance": { + "type": [ + "number", + "null" + ] + }, "score": { "type": [ "number", "null" ] }, + "semantic_similarity": { + "type": [ + "number", + "null" + ] + }, "similarity": { "type": [ "number", @@ -3836,9 +3860,91 @@ }, "type": "array" }, + "filter_decisions": { + "items": { + "properties": { + "decision": { + "enum": [ + "kept", + "filtered" + ], + "type": "string" + }, + "id": { + "type": "string" + }, + "namespace": { + "type": [ + "string", + "null" + ] + }, + "ranking_score": { + "type": "number" + }, + "reason": { + "type": "string" + }, + "relevance": { + "type": "number" + }, + "semantic_similarity": { + "type": "number" + }, + "source_kind": { + "enum": [ + "integration", + "local" + ], + "type": "string" + }, + "source_site": { + "type": "string" + }, + "threshold": { + "type": [ + "number", + "null" + ] + } + }, + "required": [ + "id", + "source_site", + "source_kind", + "namespace", + "semantic_similarity", + "ranking_score", + "relevance", + "threshold", + "decision", + "reason" + ], + "type": "object" + }, + "type": "array" + }, + "filtered_candidate_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, "query_text": { "type": "string" }, + "relevance_filter_reason": { + "type": "string" + }, + "relevance_filter_source": { + "type": "string" + }, + "relevance_threshold": { + "type": [ + "number", + "null" + ] + }, "skip_repair": { "type": "boolean" }, @@ -4091,6 +4197,12 @@ "source_site": { "type": "string" }, + "threshold": { + "description": "Optional normalized relevance threshold. Results below this semantic relevance floor are excluded before injection packaging.", + "maximum": 1, + "minimum": 0, + "type": "number" + }, "token_budget": { "maximum": 50000, "minimum": 100, @@ -4227,12 +4339,30 @@ "description": "Memory metadata persisted on the row, including caller-supplied verbatim metadata (set via /v1/memories/ingest/quick with skip_extraction=true) and core-generated metadata (e.g. cmo_id, memberMemoryIds, headline). Mirrors the shape /v1/memories/list and /v1/memories/:id return.", "type": "object" }, + "ranking_score": { + "type": [ + "number", + "null" + ] + }, + "relevance": { + "type": [ + "number", + "null" + ] + }, "score": { "type": [ "number", "null" ] }, + "semantic_similarity": { + "type": [ + "number", + "null" + ] + }, "similarity": { "type": [ "number", @@ -4367,9 +4497,91 @@ }, "type": "array" }, + "filter_decisions": { + "items": { + "properties": { + "decision": { + "enum": [ + "kept", + "filtered" + ], + "type": "string" + }, + "id": { + "type": "string" + }, + "namespace": { + "type": [ + "string", + "null" + ] + }, + "ranking_score": { + "type": "number" + }, + "reason": { + "type": "string" + }, + "relevance": { + "type": "number" + }, + "semantic_similarity": { + "type": "number" + }, + "source_kind": { + "enum": [ + "integration", + "local" + ], + "type": "string" + }, + "source_site": { + "type": "string" + }, + "threshold": { + "type": [ + "number", + "null" + ] + } + }, + "required": [ + "id", + "source_site", + "source_kind", + "namespace", + "semantic_similarity", + "ranking_score", + "relevance", + "threshold", + "decision", + "reason" + ], + "type": "object" + }, + "type": "array" + }, + "filtered_candidate_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, "query_text": { "type": "string" }, + "relevance_filter_reason": { + "type": "string" + }, + "relevance_filter_source": { + "type": "string" + }, + "relevance_threshold": { + "type": [ + "number", + "null" + ] + }, "skip_repair": { "type": "boolean" }, diff --git a/openapi.yaml b/openapi.yaml index e6f2e97..af5c3a0 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -2388,6 +2388,11 @@ paths: type: boolean source_site: type: string + threshold: + description: Optional normalized relevance threshold. Results below this semantic relevance floor are excluded before injection packaging. + maximum: 1 + minimum: 0 + type: number token_budget: maximum: 50000 minimum: 100 @@ -2485,10 +2490,22 @@ paths: additionalProperties: {} description: Memory metadata persisted on the row, including caller-supplied verbatim metadata (set via /v1/memories/ingest/quick with skip_extraction=true) and core-generated metadata (e.g. cmo_id, memberMemoryIds, headline). Mirrors the shape /v1/memories/list and /v1/memories/:id return. type: object + ranking_score: + type: + - number + - "null" + relevance: + type: + - number + - "null" score: type: - number - "null" + semantic_similarity: + type: + - number + - "null" similarity: type: - number @@ -2584,8 +2601,66 @@ paths: items: type: string type: array + filter_decisions: + items: + properties: + decision: + enum: + - kept + - filtered + type: string + id: + type: string + namespace: + type: + - string + - "null" + ranking_score: + type: number + reason: + type: string + relevance: + type: number + semantic_similarity: + type: number + source_kind: + enum: + - integration + - local + type: string + source_site: + type: string + threshold: + type: + - number + - "null" + required: + - id + - source_site + - source_kind + - namespace + - semantic_similarity + - ranking_score + - relevance + - threshold + - decision + - reason + type: object + type: array + filtered_candidate_ids: + items: + type: string + type: array query_text: type: string + relevance_filter_reason: + type: string + relevance_filter_source: + type: string + relevance_threshold: + type: + - number + - "null" skip_repair: type: boolean stage_count: @@ -2748,6 +2823,11 @@ paths: type: boolean source_site: type: string + threshold: + description: Optional normalized relevance threshold. Results below this semantic relevance floor are excluded before injection packaging. + maximum: 1 + minimum: 0 + type: number token_budget: maximum: 50000 minimum: 100 @@ -2845,10 +2925,22 @@ paths: additionalProperties: {} description: Memory metadata persisted on the row, including caller-supplied verbatim metadata (set via /v1/memories/ingest/quick with skip_extraction=true) and core-generated metadata (e.g. cmo_id, memberMemoryIds, headline). Mirrors the shape /v1/memories/list and /v1/memories/:id return. type: object + ranking_score: + type: + - number + - "null" + relevance: + type: + - number + - "null" score: type: - number - "null" + semantic_similarity: + type: + - number + - "null" similarity: type: - number @@ -2944,8 +3036,66 @@ paths: items: type: string type: array + filter_decisions: + items: + properties: + decision: + enum: + - kept + - filtered + type: string + id: + type: string + namespace: + type: + - string + - "null" + ranking_score: + type: number + reason: + type: string + relevance: + type: number + semantic_similarity: + type: number + source_kind: + enum: + - integration + - local + type: string + source_site: + type: string + threshold: + type: + - number + - "null" + required: + - id + - source_site + - source_kind + - namespace + - semantic_similarity + - ranking_score + - relevance + - threshold + - decision + - reason + type: object + type: array + filtered_candidate_ids: + items: + type: string + type: array query_text: type: string + relevance_filter_reason: + type: string + relevance_filter_source: + type: string + relevance_threshold: + type: + - number + - "null" skip_repair: type: boolean stage_count: diff --git a/src/__tests__/memory-route-config-override.test.ts b/src/__tests__/memory-route-config-override.test.ts index 64f7f39..e256bd0 100644 --- a/src/__tests__/memory-route-config-override.test.ts +++ b/src/__tests__/memory-route-config-override.test.ts @@ -19,6 +19,13 @@ import express from 'express'; import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.hoisted(() => { + process.env.OPENAI_API_KEY ??= 'test-openai-key'; + process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; + process.env.EMBEDDING_DIMENSIONS ??= '1536'; +}); + import { createMemoryRouter } from '../routes/memories.js'; import type { MemoryService } from '../services/memory-service.js'; import { type BootedApp, bindEphemeral } from '../app/bind-ephemeral.js'; @@ -76,18 +83,7 @@ describe('POST /memories/* — per-request config_override', () => { memoryIds: [], linksCreated: 0, compositesCreated: 0, }); - const service = { - scopedSearch, ingest, quickIngest, - storeVerbatim: vi.fn(), workspaceIngest: vi.fn(), - scopedExpand: vi.fn(), scopedList: vi.fn(), scopedGet: vi.fn(), scopedDelete: vi.fn(), - list: vi.fn(), get: vi.fn(), delete: vi.fn(), expand: vi.fn(), resetBySource: vi.fn(), - getStats: vi.fn(), consolidate: vi.fn(), executeConsolidation: vi.fn(), - reconcileDeferred: vi.fn(), reconcileDeferredAll: vi.fn(), getDeferredStatus: vi.fn(), - evaluateDecay: vi.fn(), archiveDecayed: vi.fn(), checkCap: vi.fn(), - getAuditTrail: vi.fn(), getMutationSummary: vi.fn(), getRecentMutations: vi.fn(), - getLessons: vi.fn(), getLessonStats: vi.fn(), reportLesson: vi.fn(), deactivateLesson: vi.fn(), - } as unknown as MemoryService; - + const service = createRouteService(scopedSearch, ingest, quickIngest); const adapter = { base: routeBaseConfig, current: () => ({ ...ROUTE_CONFIG }), @@ -149,6 +145,29 @@ describe('POST /memories/* — per-request config_override', () => { expect(options.effectiveConfig.hybridSearchEnabled).toBe(true); }); + it('POST /search with threshold → forwards relevanceThreshold', async () => { + const res = await postJson(`/memories/search`, { + user_id: 'u', + query: 'q', + threshold: 0.42, + }); + expect(res.status).toBe(200); + const call = scopedSearch.mock.calls[0]!; + const options = call[2] as { retrievalOptions: { relevanceThreshold: number } }; + expect(options.retrievalOptions.relevanceThreshold).toBe(0.42); + }); + + it('POST /search rejects invalid threshold', async () => { + const res = await postJson(`/memories/search`, { + user_id: 'u', + query: 'q', + threshold: 1.2, + }); + expect(res.status).toBe(400); + expect((await res.json()).error).toMatch(/threshold must be between 0 and 1/); + expect(scopedSearch).not.toHaveBeenCalled(); + }); + it('POST /search/fast with override → headers and fast:true both set', async () => { const res = await postJson(`/memories/search/fast`, { user_id: 'u', query: 'q', @@ -162,6 +181,22 @@ describe('POST /memories/* — per-request config_override', () => { expect(options.effectiveConfig.crossEncoderEnabled).toBe(true); }); + it('POST /search/fast with threshold → forwards relevanceThreshold', async () => { + const res = await postJson(`/memories/search/fast`, { + user_id: 'u', + query: 'q', + threshold: 0.7, + }); + expect(res.status).toBe(200); + const call = scopedSearch.mock.calls[0]!; + const options = call[2] as { + fast: boolean; + retrievalOptions: { relevanceThreshold: number }; + }; + expect(options.fast).toBe(true); + expect(options.retrievalOptions.relevanceThreshold).toBe(0.7); + }); + it('POST /ingest with override → headers + trailing effectiveConfig arg', async () => { ingest.mockResolvedValueOnce({ episodeId: 'ep', factsExtracted: 0, memoriesStored: 0, memoriesUpdated: 0, @@ -197,10 +232,7 @@ describe('POST /memories/* — per-request config_override', () => { it('unknown override key → 200, service invoked, warning header set', async () => { const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); - const res = await postJson(`/memories/search`, { - user_id: 'u', query: 'q', - config_override: { bogusFlag: true, alsoBogus: 'nope' }, - }); + const res = await postSearchWithConfigOverride(booted, { bogusFlag: true, alsoBogus: 'nope' }); expect(res.status).toBe(200); expect(res.headers.get('X-Atomicmem-Config-Override-Applied')).toBe('true'); expect(res.headers.get('X-Atomicmem-Unknown-Override-Keys')).toBe('alsoBogus,bogusFlag'); @@ -211,10 +243,7 @@ describe('POST /memories/* — per-request config_override', () => { it('mix of known and unknown keys → only unknown ones in warning header', async () => { const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); - const res = await postJson(`/memories/search`, { - user_id: 'u', query: 'q', - config_override: { hybridSearchEnabled: true, futureFieldX: 42 }, - }); + const res = await postSearchWithConfigOverride(booted, { hybridSearchEnabled: true, futureFieldX: 42 }); expect(res.status).toBe(200); expect(res.headers.get('X-Atomicmem-Config-Override-Keys')).toBe('futureFieldX,hybridSearchEnabled'); expect(res.headers.get('X-Atomicmem-Unknown-Override-Keys')).toBe('futureFieldX'); @@ -222,10 +251,7 @@ describe('POST /memories/* — per-request config_override', () => { }); it('all-known keys → no X-Atomicmem-Unknown-Override-Keys header', async () => { - const res = await postJson(`/memories/search`, { - user_id: 'u', query: 'q', - config_override: { hybridSearchEnabled: true }, - }); + const res = await postSearchWithConfigOverride(booted, { hybridSearchEnabled: true }); expect(res.status).toBe(200); expect(res.headers.get('X-Atomicmem-Config-Override-Applied')).toBe('true'); expect(res.headers.get('X-Atomicmem-Unknown-Override-Keys')).toBeNull(); @@ -253,3 +279,28 @@ describe('POST /memories/* — per-request config_override', () => { expect(options.effectiveConfig).toBeUndefined(); }); }); + +function createRouteService( + scopedSearch: ReturnType, + ingest: ReturnType, + quickIngest: ReturnType, +): MemoryService { + return { + scopedSearch, + ingest, + quickIngest, + storeVerbatim: vi.fn(), + workspaceIngest: vi.fn(), + } as unknown as MemoryService; +} + +function postSearchWithConfigOverride( + booted: BootedApp, + configOverride: Record, +): Promise { + return fetch(`${booted.baseUrl}/memories/search`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ user_id: 'u', query: 'q', config_override: configOverride }), + }); +} diff --git a/src/app/runtime-container.ts b/src/app/runtime-container.ts index 7f3dec7..8609c89 100644 --- a/src/app/runtime-container.ts +++ b/src/app/runtime-container.ts @@ -111,6 +111,7 @@ export interface CoreRuntimeConfig { rerankSkipMinGap: number; rerankSkipTopSimilarity: number; retrievalProfileSettings: RetrievalProfile; + similarityThreshold: number; temporalQueryConstraintBoost: number; temporalQueryConstraintEnabled: boolean; } diff --git a/src/db/repository-types.ts b/src/db/repository-types.ts index 0f5aa18..d6cb5f6 100644 --- a/src/db/repository-types.ts +++ b/src/db/repository-types.ts @@ -163,6 +163,9 @@ export interface EpisodeRow { export interface SearchResult extends MemoryRow { similarity: number; score: number; + semantic_similarity?: number; + ranking_score?: number; + relevance?: number; matched_facts?: string[]; matched_fact_ids?: string[]; retrieval_layer?: 'memory' | 'atomic_fact'; diff --git a/src/routes/__tests__/response-schema-coverage.test.ts b/src/routes/__tests__/response-schema-coverage.test.ts index af8e448..aa3299c 100644 --- a/src/routes/__tests__/response-schema-coverage.test.ts +++ b/src/routes/__tests__/response-schema-coverage.test.ts @@ -16,7 +16,14 @@ * first. */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; + +vi.hoisted(() => { + process.env.OPENAI_API_KEY ??= 'test-openai-key'; + process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; + process.env.EMBEDDING_DIMENSIONS ??= '1536'; +}); + import type { Router } from 'express'; import { createMemoryRouter } from '../memories'; import { createAgentRouter } from '../agents'; diff --git a/src/routes/memories.ts b/src/routes/memories.ts index 0a182cf..ece2a62 100644 --- a/src/routes/memories.ts +++ b/src/routes/memories.ts @@ -287,9 +287,15 @@ function registerSearchRoute( router.post('/search', validateBody(SearchBodySchema), async (req: Request, res: Response) => { try { const { body, effectiveConfig, scope, requestLimit } = readSearchRequest(req, res, configRouteAdapter); - const retrievalOptions: { retrievalMode?: SearchBody['retrievalMode']; tokenBudget?: SearchBody['tokenBudget']; skipRepairLoop?: boolean } = { + const retrievalOptions: { + retrievalMode?: SearchBody['retrievalMode']; + tokenBudget?: SearchBody['tokenBudget']; + relevanceThreshold?: SearchBody['relevanceThreshold']; + skipRepairLoop?: boolean; + } = { retrievalMode: body.retrievalMode, tokenBudget: body.tokenBudget, + relevanceThreshold: body.relevanceThreshold, ...(body.skipRepair ? { skipRepairLoop: true } : {}), }; const result = await service.scopedSearch(scope, body.query, { @@ -319,11 +325,15 @@ function registerFastSearchRoute( router.post('/search/fast', validateBody(SearchBodySchema), async (req: Request, res: Response) => { try { const { body, effectiveConfig, scope, requestLimit } = readSearchRequest(req, res, configRouteAdapter); + const retrievalOptions = { + relevanceThreshold: body.relevanceThreshold, + }; const result = await service.scopedSearch(scope, body.query, { fast: true, sourceSite: body.sourceSite, limit: requestLimit, namespaceScope: body.namespaceScope, + retrievalOptions, effectiveConfig, }); res.json(formatSearchResponse(result, scope)); @@ -789,6 +799,9 @@ function formatSearchResponse(result: RetrievalResult, scope: MemoryScope) { content: memory.content, similarity: memory.similarity, score: memory.score, + semantic_similarity: memory.semantic_similarity ?? memory.similarity, + ranking_score: memory.ranking_score ?? memory.score, + relevance: memory.relevance ?? memory.similarity, importance: memory.importance, source_site: memory.source_site, created_at: memory.created_at, diff --git a/src/routes/memory-response-formatters.ts b/src/routes/memory-response-formatters.ts index 473d6f8..a4eacb9 100644 --- a/src/routes/memory-response-formatters.ts +++ b/src/routes/memory-response-formatters.ts @@ -178,12 +178,34 @@ function formatRetrievalTrace(summary: RetrievalTraceSummary) { candidate_count: summary.candidateCount, query_text: summary.queryText, skip_repair: summary.skipRepair, + ...(summary.relevanceThreshold !== undefined ? { relevance_threshold: summary.relevanceThreshold } : {}), + ...(summary.relevanceFilterSource ? { relevance_filter_source: summary.relevanceFilterSource } : {}), + ...(summary.relevanceFilterReason ? { relevance_filter_reason: summary.relevanceFilterReason } : {}), + ...(summary.filteredCandidateIds ? { filtered_candidate_ids: summary.filteredCandidateIds } : {}), + ...(summary.filterDecisions ? { + filter_decisions: summary.filterDecisions.map(formatFilterDecision), + } : {}), ...(summary.traceId ? { trace_id: summary.traceId } : {}), ...(summary.stageCount !== undefined ? { stage_count: summary.stageCount } : {}), ...(summary.stageNames ? { stage_names: summary.stageNames } : {}), }; } +function formatFilterDecision(decision: NonNullable[number]) { + return { + id: decision.id, + source_site: decision.sourceSite, + source_kind: decision.sourceKind, + namespace: decision.namespace, + semantic_similarity: decision.semanticSimilarity, + ranking_score: decision.rankingScore, + relevance: decision.relevance, + threshold: decision.threshold, + decision: decision.decision, + reason: decision.reason, + }; +} + function formatPackagingTrace(summary: PackagingTraceSummary) { return { package_type: summary.packageType, diff --git a/src/schemas/memories.ts b/src/schemas/memories.ts index b70e878..042b6cb 100644 --- a/src/schemas/memories.ts +++ b/src/schemas/memories.ts @@ -131,14 +131,7 @@ const SearchBodyLimit = z .openapi({ type: 'integer', minimum: 1, maximum: MAX_SEARCH_LIMIT }); /** token_budget: finite number in [100, 50000], floored. Throws on invalid. */ -const TokenBudgetSchema = z - .preprocess(v => (v === undefined || v === null ? undefined : v), z.unknown().optional()) - .refine( - v => - v === undefined || - (typeof v === 'number' && Number.isFinite(v)), - { message: 'token_budget must be a finite number' }, - ) +const TokenBudgetSchema = optionalFiniteNumber('token_budget') .refine( v => v === undefined || @@ -152,6 +145,32 @@ const TokenBudgetSchema = z .transform(v => (typeof v === 'number' ? Math.floor(v) : undefined)) .openapi({ type: 'integer', minimum: MIN_TOKEN_BUDGET, maximum: MAX_TOKEN_BUDGET }); +/** threshold: normalized relevance floor in [0, 1]. Throws on invalid. */ +const SearchThresholdSchema = optionalFiniteNumber('threshold') + .refine( + v => v === undefined || (v >= 0 && v <= 1), + { message: 'threshold must be between 0 and 1' }, + ) + .transform(v => (typeof v === 'number' ? v : undefined)) + .openapi({ + type: 'number', + minimum: 0, + maximum: 1, + description: + 'Optional normalized relevance threshold. Results below this semantic relevance floor are excluded before injection packaging.', + }); + +function optionalFiniteNumber(label: string) { + return z + .preprocess(v => (v === undefined || v === null ? undefined : v), z.unknown().optional()) + .refine( + (v): v is number | undefined => + v === undefined || + (typeof v === 'number' && Number.isFinite(v)), + { message: `${label} must be a finite number` }, + ); +} + /** * retrieval_mode: string enum or undefined. Absent/null → undefined; * wrong type → throw 'retrieval_mode must be a string'; wrong enum @@ -321,6 +340,7 @@ export const SearchBodySchema = z as_of: IsoTimestamp, retrieval_mode: RetrievalModeField, token_budget: TokenBudgetSchema, + threshold: SearchThresholdSchema, namespace_scope: OptionalBodyString, skip_repair: OptionalBooleanField(), workspace_id: WorkspaceIdField, @@ -337,6 +357,7 @@ export const SearchBodySchema = z asOf: b.as_of, retrievalMode: b.retrieval_mode, tokenBudget: b.token_budget, + relevanceThreshold: b.threshold, namespaceScope: b.namespace_scope, skipRepair: b.skip_repair === true, workspace: buildWorkspaceContext(b.workspace_id, b.agent_id, b.visibility), diff --git a/src/schemas/search-response-parts.ts b/src/schemas/search-response-parts.ts index fc63fa4..c10d333 100644 --- a/src/schemas/search-response-parts.ts +++ b/src/schemas/search-response-parts.ts @@ -13,6 +13,9 @@ export const SearchMemoryItemSchema = z.object({ content: z.string(), similarity: NumberOrNaN.optional(), score: NumberOrNaN.optional(), + semantic_similarity: NumberOrNaN.optional(), + ranking_score: NumberOrNaN.optional(), + relevance: NumberOrNaN.optional(), importance: NumberOrNaN.optional(), source_site: z.string().optional(), created_at: IsoDateString.optional(), @@ -51,6 +54,22 @@ const RetrievalTraceSchema = z.object({ candidate_count: z.number(), query_text: z.string(), skip_repair: z.boolean(), + relevance_threshold: z.number().nullable().optional(), + relevance_filter_source: z.string().optional(), + relevance_filter_reason: z.string().optional(), + filtered_candidate_ids: z.array(z.string()).optional(), + filter_decisions: z.array(z.object({ + id: z.string(), + source_site: z.string(), + source_kind: z.enum(['integration', 'local']), + namespace: z.string().nullable(), + semantic_similarity: z.number(), + ranking_score: z.number(), + relevance: z.number(), + threshold: z.number().nullable(), + decision: z.enum(['kept', 'filtered']), + reason: z.string(), + })).optional(), trace_id: z.string().optional(), stage_count: z.number().optional(), stage_names: z.array(z.string()).optional(), diff --git a/src/services/__tests__/memory-search-runtime-config.test.ts b/src/services/__tests__/memory-search-runtime-config.test.ts index 81f499f..92cd985 100644 --- a/src/services/__tests__/memory-search-runtime-config.test.ts +++ b/src/services/__tests__/memory-search-runtime-config.test.ts @@ -20,6 +20,11 @@ const { mockEmitAuditEvent: vi.fn(), mockRunSearchPipelineWithTrace: vi.fn(), })); +vi.hoisted(() => { + process.env.OPENAI_API_KEY ??= 'test-openai-key'; + process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; + process.env.EMBEDDING_DIMENSIONS ??= '1536'; +}); vi.mock('../lesson-service.js', () => ({ checkLessons: mockCheckLessons, diff --git a/src/services/__tests__/retrieval-relevance-regression.test.ts b/src/services/__tests__/retrieval-relevance-regression.test.ts new file mode 100644 index 0000000..ca6de09 --- /dev/null +++ b/src/services/__tests__/retrieval-relevance-regression.test.ts @@ -0,0 +1,201 @@ +/** + * Regression coverage for GTM-1103 noisy context retrieval. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { + createFavoriteColorNoisyRetrievalFixture, + createSearchResult, +} from './test-fixtures.js'; + +const { + mockRunSearchPipelineWithTrace, + mockResolveSearchLimitDetailed, + mockClassifyQueryDetailed, +} = vi.hoisted(() => ({ + mockRunSearchPipelineWithTrace: vi.fn(), + mockResolveSearchLimitDetailed: vi.fn(), + mockClassifyQueryDetailed: vi.fn(), +})); +vi.hoisted(() => { + process.env.OPENAI_API_KEY ??= 'test-openai-key'; + process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; + process.env.EMBEDDING_DIMENSIONS ??= '1536'; +}); + +vi.mock('../search-pipeline.js', () => ({ runSearchPipelineWithTrace: mockRunSearchPipelineWithTrace })); +vi.mock('../retrieval-policy.js', () => ({ + resolveSearchLimitDetailed: mockResolveSearchLimitDetailed, + classifyQueryDetailed: mockClassifyQueryDetailed, +})); +vi.mock('../composite-staleness.js', () => ({ + excludeStaleComposites: vi.fn(passthroughCompositeFilter), +})); + +const { performSearch } = await import('../memory-search.js'); + +const TEST_USER = 'retrieval-relevance-regression-user'; + +describe('retrieval relevance regression', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockClassifyQueryDetailed.mockImplementation(classifyFixtureQuery); + mockResolveSearchLimitDetailed.mockImplementation((query: string, limit?: number) => ({ + limit: limit ?? 5, + classification: classifyFixtureQuery(query), + })); + }); + + it('keeps answer-bearing direct fact memory and filters unrelated high-score noise', async () => { + const fixture = createFavoriteColorNoisyRetrievalFixture(); + const trace = createTrace(fixture.all.map((memory) => memory.id)); + mockRunSearchPipelineWithTrace.mockResolvedValue({ filtered: fixture.all, trace }); + + const result = await performSearch( + createDeps(0.5), + TEST_USER, + 'What is my favorite color?', + undefined, + 5, + undefined, + undefined, + undefined, + { skipRepairLoop: true, skipReranking: true }, + ); + + const ids = result.memories.map((memory) => memory.id); + expect(ids).toEqual([fixture.answer.id]); + expect(precisionAtK(ids, new Set([fixture.answer.id]))).toBe(1); + expect(result.injectionText).toContain('favorite color is teal'); + expect(result.injectionText).not.toContain('spicy ramen'); + expect(result.injectionText).not.toContain('Flight receipts'); + }); + + it('uses caller threshold before packaging even when config threshold is loose', async () => { + const fixture = createFavoriteColorNoisyRetrievalFixture(); + const borderline = createSearchResult({ + id: 'borderline-food-noise', + content: 'The user prefers crunchy snacks.', + similarity: 0.42, + score: 0.99, + importance: 1, + source_site: 'manual', + }); + const noisyResults = [fixture.answer, borderline, ...fixture.all.slice(1)]; + mockRunSearchPipelineWithTrace.mockResolvedValue({ + filtered: noisyResults, + trace: createTrace(noisyResults.map((memory) => memory.id)), + }); + + const result = await performSearch( + createDeps(0.1), + TEST_USER, + 'What is my favorite color?', + undefined, + 5, + undefined, + undefined, + undefined, + { relevanceThreshold: 0.5, skipRepairLoop: true, skipReranking: true }, + ); + + expect(result.memories.map((memory) => memory.id)).toEqual([fixture.answer.id]); + }); + + it('documents source and namespace filter decisions in the retrieval trace', async () => { + const fixture = createFavoriteColorNoisyRetrievalFixture(); + const trace = createTrace(fixture.all.map((memory) => memory.id)); + mockRunSearchPipelineWithTrace.mockResolvedValue({ filtered: fixture.all, trace }); + + await performSearch(createDeps(0.5), TEST_USER, 'What is my favorite color?'); + + expect(trace.stage).toHaveBeenCalledWith( + 'relevance-filter', + [expect.objectContaining({ id: fixture.answer.id, relevance: 0.91 })], + expect.objectContaining({ + threshold: 0.5, + removedIds: expect.arrayContaining([ + fixture.unrelatedFood.id, + fixture.gmail.id, + fixture.drive.id, + fixture.x.id, + ]), + decisions: expect.arrayContaining([ + expect.objectContaining({ + id: fixture.gmail.id, + sourceSite: 'integration-google', + sourceKind: 'integration', + namespace: 'site/integration-google', + decision: 'filtered', + reason: 'integration-below-threshold', + }), + ]), + }), + ); + }); + + it('preserves broad integration retrieval when no caller threshold is supplied', async () => { + const fixture = createFavoriteColorNoisyRetrievalFixture(); + mockRunSearchPipelineWithTrace.mockResolvedValue({ + filtered: fixture.all, + trace: createTrace(fixture.all.map((memory) => memory.id)), + }); + + const result = await performSearch( + createDeps(0.5), + TEST_USER, + 'List all synced integration memories', + ); + + expect(result.memories.map((memory) => memory.id)).toEqual(fixture.all.map((memory) => memory.id)); + }); +}); + +function classifyFixtureQuery(query: string) { + if (query.toLowerCase().includes('list all')) { + return { limit: 25, label: 'aggregation', matchedMarker: 'list all' }; + } + return { limit: 5, label: 'simple' }; +} + +async function passthroughCompositeFilter(_repo: unknown, _userId: string, memories: unknown[]) { + return { filtered: memories, removedCompositeIds: [] }; +} + +function createTrace(candidateIds: string[]) { + return { + event: vi.fn(), + stage: vi.fn(), + finalize: vi.fn(), + setPackagingSummary: vi.fn(), + setAssemblySummary: vi.fn(), + setRetrievalSummary: vi.fn(), + getRetrievalSummary: vi.fn(() => ({ + candidateIds, + candidateCount: candidateIds.length, + queryText: 'What is my favorite color?', + skipRepair: true, + })), + }; +} + +function createDeps(similarityThreshold: number) { + const memory = { touchMemory: vi.fn().mockResolvedValue(undefined) }; + return { + config: { + auditLoggingEnabled: false, + consensusMinMemories: 2, + consensusValidationEnabled: false, + lessonsEnabled: false, + similarityThreshold, + }, + stores: { memory, search: {}, link: {}, claim: {}, entity: null, lesson: null, pool: {} }, + observationService: null, + uriResolver: { resolve: vi.fn().mockResolvedValue(null), format: vi.fn() }, + } as any; +} + +function precisionAtK(resultIds: string[], relevantIds: Set): number { + if (resultIds.length === 0) return 0; + return resultIds.filter((id) => relevantIds.has(id)).length / resultIds.length; +} diff --git a/src/services/__tests__/retrieval-trace.test.ts b/src/services/__tests__/retrieval-trace.test.ts index d9b6d13..33d429b 100644 --- a/src/services/__tests__/retrieval-trace.test.ts +++ b/src/services/__tests__/retrieval-trace.test.ts @@ -4,6 +4,13 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; + +vi.hoisted(() => { + process.env.OPENAI_API_KEY ??= 'test-openai-key'; + process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; + process.env.EMBEDDING_DIMENSIONS ??= '1536'; +}); + import { TraceCollector } from '../retrieval-trace.js'; import { config } from '../../config.js'; import { createSearchResult } from './test-fixtures.js'; @@ -129,6 +136,56 @@ describe('TraceCollector', () => { expect(mem.score).toBe(0.9877); }); + it('captures safe score semantics and source fields', () => { + config.retrievalTraceEnabled = true; + const trace = new TraceCollector('score semantics', 'user-4'); + const results = [ + createSearchResult({ + id: 'integration-1', + content: 'Sensitive body is preview-limited.', + similarity: 0.23456, + semantic_similarity: 0.23456, + score: 0.98765, + ranking_score: 0.98765, + relevance: 0.23456, + importance: 0.8, + source_site: 'integration-google', + namespace: 'site/integration-google', + }), + ]; + + trace.stage('relevance-filter', results, { + decisions: [{ + id: 'integration-1', + sourceSite: 'integration-google', + sourceKind: 'integration', + namespace: 'site/integration-google', + semanticSimilarity: 0.23456, + rankingScore: 0.98765, + relevance: 0.23456, + threshold: 0.5, + decision: 'filtered', + reason: 'integration-below-threshold', + }], + }); + trace.finalize([]); + + const output = getWrittenTrace(); + const stages = output.stages as Array<{ memories: Array>; meta?: Record }>; + expect(stages[0].memories[0]).toMatchObject({ + id: 'integration-1', + semanticSimilarity: 0.2346, + rankingScore: 0.9877, + relevance: 0.2346, + importance: 0.8, + sourceSite: 'integration-google', + namespace: 'site/integration-google', + }); + expect(stages[0].meta?.decisions).toEqual([ + expect.objectContaining({ id: 'integration-1', reason: 'integration-below-threshold' }), + ]); + }); + it('includes metadata in stage output', () => { config.retrievalTraceEnabled = true; const trace = new TraceCollector('meta test', 'user-5'); diff --git a/src/services/__tests__/scoped-dispatch.test.ts b/src/services/__tests__/scoped-dispatch.test.ts index 2829b1e..abbfed4 100644 --- a/src/services/__tests__/scoped-dispatch.test.ts +++ b/src/services/__tests__/scoped-dispatch.test.ts @@ -84,7 +84,7 @@ describe('scopedSearch', () => { it('dispatches user scope with fast option to performFastSearch', async () => { await service.scopedSearch({ kind: 'user', userId: 'u1' }, 'query', { fast: true, sourceSite: 'test', limit: 10 }); - expect(mockPerformFastSearch).toHaveBeenCalledWith(expect.anything(), 'u1', 'query', 'test', 10, undefined); + expect(mockPerformFastSearch).toHaveBeenCalledWith(expect.anything(), 'u1', 'query', 'test', 10, undefined, undefined); expect(mockPerformSearch).not.toHaveBeenCalled(); }); diff --git a/src/services/__tests__/test-fixtures.ts b/src/services/__tests__/test-fixtures.ts index 5cf327a..f505002 100644 --- a/src/services/__tests__/test-fixtures.ts +++ b/src/services/__tests__/test-fixtures.ts @@ -46,6 +46,35 @@ export function createSearchResult(overrides: Partial = {}): Searc return { ...baseMemoryDefaults(), similarity: 0.8, score: 0.8, ...overrides }; } +/** Deterministic fixture for direct fact retrieval with noisy integrations. */ +export function createFavoriteColorNoisyRetrievalFixture() { + const answer = createSearchResult({ + id: 'favorite-color-answer', + content: 'The user favorite color is teal.', + similarity: 0.91, + score: 0.91, + source_site: 'manual', + namespace: 'user/preferences', + }); + const unrelatedFood = createNoiseMemory('unrelated-food-preference', 'The user likes spicy ramen.', 'manual'); + const gmail = createNoiseMemory('integration-gmail-noise', 'Flight receipts mention seat 14C.', 'integration-google'); + const drive = createNoiseMemory('integration-drive-noise', 'Quarterly planning doc mentions budget owners.', 'integration-drive'); + const x = createNoiseMemory('integration-x-noise', 'A saved X post mentions CSS color palettes.', 'integration-x'); + return { answer, unrelatedFood, gmail, drive, x, all: [answer, unrelatedFood, gmail, drive, x] }; +} + +function createNoiseMemory(id: string, content: string, sourceSite: string): SearchResult { + return createSearchResult({ + id, + content, + similarity: 0.18, + score: 0.98, + importance: 1, + source_site: sourceSite, + namespace: `site/${sourceSite}`, + }); +} + /** Build a fully typed MemoryRow with sane defaults. Override any field. */ export function createMemoryRow(overrides: Partial = {}): MemoryRow { return { ...baseMemoryDefaults(), ...overrides }; diff --git a/src/services/memory-search.ts b/src/services/memory-search.ts index c221f12..583cc7a 100644 --- a/src/services/memory-search.ts +++ b/src/services/memory-search.ts @@ -18,9 +18,40 @@ import { TraceCollector } from './retrieval-trace.js'; import { excludeStaleComposites } from './composite-staleness.js'; import { applyFlatPackagingPolicy } from './composite-dedup.js'; import { recordSearchSideEffects } from './retrieval-side-effects.js'; +import { + applyRelevanceFilter, + resolveRelevanceGate, + type RelevanceFilterDecision, +} from './relevance-policy.js'; import type { AgentScope, WorkspaceContext } from '../db/repository-types.js'; import type { MemoryServiceDeps, RetrievalOptions, RetrievalResult } from './memory-service-types.js'; +interface RelevanceFilterSummary { + threshold: number | null; + source: string; + reason: string; + queryLabel: string; + removedIds: string[]; + decisions: RelevanceFilterDecision[]; +} + +interface PostProcessedSearch { + memories: SearchResult[]; + consensusResult?: ConsensusResult; + relevanceFilter: RelevanceFilterSummary; +} + +interface PackagedSearchOutput { + mode: RetrievalOptions['retrievalMode']; + outputMemories: SearchResult[]; + injectionText: string; + tierAssignments: ReturnType['tierAssignments']; + expandIds: ReturnType['expandIds']; + estimatedContextTokens: ReturnType['estimatedContextTokens']; + packagingSummary: ReturnType['packagingSummary']; + assemblySummary: ReturnType['assemblySummary']; +} + /** Check lessons safety gate; returns undefined if lessons disabled. */ async function checkSearchLessons(deps: MemoryServiceDeps, userId: string, query: string): Promise { if (!deps.config.lessonsEnabled || !deps.stores.lesson) return undefined; @@ -89,7 +120,8 @@ async function postProcessResults( userId: string, query: string, asOf: string | undefined, -): Promise<{ memories: SearchResult[]; consensusResult?: ConsensusResult }> { + retrievalOptions: RetrievalOptions | undefined, +): Promise { let memories = rawMemories.filter((m) => !m.workspace_id); if (!asOf) { @@ -103,31 +135,63 @@ async function postProcessResults( } } - if (!deps.config.consensusValidationEnabled || memories.length < deps.config.consensusMinMemories) { - return { memories }; - } + let consensusResult: ConsensusResult | undefined; - const consensusResult = await validateConsensus(query, memories); - if (consensusResult.removedMemoryIds.length > 0) { - const removedSet = new Set(consensusResult.removedMemoryIds); - memories = memories.filter((m) => !removedSet.has(m.id)); - activeTrace.stage('consensus-filter', memories, { - removedCount: consensusResult.removedMemoryIds.length, - removedIds: consensusResult.removedMemoryIds, - }); - if (deps.config.lessonsEnabled && deps.stores.lesson) { - recordConsensusLessons(deps.stores.lesson, userId, consensusResult, memories).catch( - (err) => console.error('Consensus lesson recording failed:', err), - ); + if (deps.config.consensusValidationEnabled && memories.length >= deps.config.consensusMinMemories) { + consensusResult = await validateConsensus(query, memories); + if (consensusResult.removedMemoryIds.length > 0) { + const removedSet = new Set(consensusResult.removedMemoryIds); + memories = memories.filter((m) => !removedSet.has(m.id)); + activeTrace.stage('consensus-filter', memories, { + removedCount: consensusResult.removedMemoryIds.length, + removedIds: consensusResult.removedMemoryIds, + }); + if (deps.config.lessonsEnabled && deps.stores.lesson) { + recordConsensusLessons(deps.stores.lesson, userId, consensusResult, memories).catch( + (err) => console.error('Consensus lesson recording failed:', err), + ); + } } } - return { memories, consensusResult }; + + const relevanceFilter = applySearchRelevanceFilter( + memories, + activeTrace, + query, + retrievalOptions, + deps.config, + ); + return { memories: relevanceFilter.memories, consensusResult, relevanceFilter }; +} + +function applySearchRelevanceFilter( + memories: SearchResult[], + activeTrace: TraceCollector, + query: string, + retrievalOptions: RetrievalOptions | undefined, + runtimeConfig: MemoryServiceDeps['config'], +): RelevanceFilterSummary & { memories: SearchResult[] } { + const gate = resolveRelevanceGate(query, retrievalOptions?.relevanceThreshold, runtimeConfig); + const result = applyRelevanceFilter(memories, gate); + const summary = { + threshold: gate.threshold, + source: gate.source, + reason: gate.reason, + queryLabel: gate.queryLabel, + removedIds: result.removedIds, + decisions: result.decisions, + }; + activeTrace.stage('relevance-filter', result.memories, { + ...summary, + removedCount: result.removedIds.length, + }); + return { ...summary, memories: result.memories }; } /** Package memories, build injection text, and assemble the final response. */ function assembleResponse( deps: MemoryServiceDeps, - postProcessed: { memories: SearchResult[]; consensusResult?: ConsensusResult }, + postProcessed: PostProcessedSearch, query: string, userId: string, activeTrace: TraceCollector, @@ -136,28 +200,74 @@ function assembleResponse( sourceSite: string | undefined, lessonCheck: LessonCheckResult | undefined, ): RetrievalResult { + const packaged = packageSearchOutput(postProcessed, query, activeTrace, retrievalOptions); + recordSearchSideEffects(deps, packaged.outputMemories, userId, query, sourceSite, asOf); + updateRetrievalSummary(activeTrace, packaged.outputMemories, query, retrievalOptions, postProcessed.relevanceFilter); + activeTrace.finalize(packaged.outputMemories); + return buildRetrievalResult(postProcessed, packaged, activeTrace, lessonCheck); +} + +function packageSearchOutput( + postProcessed: PostProcessedSearch, + query: string, + activeTrace: TraceCollector, + retrievalOptions: RetrievalOptions | undefined, +): PackagedSearchOutput { const mode = retrievalOptions?.retrievalMode ?? 'flat'; const packaged = applyFlatPackagingPolicy(postProcessed.memories, query, mode, activeTrace); const outputMemories = isCurrentStateQuery(query) ? packaged.sort((a, b) => b.score - a.score) : packaged; - - recordSearchSideEffects(deps, outputMemories, userId, query, sourceSite, asOf); - const { injectionText, tierAssignments, expandIds, estimatedContextTokens } = buildInjection(outputMemories, query, mode, retrievalOptions?.tokenBudget); const { packagingSummary, assemblySummary } = finalizePackagingTrace(activeTrace, { outputMemories, mode, injectionText, estimatedContextTokens, tierAssignments, tokenBudget: retrievalOptions?.tokenBudget, }); - activeTrace.finalize(outputMemories); + return { + mode, outputMemories, injectionText, tierAssignments, expandIds, + estimatedContextTokens, packagingSummary, assemblySummary, + }; +} +function updateRetrievalSummary( + activeTrace: TraceCollector, + outputMemories: SearchResult[], + query: string, + retrievalOptions: RetrievalOptions | undefined, + relevanceFilter: RelevanceFilterSummary, +): void { + const priorSummary = activeTrace.getRetrievalSummary(); + activeTrace.setRetrievalSummary({ + candidateIds: outputMemories.map((memory) => memory.id), + candidateCount: outputMemories.length, + queryText: priorSummary?.queryText ?? query, + skipRepair: priorSummary?.skipRepair ?? retrievalOptions?.skipRepairLoop ?? false, + relevanceThreshold: relevanceFilter.threshold, + relevanceFilterSource: relevanceFilter.source, + relevanceFilterReason: relevanceFilter.reason, + filteredCandidateIds: relevanceFilter.removedIds, + filterDecisions: relevanceFilter.decisions, + }); +} + +function buildRetrievalResult( + postProcessed: PostProcessedSearch, + packaged: PackagedSearchOutput, + activeTrace: TraceCollector, + lessonCheck: LessonCheckResult | undefined, +): RetrievalResult { return { - memories: outputMemories, injectionText, - citations: buildRichCitations(outputMemories).map((c) => c.memory_id), - retrievalMode: mode, tierAssignments, expandIds, estimatedContextTokens, + memories: packaged.outputMemories, + injectionText: packaged.injectionText, + citations: buildRichCitations(packaged.outputMemories).map((c) => c.memory_id), + retrievalMode: packaged.mode ?? 'flat', + tierAssignments: packaged.tierAssignments, + expandIds: packaged.expandIds, + estimatedContextTokens: packaged.estimatedContextTokens, lessonCheck, consensusResult: postProcessed.consensusResult, - packagingSignal: computePackagingSignal(outputMemories), + packagingSignal: computePackagingSignal(packaged.outputMemories), retrievalSummary: activeTrace.getRetrievalSummary(), - packagingSummary, assemblySummary, + packagingSummary: packaged.packagingSummary, + assemblySummary: packaged.assemblySummary, }; } @@ -186,7 +296,7 @@ export async function performSearch( if (uriResult) return uriResult; const { memories: rawMemories, activeTrace } = await executeSearchStep(deps, userId, query, effectiveLimit, sourceSite, referenceTime, namespaceScope, retrievalOptions, asOf, trace); - const filteredMemories = await postProcessResults(deps, rawMemories, activeTrace, userId, query, asOf); + const filteredMemories = await postProcessResults(deps, rawMemories, activeTrace, userId, query, asOf, retrievalOptions); return assembleResponse(deps, filteredMemories, query, userId, activeTrace, retrievalOptions, asOf, sourceSite, lessonCheck); } @@ -202,10 +312,12 @@ export async function performFastSearch( sourceSite?: string, limit?: number, namespaceScope?: string, + retrievalOptions?: RetrievalOptions, ): Promise { const label = classifyQueryDetailed(query).label; const escalate = label === 'multi-hop' || label === 'aggregation' || label === 'complex'; return performSearch(deps, userId, query, sourceSite, limit, undefined, undefined, namespaceScope, { + ...retrievalOptions, skipRepairLoop: !escalate, skipReranking: !escalate, }); @@ -234,7 +346,9 @@ export async function performWorkspaceSearch( workspace.workspaceId, queryEmbedding, effectiveLimit, options.agentScope ?? 'all', workspace.agentId, options.referenceTime, ); - const { filtered: filteredMemories } = await excludeStaleComposites(deps.stores.memory, userId, memories); + const { filtered: staleFilteredMemories } = await excludeStaleComposites(deps.stores.memory, userId, memories); + const gate = resolveRelevanceGate(query, options.retrievalOptions?.relevanceThreshold, deps.config); + const { memories: filteredMemories } = applyRelevanceFilter(staleFilteredMemories, gate); for (const m of filteredMemories) deps.stores.memory.touchMemory(m.id).catch(() => {}); const mode = options.retrievalOptions?.retrievalMode ?? 'flat'; diff --git a/src/services/memory-service-types.ts b/src/services/memory-service-types.ts index 0187247..a070a63 100644 --- a/src/services/memory-service-types.ts +++ b/src/services/memory-service-types.ts @@ -197,6 +197,8 @@ export interface RetrievalOptions { retrievalMode?: RetrievalMode; tokenBudget?: number; searchStrategy?: SearchStrategy; + /** Minimum normalized relevance required before injection packaging. */ + relevanceThreshold?: number; /** Skip the LLM repair loop for latency-critical paths. */ skipRepairLoop?: boolean; /** Skip cross-encoder reranking for latency-critical paths. */ diff --git a/src/services/memory-service.ts b/src/services/memory-service.ts index e698652..7ae92ca 100644 --- a/src/services/memory-service.ts +++ b/src/services/memory-service.ts @@ -112,7 +112,15 @@ export class MemoryService { }); } if (options.fast) { - return performFastSearch(deps, scope.userId, query, options.sourceSite, options.limit, options.namespaceScope); + return performFastSearch( + deps, + scope.userId, + query, + options.sourceSite, + options.limit, + options.namespaceScope, + options.retrievalOptions, + ); } return performSearch(deps, scope.userId, query, options.sourceSite, options.limit, options.asOf, options.referenceTime, options.namespaceScope, options.retrievalOptions); } diff --git a/src/services/relevance-policy.ts b/src/services/relevance-policy.ts new file mode 100644 index 0000000..c61cc8e --- /dev/null +++ b/src/services/relevance-policy.ts @@ -0,0 +1,147 @@ +/** + * Score semantics and relevance threshold policy for retrieval packaging. + */ + +import type { SearchResult } from '../db/repository-types.js'; +import { classifyQueryDetailed, type QueryComplexityLabel } from './retrieval-policy.js'; + +export interface RelevanceGateConfig { + similarityThreshold: number; +} + +export interface RelevanceGate { + threshold: number | null; + source: 'request' | 'config' | 'disabled'; + reason: string; + queryLabel: QueryComplexityLabel; +} + +export interface RelevanceFilterDecision { + id: string; + sourceSite: string; + sourceKind: 'integration' | 'local'; + namespace: string | null; + semanticSimilarity: number; + rankingScore: number; + relevance: number; + threshold: number | null; + decision: 'kept' | 'filtered'; + reason: string; +} + +export interface RelevanceFilterResult { + memories: SearchResult[]; + decisions: RelevanceFilterDecision[]; + removedIds: string[]; +} + +const BROAD_QUERY_LABELS = new Set(['aggregation', 'multi-hop']); +const INTEGRATION_SOURCE_MARKERS = [ + 'integration', + 'gmail', + 'google-drive', + 'google_drive', + 'drive', + 'x.com', + 'twitter', +]; + +export function resolveRelevanceGate( + query: string, + requestedThreshold: number | undefined, + runtimeConfig: RelevanceGateConfig, +): RelevanceGate { + const queryLabel = classifyQueryDetailed(query).label; + if (requestedThreshold !== undefined) { + return buildGate(requestedThreshold, 'request', 'caller-threshold', queryLabel); + } + if (BROAD_QUERY_LABELS.has(queryLabel)) { + return { threshold: null, source: 'disabled', reason: `broad-${queryLabel}-query`, queryLabel }; + } + return buildGate(runtimeConfig.similarityThreshold, 'config', 'direct-query-default', queryLabel); +} + +export function applyRelevanceFilter( + memories: SearchResult[], + gate: RelevanceGate, +): RelevanceFilterResult { + const scored = memories.map(withScoreSemantics); + const decisions = scored.map((memory) => buildDecision(memory, gate)); + if (gate.threshold === null) return { memories: scored, decisions, removedIds: [] }; + + const keptIds = new Set( + decisions.filter((decision) => decision.decision === 'kept').map((decision) => decision.id), + ); + return { + memories: scored.filter((memory) => keptIds.has(memory.id)), + decisions, + removedIds: decisions.filter((decision) => decision.decision === 'filtered').map((decision) => decision.id), + }; +} + +function withScoreSemantics(memory: SearchResult): SearchResult { + const semanticSimilarity = finiteOrZero(memory.semantic_similarity ?? memory.similarity); + const rankingScore = finiteOrZero(memory.ranking_score ?? memory.score); + const relevance = clampUnit(memory.relevance ?? semanticSimilarity); + return { + ...memory, + semantic_similarity: semanticSimilarity, + ranking_score: rankingScore, + relevance, + }; +} + +function buildGate( + rawThreshold: number, + source: RelevanceGate['source'], + reason: string, + queryLabel: QueryComplexityLabel, +): RelevanceGate { + const threshold = clampUnit(rawThreshold); + if (threshold <= 0) return { threshold: null, source: 'disabled', reason: 'non-positive-threshold', queryLabel }; + return { threshold, source, reason, queryLabel }; +} + +function buildDecision(memory: SearchResult, gate: RelevanceGate): RelevanceFilterDecision { + const sourceKind = isIntegrationSource(memory.source_site) ? 'integration' : 'local'; + const threshold = gate.threshold; + const kept = threshold === null || (memory.relevance ?? 0) >= threshold; + return { + id: memory.id, + sourceSite: memory.source_site, + sourceKind, + namespace: memory.namespace ?? null, + semanticSimilarity: memory.semantic_similarity ?? 0, + rankingScore: memory.ranking_score ?? memory.score, + relevance: memory.relevance ?? 0, + threshold, + decision: kept ? 'kept' : 'filtered', + reason: buildReason(kept, gate, sourceKind), + }; +} + +function buildReason( + kept: boolean, + gate: RelevanceGate, + sourceKind: RelevanceFilterDecision['sourceKind'], +): string { + if (gate.threshold === null) return gate.reason; + if (sourceKind === 'integration') { + return kept ? 'integration-meets-threshold' : 'integration-below-threshold'; + } + return kept ? 'meets-threshold' : 'below-threshold'; +} + +function isIntegrationSource(sourceSite: string): boolean { + const normalized = sourceSite.toLowerCase(); + return INTEGRATION_SOURCE_MARKERS.some((marker) => normalized.includes(marker)); +} + +function finiteOrZero(value: number): number { + return Number.isFinite(value) ? value : 0; +} + +function clampUnit(value: number): number { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); +} diff --git a/src/services/retrieval-trace.ts b/src/services/retrieval-trace.ts index fb48694..83c0721 100644 --- a/src/services/retrieval-trace.ts +++ b/src/services/retrieval-trace.ts @@ -12,6 +12,7 @@ import { join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { config } from '../config.js'; import type { SearchResult } from '../db/memory-repository.js'; +import type { RelevanceFilterDecision } from './relevance-policy.js'; const __dirname = resolve(fileURLToPath(import.meta.url), '..'); const DEFAULT_TRACE_DIR = resolve(__dirname, '../../.traces'); @@ -20,7 +21,13 @@ const DEFAULT_TRACE_DIR = resolve(__dirname, '../../.traces'); interface TracedMemory { id: string; similarity: number; + semanticSimilarity: number; score: number; + rankingScore: number; + relevance: number; + importance: number; + sourceSite: string; + namespace: string | null; contentPreview: string; tier?: string; } @@ -53,6 +60,11 @@ export interface RetrievalTraceSummary { candidateCount: number; queryText: string; skipRepair: boolean; + relevanceThreshold?: number | null; + relevanceFilterSource?: string; + relevanceFilterReason?: string; + filteredCandidateIds?: string[]; + filterDecisions?: RelevanceFilterDecision[]; traceId?: string; stageCount?: number; stageNames?: string[]; @@ -87,7 +99,13 @@ function snapshotMemories(results: SearchResult[]): TracedMemory[] { return results.map((r) => ({ id: r.id, similarity: round4(r.similarity), + semanticSimilarity: round4(r.semantic_similarity ?? r.similarity), score: round4(r.score), + rankingScore: round4(r.ranking_score ?? r.score), + relevance: round4(r.relevance ?? r.similarity), + importance: round4(r.importance), + sourceSite: r.source_site, + namespace: r.namespace ?? null, contentPreview: r.content.slice(0, CONTENT_PREVIEW_LENGTH), // @ts-expect-error -- tier might be present if added by tiered loading tier: r.tier, @@ -201,19 +219,18 @@ export class TraceCollector { private persistTrace(trace: RetrievalTrace): void { try { - const traceDir = process.env.RETRIEVAL_TRACE_DIR ?? DEFAULT_TRACE_DIR; - if (!existsSync(traceDir)) { - mkdirSync(traceDir, { recursive: true }); - } - - const filename = `${trace.traceId}.json`; - const filePath = join(traceDir, filename); - writeFileSync(filePath, JSON.stringify(trace, null, 2)); - - // Also log a summary line to stdout for visibility during runs + const filename = writeTraceArtifact(trace); console.log(`[trace] Saved retrieval trace: ${filename} (${trace.durationMs}ms, ${trace.stages.length} stages)`); } catch (err) { console.error('[trace] Failed to persist retrieval trace:', err); } } } + +function writeTraceArtifact(trace: RetrievalTrace): string { + const traceDir = process.env.RETRIEVAL_TRACE_DIR ?? DEFAULT_TRACE_DIR; + if (!existsSync(traceDir)) mkdirSync(traceDir, { recursive: true }); + const filename = `${trace.traceId}.json`; + writeFileSync(join(traceDir, filename), JSON.stringify(trace, null, 2)); + return filename; +} diff --git a/src/services/search-pipeline.ts b/src/services/search-pipeline.ts index 1c4d445..6e9bf55 100644 --- a/src/services/search-pipeline.ts +++ b/src/services/search-pipeline.ts @@ -277,13 +277,32 @@ export async function runSearchPipelineWithTrace( if (namespaceScope) { trace.event('namespace-filtering', { scope: namespaceScope }); } - const filtered = namespaceScope - ? selected.filter((r) => isInScope(r.namespace, namespaceScope)) - : selected; + const filtered = applyNamespaceScopeFilter(selected, namespaceScope, trace); return { filtered, trace }; } +function applyNamespaceScopeFilter( + selected: SearchResult[], + namespaceScope: string | null, + trace: TraceCollector, +): SearchResult[] { + if (!namespaceScope) return selected; + const decisions = selected.map((result) => ({ + id: result.id, + namespace: result.namespace ?? null, + sourceSite: result.source_site, + decision: isInScope(result.namespace, namespaceScope) ? 'kept' : 'filtered', + })); + const filtered = selected.filter((result) => isInScope(result.namespace, namespaceScope)); + trace.stage('namespace-filter', filtered, { + scope: namespaceScope, + removedIds: decisions.filter((decision) => decision.decision === 'filtered').map((decision) => decision.id), + decisions, + }); + return filtered; +} + async function runInitialRetrieval( stores: SearchPipelineStores, userId: string, From b60088698c735c95b4169a6907687aa8ff582294 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 15:20:59 -0400 Subject: [PATCH 2/8] fix: address retrieval relevance review --- openapi.json | 8 ++ openapi.yaml | 8 ++ package-lock.json | 1 + package.json | 1 + .../memory-route-config-override.test.ts | 10 +- src/__tests__/setup.ts | 15 +++ .../response-schema-coverage.test.ts | 6 -- src/schemas/search-response-parts.ts | 16 +++- .../current-state-composite-packaging.test.ts | 2 - ...current-state-retrieval-regression.test.ts | 3 - .../memory-search-runtime-config.test.ts | 5 - .../retrieval-relevance-regression.test.ts | 93 +++++++++++++++++-- .../__tests__/retrieval-trace.test.ts | 6 -- .../stale-composite-retrieval.test.ts | 4 - src/services/memory-search.ts | 36 ++++++- src/services/relevance-policy.ts | 51 +++++++--- vitest.config.ts | 1 + 17 files changed, 204 insertions(+), 62 deletions(-) create mode 100644 src/__tests__/setup.ts diff --git a/openapi.json b/openapi.json index 411f949..2696034 100644 --- a/openapi.json +++ b/openapi.json @@ -3703,12 +3703,14 @@ "type": "object" }, "ranking_score": { + "description": "Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range.", "type": [ "number", "null" ] }, "relevance": { + "description": "Normalized relevance value used for threshold filtering; clamped to the [0,1] range.", "type": [ "number", "null" @@ -3880,12 +3882,14 @@ ] }, "ranking_score": { + "description": "Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range.", "type": "number" }, "reason": { "type": "string" }, "relevance": { + "description": "Normalized relevance value used for threshold filtering; clamped to the [0,1] range.", "type": "number" }, "semantic_similarity": { @@ -4340,12 +4344,14 @@ "type": "object" }, "ranking_score": { + "description": "Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range.", "type": [ "number", "null" ] }, "relevance": { + "description": "Normalized relevance value used for threshold filtering; clamped to the [0,1] range.", "type": [ "number", "null" @@ -4517,12 +4523,14 @@ ] }, "ranking_score": { + "description": "Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range.", "type": "number" }, "reason": { "type": "string" }, "relevance": { + "description": "Normalized relevance value used for threshold filtering; clamped to the [0,1] range.", "type": "number" }, "semantic_similarity": { diff --git a/openapi.yaml b/openapi.yaml index af5c3a0..c26df8e 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -2491,10 +2491,12 @@ paths: description: Memory metadata persisted on the row, including caller-supplied verbatim metadata (set via /v1/memories/ingest/quick with skip_extraction=true) and core-generated metadata (e.g. cmo_id, memberMemoryIds, headline). Mirrors the shape /v1/memories/list and /v1/memories/:id return. type: object ranking_score: + description: Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range. type: - number - "null" relevance: + description: Normalized relevance value used for threshold filtering; clamped to the [0,1] range. type: - number - "null" @@ -2616,10 +2618,12 @@ paths: - string - "null" ranking_score: + description: Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range. type: number reason: type: string relevance: + description: Normalized relevance value used for threshold filtering; clamped to the [0,1] range. type: number semantic_similarity: type: number @@ -2926,10 +2930,12 @@ paths: description: Memory metadata persisted on the row, including caller-supplied verbatim metadata (set via /v1/memories/ingest/quick with skip_extraction=true) and core-generated metadata (e.g. cmo_id, memberMemoryIds, headline). Mirrors the shape /v1/memories/list and /v1/memories/:id return. type: object ranking_score: + description: Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range. type: - number - "null" relevance: + description: Normalized relevance value used for threshold filtering; clamped to the [0,1] range. type: - number - "null" @@ -3051,10 +3057,12 @@ paths: - string - "null" ranking_score: + description: Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range. type: number reason: type: string relevance: + description: Normalized relevance value used for threshold filtering; clamped to the [0,1] range. type: number semantic_similarity: type: number diff --git a/package-lock.json b/package-lock.json index 6fe822b..cbe229c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -23,6 +23,7 @@ "@types/express": "^5.0.0", "@types/node": "^22.0.0", "@types/pg": "^8.15.0", + "dotenv": "^16.6.1", "dotenv-cli": "^8.0.0", "husky": "^9.1.7", "tsx": "^4.19.0", diff --git a/package.json b/package.json index b02035c..ef3fd15 100644 --- a/package.json +++ b/package.json @@ -87,6 +87,7 @@ "@types/express": "^5.0.0", "@types/node": "^22.0.0", "@types/pg": "^8.15.0", + "dotenv": "^16.6.1", "dotenv-cli": "^8.0.0", "husky": "^9.1.7", "tsx": "^4.19.0", diff --git a/src/__tests__/memory-route-config-override.test.ts b/src/__tests__/memory-route-config-override.test.ts index e256bd0..aab2ca3 100644 --- a/src/__tests__/memory-route-config-override.test.ts +++ b/src/__tests__/memory-route-config-override.test.ts @@ -20,16 +20,10 @@ import express from 'express'; import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; -vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; - process.env.EMBEDDING_DIMENSIONS ??= '1536'; -}); - -import { createMemoryRouter } from '../routes/memories.js'; -import type { MemoryService } from '../services/memory-service.js'; import { type BootedApp, bindEphemeral } from '../app/bind-ephemeral.js'; import { config, type RuntimeConfig } from '../config.js'; +import { createMemoryRouter } from '../routes/memories.js'; +import type { MemoryService } from '../services/memory-service.js'; const ROUTE_CONFIG = { retrievalProfile: 'override-test-profile', diff --git a/src/__tests__/setup.ts b/src/__tests__/setup.ts new file mode 100644 index 0000000..eaa8c59 --- /dev/null +++ b/src/__tests__/setup.ts @@ -0,0 +1,15 @@ +import { existsSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { config as loadDotenv } from 'dotenv'; + +const envPath = ['.env.test', '.env'] + .map((file) => resolve(process.cwd(), file)) + .find((file) => existsSync(file)); + +if (envPath) { + loadDotenv({ path: envPath, override: false }); +} + +process.env.OPENAI_API_KEY ??= 'test-openai-key'; +process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; +process.env.EMBEDDING_DIMENSIONS ??= '1536'; diff --git a/src/routes/__tests__/response-schema-coverage.test.ts b/src/routes/__tests__/response-schema-coverage.test.ts index aa3299c..0ef779f 100644 --- a/src/routes/__tests__/response-schema-coverage.test.ts +++ b/src/routes/__tests__/response-schema-coverage.test.ts @@ -18,12 +18,6 @@ import { describe, it, expect, vi } from 'vitest'; -vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; - process.env.EMBEDDING_DIMENSIONS ??= '1536'; -}); - import type { Router } from 'express'; import { createMemoryRouter } from '../memories'; import { createAgentRouter } from '../agents'; diff --git a/src/schemas/search-response-parts.ts b/src/schemas/search-response-parts.ts index c10d333..9356572 100644 --- a/src/schemas/search-response-parts.ts +++ b/src/schemas/search-response-parts.ts @@ -14,8 +14,12 @@ export const SearchMemoryItemSchema = z.object({ similarity: NumberOrNaN.optional(), score: NumberOrNaN.optional(), semantic_similarity: NumberOrNaN.optional(), - ranking_score: NumberOrNaN.optional(), - relevance: NumberOrNaN.optional(), + ranking_score: NumberOrNaN.optional().openapi({ + description: 'Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range.', + }), + relevance: NumberOrNaN.optional().openapi({ + description: 'Normalized relevance value used for threshold filtering; clamped to the [0,1] range.', + }), importance: NumberOrNaN.optional(), source_site: z.string().optional(), created_at: IsoDateString.optional(), @@ -64,8 +68,12 @@ const RetrievalTraceSchema = z.object({ source_kind: z.enum(['integration', 'local']), namespace: z.string().nullable(), semantic_similarity: z.number(), - ranking_score: z.number(), - relevance: z.number(), + ranking_score: z.number().openapi({ + description: 'Composite ranking/debug score. It is not normalized and may be outside the [0,1] relevance range.', + }), + relevance: z.number().openapi({ + description: 'Normalized relevance value used for threshold filtering; clamped to the [0,1] range.', + }), threshold: z.number().nullable(), decision: z.enum(['kept', 'filtered']), reason: z.string(), diff --git a/src/services/__tests__/current-state-composite-packaging.test.ts b/src/services/__tests__/current-state-composite-packaging.test.ts index 8d27ad9..36b5202 100644 --- a/src/services/__tests__/current-state-composite-packaging.test.ts +++ b/src/services/__tests__/current-state-composite-packaging.test.ts @@ -7,8 +7,6 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; /* vi.hoisted must be per-file (vitest hoisting requirement). */ const searchMocks = vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; return { pipeline: vi.fn(), touch: vi.fn(), get: vi.fn(), stage: vi.fn(), event: vi.fn(), finalize: vi.fn() }; }); const { pipeline: mockRunSearchPipelineWithTrace, touch: mockTouchMemory, get: mockGetMemory, stage: mockTraceStage, event: mockTraceEvent, finalize: mockTraceFinalize } = searchMocks; diff --git a/src/services/__tests__/current-state-retrieval-regression.test.ts b/src/services/__tests__/current-state-retrieval-regression.test.ts index b0127e3..cf1eeba 100644 --- a/src/services/__tests__/current-state-retrieval-regression.test.ts +++ b/src/services/__tests__/current-state-retrieval-regression.test.ts @@ -7,8 +7,6 @@ import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; const { mockChat, embeddingOverrides } = vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_eval'; return { mockChat: vi.fn(), embeddingOverrides: new Map(), @@ -205,4 +203,3 @@ function hashVector(text: string): number[] { const seed = [...text].reduce((sum, char, index) => sum + char.charCodeAt(0) * (index + 1), 0) || 1; return unitVector(seed); } - diff --git a/src/services/__tests__/memory-search-runtime-config.test.ts b/src/services/__tests__/memory-search-runtime-config.test.ts index 92cd985..81f499f 100644 --- a/src/services/__tests__/memory-search-runtime-config.test.ts +++ b/src/services/__tests__/memory-search-runtime-config.test.ts @@ -20,11 +20,6 @@ const { mockEmitAuditEvent: vi.fn(), mockRunSearchPipelineWithTrace: vi.fn(), })); -vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; - process.env.EMBEDDING_DIMENSIONS ??= '1536'; -}); vi.mock('../lesson-service.js', () => ({ checkLessons: mockCheckLessons, diff --git a/src/services/__tests__/retrieval-relevance-regression.test.ts b/src/services/__tests__/retrieval-relevance-regression.test.ts index ca6de09..f112adc 100644 --- a/src/services/__tests__/retrieval-relevance-regression.test.ts +++ b/src/services/__tests__/retrieval-relevance-regression.test.ts @@ -12,27 +12,31 @@ const { mockRunSearchPipelineWithTrace, mockResolveSearchLimitDetailed, mockClassifyQueryDetailed, + mockEmbedText, } = vi.hoisted(() => ({ mockRunSearchPipelineWithTrace: vi.fn(), mockResolveSearchLimitDetailed: vi.fn(), mockClassifyQueryDetailed: vi.fn(), + mockEmbedText: vi.fn(), })); -vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; - process.env.EMBEDDING_DIMENSIONS ??= '1536'; -}); vi.mock('../search-pipeline.js', () => ({ runSearchPipelineWithTrace: mockRunSearchPipelineWithTrace })); vi.mock('../retrieval-policy.js', () => ({ resolveSearchLimitDetailed: mockResolveSearchLimitDetailed, classifyQueryDetailed: mockClassifyQueryDetailed, })); +vi.mock('../embedding.js', () => ({ embedText: mockEmbedText })); vi.mock('../composite-staleness.js', () => ({ excludeStaleComposites: vi.fn(passthroughCompositeFilter), })); +vi.mock('node:fs', () => ({ + existsSync: vi.fn(() => true), + mkdirSync: vi.fn(), + writeFileSync: vi.fn(), +})); -const { performSearch } = await import('../memory-search.js'); +const { performSearch, performWorkspaceSearch } = await import('../memory-search.js'); +const { config } = await import('../../config.js'); const TEST_USER = 'retrieval-relevance-regression-user'; @@ -40,6 +44,7 @@ describe('retrieval relevance regression', () => { beforeEach(() => { vi.clearAllMocks(); mockClassifyQueryDetailed.mockImplementation(classifyFixtureQuery); + mockEmbedText.mockResolvedValue([1, 0, 0]); mockResolveSearchLimitDetailed.mockImplementation((query: string, limit?: number) => ({ limit: limit ?? 5, classification: classifyFixtureQuery(query), @@ -134,6 +139,82 @@ describe('retrieval relevance regression', () => { ); }); + it('does not classify local sources by drive or twitter substrings', async () => { + const driverBlog = createSearchResult({ + id: 'driver-blog-local', + content: 'A local article mentions keyboard drivers.', + similarity: 0.2, + score: 0.99, + source_site: 'driver-blog.com', + }); + const twitterishLocal = createSearchResult({ + id: 'twitterish-local', + content: 'A local archive happens to include twitter in its host name.', + similarity: 0.2, + score: 0.99, + source_site: 'not-twitter.example', + }); + const trace = createTrace([driverBlog.id, twitterishLocal.id]); + mockRunSearchPipelineWithTrace.mockResolvedValue({ filtered: [driverBlog, twitterishLocal], trace }); + + await performSearch(createDeps(0.5), TEST_USER, 'What is my favorite color?'); + + expect(trace.stage).toHaveBeenCalledWith( + 'relevance-filter', + [], + expect.objectContaining({ + decisions: expect.arrayContaining([ + expect.objectContaining({ + id: driverBlog.id, + sourceKind: 'local', + reason: 'below-threshold', + }), + expect.objectContaining({ + id: twitterishLocal.id, + sourceKind: 'local', + reason: 'below-threshold', + }), + ]), + }), + ); + }); + + it('traces workspace relevance filtering decisions', async () => { + const fixture = createFavoriteColorNoisyRetrievalFixture(); + const workspaceResults = [fixture.answer, fixture.drive]; + const deps = createDeps(0.5); + deps.stores.search.searchSimilarInWorkspace = vi.fn().mockResolvedValue(workspaceResults); + const previousTraceEnabled = config.retrievalTraceEnabled; + config.retrievalTraceEnabled = true; + + try { + const result = await performWorkspaceSearch( + deps, + TEST_USER, + 'What is my favorite color?', + { workspaceId: 'workspace-1', agentId: 'agent-1' }, + { retrievalOptions: { relevanceThreshold: 0.5 } }, + ); + + expect(result.memories.map((memory) => memory.id)).toEqual([fixture.answer.id]); + expect(result.retrievalSummary).toMatchObject({ + relevanceThreshold: 0.5, + relevanceFilterSource: 'request', + filteredCandidateIds: [fixture.drive.id], + filterDecisions: expect.arrayContaining([ + expect.objectContaining({ + id: fixture.drive.id, + sourceKind: 'integration', + reason: 'integration-below-threshold', + }), + ]), + stageNames: expect.arrayContaining(['workspace-search', 'relevance-filter', 'final']), + }); + } finally { + config.retrievalTraceEnabled = previousTraceEnabled; + } + }); + it('preserves broad integration retrieval when no caller threshold is supplied', async () => { const fixture = createFavoriteColorNoisyRetrievalFixture(); mockRunSearchPipelineWithTrace.mockResolvedValue({ diff --git a/src/services/__tests__/retrieval-trace.test.ts b/src/services/__tests__/retrieval-trace.test.ts index 33d429b..ffc89f6 100644 --- a/src/services/__tests__/retrieval-trace.test.ts +++ b/src/services/__tests__/retrieval-trace.test.ts @@ -5,12 +5,6 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; - process.env.EMBEDDING_DIMENSIONS ??= '1536'; -}); - import { TraceCollector } from '../retrieval-trace.js'; import { config } from '../../config.js'; import { createSearchResult } from './test-fixtures.js'; diff --git a/src/services/__tests__/stale-composite-retrieval.test.ts b/src/services/__tests__/stale-composite-retrieval.test.ts index 0128b71..617184a 100644 --- a/src/services/__tests__/stale-composite-retrieval.test.ts +++ b/src/services/__tests__/stale-composite-retrieval.test.ts @@ -14,10 +14,6 @@ const mockGetMemory = vi.hoisted(() => vi.fn()); const mockTraceStage = vi.hoisted(() => vi.fn()); const mockTraceEvent = vi.hoisted(() => vi.fn()); const mockTraceFinalize = vi.hoisted(() => vi.fn()); -vi.hoisted(() => { - process.env.OPENAI_API_KEY ??= 'test-openai-key'; - process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; -}); import { createSearchResult, setupSearchPipelineTest, createSearchPipelineMockFactory } from './test-fixtures.js'; vi.mock('../search-pipeline.js', () => createSearchPipelineMockFactory(mockRunSearchPipelineWithTrace)); diff --git a/src/services/memory-search.ts b/src/services/memory-search.ts index 583cc7a..99d2550 100644 --- a/src/services/memory-search.ts +++ b/src/services/memory-search.ts @@ -42,7 +42,7 @@ interface PostProcessedSearch { } interface PackagedSearchOutput { - mode: RetrievalOptions['retrievalMode']; + mode: RetrievalResult['retrievalMode']; outputMemories: SearchResult[]; injectionText: string; tierAssignments: ReturnType['tierAssignments']; @@ -259,7 +259,7 @@ function buildRetrievalResult( memories: packaged.outputMemories, injectionText: packaged.injectionText, citations: buildRichCitations(packaged.outputMemories).map((c) => c.memory_id), - retrievalMode: packaged.mode ?? 'flat', + retrievalMode: packaged.mode, tierAssignments: packaged.tierAssignments, expandIds: packaged.expandIds, estimatedContextTokens: packaged.estimatedContextTokens, @@ -316,6 +316,8 @@ export async function performFastSearch( ): Promise { const label = classifyQueryDetailed(query).label; const escalate = label === 'multi-hop' || label === 'aggregation' || label === 'complex'; + // Fast search owns these latency toggles based on query class; caller options + // still flow through for packaging, threshold, and strategy controls. return performSearch(deps, userId, query, sourceSite, limit, undefined, undefined, namespaceScope, { ...retrievalOptions, skipRepairLoop: !escalate, @@ -346,17 +348,41 @@ export async function performWorkspaceSearch( workspace.workspaceId, queryEmbedding, effectiveLimit, options.agentScope ?? 'all', workspace.agentId, options.referenceTime, ); - const { filtered: staleFilteredMemories } = await excludeStaleComposites(deps.stores.memory, userId, memories); - const gate = resolveRelevanceGate(query, options.retrievalOptions?.relevanceThreshold, deps.config); - const { memories: filteredMemories } = applyRelevanceFilter(staleFilteredMemories, gate); + const trace = new TraceCollector(query, userId); + trace.stage('workspace-search', memories, { + workspaceId: workspace.workspaceId, + agentId: workspace.agentId, + agentScope: options.agentScope ?? 'all', + }); + + const { filtered: staleFilteredMemories, removedCompositeIds } = + await excludeStaleComposites(deps.stores.memory, userId, memories); + if (removedCompositeIds.length > 0) { + trace.stage('stale-composite-filter', staleFilteredMemories, { + removedCount: removedCompositeIds.length, + removedIds: removedCompositeIds, + }); + } + + const relevanceFilter = applySearchRelevanceFilter( + staleFilteredMemories, + trace, + query, + options.retrievalOptions, + deps.config, + ); + const filteredMemories = relevanceFilter.memories; for (const m of filteredMemories) deps.stores.memory.touchMemory(m.id).catch(() => {}); const mode = options.retrievalOptions?.retrievalMode ?? 'flat'; const injection = buildInjection(filteredMemories, query, mode, options.retrievalOptions?.tokenBudget); + updateRetrievalSummary(trace, filteredMemories, query, options.retrievalOptions, relevanceFilter); + trace.finalize(filteredMemories); return { memories: filteredMemories, citations: filteredMemories.map((m) => m.id), retrievalMode: mode, + retrievalSummary: trace.getRetrievalSummary(), ...injection, }; } diff --git a/src/services/relevance-policy.ts b/src/services/relevance-policy.ts index c61cc8e..fca9bb9 100644 --- a/src/services/relevance-policy.ts +++ b/src/services/relevance-policy.ts @@ -35,16 +35,27 @@ export interface RelevanceFilterResult { removedIds: string[]; } +type ScoredSearchResult = SearchResult & { + semantic_similarity: number; + ranking_score: number; + relevance: number; +}; + const BROAD_QUERY_LABELS = new Set(['aggregation', 'multi-hop']); -const INTEGRATION_SOURCE_MARKERS = [ +const INTEGRATION_SOURCE_PREFIXES = ['integration-', 'integration_', 'integration:', 'integration/']; +const KNOWN_INTEGRATION_SOURCE_SITES = new Set([ 'integration', 'gmail', + 'gmail.com', 'google-drive', 'google_drive', - 'drive', + 'drive.google.com', + 'docs.google.com', + 'mail.google.com', 'x.com', 'twitter', -]; + 'twitter.com', +]); export function resolveRelevanceGate( query: string, @@ -79,7 +90,7 @@ export function applyRelevanceFilter( }; } -function withScoreSemantics(memory: SearchResult): SearchResult { +function withScoreSemantics(memory: SearchResult): ScoredSearchResult { const semanticSimilarity = finiteOrZero(memory.semantic_similarity ?? memory.similarity); const rankingScore = finiteOrZero(memory.ranking_score ?? memory.score); const relevance = clampUnit(memory.relevance ?? semanticSimilarity); @@ -102,18 +113,18 @@ function buildGate( return { threshold, source, reason, queryLabel }; } -function buildDecision(memory: SearchResult, gate: RelevanceGate): RelevanceFilterDecision { - const sourceKind = isIntegrationSource(memory.source_site) ? 'integration' : 'local'; +function buildDecision(memory: ScoredSearchResult, gate: RelevanceGate): RelevanceFilterDecision { + const sourceKind = classifySourceKind(memory.source_site); const threshold = gate.threshold; - const kept = threshold === null || (memory.relevance ?? 0) >= threshold; + const kept = threshold === null || memory.relevance >= threshold; return { id: memory.id, sourceSite: memory.source_site, sourceKind, namespace: memory.namespace ?? null, - semanticSimilarity: memory.semantic_similarity ?? 0, - rankingScore: memory.ranking_score ?? memory.score, - relevance: memory.relevance ?? 0, + semanticSimilarity: memory.semantic_similarity, + rankingScore: memory.ranking_score, + relevance: memory.relevance, threshold, decision: kept ? 'kept' : 'filtered', reason: buildReason(kept, gate, sourceKind), @@ -132,9 +143,23 @@ function buildReason( return kept ? 'meets-threshold' : 'below-threshold'; } -function isIntegrationSource(sourceSite: string): boolean { - const normalized = sourceSite.toLowerCase(); - return INTEGRATION_SOURCE_MARKERS.some((marker) => normalized.includes(marker)); +function classifySourceKind(sourceSite: string): RelevanceFilterDecision['sourceKind'] { + const normalized = normalizeSourceSite(sourceSite); + if (INTEGRATION_SOURCE_PREFIXES.some((prefix) => normalized.startsWith(prefix))) { + return 'integration'; + } + return KNOWN_INTEGRATION_SOURCE_SITES.has(normalized) ? 'integration' : 'local'; +} + +function normalizeSourceSite(sourceSite: string): string { + const trimmed = sourceSite.trim().toLowerCase(); + if (!trimmed) return trimmed; + try { + const parsed = new URL(trimmed.includes('://') ? trimmed : `https://${trimmed}`); + return parsed.hostname.replace(/^www\./, ''); + } catch { + return trimmed; + } } function finiteOrZero(value: number): number { diff --git a/vitest.config.ts b/vitest.config.ts index 9d263bc..d85125c 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -10,5 +10,6 @@ export default defineConfig({ test: { include: ['src/**/*.test.ts'], fileParallelism: false, + setupFiles: ['./src/__tests__/setup.ts'], }, }); From 4f05fb83766fb7658b93d3937a329e4158c43da6 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 15:24:48 -0400 Subject: [PATCH 3/8] fix: clarify relevance gate scope --- .../retrieval-relevance-regression.test.ts | 34 ++++++++++++------- src/services/relevance-policy.ts | 6 ++-- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/services/__tests__/retrieval-relevance-regression.test.ts b/src/services/__tests__/retrieval-relevance-regression.test.ts index f112adc..3a4f1ed 100644 --- a/src/services/__tests__/retrieval-relevance-regression.test.ts +++ b/src/services/__tests__/retrieval-relevance-regression.test.ts @@ -39,6 +39,7 @@ const { performSearch, performWorkspaceSearch } = await import('../memory-search const { config } = await import('../../config.js'); const TEST_USER = 'retrieval-relevance-regression-user'; +const DIRECT_FACT_PRECISION_FLOOR = 1; describe('retrieval relevance regression', () => { beforeEach(() => { @@ -70,7 +71,7 @@ describe('retrieval relevance regression', () => { const ids = result.memories.map((memory) => memory.id); expect(ids).toEqual([fixture.answer.id]); - expect(precisionAtK(ids, new Set([fixture.answer.id]))).toBe(1); + expect(precisionAtK(ids, new Set([fixture.answer.id]))).toBeGreaterThanOrEqual(DIRECT_FACT_PRECISION_FLOOR); expect(result.injectionText).toContain('favorite color is teal'); expect(result.injectionText).not.toContain('spicy ramen'); expect(result.injectionText).not.toContain('Flight receipts'); @@ -216,26 +217,33 @@ describe('retrieval relevance regression', () => { }); it('preserves broad integration retrieval when no caller threshold is supplied', async () => { - const fixture = createFavoriteColorNoisyRetrievalFixture(); - mockRunSearchPipelineWithTrace.mockResolvedValue({ - filtered: fixture.all, - trace: createTrace(fixture.all.map((memory) => memory.id)), - }); - - const result = await performSearch( - createDeps(0.5), - TEST_USER, - 'List all synced integration memories', - ); + await expectRecallPreservedForQuery('List all synced integration memories'); + }); - expect(result.memories.map((memory) => memory.id)).toEqual(fixture.all.map((memory) => memory.id)); + it('preserves complex-query recall when no caller threshold is supplied', async () => { + await expectRecallPreservedForQuery('Why did my synced context mention color palettes?'); }); }); +async function expectRecallPreservedForQuery(query: string) { + const fixture = createFavoriteColorNoisyRetrievalFixture(); + mockRunSearchPipelineWithTrace.mockResolvedValue({ + filtered: fixture.all, + trace: createTrace(fixture.all.map((memory) => memory.id)), + }); + + const result = await performSearch(createDeps(0.5), TEST_USER, query); + + expect(result.memories.map((memory) => memory.id)).toEqual(fixture.all.map((memory) => memory.id)); +} + function classifyFixtureQuery(query: string) { if (query.toLowerCase().includes('list all')) { return { limit: 25, label: 'aggregation', matchedMarker: 'list all' }; } + if (query.toLowerCase().startsWith('why')) { + return { limit: 8, label: 'complex', matchedMarker: 'why' }; + } return { limit: 5, label: 'simple' }; } diff --git a/src/services/relevance-policy.ts b/src/services/relevance-policy.ts index fca9bb9..b7d3af8 100644 --- a/src/services/relevance-policy.ts +++ b/src/services/relevance-policy.ts @@ -41,7 +41,7 @@ type ScoredSearchResult = SearchResult & { relevance: number; }; -const BROAD_QUERY_LABELS = new Set(['aggregation', 'multi-hop']); +const RECALL_ORIENTED_QUERY_LABELS = new Set(['complex', 'multi-hop', 'aggregation']); const INTEGRATION_SOURCE_PREFIXES = ['integration-', 'integration_', 'integration:', 'integration/']; const KNOWN_INTEGRATION_SOURCE_SITES = new Set([ 'integration', @@ -66,8 +66,8 @@ export function resolveRelevanceGate( if (requestedThreshold !== undefined) { return buildGate(requestedThreshold, 'request', 'caller-threshold', queryLabel); } - if (BROAD_QUERY_LABELS.has(queryLabel)) { - return { threshold: null, source: 'disabled', reason: `broad-${queryLabel}-query`, queryLabel }; + if (RECALL_ORIENTED_QUERY_LABELS.has(queryLabel)) { + return { threshold: null, source: 'disabled', reason: `recall-oriented-${queryLabel}-query`, queryLabel }; } return buildGate(runtimeConfig.similarityThreshold, 'config', 'direct-query-default', queryLabel); } From 664cd5fa7787ae5abc6f66455b711a368cc43ed7 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 15:28:03 -0400 Subject: [PATCH 4/8] chore: ignore local mcp config --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6f78c64..037402e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ node_modules/ .env.local .env.*.local .env.test +.mcp.json # IDE files .vscode/ From a5f8842265f6a1e7ea43727ab14f0c19f1d1622f Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 15:35:08 -0400 Subject: [PATCH 5/8] fix: preserve scoped retrieval recall --- src/__tests__/setup.ts | 1 + .../current-state-composite-packaging.test.ts | 5 ++++- src/services/memory-search.ts | 9 +++++++-- src/services/relevance-policy.ts | 16 ++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/__tests__/setup.ts b/src/__tests__/setup.ts index eaa8c59..4409594 100644 --- a/src/__tests__/setup.ts +++ b/src/__tests__/setup.ts @@ -13,3 +13,4 @@ if (envPath) { process.env.OPENAI_API_KEY ??= 'test-openai-key'; process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; process.env.EMBEDDING_DIMENSIONS ??= '1536'; +process.env.CORE_RUNTIME_CONFIG_MUTATION_ENABLED ??= 'true'; diff --git a/src/services/__tests__/current-state-composite-packaging.test.ts b/src/services/__tests__/current-state-composite-packaging.test.ts index 36b5202..c80a1ab 100644 --- a/src/services/__tests__/current-state-composite-packaging.test.ts +++ b/src/services/__tests__/current-state-composite-packaging.test.ts @@ -70,7 +70,10 @@ describe('current-state composite packaging', () => { expect(result.injectionText).not.toContain('considered MongoDB earlier'); expect(trace.stage).toHaveBeenCalledWith( 'flat-packaging-dedup', - [current, old], + [ + expect.objectContaining({ id: current.id, content: current.content }), + expect.objectContaining({ id: old.id, content: old.content }), + ], expect.objectContaining({ removedIds: ['composite-timeline'] }), ); }); diff --git a/src/services/memory-search.ts b/src/services/memory-search.ts index 99d2550..6015777 100644 --- a/src/services/memory-search.ts +++ b/src/services/memory-search.ts @@ -120,6 +120,7 @@ async function postProcessResults( userId: string, query: string, asOf: string | undefined, + sourceSite: string | undefined, retrievalOptions: RetrievalOptions | undefined, ): Promise { let memories = rawMemories.filter((m) => !m.workspace_id); @@ -160,6 +161,7 @@ async function postProcessResults( query, retrievalOptions, deps.config, + { asOf, sourceSite }, ); return { memories: relevanceFilter.memories, consensusResult, relevanceFilter }; } @@ -170,8 +172,9 @@ function applySearchRelevanceFilter( query: string, retrievalOptions: RetrievalOptions | undefined, runtimeConfig: MemoryServiceDeps['config'], + gateContext: { asOf?: string; sourceSite?: string } = {}, ): RelevanceFilterSummary & { memories: SearchResult[] } { - const gate = resolveRelevanceGate(query, retrievalOptions?.relevanceThreshold, runtimeConfig); + const gate = resolveRelevanceGate(query, retrievalOptions?.relevanceThreshold, runtimeConfig, gateContext); const result = applyRelevanceFilter(memories, gate); const summary = { threshold: gate.threshold, @@ -296,7 +299,9 @@ export async function performSearch( if (uriResult) return uriResult; const { memories: rawMemories, activeTrace } = await executeSearchStep(deps, userId, query, effectiveLimit, sourceSite, referenceTime, namespaceScope, retrievalOptions, asOf, trace); - const filteredMemories = await postProcessResults(deps, rawMemories, activeTrace, userId, query, asOf, retrievalOptions); + const filteredMemories = await postProcessResults( + deps, rawMemories, activeTrace, userId, query, asOf, sourceSite, retrievalOptions, + ); return assembleResponse(deps, filteredMemories, query, userId, activeTrace, retrievalOptions, asOf, sourceSite, lessonCheck); } diff --git a/src/services/relevance-policy.ts b/src/services/relevance-policy.ts index b7d3af8..c993414 100644 --- a/src/services/relevance-policy.ts +++ b/src/services/relevance-policy.ts @@ -3,12 +3,18 @@ */ import type { SearchResult } from '../db/repository-types.js'; +import { isCurrentStateQuery, isHistoricalQuery } from './current-state-ranking.js'; import { classifyQueryDetailed, type QueryComplexityLabel } from './retrieval-policy.js'; export interface RelevanceGateConfig { similarityThreshold: number; } +export interface RelevanceGateContext { + asOf?: string; + sourceSite?: string; +} + export interface RelevanceGate { threshold: number | null; source: 'request' | 'config' | 'disabled'; @@ -61,11 +67,21 @@ export function resolveRelevanceGate( query: string, requestedThreshold: number | undefined, runtimeConfig: RelevanceGateConfig, + context: RelevanceGateContext = {}, ): RelevanceGate { const queryLabel = classifyQueryDetailed(query).label; if (requestedThreshold !== undefined) { return buildGate(requestedThreshold, 'request', 'caller-threshold', queryLabel); } + if (context.asOf) { + return { threshold: null, source: 'disabled', reason: 'as-of-query', queryLabel }; + } + if (context.sourceSite) { + return { threshold: null, source: 'disabled', reason: 'source-site-filter', queryLabel }; + } + if (isCurrentStateQuery(query) || isHistoricalQuery(query)) { + return { threshold: null, source: 'disabled', reason: 'temporal-state-query', queryLabel }; + } if (RECALL_ORIENTED_QUERY_LABELS.has(queryLabel)) { return { threshold: null, source: 'disabled', reason: `recall-oriented-${queryLabel}-query`, queryLabel }; } From 547078910af345060515d1ee9f20943c46943b53 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 17:05:39 -0400 Subject: [PATCH 6/8] fix: clarify relevance review follow-up --- src/__tests__/setup.ts | 2 ++ src/services/__tests__/retrieval-relevance-regression.test.ts | 2 +- src/services/relevance-policy.ts | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/__tests__/setup.ts b/src/__tests__/setup.ts index 4409594..cebe3a8 100644 --- a/src/__tests__/setup.ts +++ b/src/__tests__/setup.ts @@ -13,4 +13,6 @@ if (envPath) { process.env.OPENAI_API_KEY ??= 'test-openai-key'; process.env.DATABASE_URL ??= 'postgresql://atomicmem:atomicmem@localhost:5433/atomicmem_test'; process.env.EMBEDDING_DIMENSIONS ??= '1536'; +// Mirror .env.test.example for route seam tests when no local env file exists; +// production config still defaults this flag to false in src/config.ts. process.env.CORE_RUNTIME_CONFIG_MUTATION_ENABLED ??= 'true'; diff --git a/src/services/__tests__/retrieval-relevance-regression.test.ts b/src/services/__tests__/retrieval-relevance-regression.test.ts index 3a4f1ed..db96d6b 100644 --- a/src/services/__tests__/retrieval-relevance-regression.test.ts +++ b/src/services/__tests__/retrieval-relevance-regression.test.ts @@ -39,7 +39,7 @@ const { performSearch, performWorkspaceSearch } = await import('../memory-search const { config } = await import('../../config.js'); const TEST_USER = 'retrieval-relevance-regression-user'; -const DIRECT_FACT_PRECISION_FLOOR = 1; +const DIRECT_FACT_PRECISION_FLOOR = 0.8; describe('retrieval relevance regression', () => { beforeEach(() => { diff --git a/src/services/relevance-policy.ts b/src/services/relevance-policy.ts index c993414..6b8d975 100644 --- a/src/services/relevance-policy.ts +++ b/src/services/relevance-policy.ts @@ -70,6 +70,8 @@ export function resolveRelevanceGate( context: RelevanceGateContext = {}, ): RelevanceGate { const queryLabel = classifyQueryDetailed(query).label; + // Explicit caller policy is authoritative; recall-preserving bypasses below + // only relax the config default when the request has not supplied a floor. if (requestedThreshold !== undefined) { return buildGate(requestedThreshold, 'request', 'caller-threshold', queryLabel); } From c55fe0c8e3e69e4a82c5cdfec3835dfc93aa8007 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 17:29:20 -0400 Subject: [PATCH 7/8] fix: close retrieval ranking relevance gaps --- src/db/query-helpers.ts | 21 ++-- src/db/repository-representations.ts | 6 +- src/db/repository-vector-search.ts | 30 +++-- .../__tests__/retrieval-policy.test.ts | 56 +++++++++ .../search-pipeline-runtime-config.test.ts | 10 ++ src/services/retrieval-policy.ts | 115 ++++++++++++++++++ src/services/retrieval-profiles.ts | 10 ++ src/services/search-pipeline.ts | 38 +++++- 8 files changed, 262 insertions(+), 24 deletions(-) diff --git a/src/db/query-helpers.ts b/src/db/query-helpers.ts index 85c4e1d..8cabea8 100644 --- a/src/db/query-helpers.ts +++ b/src/db/query-helpers.ts @@ -22,6 +22,7 @@ export interface HybridQueryParams { wSim: number; wImp: number; wRec: number; + rankingMinSimilarity: number; } /** @@ -29,7 +30,7 @@ export interface HybridQueryParams { * * Returns an array of positional params, the site filter clause, and * scoring weights. The caller must set the first params in a known order: - * $1=embedding, $2=userId, $3=queryText, $4=limit, $5=wSim, $6=wImp, $7=wRec, $8=refTime, [$9=sourceSite] + * $1=embedding, $2=userId, $3=queryText, $4=limit, $5=wSim, $6=wImp, $7=wRec, $8=refTime, $9=rankingMinSimilarity, [$10=sourceSite] */ export function buildHybridSearchParams( queryEmbedding: number[], @@ -43,23 +44,24 @@ export function buildHybridSearchParams( const wSim = config.scoringWeightSimilarity; const wImp = config.scoringWeightImportance; const wRec = config.scoringWeightRecency; + const rankingMinSimilarity = config.retrievalProfileSettings.rankingMinSimilarity; const refTime = (referenceTime ?? new Date()).toISOString(); - const siteFilter = sourceSite ? `AND ${siteFilterColumn} = $9` : ''; + const siteFilter = sourceSite ? `AND ${siteFilterColumn} = $10` : ''; const params: unknown[] = [ pgvector.toSql(queryEmbedding), userId, queryText, Math.max(1, limit), - wSim, wImp, wRec, refTime, + wSim, wImp, wRec, refTime, rankingMinSimilarity, ]; if (sourceSite) params.push(sourceSite); - return { params, siteFilter, refTime, wSim, wImp, wRec }; + return { params, siteFilter, refTime, wSim, wImp, wRec, rankingMinSimilarity }; } /** * Build shared query parameters for a vector-only scored search. * - * Returns params in order: $1=embedding, $2=userId, $3=limit, $4=wSim, $5=wImp, $6=wRec, $7=refTime, [$8=sourceSite] + * Returns params in order: $1=embedding, $2=userId, $3=limit, $4=wSim, $5=wImp, $6=wRec, $7=refTime, $8=rankingMinSimilarity, [$9=sourceSite] */ export function buildVectorSearchParams( queryEmbedding: number[], @@ -67,16 +69,17 @@ export function buildVectorSearchParams( limit: number, sourceSite?: string, referenceTime?: Date, -): { params: unknown[]; siteClause: string; wSim: number; wImp: number; wRec: number; refTime: string } { +): { params: unknown[]; siteClause: string; wSim: number; wImp: number; wRec: number; rankingMinSimilarity: number; refTime: string } { const wSim = config.scoringWeightSimilarity; const wImp = config.scoringWeightImportance; const wRec = config.scoringWeightRecency; + const rankingMinSimilarity = config.retrievalProfileSettings.rankingMinSimilarity; const refTime = (referenceTime ?? new Date()).toISOString(); - const siteClause = sourceSite ? 'AND source_site = $8' : ''; + const siteClause = sourceSite ? 'AND source_site = $9' : ''; const params: unknown[] = [ pgvector.toSql(queryEmbedding), userId, Math.max(1, Math.min(100, limit)), - wSim, wImp, wRec, refTime, + wSim, wImp, wRec, refTime, rankingMinSimilarity, ]; if (sourceSite) params.push(sourceSite); - return { params, siteClause, wSim, wImp, wRec, refTime }; + return { params, siteClause, wSim, wImp, wRec, rankingMinSimilarity, refTime }; } diff --git a/src/db/repository-representations.ts b/src/db/repository-representations.ts index 3a5941b..fe42c64 100644 --- a/src/db/repository-representations.ts +++ b/src/db/repository-representations.ts @@ -244,8 +244,10 @@ export async function searchAtomicFactsHybrid( p.similarity, ( $5 * p.similarity - + $6 * m.importance - + $7 * EXP(-EXTRACT(EPOCH FROM ($8::timestamptz - m.last_accessed_at)) / 2592000.0) + + CASE WHEN p.similarity >= $9 THEN ( + $6 * m.importance + + $7 * EXP(-EXTRACT(EPOCH FROM ($8::timestamptz - m.last_accessed_at)) / 2592000.0) + ) ELSE 0 END + ${config.retrievalProfileSettings.lexicalWeight} * p.best_rrf_score ) * COALESCE(m.trust_score, 1.0) AS score, p.matched_facts, diff --git a/src/db/repository-vector-search.ts b/src/db/repository-vector-search.ts index 6c32b58..e163a84 100644 --- a/src/db/repository-vector-search.ts +++ b/src/db/repository-vector-search.ts @@ -108,13 +108,14 @@ export async function searchVectorsInWorkspace( const wSim = config.scoringWeightSimilarity; const wImp = config.scoringWeightImportance; const wRec = config.scoringWeightRecency; + const rankingMinSimilarity = config.retrievalProfileSettings.rankingMinSimilarity; const refTime = (referenceTime ?? new Date()).toISOString(); const params: unknown[] = [ pgvector.toSql(queryEmbedding), workspaceId, normalizeLimit(limit), - wSim, wImp, wRec, refTime, + wSim, wImp, wRec, refTime, rankingMinSimilarity, ]; - let nextParam = 8; + let nextParam = 9; const agentClause = buildAgentScopeClause(agentScope, callerAgentId, params, nextParam); nextParam += agentClause.paramsAdded; @@ -126,8 +127,10 @@ export async function searchVectorsInWorkspace( 1 - (embedding <=> $1) AS similarity, ( $4 * (1 - (embedding <=> $1)) - + $5 * importance - + $6 * EXP(-EXTRACT(EPOCH FROM ($7::timestamptz - last_accessed_at)) / 2592000.0) + + CASE WHEN (1 - (embedding <=> $1)) >= $8 THEN ( + $5 * importance + + $6 * EXP(-EXTRACT(EPOCH FROM ($7::timestamptz - last_accessed_at)) / 2592000.0) + ) ELSE 0 END ) * COALESCE(trust_score, 1.0) AS score FROM memories WHERE workspace_id = $2 @@ -250,8 +253,10 @@ async function searchVectorsPg( 1 - (embedding <=> $1) AS similarity, ( $4 * (1 - (embedding <=> $1)) - + $5 * importance - + $6 * EXP(-EXTRACT(EPOCH FROM ($7::timestamptz - last_accessed_at)) / 2592000.0) + + CASE WHEN (1 - (embedding <=> $1)) >= $8 THEN ( + $5 * importance + + $6 * EXP(-EXTRACT(EPOCH FROM ($7::timestamptz - last_accessed_at)) / 2592000.0) + ) ELSE 0 END ) * COALESCE(trust_score, 1.0) AS score FROM memories WHERE user_id = $2 @@ -359,8 +364,10 @@ async function searchHybridPg( 1 - (m.embedding <=> $1) AS similarity, ( $5 * (1 - (m.embedding <=> $1)) - + $6 * m.importance - + $7 * EXP(-EXTRACT(EPOCH FROM ($8::timestamptz - m.last_accessed_at)) / 2592000.0) + + CASE WHEN (1 - (m.embedding <=> $1)) >= $9 THEN ( + $6 * m.importance + + $7 * EXP(-EXTRACT(EPOCH FROM ($8::timestamptz - m.last_accessed_at)) / 2592000.0) + ) ELSE 0 END + ${config.retrievalProfileSettings.lexicalWeight} * f.rrf_score ) * COALESCE(m.trust_score, 1.0) AS score FROM fused f @@ -471,9 +478,10 @@ function computeScore(similarity: number, importance: number, lastAccessedAt: Da const refMs = referenceTime ? referenceTime.getTime() : Date.now(); const secondsSinceAccess = (refMs - lastAccessedAt.getTime()) / 1000; const recency = Math.exp(-secondsSinceAccess / 2592000.0); - return (config.scoringWeightSimilarity * similarity) - + (config.scoringWeightImportance * importance) - + (config.scoringWeightRecency * recency); + const nonSemanticScore = similarity >= config.retrievalProfileSettings.rankingMinSimilarity + ? (config.scoringWeightImportance * importance) + (config.scoringWeightRecency * recency) + : 0; + return (config.scoringWeightSimilarity * similarity) + nonSemanticScore; } function approximateCosineSimilarity(left: number[], right: number[]): number { diff --git a/src/services/__tests__/retrieval-policy.test.ts b/src/services/__tests__/retrieval-policy.test.ts index e0f2ebd..cacb2ae 100644 --- a/src/services/__tests__/retrieval-policy.test.ts +++ b/src/services/__tests__/retrieval-policy.test.ts @@ -8,6 +8,7 @@ import { describe, expect, it, vi } from 'vitest'; import type { RetrievalProfile } from '../retrieval-profiles.js'; import type { SearchResult } from '../../db/memory-repository.js'; import { createSearchResult } from './test-fixtures.js'; +import { getRetrievalProfile } from '../retrieval-profiles.js'; const retrievalProfileSettings: RetrievalProfile = { name: 'balanced', @@ -29,6 +30,7 @@ const retrievalProfileSettings: RetrievalProfile = { scoringWeightSimilarity: 2.0, scoringWeightImportance: 1.0, scoringWeightRecency: 1.0, + rankingMinSimilarity: 0.3, linkExpansionBeforeMMR: false, repairDeltaThreshold: 0, repairConfidenceFloor: 0, @@ -62,6 +64,7 @@ const { shouldAcceptRepair, mergeSearchResults, resolveRerankDepth, + applyRankingEligibility, isAggregationQuery, AGGREGATION_QUERY_LIMIT, } = await import('../retrieval-policy.js'); @@ -363,6 +366,59 @@ describe('resolveRerankDepth', () => { }); }); +describe('applyRankingEligibility', () => { + it.each([ + ['safe', 0.35], + ['balanced', 0.3], + ['quality', 0.25], + ] as const)('uses the %s profile semantic floor before composite ranking', (profileName, threshold) => { + const profile = getRetrievalProfile(profileName); + expect(profile.rankingMinSimilarity).toBe(threshold); + + const relevant = makeResult({ id: 'answer', similarity: threshold + 0.05, score: 0.3 }); + const noisy = makeResult({ id: 'recent-important-noise', similarity: threshold - 0.01, score: 10 }); + const result = applyRankingEligibility( + 'What is my favorite color?', + [noisy, relevant], + { retrievalProfileSettings: profile }, + ); + + expect(result.triggered).toBe(true); + expect(result.results.map((memory) => memory.id)).toEqual(['answer']); + expect(result.removedIds).toEqual(['recent-important-noise']); + }); + + it('bypasses recall-oriented and temporal queries', () => { + const noisy = makeResult({ id: 'low-sim-history', similarity: 0.01, score: 10 }); + const profile = getRetrievalProfile('balanced'); + + expect(applyRankingEligibility('Why did this project change?', [noisy], { retrievalProfileSettings: profile }).triggered) + .toBe(false); + expect(applyRankingEligibility('What database do I currently use?', [noisy], { retrievalProfileSettings: profile }).triggered) + .toBe(false); + expect(applyRankingEligibility('What did I use before switching?', [noisy], { retrievalProfileSettings: profile }).triggered) + .toBe(false); + }); + + it('bypasses source-scoped and as-of reads', () => { + const noisy = makeResult({ id: 'low-sim-scoped', similarity: 0.01, score: 10 }); + const profile = getRetrievalProfile('balanced'); + + expect(applyRankingEligibility( + 'What is my favorite color?', + [noisy], + { retrievalProfileSettings: profile }, + { sourceSite: 'gmail' }, + ).triggered).toBe(false); + expect(applyRankingEligibility( + 'What is my favorite color?', + [noisy], + { retrievalProfileSettings: profile }, + { referenceTime: new Date('2026-01-01T00:00:00.000Z') }, + ).triggered).toBe(false); + }); +}); + describe('isAggregationQuery', () => { it('detects "how many" patterns', () => { expect(isAggregationQuery('how many projects am I working on')).toBe(true); diff --git a/src/services/__tests__/search-pipeline-runtime-config.test.ts b/src/services/__tests__/search-pipeline-runtime-config.test.ts index 3917d2a..63c0c18 100644 --- a/src/services/__tests__/search-pipeline-runtime-config.test.ts +++ b/src/services/__tests__/search-pipeline-runtime-config.test.ts @@ -28,6 +28,7 @@ const mockConfig = { retrievalProfileSettings: { repairPrimaryWeight: 1, repairRewriteWeight: 1, + rankingMinSimilarity: 0.3, }, }; @@ -45,6 +46,15 @@ vi.mock('../retrieval-policy.js', () => ({ resolveRerankDepth: vi.fn((limit: number) => limit), shouldRunRepairLoop: vi.fn(() => false), shouldAcceptRepair: vi.fn(), + applyRankingEligibility: vi.fn((_query: string, candidates: unknown[]) => ({ + results: candidates, + decisions: [], + removedIds: [], + threshold: null, + reason: 'mocked', + queryLabel: 'simple', + triggered: false, + })), })); vi.mock('../query-expansion.js', () => ({ expandQueryViaEntities: vi.fn(), diff --git a/src/services/retrieval-policy.ts b/src/services/retrieval-policy.ts index a34d796..1d7a776 100644 --- a/src/services/retrieval-policy.ts +++ b/src/services/retrieval-policy.ts @@ -4,6 +4,7 @@ import type { CoreRuntimeConfig } from '../app/runtime-container.js'; import type { SearchResult } from '../db/memory-repository.js'; +import { isCurrentStateQuery, isHistoricalQuery } from './current-state-ranking.js'; import { isTemporalOrderingQuery } from './temporal-query-expansion.js'; const SIMPLE_QUERY_LIMIT = 5; @@ -23,6 +24,31 @@ type AdaptiveLimitConfig = Pick< | 'adaptiveAggregationLimit' >; +type RankingEligibilityConfig = Pick; + +export interface RankingEligibilityContext { + sourceSite?: string; + referenceTime?: Date; +} + +export interface RankingEligibilityDecision { + id: string; + similarity: number; + threshold: number; + decision: 'eligible' | 'filtered'; + reason: string; +} + +export interface RankingEligibilityResult { + results: SearchResult[]; + decisions: RankingEligibilityDecision[]; + removedIds: string[]; + threshold: number | null; + reason: string; + queryLabel: QueryComplexityLabel; + triggered: boolean; +} + /** Hard ceiling for aggregation queries (prevents runaway candidate pools). */ const AGGREGATION_HARD_CAP = 50; @@ -188,6 +214,57 @@ export function resolveRerankDepth( return Math.max(clampLimitWide(limit), runtimeConfig.retrievalProfileSettings.rerankDepth); } +export function applyRankingEligibility( + query: string, + candidates: SearchResult[], + runtimeConfig: RankingEligibilityConfig, + context: RankingEligibilityContext = {}, +): RankingEligibilityResult { + const queryLabel = classifyQueryDetailed(query).label; + const bypassReason = resolveRankingEligibilityBypass(query, queryLabel, context); + if (bypassReason) { + return { + results: candidates, + decisions: [], + removedIds: [], + threshold: null, + reason: bypassReason, + queryLabel, + triggered: false, + }; + } + + const threshold = clampUnit(runtimeConfig.retrievalProfileSettings.rankingMinSimilarity); + if (threshold <= 0) { + return { + results: candidates, + decisions: [], + removedIds: [], + threshold: null, + reason: 'non-positive-ranking-threshold', + queryLabel, + triggered: false, + }; + } + + const decisions = candidates.map((candidate) => buildRankingEligibilityDecision(candidate, threshold)); + const removedIds = decisions + .filter((decision) => decision.decision === 'filtered') + .map((decision) => decision.id); + const keptIds = new Set( + decisions.filter((decision) => decision.decision === 'eligible').map((decision) => decision.id), + ); + return { + results: candidates.filter((candidate) => keptIds.has(candidate.id)), + decisions, + removedIds, + threshold, + reason: 'direct-query-ranking-floor', + queryLabel, + triggered: true, + }; +} + export type QueryComplexityLabel = 'simple' | 'medium' | 'complex' | 'multi-hop' | 'aggregation'; export interface QueryClassification { @@ -251,6 +328,44 @@ function clampLimitWide(limit: number): number { return Math.max(1, Math.min(AGGREGATION_HARD_CAP, Math.floor(limit))); } +function resolveRankingEligibilityBypass( + query: string, + queryLabel: QueryComplexityLabel, + context: RankingEligibilityContext, +): string | null { + if (context.referenceTime) return 'as-of-query'; + if (context.sourceSite) return 'source-site-filter'; + if (isCurrentStateQuery(query) || isHistoricalQuery(query)) return 'temporal-state-query'; + if (queryLabel === 'complex' || queryLabel === 'multi-hop' || queryLabel === 'aggregation') { + return `recall-oriented-${queryLabel}-query`; + } + return null; +} + +function buildRankingEligibilityDecision( + candidate: SearchResult, + threshold: number, +): RankingEligibilityDecision { + const similarity = finiteOrZero(candidate.similarity); + const eligible = similarity >= threshold; + return { + id: candidate.id, + similarity, + threshold, + decision: eligible ? 'eligible' : 'filtered', + reason: eligible ? 'meets-ranking-floor' : 'below-ranking-floor', + }; +} + +function finiteOrZero(value: number): number { + return Number.isFinite(value) ? value : 0; +} + +function clampUnit(value: number): number { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); +} + function mergeWeightedResults( merged: Map, results: SearchResult[], diff --git a/src/services/retrieval-profiles.ts b/src/services/retrieval-profiles.ts index 2a441f9..b5fa12d 100644 --- a/src/services/retrieval-profiles.ts +++ b/src/services/retrieval-profiles.ts @@ -24,6 +24,13 @@ export interface RetrievalProfile { scoringWeightSimilarity: number; scoringWeightImportance: number; scoringWeightRecency: number; + /** + * Minimum semantic similarity required before importance/recency ranking + * boosts apply, and before direct simple/medium queries consider a candidate + * eligible for injection. Safe is stricter; quality keeps a little more + * recall for borderline matches. + */ + rankingMinSimilarity: number; linkExpansionBeforeMMR: boolean; repairDeltaThreshold: number; repairConfidenceFloor: number; @@ -50,6 +57,7 @@ const PROFILES: Record = { scoringWeightSimilarity: 2.0, scoringWeightImportance: 1.0, scoringWeightRecency: 1.0, + rankingMinSimilarity: 0.35, linkExpansionBeforeMMR: false, repairDeltaThreshold: 0, repairConfidenceFloor: 0, @@ -74,6 +82,7 @@ const PROFILES: Record = { scoringWeightSimilarity: 2.0, scoringWeightImportance: 1.0, scoringWeightRecency: 1.0, + rankingMinSimilarity: 0.3, linkExpansionBeforeMMR: false, repairDeltaThreshold: 0, repairConfidenceFloor: 0, @@ -98,6 +107,7 @@ const PROFILES: Record = { scoringWeightSimilarity: 2.0, scoringWeightImportance: 1.0, scoringWeightRecency: 1.0, + rankingMinSimilarity: 0.25, linkExpansionBeforeMMR: false, repairDeltaThreshold: 0, repairConfidenceFloor: 0, diff --git a/src/services/search-pipeline.ts b/src/services/search-pipeline.ts index 6e9bf55..148613e 100644 --- a/src/services/search-pipeline.ts +++ b/src/services/search-pipeline.ts @@ -13,6 +13,7 @@ import type { SearchStore, SemanticLinkStore, MemoryStore, EntityStore } from '. import { embedText } from './embedding.js'; import { rewriteQuery } from './extraction.js'; import { + applyRankingEligibility, resolveRerankDepth, shouldRunRepairLoop, shouldAcceptRepair, @@ -260,6 +261,7 @@ export async function runSearchPipelineWithTrace( results, queryEmbedding, limit, + sourceSite, referenceTime, temporalExpansion.temporalAnchorFingerprints, trace, @@ -701,6 +703,7 @@ async function applyExpansionAndReranking( results: SearchResult[], queryEmbedding: number[], limit: number, + sourceSite: string | undefined, referenceTime: Date | undefined, temporalAnchorFingerprints: string[], trace: TraceCollector, @@ -715,15 +718,23 @@ async function applyExpansionAndReranking( trace, policyConfig, ); + const eligible = applyRankingEligibilityStage( + query, + ranked, + sourceSite, + referenceTime, + trace, + policyConfig, + ); return selectAndExpandCandidates( stores, userId, - ranked.candidates, + eligible.candidates, queryEmbedding, limit, referenceTime, - ranked.protectedFingerprints, + eligible.protectedFingerprints, trace, policyConfig, ); @@ -843,6 +854,29 @@ function applyTemporalConstraintStage( }; } +function applyRankingEligibilityStage( + query: string, + state: RankedCandidateState, + sourceSite: string | undefined, + referenceTime: Date | undefined, + trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig, +): RankedCandidateState { + const eligibility = applyRankingEligibility(query, state.candidates, policyConfig, { + sourceSite, + referenceTime, + }); + if (!eligibility.triggered) return state; + trace.stage('ranking-eligibility', eligibility.results, { + threshold: eligibility.threshold, + reason: eligibility.reason, + queryLabel: eligibility.queryLabel, + removedIds: eligibility.removedIds, + decisions: eligibility.decisions, + }); + return { ...state, candidates: eligibility.results }; +} + async function selectAndExpandCandidates( stores: SearchPipelineStores, userId: string, From b2e4ceae129cd4d7a928ecb5873a4b6f1389f582 Mon Sep 17 00:00:00 2001 From: Philippe Mortelette Date: Tue, 28 Apr 2026 18:13:22 -0400 Subject: [PATCH 8/8] fix: address ranking relevance review cleanup --- src/db/__tests__/pgvector-smoke.test.ts | 45 +++++++++++++++++++ src/db/query-helpers.ts | 9 +++- src/db/repository-representations.ts | 1 + src/db/repository-vector-search.ts | 7 +-- .../memory-search-runtime-config.test.ts | 1 + .../retrieval-relevance-regression.test.ts | 8 ++++ src/services/relevance-policy.ts | 18 ++------ src/services/retrieval-policy.ts | 42 ++++++++++------- 8 files changed, 96 insertions(+), 35 deletions(-) diff --git a/src/db/__tests__/pgvector-smoke.test.ts b/src/db/__tests__/pgvector-smoke.test.ts index ab9d3c9..9113935 100644 --- a/src/db/__tests__/pgvector-smoke.test.ts +++ b/src/db/__tests__/pgvector-smoke.test.ts @@ -7,6 +7,7 @@ import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest'; import { createMemoryTestContext } from './test-fixtures.js'; import { pool } from '../pool.js'; import { unitVector } from './test-fixtures.js'; +import { config } from '../../config.js'; const TEST_USER = 'test-user-1'; @@ -17,6 +18,15 @@ function similarTo(base: number[], noise: number): number[] { return vec.map((v) => v / norm); } +function vectorWithCosine(base: number[], cosine: number): number[] { + const axisIndex = Math.abs(base[0]) < 0.9 ? 0 : 1; + const orthogonal = base.map((value, index) => (index === axisIndex ? 1 : 0) - base[axisIndex] * value); + const orthogonalNorm = Math.sqrt(orthogonal.reduce((sum, value) => sum + value * value, 0)); + const orthogonalUnit = orthogonal.map((value) => value / orthogonalNorm); + const sine = Math.sqrt(Math.max(0, 1 - cosine * cosine)); + return base.map((value, index) => cosine * value + sine * orthogonalUnit[index]); +} + describe('pgvector smoke test', () => { const { repo } = createMemoryTestContext(pool, { beforeAll, beforeEach, afterAll }); @@ -51,6 +61,41 @@ describe('pgvector smoke test', () => { expect(results[0].score).toBeGreaterThan(results[1].score); }); + it('does not let high importance rescue a sub-floor vector match', async () => { + const originalRankingMin = config.retrievalProfileSettings.rankingMinSimilarity; + config.retrievalProfileSettings.rankingMinSimilarity = 0.3; + try { + const queryVec = unitVector(123); + await repo.storeMemory({ + userId: TEST_USER, + content: 'semantic match with low importance', + embedding: vectorWithCosine(queryVec, 0.35), + importance: 0, + sourceSite: 'test', + }); + await repo.storeMemory({ + userId: TEST_USER, + content: 'important unrelated note', + embedding: vectorWithCosine(queryVec, 0.2), + importance: 1, + sourceSite: 'test', + }); + + const results = await repo.searchSimilar(TEST_USER, queryVec, 5); + const semanticMatch = results.find((result) => result.content === 'semantic match with low importance'); + const noisyMatch = results.find((result) => result.content === 'important unrelated note'); + + expect(semanticMatch).toBeDefined(); + expect(noisyMatch).toBeDefined(); + expect(results[0].content).toBe('semantic match with low importance'); + expect(noisyMatch!.similarity).toBeLessThan(0.3); + expect(noisyMatch!.score).toBeCloseTo(config.scoringWeightSimilarity * noisyMatch!.similarity, 5); + expect(semanticMatch!.score).toBeGreaterThan(noisyMatch!.score); + } finally { + config.retrievalProfileSettings.rankingMinSimilarity = originalRankingMin; + } + }); + it('isolates memories by user_id', async () => { const vec = unitVector(10); await repo.storeMemory({ userId: 'user-a', content: 'from user A', embedding: vec, importance: 0.5, sourceSite: 'test' }); diff --git a/src/db/query-helpers.ts b/src/db/query-helpers.ts index 8cabea8..868b24a 100644 --- a/src/db/query-helpers.ts +++ b/src/db/query-helpers.ts @@ -44,7 +44,7 @@ export function buildHybridSearchParams( const wSim = config.scoringWeightSimilarity; const wImp = config.scoringWeightImportance; const wRec = config.scoringWeightRecency; - const rankingMinSimilarity = config.retrievalProfileSettings.rankingMinSimilarity; + const rankingMinSimilarity = clampUnit(config.retrievalProfileSettings.rankingMinSimilarity); const refTime = (referenceTime ?? new Date()).toISOString(); const siteFilter = sourceSite ? `AND ${siteFilterColumn} = $10` : ''; const params: unknown[] = [ @@ -73,7 +73,7 @@ export function buildVectorSearchParams( const wSim = config.scoringWeightSimilarity; const wImp = config.scoringWeightImportance; const wRec = config.scoringWeightRecency; - const rankingMinSimilarity = config.retrievalProfileSettings.rankingMinSimilarity; + const rankingMinSimilarity = clampUnit(config.retrievalProfileSettings.rankingMinSimilarity); const refTime = (referenceTime ?? new Date()).toISOString(); const siteClause = sourceSite ? 'AND source_site = $9' : ''; const params: unknown[] = [ @@ -83,3 +83,8 @@ export function buildVectorSearchParams( if (sourceSite) params.push(sourceSite); return { params, siteClause, wSim, wImp, wRec, rankingMinSimilarity, refTime }; } + +export function clampUnit(value: number): number { + if (!Number.isFinite(value)) return 0; + return Math.max(0, Math.min(1, value)); +} diff --git a/src/db/repository-representations.ts b/src/db/repository-representations.ts index fe42c64..47718e8 100644 --- a/src/db/repository-representations.ts +++ b/src/db/repository-representations.ts @@ -248,6 +248,7 @@ export async function searchAtomicFactsHybrid( $6 * m.importance + $7 * EXP(-EXTRACT(EPOCH FROM ($8::timestamptz - m.last_accessed_at)) / 2592000.0) ) ELSE 0 END + -- Lexical RRF stays outside the semantic boost gate because exact text match is itself a relevance signal. + ${config.retrievalProfileSettings.lexicalWeight} * p.best_rrf_score ) * COALESCE(m.trust_score, 1.0) AS score, p.matched_facts, diff --git a/src/db/repository-vector-search.ts b/src/db/repository-vector-search.ts index e163a84..e7860b6 100644 --- a/src/db/repository-vector-search.ts +++ b/src/db/repository-vector-search.ts @@ -13,7 +13,7 @@ import { type SearchResult, type WorkspaceContext, } from './repository-types.js'; -import { RRF_K, buildHybridSearchParams, buildVectorSearchParams } from './query-helpers.js'; +import { RRF_K, buildHybridSearchParams, buildVectorSearchParams, clampUnit } from './query-helpers.js'; import { cosineSimilarity } from '../vector-math.js'; export interface CandidateRow { @@ -108,7 +108,7 @@ export async function searchVectorsInWorkspace( const wSim = config.scoringWeightSimilarity; const wImp = config.scoringWeightImportance; const wRec = config.scoringWeightRecency; - const rankingMinSimilarity = config.retrievalProfileSettings.rankingMinSimilarity; + const rankingMinSimilarity = clampUnit(config.retrievalProfileSettings.rankingMinSimilarity); const refTime = (referenceTime ?? new Date()).toISOString(); const params: unknown[] = [ @@ -368,6 +368,7 @@ async function searchHybridPg( $6 * m.importance + $7 * EXP(-EXTRACT(EPOCH FROM ($8::timestamptz - m.last_accessed_at)) / 2592000.0) ) ELSE 0 END + -- Lexical RRF stays outside the semantic boost gate because exact text match is itself a relevance signal. + ${config.retrievalProfileSettings.lexicalWeight} * f.rrf_score ) * COALESCE(m.trust_score, 1.0) AS score FROM fused f @@ -478,7 +479,7 @@ function computeScore(similarity: number, importance: number, lastAccessedAt: Da const refMs = referenceTime ? referenceTime.getTime() : Date.now(); const secondsSinceAccess = (refMs - lastAccessedAt.getTime()) / 1000; const recency = Math.exp(-secondsSinceAccess / 2592000.0); - const nonSemanticScore = similarity >= config.retrievalProfileSettings.rankingMinSimilarity + const nonSemanticScore = similarity >= clampUnit(config.retrievalProfileSettings.rankingMinSimilarity) ? (config.scoringWeightImportance * importance) + (config.scoringWeightRecency * recency) : 0; return (config.scoringWeightSimilarity * similarity) + nonSemanticScore; diff --git a/src/services/__tests__/memory-search-runtime-config.test.ts b/src/services/__tests__/memory-search-runtime-config.test.ts index 81f499f..915da8f 100644 --- a/src/services/__tests__/memory-search-runtime-config.test.ts +++ b/src/services/__tests__/memory-search-runtime-config.test.ts @@ -33,6 +33,7 @@ vi.mock('../retrieval-policy.js', () => ({ classification: { label: 'simple', matchedMarker: null }, })), classifyQueryDetailed: vi.fn(() => ({ label: 'simple' })), + resolveRecallBypass: vi.fn(() => null), })); vi.mock('../search-pipeline.js', () => ({ runSearchPipelineWithTrace: mockRunSearchPipelineWithTrace, diff --git a/src/services/__tests__/retrieval-relevance-regression.test.ts b/src/services/__tests__/retrieval-relevance-regression.test.ts index db96d6b..a273cd7 100644 --- a/src/services/__tests__/retrieval-relevance-regression.test.ts +++ b/src/services/__tests__/retrieval-relevance-regression.test.ts @@ -12,11 +12,13 @@ const { mockRunSearchPipelineWithTrace, mockResolveSearchLimitDetailed, mockClassifyQueryDetailed, + mockResolveRecallBypass, mockEmbedText, } = vi.hoisted(() => ({ mockRunSearchPipelineWithTrace: vi.fn(), mockResolveSearchLimitDetailed: vi.fn(), mockClassifyQueryDetailed: vi.fn(), + mockResolveRecallBypass: vi.fn(), mockEmbedText: vi.fn(), })); @@ -24,6 +26,7 @@ vi.mock('../search-pipeline.js', () => ({ runSearchPipelineWithTrace: mockRunSea vi.mock('../retrieval-policy.js', () => ({ resolveSearchLimitDetailed: mockResolveSearchLimitDetailed, classifyQueryDetailed: mockClassifyQueryDetailed, + resolveRecallBypass: mockResolveRecallBypass, })); vi.mock('../embedding.js', () => ({ embedText: mockEmbedText })); vi.mock('../composite-staleness.js', () => ({ @@ -45,6 +48,11 @@ describe('retrieval relevance regression', () => { beforeEach(() => { vi.clearAllMocks(); mockClassifyQueryDetailed.mockImplementation(classifyFixtureQuery); + mockResolveRecallBypass.mockImplementation((_query: string, label: string, context: { asOf?: string; referenceTime?: Date; sourceSite?: string }) => { + if (context.asOf || context.referenceTime) return 'as-of-query'; + if (context.sourceSite) return 'source-site-filter'; + return ['complex', 'multi-hop', 'aggregation'].includes(label) ? `recall-oriented-${label}-query` : null; + }); mockEmbedText.mockResolvedValue([1, 0, 0]); mockResolveSearchLimitDetailed.mockImplementation((query: string, limit?: number) => ({ limit: limit ?? 5, diff --git a/src/services/relevance-policy.ts b/src/services/relevance-policy.ts index 6b8d975..696e8f1 100644 --- a/src/services/relevance-policy.ts +++ b/src/services/relevance-policy.ts @@ -3,8 +3,7 @@ */ import type { SearchResult } from '../db/repository-types.js'; -import { isCurrentStateQuery, isHistoricalQuery } from './current-state-ranking.js'; -import { classifyQueryDetailed, type QueryComplexityLabel } from './retrieval-policy.js'; +import { classifyQueryDetailed, resolveRecallBypass, type QueryComplexityLabel } from './retrieval-policy.js'; export interface RelevanceGateConfig { similarityThreshold: number; @@ -47,7 +46,6 @@ type ScoredSearchResult = SearchResult & { relevance: number; }; -const RECALL_ORIENTED_QUERY_LABELS = new Set(['complex', 'multi-hop', 'aggregation']); const INTEGRATION_SOURCE_PREFIXES = ['integration-', 'integration_', 'integration:', 'integration/']; const KNOWN_INTEGRATION_SOURCE_SITES = new Set([ 'integration', @@ -75,18 +73,8 @@ export function resolveRelevanceGate( if (requestedThreshold !== undefined) { return buildGate(requestedThreshold, 'request', 'caller-threshold', queryLabel); } - if (context.asOf) { - return { threshold: null, source: 'disabled', reason: 'as-of-query', queryLabel }; - } - if (context.sourceSite) { - return { threshold: null, source: 'disabled', reason: 'source-site-filter', queryLabel }; - } - if (isCurrentStateQuery(query) || isHistoricalQuery(query)) { - return { threshold: null, source: 'disabled', reason: 'temporal-state-query', queryLabel }; - } - if (RECALL_ORIENTED_QUERY_LABELS.has(queryLabel)) { - return { threshold: null, source: 'disabled', reason: `recall-oriented-${queryLabel}-query`, queryLabel }; - } + const bypassReason = resolveRecallBypass(query, queryLabel, context); + if (bypassReason) return { threshold: null, source: 'disabled', reason: bypassReason, queryLabel }; return buildGate(runtimeConfig.similarityThreshold, 'config', 'direct-query-default', queryLabel); } diff --git a/src/services/retrieval-policy.ts b/src/services/retrieval-policy.ts index 1d7a776..65b3106 100644 --- a/src/services/retrieval-policy.ts +++ b/src/services/retrieval-policy.ts @@ -31,6 +31,12 @@ export interface RankingEligibilityContext { referenceTime?: Date; } +export interface RecallBypassContext { + asOf?: string; + referenceTime?: Date; + sourceSite?: string; +} + export interface RankingEligibilityDecision { id: string; similarity: number; @@ -51,6 +57,14 @@ export interface RankingEligibilityResult { /** Hard ceiling for aggregation queries (prevents runaway candidate pools). */ const AGGREGATION_HARD_CAP = 50; +const RECALL_ORIENTED_QUERY_LABELS = new Set(['complex', 'multi-hop', 'aggregation']); + +const RECALL_BYPASS_REASONS = { + AS_OF_QUERY: 'as-of-query', + SOURCE_SITE_FILTER: 'source-site-filter', + TEMPORAL_STATE_QUERY: 'temporal-state-query', + recallOriented: (queryLabel: QueryComplexityLabel) => `recall-oriented-${queryLabel}-query`, +} as const; /** * Markers indicating temporal/relational complexity (multi-hop or comparison). @@ -221,7 +235,7 @@ export function applyRankingEligibility( context: RankingEligibilityContext = {}, ): RankingEligibilityResult { const queryLabel = classifyQueryDetailed(query).label; - const bypassReason = resolveRankingEligibilityBypass(query, queryLabel, context); + const bypassReason = resolveRecallBypass(query, queryLabel, context); if (bypassReason) { return { results: candidates, @@ -307,6 +321,18 @@ export function classifyQueryDetailed(query: string): QueryClassification { return { limit: MEDIUM_QUERY_LIMIT, label: 'medium' }; } +export function resolveRecallBypass( + query: string, + queryLabel: QueryComplexityLabel, + context: RecallBypassContext, +): string | null { + if (context.asOf || context.referenceTime) return RECALL_BYPASS_REASONS.AS_OF_QUERY; + if (context.sourceSite) return RECALL_BYPASS_REASONS.SOURCE_SITE_FILTER; + if (isCurrentStateQuery(query) || isHistoricalQuery(query)) return RECALL_BYPASS_REASONS.TEMPORAL_STATE_QUERY; + if (RECALL_ORIENTED_QUERY_LABELS.has(queryLabel)) return RECALL_BYPASS_REASONS.recallOriented(queryLabel); + return null; +} + function isMultiHopQuery(lowerQuery: string): boolean { return MULTI_HOP_MARKERS.some((marker) => new RegExp(marker).test(lowerQuery)); } @@ -328,20 +354,6 @@ function clampLimitWide(limit: number): number { return Math.max(1, Math.min(AGGREGATION_HARD_CAP, Math.floor(limit))); } -function resolveRankingEligibilityBypass( - query: string, - queryLabel: QueryComplexityLabel, - context: RankingEligibilityContext, -): string | null { - if (context.referenceTime) return 'as-of-query'; - if (context.sourceSite) return 'source-site-filter'; - if (isCurrentStateQuery(query) || isHistoricalQuery(query)) return 'temporal-state-query'; - if (queryLabel === 'complex' || queryLabel === 'multi-hop' || queryLabel === 'aggregation') { - return `recall-oriented-${queryLabel}-query`; - } - return null; -} - function buildRankingEligibilityDecision( candidate: SearchResult, threshold: number,