diff --git a/src/app/runtime-container.ts b/src/app/runtime-container.ts index 7f3dec7..feb237e 100644 --- a/src/app/runtime-container.ts +++ b/src/app/runtime-container.ts @@ -104,6 +104,8 @@ export interface CoreRuntimeConfig { queryAugmentationMinSimilarity: number; queryExpansionEnabled: boolean; queryExpansionMinSimilarity: number; + recencyBinBoostEnabled: boolean; + recencyBinBoostWeight: number; repairConfidenceFloor: number; repairDeltaThreshold: number; repairLoopEnabled: boolean; diff --git a/src/config.ts b/src/config.ts index 10025d0..3d7db53 100644 --- a/src/config.ts +++ b/src/config.ts @@ -127,6 +127,8 @@ export interface RuntimeConfig { literalListProtectionMaxProtected: number; temporalQueryConstraintEnabled: boolean; temporalQueryConstraintBoost: number; + recencyBinBoostEnabled: boolean; + recencyBinBoostWeight: number; deferredAudnEnabled: boolean; deferredAudnBatchSize: number; compositeGroupingEnabled: boolean; @@ -374,6 +376,8 @@ export const config: RuntimeConfig = { literalListProtectionMaxProtected: parsePositiveIntEnv('LITERAL_LIST_PROTECTION_MAX_PROTECTED', 3), temporalQueryConstraintEnabled: (optionalEnv('TEMPORAL_QUERY_CONSTRAINT_ENABLED') ?? 'false') === 'true', temporalQueryConstraintBoost: parseFloat(optionalEnv('TEMPORAL_QUERY_CONSTRAINT_BOOST') ?? '2'), + recencyBinBoostEnabled: (optionalEnv('RECENCY_BIN_BOOST_ENABLED') ?? 'false') === 'true', + recencyBinBoostWeight: parseFloat(optionalEnv('RECENCY_BIN_BOOST_WEIGHT') ?? '0.10'), deferredAudnEnabled: (optionalEnv('DEFERRED_AUDN_ENABLED') ?? 'false') === 'true', deferredAudnBatchSize: parseInt(optionalEnv('DEFERRED_AUDN_BATCH_SIZE') ?? '20', 10), compositeGroupingEnabled: (optionalEnv('COMPOSITE_GROUPING_ENABLED') ?? 'true') === 'true', @@ -514,6 +518,8 @@ export const INTERNAL_POLICY_CONFIG_FIELDS = [ 'literalListProtectionEnabled', 'literalListProtectionMaxProtected', // Temporal query selection 'temporalQueryConstraintEnabled', 'temporalQueryConstraintBoost', + // Recency-bin boost (EXP-12) + 'recencyBinBoostEnabled', 'recencyBinBoostWeight', // Fast AUDN 'fastAudnEnabled', 'fastAudnDuplicateThreshold', // Observation / deferred diff --git a/src/db/repository-types.ts b/src/db/repository-types.ts index 0f5aa18..b55245c 100644 --- a/src/db/repository-types.ts +++ b/src/db/repository-types.ts @@ -49,6 +49,8 @@ export const RESERVED_METADATA_KEYS = new Set([ 'clarification_note', 'target_memory_id', 'contradiction_confidence', + // Recency bin breadcrumb — `src/services/memory-storage.ts` (EXP-12) + 'recency_bin', ]); /** diff --git a/src/services/__tests__/recency-bin-ranking.test.ts b/src/services/__tests__/recency-bin-ranking.test.ts new file mode 100644 index 0000000..4e35721 --- /dev/null +++ b/src/services/__tests__/recency-bin-ranking.test.ts @@ -0,0 +1,149 @@ +/** + * Unit tests for the recency-bin boost stage (EXP-12). + */ + +import { describe, expect, it } from 'vitest'; +import { applyRecencyBinBoost } from '../recency-bin-ranking.js'; +import { createSearchResult } from './test-fixtures.js'; + +const NOW = new Date('2026-04-29T12:00:00.000Z'); + +function aged(id: string, ageMs: number, score: number) { + return createSearchResult({ + id, + score, + similarity: score, + created_at: new Date(NOW.getTime() - ageMs), + observed_at: new Date(NOW.getTime() - ageMs), + }); +} + +describe('applyRecencyBinBoost', () => { + it('boosts the matching bin and re-sorts ties', () => { + const candidates = [ + aged('old', 100 * 86_400_000, 1.0), + aged('hours', 2 * 3_600_000, 0.95), + aged('minutes', 5 * 60_000, 0.9), + ]; + + const result = applyRecencyBinBoost({ + query: 'what did I do recently', + candidates, + weight: 0.5, + referenceTime: NOW, + currentStateTriggered: false, + }); + + expect(result.applied).toBe(true); + expect(result.queryBin).toBe('1d'); + expect(result.results[0]?.id).toBe('hours'); + }); + + it('is a no-op when the flag is off (caller skips by not calling)', () => { + // The pipeline gates on `recencyBinBoostEnabled` before calling. This + // test asserts the function still preserves order when called with + // weight 0, which the pipeline can use as a defense-in-depth fallback. + const candidates = [ + aged('a', 5 * 60_000, 0.9), + aged('b', 100 * 86_400_000, 0.8), + ]; + const result = applyRecencyBinBoost({ + query: 'recently', + candidates, + weight: 0, + referenceTime: NOW, + currentStateTriggered: false, + }); + expect(result.applied).toBe(false); + expect(result.results.map((r) => r.id)).toEqual(['a', 'b']); + }); + + it('returns no-op when the query bin is not inferable', () => { + const candidates = [ + aged('a', 5 * 60_000, 0.9), + aged('b', 5 * 86_400_000, 0.8), + ]; + const result = applyRecencyBinBoost({ + query: 'what database does the project use', + candidates, + weight: 0.5, + referenceTime: NOW, + currentStateTriggered: false, + }); + expect(result.applied).toBe(false); + expect(result.queryBin).toBe(null); + expect(result.results.map((r) => r.id)).toEqual(['a', 'b']); + }); + + it('short-circuits when current-state-ranking already triggered', () => { + const candidates = [ + aged('a', 5 * 60_000, 0.9), + aged('b', 5 * 86_400_000, 0.8), + ]; + const result = applyRecencyBinBoost({ + query: 'recently', + candidates, + weight: 0.5, + referenceTime: NOW, + currentStateTriggered: true, + }); + expect(result.applied).toBe(false); + expect(result.results.map((r) => r.id)).toEqual(['a', 'b']); + }); + + it('applies configured weight on exact match and adjacent bins', () => { + const candidates = [ + aged('exact', 12 * 3_600_000, 1.0), // 12h ⇒ 1d bin (adjacent to 10h? 10h max=36M ms; 12*3.6M=43.2M ⇒ 1d) + aged('adjacent', 5 * 3_600_000, 1.0), // 5h ⇒ 10h bin (adjacent to 1d) + aged('far', 1 * 60_000, 1.0), // 1m bin (non-adjacent) + ]; + const result = applyRecencyBinBoost({ + query: 'yesterday', + candidates, + weight: 0.4, + referenceTime: NOW, + currentStateTriggered: false, + }); + + expect(result.applied).toBe(true); + expect(result.queryBin).toBe('1d'); + const byId = new Map(result.results.map((r) => [r.id, r.score])); + // Exact-match (1d) gets full weight; adjacent (10h) gets half; far (1m) gets nothing. + expect(byId.get('exact')).toBeCloseTo(1.4, 5); + expect(byId.get('adjacent')).toBeCloseTo(1.2, 5); + expect(byId.get('far')).toBeCloseTo(1.0, 5); + }); + + it('recomputes bins from created_at, ignoring stale persisted hints', () => { + // Fact stored with metadata.recency_bin='1m' but actually 5 days old. + const stale = createSearchResult({ + id: 'stale', + score: 1.0, + created_at: new Date(NOW.getTime() - 5 * 86_400_000), + metadata: { recency_bin: '1m' }, + }); + const result = applyRecencyBinBoost({ + query: 'last week', + candidates: [stale], + weight: 1.0, + referenceTime: NOW, + currentStateTriggered: false, + }); + expect(result.applied).toBe(true); + expect(result.queryBin).toBe('10d'); + // 5 days ⇒ '10d' bin ⇒ exact match ⇒ +1.0. + expect(result.results[0]?.score).toBeCloseTo(2.0, 5); + }); + + it('handles empty candidates without throwing', () => { + const result = applyRecencyBinBoost({ + query: 'recently', + candidates: [], + weight: 0.5, + referenceTime: NOW, + currentStateTriggered: false, + }); + expect(result.applied).toBe(false); + expect(result.results).toEqual([]); + }); +}); diff --git a/src/services/__tests__/temporal-fingerprint.test.ts b/src/services/__tests__/temporal-fingerprint.test.ts new file mode 100644 index 0000000..5d3c699 --- /dev/null +++ b/src/services/__tests__/temporal-fingerprint.test.ts @@ -0,0 +1,81 @@ +/** + * Unit tests for log-spaced recency bin assignment (EXP-12). + * + * Bin ladder: 1m | 10m | 1h | 10h | 1d | 10d | 100d | older. + * Bins are an upper-bound classification — exactly-on-the-boundary ages + * land in the corresponding bin, ages just past the boundary land in the + * next rung. + */ + +import { describe, expect, it } from 'vitest'; +import { + assignRecencyBin, + computeBinAffinity, + RECENCY_BIN_LABELS, + type RecencyBin, +} from '../temporal-fingerprint.js'; + +const NOW = new Date('2026-04-29T12:00:00.000Z'); + +function ageMs(ms: number): Date { + return new Date(NOW.getTime() - ms); +} + +describe('assignRecencyBin — bin boundary table', () => { + const cases: ReadonlyArray<{ label: string; ageMs: number; expected: RecencyBin }> = [ + { label: 'now (0ms)', ageMs: 0, expected: '1m' }, + { label: '1 minute exactly', ageMs: 60_000, expected: '1m' }, + { label: 'just past 1m (61s)', ageMs: 61_000, expected: '10m' }, + { label: '9 minutes', ageMs: 9 * 60_000, expected: '10m' }, + { label: '10 minutes exactly', ageMs: 10 * 60_000, expected: '10m' }, + { label: '11 minutes', ageMs: 11 * 60_000, expected: '1h' }, + { label: '59 minutes', ageMs: 59 * 60_000, expected: '1h' }, + { label: '1 hour exactly', ageMs: 3_600_000, expected: '1h' }, + { label: 'just past 1h', ageMs: 3_600_001, expected: '10h' }, + { label: '10 hours exactly', ageMs: 36_000_000, expected: '10h' }, + { label: '23 hours', ageMs: 23 * 3_600_000, expected: '1d' }, + { label: '1 day exactly', ageMs: 86_400_000, expected: '1d' }, + { label: 'just past 1 day', ageMs: 86_400_001, expected: '10d' }, + { label: '9 days', ageMs: 9 * 86_400_000, expected: '10d' }, + { label: '10 days exactly', ageMs: 10 * 86_400_000, expected: '10d' }, + { label: '11 days', ageMs: 11 * 86_400_000, expected: '100d' }, + { label: '99 days', ageMs: 99 * 86_400_000, expected: '100d' }, + { label: '100 days exactly', ageMs: 100 * 86_400_000, expected: '100d' }, + { label: '101 days', ageMs: 101 * 86_400_000, expected: 'older' }, + { label: '1 year', ageMs: 365 * 86_400_000, expected: 'older' }, + ]; + + for (const c of cases) { + it(c.label, () => { + expect(assignRecencyBin(ageMs(c.ageMs), NOW)).toBe(c.expected); + }); + } + + it('clamps future-dated facts to the youngest bin', () => { + const future = new Date(NOW.getTime() + 60_000); + expect(assignRecencyBin(future, NOW)).toBe('1m'); + }); +}); + +describe('computeBinAffinity', () => { + it('exact match returns 1', () => { + expect(computeBinAffinity('1h', '1h')).toBe(1); + }); + + it('adjacent bins return 0.5', () => { + expect(computeBinAffinity('1h', '10h')).toBe(0.5); + expect(computeBinAffinity('10h', '1h')).toBe(0.5); + expect(computeBinAffinity('100d', 'older')).toBe(0.5); + expect(computeBinAffinity('1m', '10m')).toBe(0.5); + }); + + it('non-adjacent bins return 0', () => { + expect(computeBinAffinity('1m', '1h')).toBe(0); + expect(computeBinAffinity('1h', '1d')).toBe(0); + expect(computeBinAffinity('1m', 'older')).toBe(0); + }); + + it('exposes the canonical bin order for callers', () => { + expect(RECENCY_BIN_LABELS).toEqual(['1m', '10m', '1h', '10h', '1d', '10d', '100d', 'older']); + }); +}); diff --git a/src/services/memory-storage.ts b/src/services/memory-storage.ts index 6e37c4d..2ba4ea0 100644 --- a/src/services/memory-storage.ts +++ b/src/services/memory-storage.ts @@ -15,6 +15,7 @@ import { generateL1Overview } from './tiered-context.js'; import { emitAuditEvent } from './audit-events.js'; import { derivePersistedClaimSlot } from './memory-crud.js'; import { emitLineageEvent } from './memory-lineage.js'; +import { assignRecencyBin } from './temporal-fingerprint.js'; import type { AudnFactContext, ClaimTarget, @@ -89,11 +90,16 @@ export async function storeProjection( const overview = generateL1Overview(fact.fact); const network = fact.network ?? classifyNetwork(fact as any).network; + const ingestNow = new Date(); + const createdAt = options.logicalTimestamp ?? ingestNow; + const recencyBin = assignRecencyBin(createdAt, ingestNow); + const baseMetadata: Record = { recency_bin: recencyBin }; + if (options.cmoId) baseMetadata.cmo_id = options.cmoId; const memoryId = await deps.stores.memory.storeMemory({ userId, content: fact.fact, embedding, memoryType: fact.type === 'knowledge' ? 'semantic' : 'episodic', importance: fact.importance, sourceSite, sourceUrl, episodeId, - metadata: options.cmoId ? { cmo_id: options.cmoId } : undefined, + metadata: baseMetadata, keywords: fact.keywords.join(' '), namespace: namespace ?? undefined, summary: fact.headline, diff --git a/src/services/recency-bin-ranking.ts b/src/services/recency-bin-ranking.ts new file mode 100644 index 0000000..4cecdde --- /dev/null +++ b/src/services/recency-bin-ranking.ts @@ -0,0 +1,69 @@ +/** + * Log-spaced recency-bin boost stage (EXP-12). + * + * Reads the inferred query bin from `temporal-query-expansion.inferQueryBin`, + * recomputes each candidate's bin from `result.created_at` against the + * provided reference time, and adds `weight * computeBinAffinity(...)` to + * each result's `score`. Re-sorts and returns the new order. + * + * Recomputation is deliberate: persisted `metadata.recency_bin` is a debug + * breadcrumb only — it goes stale the moment a fact ages past its bin + * boundary. We always recompute from `created_at` at retrieval time so + * the boost matches the query's "feel" of recency at the moment of the + * search call. + * + * The stage is wired in `search-pipeline.ts` after `applyCurrentStateRanking` + * and short-circuits when current-state-ranking already triggered, to + * avoid double-counting two recency-flavored signals on the same query. + */ + +import type { SearchResult } from '../db/repository-types.js'; +import { assignRecencyBin, computeBinAffinity, type RecencyBin } from './temporal-fingerprint.js'; +import { inferQueryBin } from './temporal-query-expansion.js'; + +export interface RecencyBinBoostInput { + query: string; + candidates: SearchResult[]; + weight: number; + referenceTime: Date; + /** + * When `applyCurrentStateRanking.triggered` is true the current-state + * stage has already added a recency-flavored signal; layering the bin + * boost on top double-counts. The pipeline passes that flag through so + * this stage can no-op cleanly. + */ + currentStateTriggered: boolean; +} + +export interface RecencyBinBoostResult { + applied: boolean; + queryBin: RecencyBin | null; + results: SearchResult[]; +} + +const NO_OP = (candidates: SearchResult[], queryBin: RecencyBin | null): RecencyBinBoostResult => ({ + applied: false, + queryBin, + results: candidates, +}); + +export function applyRecencyBinBoost(input: RecencyBinBoostInput): RecencyBinBoostResult { + const { query, candidates, weight, referenceTime, currentStateTriggered } = input; + if (currentStateTriggered) return NO_OP(candidates, null); + if (candidates.length === 0) return NO_OP(candidates, null); + if (!Number.isFinite(weight) || weight === 0) return NO_OP(candidates, null); + + const queryBin = inferQueryBin(query, referenceTime); + if (queryBin === null) return NO_OP(candidates, queryBin); + + const rescored = candidates + .map((result) => { + const factBin = assignRecencyBin(result.created_at, referenceTime); + const affinity = computeBinAffinity(queryBin, factBin); + if (affinity === 0) return result; + return { ...result, score: result.score + weight * affinity }; + }) + .sort((left, right) => right.score - left.score); + + return { applied: true, queryBin, results: rescored }; +} diff --git a/src/services/search-pipeline.ts b/src/services/search-pipeline.ts index 1c4d445..35792ae 100644 --- a/src/services/search-pipeline.ts +++ b/src/services/search-pipeline.ts @@ -36,6 +36,7 @@ import { DEFAULT_RRF_K, weightedRRF } from './rrf-fusion.js'; import { applyIterativeRetrieval } from './iterative-retrieval.js'; import { applyCurrentStateRanking } from './current-state-ranking.js'; import { applyConcisenessPenalty } from './conciseness-preference.js'; +import { applyRecencyBinBoost } from './recency-bin-ranking.js'; import { protectLiteralListAnswerCandidates } from './literal-list-protection.js'; import { applyTemporalQueryConstraints } from './temporal-query-constraints.js'; @@ -80,6 +81,8 @@ export type SearchPipelineRuntimeConfig = Pick< | 'repairDeltaThreshold' | 'repairLoopEnabled' | 'repairLoopMinSimilarity' + | 'recencyBinBoostEnabled' + | 'recencyBinBoostWeight' | 'rerankSkipMinGap' | 'rerankSkipTopSimilarity' | 'retrievalProfileSettings' @@ -695,6 +698,7 @@ async function applyExpansionAndReranking( temporalAnchorFingerprints, trace, policyConfig, + referenceTime, ); return selectAndExpandCandidates( @@ -747,6 +751,7 @@ function applyRankingProtectionStages( temporalAnchorFingerprints: string[], trace: TraceCollector, policyConfig: SearchPipelineRuntimeConfig, + referenceTime: Date | undefined, ): RankedCandidateState { let state = applySubjectRankingStage(query, candidates, temporalAnchorFingerprints, trace); state = applyLiteralProtectionStage(query, state, trace, policyConfig); @@ -758,9 +763,42 @@ function applyRankingProtectionStages( state = { ...state, candidates: currentStateRanked.results }; } + state = applyRecencyBinStage( + query, + state, + trace, + policyConfig, + referenceTime, + currentStateRanked.triggered, + ); + return { ...state, candidates: applyConcisenessPenalty(state.candidates) }; } +function applyRecencyBinStage( + query: string, + state: RankedCandidateState, + trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig, + referenceTime: Date | undefined, + currentStateTriggered: boolean, +): RankedCandidateState { + if (!policyConfig.recencyBinBoostEnabled) return state; + const boost = applyRecencyBinBoost({ + query, + candidates: state.candidates, + weight: policyConfig.recencyBinBoostWeight, + referenceTime: referenceTime ?? new Date(), + currentStateTriggered, + }); + if (!boost.applied) return state; + trace.stage('recency-bin-boost', boost.results, { + queryBin: boost.queryBin, + weight: policyConfig.recencyBinBoostWeight, + }); + return { ...state, candidates: boost.results }; +} + function applySubjectRankingStage( query: string, candidates: SearchResult[], diff --git a/src/services/temporal-fingerprint.ts b/src/services/temporal-fingerprint.ts index 4e5bd34..38b8d48 100644 --- a/src/services/temporal-fingerprint.ts +++ b/src/services/temporal-fingerprint.ts @@ -1,7 +1,16 @@ /** - * Normalized content fingerprints for temporal retrieval protection. - * Different rows can duplicate the same event text, so protection needs - * to reason about content identity, not row identity. + * Normalized content fingerprints and log-spaced recency bins for temporal + * retrieval protection. + * + * Different rows can duplicate the same event text, so protection needs to + * reason about content identity, not row identity (`buildTemporalFingerprint`). + * + * Recency bins are a coarse, log-spaced quantization of `now - createdAt`. + * They give the search pipeline a scale-invariant signal it can match + * against keywords like "recently" or "last week" without locking the + * comparison to a specific timestamp. Bins are recomputed at retrieval + * time against the current `now`; persisted breadcrumbs go stale and + * MUST NOT be trusted for ranking decisions. */ export function buildTemporalFingerprint(content: string): string { @@ -11,3 +20,58 @@ export function buildTemporalFingerprint(content: string): string { .trim() .toLowerCase(); } + +/** + * Closed set of recency bin labels, ordered oldest → youngest is reversed: + * `BIN_LADDER` is youngest-first so the first matching upper bound wins. + * `'older'` is the implicit catch-all for ages beyond 100 days. + */ +export type RecencyBin = '1m' | '10m' | '1h' | '10h' | '1d' | '10d' | '100d' | 'older'; + +interface BinRung { + readonly label: Exclude; + readonly ms: number; +} + +const BIN_LADDER: readonly BinRung[] = [ + { label: '1m', ms: 60_000 }, + { label: '10m', ms: 600_000 }, + { label: '1h', ms: 3_600_000 }, + { label: '10h', ms: 36_000_000 }, + { label: '1d', ms: 86_400_000 }, + { label: '10d', ms: 864_000_000 }, + { label: '100d', ms: 8_640_000_000 }, +] as const; + +const BIN_ORDER: readonly RecencyBin[] = [ + '1m', '10m', '1h', '10h', '1d', '10d', '100d', 'older', +]; + +/** + * Map a fact's age (`now - createdAt`) onto the log-spaced bin ladder. + * Negative ages clamp to zero so future-dated facts land in `'1m'` + * rather than producing NaN. + */ +export function assignRecencyBin(createdAt: Date, now: Date): RecencyBin { + const ageMs = Math.max(0, now.getTime() - createdAt.getTime()); + for (const rung of BIN_LADDER) { + if (ageMs <= rung.ms) return rung.label; + } + return 'older'; +} + +/** + * Affinity between a query bin and a fact bin. + * - 1.0: exact bin match + * - 0.5: adjacent bin in `BIN_ORDER` (e.g. `1h` ↔ `10h`, `100d` ↔ `older`) + * - 0.0: otherwise + */ +export function computeBinAffinity(queryBin: RecencyBin, factBin: RecencyBin): number { + if (queryBin === factBin) return 1; + const qi = BIN_ORDER.indexOf(queryBin); + const fi = BIN_ORDER.indexOf(factBin); + if (qi < 0 || fi < 0) return 0; + return Math.abs(qi - fi) === 1 ? 0.5 : 0; +} + +export const RECENCY_BIN_LABELS: readonly RecencyBin[] = BIN_ORDER; diff --git a/src/services/temporal-query-expansion.ts b/src/services/temporal-query-expansion.ts index 1073112..3983e92 100644 --- a/src/services/temporal-query-expansion.ts +++ b/src/services/temporal-query-expansion.ts @@ -6,7 +6,7 @@ import type { SearchResult } from '../db/repository-types.js'; import type { SearchStore } from '../db/stores.js'; -import { buildTemporalFingerprint } from './temporal-fingerprint.js'; +import { buildTemporalFingerprint, type RecencyBin } from './temporal-fingerprint.js'; const TEMPORAL_MARKERS = [ 'before', @@ -178,3 +178,44 @@ function buildBigrams(words: string[]): string[] { function isCompactHighSignalToken(word: string): boolean { return /^[A-Z]{3,}$/.test(word) || /[A-Z]/.test(word.slice(1)); } + +/** + * Map natural-language recency markers in a query to a log-spaced bin + * (see `temporal-fingerprint.ts:RecencyBin`). Returns `null` when no + * marker matches — callers MUST treat that as "no preference" and + * skip any bin-based boost rather than guessing a default bin. + * + * Patterns are checked youngest → oldest; the first match wins so that + * "just now" outranks "now" alone. + */ +const QUERY_BIN_PATTERNS: ReadonlyArray<{ readonly pattern: RegExp; readonly bin: RecencyBin }> = [ + { pattern: /\bjust\s+now\b/, bin: '1m' }, + { pattern: /\bright\s+now\b/, bin: '1m' }, + { pattern: /\bmoments?\s+ago\b/, bin: '1m' }, + { pattern: /\b(?:in\s+the\s+)?last\s+(?:few\s+)?minutes?\b/, bin: '10m' }, + { pattern: /\bminutes?\s+ago\b/, bin: '10m' }, + { pattern: /\b(?:an?\s+)?hour\s+ago\b/, bin: '1h' }, + { pattern: /\bhours?\s+ago\b/, bin: '10h' }, + { pattern: /\b(?:earlier\s+)?today\b/, bin: '10h' }, + { pattern: /\bthis\s+morning\b/, bin: '10h' }, + { pattern: /\bthis\s+afternoon\b/, bin: '10h' }, + { pattern: /\byesterday\b/, bin: '1d' }, + { pattern: /\bthis\s+week\b/, bin: '1d' }, + { pattern: /\brecent(?:ly)?\b/, bin: '1d' }, + { pattern: /\blast\s+week\b/, bin: '10d' }, + { pattern: /\b(?:a\s+)?few\s+days\s+ago\b/, bin: '10d' }, + { pattern: /\blast\s+month\b/, bin: '100d' }, + { pattern: /\bweeks?\s+ago\b/, bin: '10d' }, + { pattern: /\bmonths?\s+ago\b/, bin: '100d' }, + { pattern: /\blong\s+ago\b/, bin: 'older' }, + { pattern: /\byears?\s+ago\b/, bin: 'older' }, + { pattern: /\ba\s+long\s+time\s+ago\b/, bin: 'older' }, +]; + +export function inferQueryBin(query: string, _now: Date): RecencyBin | null { + const lower = ` ${query.toLowerCase()} `; + for (const { pattern, bin } of QUERY_BIN_PATTERNS) { + if (pattern.test(lower)) return bin; + } + return null; +}