Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ export interface RuntimeConfig {
consensusExtractionRuns: number;
observationDateExtractionEnabled: boolean;
quotedEntityExtractionEnabled: boolean;
genericEventAnchorEnabled: boolean;
entropyGateEnabled: boolean;
entropyGateThreshold: number;
entropyGateAlpha: number;
Expand Down Expand Up @@ -330,6 +331,7 @@ export const config: RuntimeConfig = {
consensusExtractionRuns: parseInt(optionalEnv('CONSENSUS_EXTRACTION_RUNS') ?? '3', 10),
observationDateExtractionEnabled: (optionalEnv('OBSERVATION_DATE_EXTRACTION_ENABLED') ?? 'false') === 'true',
quotedEntityExtractionEnabled: (optionalEnv('QUOTED_ENTITY_EXTRACTION_ENABLED') ?? 'false') === 'true',
genericEventAnchorEnabled: (optionalEnv('GENERIC_EVENT_ANCHOR_ENABLED') ?? 'false') === 'true',
entropyGateEnabled: (optionalEnv('ENTROPY_GATE_ENABLED') ?? 'false') === 'true',
entropyGateThreshold: parseFloat(optionalEnv('ENTROPY_GATE_THRESHOLD') ?? '0.35'),
entropyGateAlpha: parseFloat(optionalEnv('ENTROPY_GATE_ALPHA') ?? '0.5'),
Expand Down Expand Up @@ -489,6 +491,7 @@ export const INTERNAL_POLICY_CONFIG_FIELDS = [
'chunkSizeTurns', 'chunkOverlapTurns',
'consensusExtractionEnabled', 'consensusExtractionRuns',
'observationDateExtractionEnabled', 'quotedEntityExtractionEnabled',
'genericEventAnchorEnabled',
'entropyGateEnabled', 'entropyGateThreshold', 'entropyGateAlpha',
// Affinity clustering
'affinityClusteringThreshold', 'affinityClusteringMinSize',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ describe('consensusExtractFacts runtime config', () => {
extractionCacheEnabled: false,
observationDateExtractionEnabled: true,
quotedEntityExtractionEnabled: false,
genericEventAnchorEnabled: false,
});

expect(mockChunkedExtractFacts).toHaveBeenCalledWith(
'User: I commute 45 minutes.',
{ observationDateExtractionEnabled: true },
{ observationDateExtractionEnabled: true, genericEventAnchorEnabled: false },
{ chunkSizeTurns: 8, chunkOverlapTurns: 2, extractionCacheEnabled: false },
);
expect(mockCachedExtractFacts).not.toHaveBeenCalled();
Expand All @@ -82,14 +83,16 @@ describe('consensusExtractFacts runtime config', () => {
extractionCacheEnabled: true,
observationDateExtractionEnabled: false,
quotedEntityExtractionEnabled: false,
genericEventAnchorEnabled: false,
});

expect(mockCachedExtractFacts).toHaveBeenCalledWith(longConversation, {
observationDateExtractionEnabled: false,
genericEventAnchorEnabled: false,
});
expect(mockChunkedExtractFacts).toHaveBeenCalledWith(
longConversation,
{ observationDateExtractionEnabled: false },
{ observationDateExtractionEnabled: false, genericEventAnchorEnabled: false },
{ chunkSizeTurns: 2, chunkOverlapTurns: 1, extractionCacheEnabled: true },
);
});
Expand All @@ -108,6 +111,7 @@ describe('consensusExtractFacts runtime config', () => {
extractionCacheEnabled: true,
observationDateExtractionEnabled: false,
quotedEntityExtractionEnabled: false,
genericEventAnchorEnabled: false,
});

expect(mockChunkedExtractFacts).not.toHaveBeenCalled();
Expand All @@ -127,10 +131,12 @@ describe('consensusExtractFacts runtime config', () => {
extractionCacheEnabled: false,
observationDateExtractionEnabled: false,
quotedEntityExtractionEnabled: false,
genericEventAnchorEnabled: false,
});

expect(mockExtractFacts).toHaveBeenCalledWith('User: I prefer Rust', {
observationDateExtractionEnabled: false,
genericEventAnchorEnabled: false,
});
expect(mockCachedExtractFacts).not.toHaveBeenCalled();
});
Expand Down
76 changes: 76 additions & 0 deletions src/services/__tests__/event-anchor-facts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@

import { describe, expect, it } from 'vitest';
import { quickExtractFacts } from '../quick-extraction.js';
import { inferEventAnchorFacts } from '../event-anchor-facts.js';
import type { ExtractedFact } from '../extraction.js';

function makeFact(text: string, overrides: Partial<ExtractedFact> = {}): ExtractedFact {
return {
fact: text,
headline: overrides.headline ?? text.slice(0, 40),
importance: overrides.importance ?? 0.6,
type: overrides.type ?? 'knowledge',
keywords: overrides.keywords ?? [],
entities: overrides.entities ?? [],
relations: overrides.relations ?? [],
network: overrides.network,
opinionConfidence: overrides.opinionConfidence ?? null,
};
}

describe('event anchor facts', () => {
it('emits mentorship.received anchors from relative-time facts', () => {
Expand Down Expand Up @@ -60,3 +76,63 @@ describe('event anchor facts', () => {
expect(facts.some((fact) => fact.fact.includes('event anchor trip.took_short_trip_rome'))).toBe(true);
});
});

describe('event anchor facts — generic event.occurred fall-through (EXP-06)', () => {
it('emits a generic event.occurred anchor when flag is on and no rule matches', () => {
const fact = makeFact('As of January 2026, user is using PostgreSQL.');
const anchors = inferEventAnchorFacts(fact, { genericEventAnchorEnabled: true });
expect(anchors).toHaveLength(1);
expect(anchors[0].fact).toContain('event anchor event.occurred');
expect(anchors[0].fact).toContain('for User');
expect(anchors[0].fact).toContain('occurred on January 1, 2026');
});

it('emits a generic event.occurred anchor for full-date prefix when flag is on', () => {
const fact = makeFact('As of March 15 2025, user completed the API migration.');
const anchors = inferEventAnchorFacts(fact, { genericEventAnchorEnabled: true });
expect(anchors).toHaveLength(1);
expect(anchors[0].fact).toContain('event anchor event.occurred');
expect(anchors[0].fact).toContain('for User');
expect(anchors[0].fact).toContain('occurred on March 15, 2025');
});

it('emits no anchor when the flag is off, even if the prefix matches', () => {
const fact = makeFact('As of January 2026, user is using PostgreSQL.');
expect(inferEventAnchorFacts(fact)).toHaveLength(0);
expect(inferEventAnchorFacts(fact, { genericEventAnchorEnabled: false })).toHaveLength(0);
});

it('emits no anchor for facts without an "As of <date>" prefix', () => {
const fact = makeFact('User prefers Rust over Go.');
expect(inferEventAnchorFacts(fact, { genericEventAnchorEnabled: true })).toHaveLength(0);
});

it('does not emit a generic anchor when a DESCRIPTOR_RULE already matches (regression)', () => {
const facts = quickExtractFacts([
'[Session date: 2023-06-16]',
'Jon: Gina, you won\'t believe it - I got mentored by this amazing business dude yesterday!',
].join('\n'));

// Re-run with the flag on by feeding the enriched facts back through.
// The DESCRIPTOR_RULES path emits mentorship.received and the generic
// fall-through must not also fire on the same source fact.
const sourceFact = facts.find((f) => /As of /i.test(f.fact) && !f.fact.includes('event anchor'));
expect(sourceFact).toBeDefined();
const anchors = inferEventAnchorFacts(sourceFact as ExtractedFact, { genericEventAnchorEnabled: true });
const labels = anchors.map((a) => a.headline);
expect(labels).toContain('Event mentorship.received');
expect(labels).not.toContain('Event event.occurred');
});

it('returns no anchors when subject cannot be inferred (graceful fallback)', () => {
const fact = makeFact('As of January 2026, the situation continues.');
const anchors = inferEventAnchorFacts(fact, { genericEventAnchorEnabled: true });
expect(anchors).toHaveLength(0);
});

it('returns no anchors on weird non-prefixed input rather than crashing', () => {
const fact = makeFact('Random unstructured text without temporal prefix.');
expect(() => inferEventAnchorFacts(fact, { genericEventAnchorEnabled: true })).not.toThrow();
expect(inferEventAnchorFacts(fact, { genericEventAnchorEnabled: true })).toHaveLength(0);
});
});
10 changes: 8 additions & 2 deletions src/services/consensus-extraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export interface ConsensusExtractionConfig {
extractionCacheEnabled: boolean;
observationDateExtractionEnabled: boolean;
quotedEntityExtractionEnabled: boolean;
genericEventAnchorEnabled: boolean;
}

interface FactWithEmbedding {
Expand Down Expand Up @@ -86,7 +87,11 @@ function applyOptionalQuotedEntityExtraction(
/** Run extractFacts() N times to get independent LLM samples. */
async function runMultipleExtractions(
conversationText: string,
config: Pick<ConsensusExtractionConfig, 'consensusExtractionRuns' | 'observationDateExtractionEnabled'>,
config: Pick<ConsensusExtractionConfig,
| 'consensusExtractionRuns'
| 'observationDateExtractionEnabled'
| 'genericEventAnchorEnabled'
>,
): Promise<ExtractedFact[][]> {
const allRunFacts: ExtractedFact[][] = [];
const options = buildExtractionOptions(config);
Expand All @@ -97,10 +102,11 @@ async function runMultipleExtractions(
}

function buildExtractionOptions(
config: Pick<ConsensusExtractionConfig, 'observationDateExtractionEnabled'>,
config: Pick<ConsensusExtractionConfig, 'observationDateExtractionEnabled' | 'genericEventAnchorEnabled'>,
) {
return {
observationDateExtractionEnabled: config.observationDateExtractionEnabled,
genericEventAnchorEnabled: config.genericEventAnchorEnabled,
};
}

Expand Down
76 changes: 62 additions & 14 deletions src/services/event-anchor-facts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,19 @@ interface EventAnchorDescriptor {
eventDateIso: string;
}

/** Options controlling event-anchor extraction behavior. */
export interface EventAnchorOptions {
/**
* EXP-06: when no DESCRIPTOR_RULE matches but the fact has an `As of <date>,`
* prefix and a recoverable subject, emit a generic `event.occurred` anchor.
* Defaults to off.
*/
genericEventAnchorEnabled?: boolean;
}

const GENERIC_ANCHOR_LABEL = 'event.occurred';
const RECORDED_DATE_PATTERN = /^As of ([A-Za-z]+ \d{1,2} \d{4}),\s*/i;
const RECORDED_DATE_FLEXIBLE_PATTERN = /^As of ([A-Za-z]+(?:\s+\d{1,2})?\s+\d{4}),\s*(.*)$/i;
const EXPLICIT_EVENT_ANCHOR_PATTERN = /\bevent anchor\s+[a-z.]+/i;
const EVENT_DATE_PATTERN = /\boccurred on ([A-Za-z]+ \d{1,2} \d{4})\b/i;
const NON_SUBJECT_TOKENS = new Set(['Hey', 'Long', 'Yesterday', 'Thats', 'Awesome', 'Oh', 'Paris', 'Rome', 'Barcelona']);
Expand All @@ -34,18 +46,25 @@ const MONTH_INDEX: Record<string, number> = {
december: 11,
};

export function inferEventAnchorFacts(fact: ExtractedFact): ExtractedFact[] {
export function inferEventAnchorFacts(
fact: ExtractedFact,
options: EventAnchorOptions = {},
): ExtractedFact[] {
if (EXPLICIT_EVENT_ANCHOR_PATTERN.test(fact.fact)) {
return [];
}
const recordedDate = parseRecordedDate(fact.fact);
if (!recordedDate) {
return [];
}
return inferDescriptors(fact, recordedDate).map((descriptor) => buildAnchorFact(fact, descriptor));
return inferDescriptors(fact, recordedDate, options).map((descriptor) => buildAnchorFact(fact, descriptor));
}

function inferDescriptors(fact: ExtractedFact, recordedDate: Date): EventAnchorDescriptor[] {
function inferDescriptors(
fact: ExtractedFact,
recordedDate: Date,
options: EventAnchorOptions,
): EventAnchorDescriptor[] {
const lower = fact.fact.toLowerCase();
const subject = inferSubject(fact);
if (!subject) {
Expand All @@ -61,6 +80,10 @@ function inferDescriptors(fact: ExtractedFact, recordedDate: Date): EventAnchorD
}
}

if (descriptors.length === 0 && options.genericEventAnchorEnabled) {
descriptors.push({ label: GENERIC_ANCHOR_LABEL, subject, eventDateIso });
}

return dedupeDescriptors(descriptors);
}

Expand Down Expand Up @@ -112,7 +135,7 @@ function inferRomeLabels(lower: string): string[] {
}

function buildAnchorFact(sourceFact: ExtractedFact, descriptor: EventAnchorDescriptor): ExtractedFact {
const recordedPrefix = sourceFact.fact.match(RECORDED_DATE_PATTERN)?.[1];
const recordedPrefix = extractRecordedPrefix(sourceFact.fact);
const eventDateHuman = formatHumanDate(descriptor.eventDateIso);
const anchorFact = `As of ${recordedPrefix}, event anchor ${descriptor.label} for ${descriptor.subject} occurred on ${eventDateHuman}.`;
return {
Expand Down Expand Up @@ -193,24 +216,49 @@ function dedupeDescriptors(descriptors: EventAnchorDescriptor[]): EventAnchorDes
return [...unique.values()];
}

function extractRecordedPrefix(text: string): string {
const strict = text.match(RECORDED_DATE_PATTERN);
if (strict) {
return strict[1];
}
const flexible = text.match(RECORDED_DATE_FLEXIBLE_PATTERN);
if (flexible) {
return flexible[1];
}
return '';
}

function parseRecordedDate(text: string): Date | null {
const match = text.match(RECORDED_DATE_PATTERN);
if (!match) {
const strict = text.match(RECORDED_DATE_PATTERN);
if (strict) {
return parseHumanDate(strict[1]);
}
const flexible = text.match(RECORDED_DATE_FLEXIBLE_PATTERN);
if (!flexible) {
return null;
}
return parseHumanDate(match[1]);
return parseHumanDate(flexible[1]);
}

function parseHumanDate(input: string): Date | null {
const match = input.match(/^([A-Za-z]+) (\d{1,2}) (\d{4})$/);
if (!match) {
return null;
const trimmed = input.trim();
const fullMatch = trimmed.match(/^([A-Za-z]+)\s+(\d{1,2})\s+(\d{4})$/);
if (fullMatch) {
const month = MONTH_INDEX[fullMatch[1].toLowerCase()];
if (month === undefined) {
return null;
}
return new Date(Date.UTC(Number(fullMatch[3]), month, Number(fullMatch[2]), 0, 0, 0, 0));
}
const month = MONTH_INDEX[match[1].toLowerCase()];
if (month === undefined) {
return null;
const monthYearMatch = trimmed.match(/^([A-Za-z]+)\s+(\d{4})$/);
if (monthYearMatch) {
const month = MONTH_INDEX[monthYearMatch[1].toLowerCase()];
if (month === undefined) {
return null;
}
return new Date(Date.UTC(Number(monthYearMatch[2]), month, 1, 0, 0, 0, 0));
}
return new Date(Date.UTC(Number(match[3]), month, Number(match[2]), 0, 0, 0, 0));
return null;
}

function formatHumanDate(isoDate: string): string {
Expand Down
11 changes: 8 additions & 3 deletions src/services/extraction-enrichment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import type { ExtractedEntity, ExtractedFact, ExtractedRelation } from './extraction.js';
import { dedupeEntities } from './entity-dedup.js';
import { inferEventAnchorFacts } from './event-anchor-facts.js';
import { inferEventAnchorFacts, type EventAnchorOptions } from './event-anchor-facts.js';

export type EnrichmentOptions = EventAnchorOptions;

const SELF_ENTITY: ExtractedEntity = { name: 'User', type: 'person' };
const SELF_MARKERS = ['user ', 'user\'s', 'i ', 'i\'m', 'i’ve', 'i have', 'my '];
Expand Down Expand Up @@ -49,10 +51,13 @@ const CANONICAL_ENTITY_NAMES: Record<string, string> = {
msr: 'Microsoft Research',
};

export function enrichExtractedFacts(facts: ExtractedFact[]): ExtractedFact[] {
export function enrichExtractedFacts(
facts: ExtractedFact[],
options: EnrichmentOptions = {},
): ExtractedFact[] {
const enriched = facts.flatMap((fact) => {
const baseFact = enrichExtractedFact(fact);
return [baseFact, ...inferEventAnchorFacts(baseFact)];
return [baseFact, ...inferEventAnchorFacts(baseFact, options)];
});
return dedupeFacts(enriched);
}
Expand Down
4 changes: 3 additions & 1 deletion src/services/extraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,9 @@ export async function extractFacts(
return timedSync('ingest.extract.post-process', () => {
const normalized: ExtractedFact[] = rawFacts.map((m) => normalizeRawFact(m));
const anchoredFacts = applyObservationDateAnchors(normalized, conversationText, options);
const baseFacts = enrichExtractedFacts(normalizeExtractedFacts(anchoredFacts));
const baseFacts = enrichExtractedFacts(normalizeExtractedFacts(anchoredFacts), {
genericEventAnchorEnabled: options.genericEventAnchorEnabled,
});
return mergeSupplementalFacts(baseFacts, conversationText);
});
}
Expand Down
4 changes: 3 additions & 1 deletion src/services/memory-ingest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ export async function performQuickIngest(
const ingestStart = performance.now();
const logicalSessionTimestamp = resolveSessionDate(sessionTimestamp, conversationText);
const episodeId = await deps.stores.episode.storeEpisode({ userId, content: conversationText, sourceSite, sourceUrl });
const facts = timed('quick-ingest.extract', () => Promise.resolve(quickExtractFacts(conversationText)));
const facts = timed('quick-ingest.extract', () => Promise.resolve(quickExtractFacts(conversationText, {
genericEventAnchorEnabled: deps.config.genericEventAnchorEnabled,
})));
const extractedFacts = await facts;
const traceCollector = new IngestTraceCollector(deps.config.ingestTraceEnabled);
const acc = createIngestAccumulator();
Expand Down
1 change: 1 addition & 0 deletions src/services/memory-service-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ export interface IngestRuntimeConfig {
extractionCacheEnabled: boolean;
observationDateExtractionEnabled: boolean;
quotedEntityExtractionEnabled: boolean;
genericEventAnchorEnabled: boolean;
entityGraphEnabled: boolean;
entropyGateAlpha: number;
entropyGateEnabled: boolean;
Expand Down
7 changes: 7 additions & 0 deletions src/services/observation-date-extraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ import { extractSessionTimestamp, parseSessionDate } from './session-date.js';

export interface ExtractionOptions {
observationDateExtractionEnabled?: boolean;
/**
* EXP-06: when no DESCRIPTOR_RULE matches but the fact has an `As of <date>,`
* prefix and a recoverable subject, emit a generic `event.occurred` anchor.
* Threaded through to `enrichExtractedFacts` and `inferEventAnchorFacts`.
* Defaults to off.
*/
genericEventAnchorEnabled?: boolean;
}

export function buildExtractionUserMessage(
Expand Down
Loading
Loading