Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions src/lib/run-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,27 @@ function safeRead(path: string): string {
}
}

/**
* Staleness caveat derived from a file's mtime (#721, extended in the
* 2026-06 context audit). Returns a markdown note when the file was last
* modified more than `staleDays` ago, otherwise ''.
*
* Applied to memory layers an agent is told to ACT ON (state.md, feedback.md):
* without it, a correction from months ago is injected as if it were current
* (the audit found March feedback under "act on this first" in June runs).
*/
function stalenessNote(filePath: string, staleDays = 0): string {
try {
const MS_PER_DAY = 24 * 60 * 60 * 1000;
const mtime = statSync(filePath).mtimeMs;
const daysAgo = Math.floor((Date.now() - mtime) / MS_PER_DAY);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To prevent potential issues with clock skew or future-dated files (which can result in a negative daysAgo value), it is safer to clamp daysAgo to a minimum of 0 using Math.max(0, ...).

Suggested change
const daysAgo = Math.floor((Date.now() - mtime) / MS_PER_DAY);
const daysAgo = Math.max(0, Math.floor((Date.now() - mtime) / MS_PER_DAY));

if (daysAgo > staleDays) {
return `*(Last updated ${daysAgo} day${daysAgo > 1 ? 's' : ''} ago — verify before relying on this)*\n\n`;
}
} catch { /* mtime unavailable — skip the caveat */ }
return '';
}

function stripYamlFrontmatter(markdown: string): string {
const lines = markdown.split('\n');
let dashCount = 0;
Expand Down Expand Up @@ -543,7 +564,9 @@ export function gatherSquadContext(
const feedbackFile = join(memoryDir, squadName, 'feedback.md');
const content = safeRead(feedbackFile);
if (content) {
addLayer(6, 'Feedback (act on this first)', content);
// Caveat stale feedback (audit F1): without it, a correction from months
// ago is injected under "act on this first" as if it were current.
addLayer(6, 'Feedback (act on this first)', stalenessNote(feedbackFile) + content);
}
}

Expand All @@ -563,15 +586,8 @@ export function gatherSquadContext(
if (content) {
const body = stripYamlFrontmatter(content);
const stateCap = (role === 'scanner' || role === 'verifier') ? 2000 : undefined;
// Add staleness caveat (#721) so agents know if their memory is outdated
let staleNote = '';
try {
const MS_PER_DAY = 24 * 60 * 60 * 1000;
const mtime = statSync(stateFile).mtimeMs;
const daysAgo = Math.floor((Date.now() - mtime) / MS_PER_DAY);
if (daysAgo > 0) staleNote = `*(Last updated ${daysAgo} day${daysAgo > 1 ? 's' : ''} ago — verify before relying on this)*\n\n`;
} catch { /* */ }
addLayer(5, 'Previous State', staleNote + body, stateCap);
// Staleness caveat (#721) so agents know if their memory is outdated.
addLayer(5, 'Previous State', stalenessNote(stateFile) + body, stateCap);
}
}

Expand Down
142 changes: 141 additions & 1 deletion test/run-context.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { mkdirSync, writeFileSync, rmSync, existsSync } from 'fs';
import { mkdirSync, writeFileSync, rmSync, existsSync, utimesSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { parseAgentFrontmatter, gatherSquadContext } from '../src/lib/run-context.js';
Expand Down Expand Up @@ -163,3 +163,143 @@ describe('gatherSquadContext — maxTokens option', () => {
expect(typeof ctxOverride).toBe('string');
});
});

// ── gatherSquadContext — injection CONTRACT (audit hardening, 2026-06) ───────
//
// The prior tests only asserted `typeof ctx === 'string'` (always true) or were
// guarded by `if (length > 0)` (silently skipped when empty), so the actual
// behavior the context system depends on — layer ORDER, role GATING,
// strategy.md as L1, budget DROP, and stale-memory caveats — was unverified.
// These build a real .agents fixture and assert that behavior directly.
//
// Hermetic: findSquadsDir/findMemoryDir walk up from cwd and step 1 (ancestor
// walk) finds the fixture before any git-aware fallback, so these never read
// the real repo.

describe('gatherSquadContext — injection contract', () => {
let testDir: string;
let originalCwd: string;
let agentPath: string;
const SQUAD = 'eng';
const AGENT = 'builder';

// Unique markers per layer so we can assert presence + ordering by index.
const M = {
founder: 'FOUNDER_CTX_MARKER',
strategy: 'STRATEGY_MARKER',
alignment: 'ALIGNMENT_MARKER',
feedback: 'FEEDBACK_MARKER',
goals: 'GOALS_MARKER',
state: 'STATE_MARKER',
agent: 'AGENT_BODY_MARKER',
briefing: 'BRIEFING_MARKER',
};

function writeFixture(opts: { feedbackMtimeDaysAgo?: number; agentRole?: string } = {}) {
const root = join(testDir, '.agents');
const mem = join(root, 'memory');
const sq = join(root, 'squads', SQUAD);
mkdirSync(sq, { recursive: true });
mkdirSync(join(mem, 'company'), { recursive: true });
mkdirSync(join(mem, SQUAD, AGENT), { recursive: true });

writeFileSync(join(sq, 'SQUAD.md'), '# Eng Squad\n');
agentPath = join(sq, `${AGENT}.md`);
writeFileSync(
agentPath,
`---\nrole: ${opts.agentRole ?? 'worker'}\n---\n\n# Builder\n\n${M.agent} — write code.\n`,
);
// L9 founder-context (universal) and L1 company/strategy.md
writeFileSync(join(mem, 'company', 'founder-context.md'), `${M.founder}\n${'F'.repeat(200)}`);
writeFileSync(join(mem, 'company', 'strategy.md'), `# Strategy — Test\n${M.strategy}\n${'S'.repeat(200)}`);
// L10 per-squad alignment, L6 feedback, L3 goals, L5 state, L7 briefing
writeFileSync(join(mem, SQUAD, 'founder-alignment.md'), `${M.alignment}\n${'L'.repeat(100)}`);
const feedbackFile = join(mem, SQUAD, 'feedback.md');
writeFileSync(feedbackFile, `${M.feedback}\n${'B'.repeat(100)}`);
if (opts.feedbackMtimeDaysAgo !== undefined) {
const tSec = (Date.now() - opts.feedbackMtimeDaysAgo * 86_400_000) / 1000;
utimesSync(feedbackFile, tSec, tSec);
Comment on lines +220 to +221

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using a Date object with utimesSync is more idiomatic and robust than manually calculating epoch seconds as a float, which can occasionally lead to platform-specific truncation or type issues.

Suggested change
const tSec = (Date.now() - opts.feedbackMtimeDaysAgo * 86_400_000) / 1000;
utimesSync(feedbackFile, tSec, tSec);
const date = new Date(Date.now() - opts.feedbackMtimeDaysAgo * 86_400_000);
utimesSync(feedbackFile, date, date);

}
writeFileSync(join(mem, SQUAD, 'goals.md'), `## Active\n${M.goals}\n${'G'.repeat(100)}`);
writeFileSync(join(mem, SQUAD, AGENT, 'state.md'), `${M.state}\n${'T'.repeat(100)}`);
writeFileSync(join(mem, 'daily-briefing.md'), `${M.briefing}\n${'D'.repeat(100)}`);
}

beforeEach(() => {
testDir = join(tmpdir(), 'squads-ctx-contract-' + Date.now() + '-' + Math.random().toString(36).slice(2));
mkdirSync(testDir, { recursive: true });
originalCwd = process.cwd();
process.chdir(testDir);
});

afterEach(() => {
process.chdir(originalCwd);
if (existsSync(testDir)) rmSync(testDir, { recursive: true, force: true });
});

it('injects layers in action-first order: founder→alignment→feedback→goals→state→agent→strategy', () => {
writeFixture();
const ctx = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'worker' });
const order = [M.founder, M.alignment, M.feedback, M.goals, M.state, M.agent, M.strategy];
const positions = order.map((m) => ctx.indexOf(m));
// every marker present
expect(positions.every((p) => p >= 0)).toBe(true);
// strictly increasing → documented order holds
for (let i = 1; i < positions.length; i++) {
expect(positions[i]).toBeGreaterThan(positions[i - 1]);
}
});

it('loads company/strategy.md as the L1 "Company" layer (#878)', () => {
writeFixture();
const ctx = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'worker' });
expect(ctx).toContain(M.strategy);
// The Company header precedes the strategy content (it IS the company layer).
expect(ctx.indexOf('## Company')).toBeGreaterThanOrEqual(0);
expect(ctx.indexOf('## Company')).toBeLessThan(ctx.indexOf(M.strategy));
});

it('gates layers by role: scanner omits feedback+briefing, worker adds feedback, lead adds briefing', () => {
writeFixture();
const scanner = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'scanner' });
const worker = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'worker' });
const lead = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'lead' });

// Universal layers present for every role
for (const ctx of [scanner, worker, lead]) {
expect(ctx).toContain(M.founder);
expect(ctx).toContain(M.strategy);
expect(ctx).toContain(M.goals);
}
// scanner: no feedback (L6), no daily-briefing (L7)
expect(scanner).not.toContain(M.feedback);
expect(scanner).not.toContain(M.briefing);
// worker: feedback yes, briefing no
expect(worker).toContain(M.feedback);
expect(worker).not.toContain(M.briefing);
// lead: briefing yes
expect(lead).toContain(M.briefing);
});

it('drops late layers (strategy) before early ones (founder-context) when budget is tight', () => {
writeFixture();
// ~50 tokens = 200 chars: only the first-injected layer survives.
const ctx = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'worker', maxTokens: 50 });
expect(ctx).toContain(M.founder); // injected first → survives
expect(ctx).not.toContain(M.strategy); // injected late → dropped
});

it('caveats stale feedback so months-old corrections are not read as current (audit F1)', () => {
writeFixture({ feedbackMtimeDaysAgo: 90 });
const ctx = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'worker' });
expect(ctx).toContain(M.feedback);
expect(ctx).toContain('Last updated 90 days ago');
});

it('does not caveat fresh feedback', () => {
writeFixture(); // feedback written now → mtime today
const ctx = gatherSquadContext(SQUAD, AGENT, { agentPath, role: 'worker' });
expect(ctx).toContain(M.feedback);
expect(ctx).not.toContain('Last updated');
});
});
Loading