From 9ffdc8baee5df4df231ed76ca3dd10d42e8ab3ff Mon Sep 17 00:00:00 2001 From: MK Date: Mon, 15 Jun 2026 01:06:44 -0400 Subject: [PATCH] feat(audit): stamp entity_id + entity_type on every event (closes #164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds top-level `entity_id` and `entity_type` fields to every Forge audit event, sourced from FORGE_AGENT_ID / forge.yaml `agent_id` with `entity_type` hardcoded to "agent". Field names + values are taken straight from the guardrails library's BasePayload vocabulary (EntityID, EntityType — "agent" / "workflow" / "assistant" constants) so consumers reading both the Forge NDJSON stream and the library's MongoDB GuardrailAuditEvent collection can join on `(entity_id, entity_type)` 1:1 without a translation table. Two-layer precedence (no per-request override layer — entity identity is fixed at process startup): 1. Explicit EntityID/EntityType on the event 2. AuditLogger.WithEntity static stamp (from env / forge.yaml) Both fields use omitempty. Deployments not setting agent_id keep emitting the pre-#164 JSON shape verbatim. No schema bump. Wiring (forge-cli/runtime/runner.go) mirrors BuildGuardrailChecker's existing AgentID resolution (guardrails_loader.go:46-50): env wins over forge.yaml. Called right after the existing WithTenancy stamp so all four tenancy/entity fields land together on every event, including startup banners (agent_card_published, policy_loaded, audit_export_status). Tests pin: static stamp on plain Emit, no stamp omits both keys, EmitFromContext per-invocation events carry the stamp alongside correlation_id, explicit event value beats the static stamp, partial WithEntity ("", id) installs only EntityID. Docs: - docs/security/audit-logging.md gains an Entity stamping section with the 1:1 library-join note. - docs/security/tenancy.md splits the precedence table into Tenancy fields + Entity fields subsections; documents the no-header-layer choice. Future-proofs for non-agent entities: when Forge adds workflow or assistant runtimes, the field name doesn't change — only the stamped value. Additive value change, not a schema change. --- docs/security/audit-logging.md | 31 ++++++++ docs/security/tenancy.md | 13 ++++ forge-cli/runtime/runner.go | 13 ++++ forge-core/runtime/audit.go | 83 +++++++++++++++++++++ forge-core/runtime/entity_test.go | 116 ++++++++++++++++++++++++++++++ 5 files changed, 256 insertions(+) create mode 100644 forge-core/runtime/entity_test.go diff --git a/docs/security/audit-logging.md b/docs/security/audit-logging.md index 3694edc..1f189b5 100644 --- a/docs/security/audit-logging.md +++ b/docs/security/audit-logging.md @@ -78,6 +78,37 @@ Both fields use `omitempty`. Deployments that set neither env nor header keep em The top-level `org_id` is distinct from `auth_verify.fields.org_id`, which carries whatever the inbound auth token claimed (provider-derived). The top-level value is the operator's declared tenancy, trusted because the deployment / orchestrator set it. Both can be present on the same `auth_verify` event when they're different identifiers (e.g., the token came from a federated identity but the agent is deployed into a specific workspace). +### Entity stamping (`entity_id` / `entity_type`) + +Every audit event also carries the entity identifier the event came from: + +| Layer | Source | +|-------|--------| +| Per-event explicit | `AuditEvent.EntityID` / `AuditEvent.EntityType` | +| Deployment-time stamp | `FORGE_AGENT_ID` env → forge.yaml `agent_id` → `entity_id`; `entity_type` hardcoded to `"agent"` | + +```yaml +env: + - name: FORGE_AGENT_ID + value: "aibuilderdemo" # or just set forge.yaml agent_id +``` + +Emits land as: + +```json +{ + "ts": "...", + "event": "session_start", + "entity_id": "aibuilderdemo", + "entity_type": "agent", + ... +} +``` + +**1:1 join with the guardrails library's MongoDB audit.** When `FORGE_GUARDRAILS_DB` is set, the library writes its own audit records into a `GuardrailAuditEvent` collection in MongoDB carrying the same `entity_id` + `entity_type` columns. The values are sourced from the same env vars / forge.yaml so consumers reading both streams can join `forge.entity_id == library.entity_id AND forge.entity_type == library.entity_type` without translation. Forge only runs `entity_type: "agent"` today; the library supports `agent` / `workflow` / `assistant` as future-compatible values. + +Entity identity has no per-request override — agent identity is fixed at process startup. The tenancy layer above (`org_id` / `workspace_id`) covers the multi-tenant routing case. + See [Tenancy stamping reference](tenancy.md) for the precedence rules and the agent-to-agent propagation helper. ### Token usage and execution duration diff --git a/docs/security/tenancy.md b/docs/security/tenancy.md index 8537f33..4390de4 100644 --- a/docs/security/tenancy.md +++ b/docs/security/tenancy.md @@ -14,6 +14,8 @@ The same agent process supports both the static-deployment case (one agent serves one workspace) and the multi-tenant routing case (one agent serves many workspaces, the orchestrator picks per request). +### Tenancy fields (`org_id` / `workspace_id`) + | Layer | Source | Wins when | |-------|--------|-----------| | 1 — Explicit on event | `AuditEvent.OrgID` / `AuditEvent.WorkspaceID` set before emit | Always — caller-owned event takes precedence over every fallback | @@ -22,6 +24,17 @@ agent serves many workspaces, the orchestrator picks per request). Each field is resolved independently. A request that overrides only `X-Forge-Org-ID` still lets the env stamp fill in `workspace_id`. +### Entity fields (`entity_id` / `entity_type`) — #164 + +| Layer | Source | Wins when | +|-------|--------|-----------| +| 1 — Explicit on event | `AuditEvent.EntityID` / `AuditEvent.EntityType` set before emit | Always — caller-owned event takes precedence | +| 2 — Deployment-time stamp | `FORGE_AGENT_ID` env / forge.yaml `agent_id` → `entity_id`; `entity_type` hardcoded to `"agent"` | Whenever the higher layer is empty | + +Entity identity has **no per-request header layer** — entity is fixed at process startup. If a deployment needs per-request entity routing, the tenancy layer above already covers that (an agent serving multiple workspaces). Agent identity is the process, by definition. + +`entity_type` and `entity_id` match the field names + values the guardrails library writes to its MongoDB `GuardrailAuditEvent` collection (see `EntityType` constants: `agent` / `workflow` / `assistant`). When `FORGE_GUARDRAILS_DB` is set, both streams carry the same `(entity_id, entity_type)` pair and consumers join them 1:1 without translation. Forge only runs agents today, so the value is always `"agent"`; future entity types are an additive value change, not a schema change. + ## Static tenancy (one agent per workspace) The simplest case: deploy one Forge agent into one workspace, declare the tenancy via env, never set headers. Every emitted event — including startup banners — carries the stamp. diff --git a/forge-cli/runtime/runner.go b/forge-cli/runtime/runner.go index a9d9bb6..4c6f9f0 100644 --- a/forge-cli/runtime/runner.go +++ b/forge-cli/runtime/runner.go @@ -316,6 +316,19 @@ func (r *Runner) Run(ctx context.Context) error { // (picked up in the A2A handlers) override the static stamp. // Empty env → empty stamp → fields omitted (backward compatible). auditLogger.WithTenancy(os.Getenv("FORGE_ORG_ID"), os.Getenv("FORGE_WORKSPACE_ID")) + // Deployment-time entity stamp (#164). Resolution mirrors + // BuildGuardrailChecker's existing agent-ID resolution + // (guardrails_loader.go) so the Forge NDJSON stream's entity_id + // matches the library's MongoDB GuardrailAuditEvent.entity_id + // column 1:1 — SIEM consumers can join on the same value. + // EntityType is hardcoded to "agent" because that's the only + // entity Forge runs today; future entity types would change + // the value, not the schema. + agentID := os.Getenv("FORGE_AGENT_ID") + if agentID == "" && r.cfg.Config != nil { + agentID = r.cfg.Config.AgentID + } + auditLogger.WithEntity("agent", agentID) // 4a. Build guardrail checker (DB mode → file mode → defaults) and // wire the audit logger so every mask/block/warn decision lands on diff --git a/forge-core/runtime/audit.go b/forge-core/runtime/audit.go index dfb1d06..947b757 100644 --- a/forge-core/runtime/audit.go +++ b/forge-core/runtime/audit.go @@ -208,6 +208,33 @@ type AuditEvent struct { OrgID string `json:"org_id,omitempty"` WorkspaceID string `json:"workspace_id,omitempty"` + // EntityID + EntityType identify which entity emitted this event. + // Sourced from two layers (highest precedence first): + // + // 1. Explicit value set on the event before emit. + // 2. Deployment-time stamp installed on the AuditLogger via + // WithEntity(entityType, entityID) — typically populated from + // FORGE_AGENT_ID / cfg.AgentID at agent startup, with + // EntityType hardcoded to "agent" for now. + // + // No per-request ctx layer: entity identity is fixed at process + // startup. If an agent serves multiple tenancies per request, the + // OrgID / WorkspaceID layer above already covers that. + // + // Field names + values match the guardrails library's BasePayload + // vocabulary (EntityID, EntityType — "agent" / "workflow" / + // "assistant"), so the Forge NDJSON stream and the library's + // MongoDB GuardrailAuditEvent collection share columns 1:1 and + // can be joined without a translation table. EntityType is + // hardcoded to "agent" today since Forge only runs agents; + // future entity types are an additive value change, not a schema + // change. + // + // Both keys use omitempty so deployments that don't set agent_id + // keep emitting the pre-#164 JSON shape verbatim. + EntityID string `json:"entity_id,omitempty"` + EntityType string `json:"entity_type,omitempty"` + // LLM call attribution (llm_call, llm_call_cancelled, invocation_complete). Model string `json:"model,omitempty"` Provider string `json:"provider,omitempty"` @@ -283,6 +310,16 @@ type AuditLogger struct { // issue #157. tenantOrgID string tenantWorkspaceID string + + // Static entity stamp, installed once at agent startup via + // WithEntity(). Populated from FORGE_AGENT_ID / cfg.AgentID + // in the CLI runner with entityType hardcoded to "agent". + // Every emit stamps these so SIEM consumers can join the Forge + // NDJSON stream against the guardrails library's MongoDB + // GuardrailAuditEvent collection on (entity_id, entity_type) + // without translation. See issue #164. + tenantEntityID string + tenantEntityType string } // WithTenancy installs the deployment-time tenancy stamp on the @@ -317,6 +354,41 @@ func (a *AuditLogger) tenancyStamp() (orgID, workspaceID string) { return a.tenantOrgID, a.tenantWorkspaceID } +// WithEntity installs the deployment-time entity stamp on the +// AuditLogger. entityType matches the guardrails library's +// EntityType constants ("agent" / "workflow" / "assistant"); today +// Forge only runs agents, so the runner hardcodes "agent". Empty +// arguments disable the stamp for that field. Called once at runner +// startup after resolving FORGE_AGENT_ID / cfg.AgentID. Returns the +// receiver for fluent construction. +// +// Precedence at emit time (highest first): +// +// 1. Explicit EntityID/EntityType set on the AuditEvent. +// 2. The static stamp installed here. +// +// No per-request context layer: entity identity is fixed at process +// startup. If a deployment needs per-request entity routing, that's +// the tenancy layer's job (OrgID/WorkspaceID) — agent identity is +// the process, by definition. +// +// See issue #164. +func (a *AuditLogger) WithEntity(entityType, entityID string) *AuditLogger { + a.mu.Lock() + a.tenantEntityID = entityID + a.tenantEntityType = entityType + a.mu.Unlock() + return a +} + +// entityStamp returns the static entity under lock. Internal — emit +// paths use this. +func (a *AuditLogger) entityStamp() (entityID, entityType string) { + a.mu.Lock() + defer a.mu.Unlock() + return a.tenantEntityID, a.tenantEntityType +} + // NewAuditLogger creates a single-sink AuditLogger wrapping the given // writer. Backward-compatible with pre-FWS-7 callers; tests and the // CLI's per-command audit loggers (channel.go / run.go) continue to @@ -424,6 +496,17 @@ func (a *AuditLogger) Emit(event AuditEvent) { event.WorkspaceID = staticWS } } + // Deployment-time entity stamp (#164). Mirrors the tenancy stamp + // but with no ctx layer since entity identity is process-fixed. + if event.EntityID == "" || event.EntityType == "" { + staticEntityID, staticEntityType := a.entityStamp() + if event.EntityID == "" { + event.EntityID = staticEntityID + } + if event.EntityType == "" { + event.EntityType = staticEntityType + } + } data, err := json.Marshal(event) if err != nil { return diff --git a/forge-core/runtime/entity_test.go b/forge-core/runtime/entity_test.go new file mode 100644 index 0000000..d23cabe --- /dev/null +++ b/forge-core/runtime/entity_test.go @@ -0,0 +1,116 @@ +package runtime + +import ( + "bytes" + "context" + "encoding/json" + "strings" + "testing" +) + +// TestAuditLogger_StaticEntityStampsPlainEmit pins the deployment- +// time stamp behavior: WithEntity installed once at startup, plain +// Emit (no ctx) lands entity_id + entity_type on the event. This is +// the startup-banner case (agent_card_published, policy_loaded). +func TestAuditLogger_StaticEntityStampsPlainEmit(t *testing.T) { + var buf bytes.Buffer + al := NewAuditLogger(&buf).WithEntity("agent", "my-agent") + al.Emit(AuditEvent{Event: "test_banner"}) + + var got AuditEvent + if err := json.Unmarshal(buf.Bytes(), &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.EntityID != "my-agent" { + t.Errorf("EntityID = %q, want my-agent", got.EntityID) + } + if got.EntityType != "agent" { + t.Errorf("EntityType = %q, want agent", got.EntityType) + } +} + +// TestAuditLogger_NoEntityStamp_OmitsFields confirms back-compat: +// without WithEntity, the emitted JSON carries no entity_id / +// entity_type keys at all. +func TestAuditLogger_NoEntityStamp_OmitsFields(t *testing.T) { + var buf bytes.Buffer + al := NewAuditLogger(&buf) + al.Emit(AuditEvent{Event: "test_banner"}) + + out := buf.String() + if strings.Contains(out, `"entity_id"`) { + t.Errorf("expected no entity_id key, got: %s", out) + } + if strings.Contains(out, `"entity_type"`) { + t.Errorf("expected no entity_type key, got: %s", out) + } +} + +// TestEmitFromContext_StaticEntityStampsPerInvocationEvents verifies +// the stamp lands on EmitFromContext events too — per-invocation +// rows must carry entity_id alongside the request-scoped correlation +// fields, not just startup banners. +func TestEmitFromContext_StaticEntityStampsPerInvocationEvents(t *testing.T) { + var buf bytes.Buffer + al := NewAuditLogger(&buf).WithEntity("agent", "my-agent") + + ctx := WithCorrelationID(context.Background(), "corr-1") + ctx = WithTaskID(ctx, "task-1") + al.EmitFromContext(ctx, AuditEvent{Event: "test_invocation"}) + + var got AuditEvent + if err := json.Unmarshal(buf.Bytes(), &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.EntityID != "my-agent" || got.EntityType != "agent" { + t.Errorf("got (entity_id=%q, entity_type=%q), want (my-agent, agent)", + got.EntityID, got.EntityType) + } + if got.CorrelationID != "corr-1" || got.TaskID != "task-1" { + t.Errorf("correlation/task not preserved: %+v", got) + } +} + +// TestEmitFromContext_ExplicitEntityValueWins protects the +// "explicit-on-event beats every fallback" rule. Mirrors the same +// invariant EmitFromContext upholds for correlation_id, workflow_id, +// trace_id, and the tenancy keys. +func TestEmitFromContext_ExplicitEntityValueWins(t *testing.T) { + var buf bytes.Buffer + al := NewAuditLogger(&buf).WithEntity("agent", "my-agent") + al.EmitFromContext(context.Background(), AuditEvent{ + Event: "test_explicit", + EntityID: "explicit-entity", + EntityType: "workflow", + }) + + var got AuditEvent + if err := json.Unmarshal(buf.Bytes(), &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.EntityID != "explicit-entity" { + t.Errorf("EntityID = %q, want explicit-entity", got.EntityID) + } + if got.EntityType != "workflow" { + t.Errorf("EntityType = %q, want workflow", got.EntityType) + } +} + +// TestAuditLogger_WithEntity_PartialStamp documents the per-field +// independence: setting only EntityID without EntityType (or vice +// versa) installs only that field on the logger. The omitempty tags +// drop the missing one in emitted JSON. +func TestAuditLogger_WithEntity_PartialStamp(t *testing.T) { + var buf bytes.Buffer + // EntityType empty, only EntityID set. + al := NewAuditLogger(&buf).WithEntity("", "my-agent") + al.Emit(AuditEvent{Event: "test_banner"}) + + out := buf.String() + if !strings.Contains(out, `"entity_id":"my-agent"`) { + t.Errorf("expected entity_id=my-agent, got: %s", out) + } + if strings.Contains(out, `"entity_type"`) { + t.Errorf("expected no entity_type key (empty), got: %s", out) + } +}