From 9ffdc8baee5df4df231ed76ca3dd10d42e8ab3ff Mon Sep 17 00:00:00 2001
From: MK <mk@initializ.io>
Date: Mon, 15 Jun 2026 01:06:44 -0400
Subject: [PATCH] feat(audit): stamp entity_id + entity_type on every event
 (closes #164)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds top-level `entity_id` and `entity_type` fields to every
Forge audit event, sourced from FORGE_AGENT_ID / forge.yaml
`agent_id` with `entity_type` hardcoded to "agent".

Field names + values are taken straight from the guardrails
library's BasePayload vocabulary (EntityID, EntityType — "agent"
/ "workflow" / "assistant" constants) so consumers reading both
the Forge NDJSON stream and the library's MongoDB
GuardrailAuditEvent collection can join on
`(entity_id, entity_type)` 1:1 without a translation table.

Two-layer precedence (no per-request override layer — entity
identity is fixed at process startup):

  1. Explicit EntityID/EntityType on the event
  2. AuditLogger.WithEntity static stamp (from env / forge.yaml)

Both fields use omitempty. Deployments not setting agent_id keep
emitting the pre-#164 JSON shape verbatim. No schema bump.

Wiring (forge-cli/runtime/runner.go) mirrors BuildGuardrailChecker's
existing AgentID resolution (guardrails_loader.go:46-50): env wins
over forge.yaml. Called right after the existing WithTenancy stamp
so all four tenancy/entity fields land together on every event,
including startup banners (agent_card_published, policy_loaded,
audit_export_status).

Tests pin: static stamp on plain Emit, no stamp omits both keys,
EmitFromContext per-invocation events carry the stamp alongside
correlation_id, explicit event value beats the static stamp,
partial WithEntity ("", id) installs only EntityID.

Docs:
- docs/security/audit-logging.md gains an Entity stamping section
  with the 1:1 library-join note.
- docs/security/tenancy.md splits the precedence table into
  Tenancy fields + Entity fields subsections; documents the
  no-header-layer choice.

Future-proofs for non-agent entities: when Forge adds workflow
or assistant runtimes, the field name doesn't change — only the
stamped value. Additive value change, not a schema change.
---
 docs/security/audit-logging.md    |  31 ++++++++
 docs/security/tenancy.md          |  13 ++++
 forge-cli/runtime/runner.go       |  13 ++++
 forge-core/runtime/audit.go       |  83 +++++++++++++++++++++
 forge-core/runtime/entity_test.go | 116 ++++++++++++++++++++++++++++++
 5 files changed, 256 insertions(+)
 create mode 100644 forge-core/runtime/entity_test.go

diff --git a/docs/security/audit-logging.md b/docs/security/audit-logging.md
index 3694edc..1f189b5 100644
--- a/docs/security/audit-logging.md
+++ b/docs/security/audit-logging.md
@@ -78,6 +78,37 @@ Both fields use `omitempty`. Deployments that set neither env nor header keep em
 
 The top-level `org_id` is distinct from `auth_verify.fields.org_id`, which carries whatever the inbound auth token claimed (provider-derived). The top-level value is the operator's declared tenancy, trusted because the deployment / orchestrator set it. Both can be present on the same `auth_verify` event when they're different identifiers (e.g., the token came from a federated identity but the agent is deployed into a specific workspace).
 
+### Entity stamping (`entity_id` / `entity_type`)
+
+Every audit event also carries the entity identifier the event came from:
+
+| Layer | Source |
+|-------|--------|
+| Per-event explicit | `AuditEvent.EntityID` / `AuditEvent.EntityType` |
+| Deployment-time stamp | `FORGE_AGENT_ID` env → forge.yaml `agent_id` → `entity_id`; `entity_type` hardcoded to `"agent"` |
+
+```yaml
+env:
+  - name: FORGE_AGENT_ID
+    value: "aibuilderdemo"        # or just set forge.yaml agent_id
+```
+
+Emits land as:
+
+```json
+{
+  "ts": "...",
+  "event": "session_start",
+  "entity_id": "aibuilderdemo",
+  "entity_type": "agent",
+  ...
+}
+```
+
+**1:1 join with the guardrails library's MongoDB audit.** When `FORGE_GUARDRAILS_DB` is set, the library writes its own audit records into a `GuardrailAuditEvent` collection in MongoDB carrying the same `entity_id` + `entity_type` columns. The values are sourced from the same env vars / forge.yaml so consumers reading both streams can join `forge.entity_id == library.entity_id AND forge.entity_type == library.entity_type` without translation. Forge only runs `entity_type: "agent"` today; the library supports `agent` / `workflow` / `assistant` as future-compatible values.
+
+Entity identity has no per-request override — agent identity is fixed at process startup. The tenancy layer above (`org_id` / `workspace_id`) covers the multi-tenant routing case.
+
 See [Tenancy stamping reference](tenancy.md) for the precedence rules and the agent-to-agent propagation helper.
 
 ### Token usage and execution duration
diff --git a/docs/security/tenancy.md b/docs/security/tenancy.md
index 8537f33..4390de4 100644
--- a/docs/security/tenancy.md
+++ b/docs/security/tenancy.md
@@ -14,6 +14,8 @@ The same agent process supports both the static-deployment case (one
 agent serves one workspace) and the multi-tenant routing case (one
 agent serves many workspaces, the orchestrator picks per request).
 
+### Tenancy fields (`org_id` / `workspace_id`)
+
 | Layer | Source | Wins when |
 |-------|--------|-----------|
 | 1 — Explicit on event | `AuditEvent.OrgID` / `AuditEvent.WorkspaceID` set before emit | Always — caller-owned event takes precedence over every fallback |
@@ -22,6 +24,17 @@ agent serves many workspaces, the orchestrator picks per request).
 
 Each field is resolved independently. A request that overrides only `X-Forge-Org-ID` still lets the env stamp fill in `workspace_id`.
 
+### Entity fields (`entity_id` / `entity_type`) — #164
+
+| Layer | Source | Wins when |
+|-------|--------|-----------|
+| 1 — Explicit on event | `AuditEvent.EntityID` / `AuditEvent.EntityType` set before emit | Always — caller-owned event takes precedence |
+| 2 — Deployment-time stamp | `FORGE_AGENT_ID` env / forge.yaml `agent_id` → `entity_id`; `entity_type` hardcoded to `"agent"` | Whenever the higher layer is empty |
+
+Entity identity has **no per-request header layer** — entity is fixed at process startup. If a deployment needs per-request entity routing, the tenancy layer above already covers that (an agent serving multiple workspaces). Agent identity is the process, by definition.
+
+`entity_type` and `entity_id` match the field names + values the guardrails library writes to its MongoDB `GuardrailAuditEvent` collection (see `EntityType` constants: `agent` / `workflow` / `assistant`). When `FORGE_GUARDRAILS_DB` is set, both streams carry the same `(entity_id, entity_type)` pair and consumers join them 1:1 without translation. Forge only runs agents today, so the value is always `"agent"`; future entity types are an additive value change, not a schema change.
+
 ## Static tenancy (one agent per workspace)
 
 The simplest case: deploy one Forge agent into one workspace, declare the tenancy via env, never set headers. Every emitted event — including startup banners — carries the stamp.
diff --git a/forge-cli/runtime/runner.go b/forge-cli/runtime/runner.go
index a9d9bb6..4c6f9f0 100644
--- a/forge-cli/runtime/runner.go
+++ b/forge-cli/runtime/runner.go
@@ -316,6 +316,19 @@ func (r *Runner) Run(ctx context.Context) error {
 	// (picked up in the A2A handlers) override the static stamp.
 	// Empty env → empty stamp → fields omitted (backward compatible).
 	auditLogger.WithTenancy(os.Getenv("FORGE_ORG_ID"), os.Getenv("FORGE_WORKSPACE_ID"))
+	// Deployment-time entity stamp (#164). Resolution mirrors
+	// BuildGuardrailChecker's existing agent-ID resolution
+	// (guardrails_loader.go) so the Forge NDJSON stream's entity_id
+	// matches the library's MongoDB GuardrailAuditEvent.entity_id
+	// column 1:1 — SIEM consumers can join on the same value.
+	// EntityType is hardcoded to "agent" because that's the only
+	// entity Forge runs today; future entity types would change
+	// the value, not the schema.
+	agentID := os.Getenv("FORGE_AGENT_ID")
+	if agentID == "" && r.cfg.Config != nil {
+		agentID = r.cfg.Config.AgentID
+	}
+	auditLogger.WithEntity("agent", agentID)
 
 	// 4a. Build guardrail checker (DB mode → file mode → defaults) and
 	// wire the audit logger so every mask/block/warn decision lands on
diff --git a/forge-core/runtime/audit.go b/forge-core/runtime/audit.go
index dfb1d06..947b757 100644
--- a/forge-core/runtime/audit.go
+++ b/forge-core/runtime/audit.go
@@ -208,6 +208,33 @@ type AuditEvent struct {
 	OrgID       string `json:"org_id,omitempty"`
 	WorkspaceID string `json:"workspace_id,omitempty"`
 
+	// EntityID + EntityType identify which entity emitted this event.
+	// Sourced from two layers (highest precedence first):
+	//
+	//   1. Explicit value set on the event before emit.
+	//   2. Deployment-time stamp installed on the AuditLogger via
+	//      WithEntity(entityType, entityID) — typically populated from
+	//      FORGE_AGENT_ID / cfg.AgentID at agent startup, with
+	//      EntityType hardcoded to "agent" for now.
+	//
+	// No per-request ctx layer: entity identity is fixed at process
+	// startup. If an agent serves multiple tenancies per request, the
+	// OrgID / WorkspaceID layer above already covers that.
+	//
+	// Field names + values match the guardrails library's BasePayload
+	// vocabulary (EntityID, EntityType — "agent" / "workflow" /
+	// "assistant"), so the Forge NDJSON stream and the library's
+	// MongoDB GuardrailAuditEvent collection share columns 1:1 and
+	// can be joined without a translation table. EntityType is
+	// hardcoded to "agent" today since Forge only runs agents;
+	// future entity types are an additive value change, not a schema
+	// change.
+	//
+	// Both keys use omitempty so deployments that don't set agent_id
+	// keep emitting the pre-#164 JSON shape verbatim.
+	EntityID   string `json:"entity_id,omitempty"`
+	EntityType string `json:"entity_type,omitempty"`
+
 	// LLM call attribution (llm_call, llm_call_cancelled, invocation_complete).
 	Model    string `json:"model,omitempty"`
 	Provider string `json:"provider,omitempty"`
@@ -283,6 +310,16 @@ type AuditLogger struct {
 	// issue #157.
 	tenantOrgID       string
 	tenantWorkspaceID string
+
+	// Static entity stamp, installed once at agent startup via
+	// WithEntity(). Populated from FORGE_AGENT_ID / cfg.AgentID
+	// in the CLI runner with entityType hardcoded to "agent".
+	// Every emit stamps these so SIEM consumers can join the Forge
+	// NDJSON stream against the guardrails library's MongoDB
+	// GuardrailAuditEvent collection on (entity_id, entity_type)
+	// without translation. See issue #164.
+	tenantEntityID   string
+	tenantEntityType string
 }
 
 // WithTenancy installs the deployment-time tenancy stamp on the
@@ -317,6 +354,41 @@ func (a *AuditLogger) tenancyStamp() (orgID, workspaceID string) {
 	return a.tenantOrgID, a.tenantWorkspaceID
 }
 
+// WithEntity installs the deployment-time entity stamp on the
+// AuditLogger. entityType matches the guardrails library's
+// EntityType constants ("agent" / "workflow" / "assistant"); today
+// Forge only runs agents, so the runner hardcodes "agent". Empty
+// arguments disable the stamp for that field. Called once at runner
+// startup after resolving FORGE_AGENT_ID / cfg.AgentID. Returns the
+// receiver for fluent construction.
+//
+// Precedence at emit time (highest first):
+//
+//  1. Explicit EntityID/EntityType set on the AuditEvent.
+//  2. The static stamp installed here.
+//
+// No per-request context layer: entity identity is fixed at process
+// startup. If a deployment needs per-request entity routing, that's
+// the tenancy layer's job (OrgID/WorkspaceID) — agent identity is
+// the process, by definition.
+//
+// See issue #164.
+func (a *AuditLogger) WithEntity(entityType, entityID string) *AuditLogger {
+	a.mu.Lock()
+	a.tenantEntityID = entityID
+	a.tenantEntityType = entityType
+	a.mu.Unlock()
+	return a
+}
+
+// entityStamp returns the static entity under lock. Internal — emit
+// paths use this.
+func (a *AuditLogger) entityStamp() (entityID, entityType string) {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	return a.tenantEntityID, a.tenantEntityType
+}
+
 // NewAuditLogger creates a single-sink AuditLogger wrapping the given
 // writer. Backward-compatible with pre-FWS-7 callers; tests and the
 // CLI's per-command audit loggers (channel.go / run.go) continue to
@@ -424,6 +496,17 @@ func (a *AuditLogger) Emit(event AuditEvent) {
 			event.WorkspaceID = staticWS
 		}
 	}
+	// Deployment-time entity stamp (#164). Mirrors the tenancy stamp
+	// but with no ctx layer since entity identity is process-fixed.
+	if event.EntityID == "" || event.EntityType == "" {
+		staticEntityID, staticEntityType := a.entityStamp()
+		if event.EntityID == "" {
+			event.EntityID = staticEntityID
+		}
+		if event.EntityType == "" {
+			event.EntityType = staticEntityType
+		}
+	}
 	data, err := json.Marshal(event)
 	if err != nil {
 		return
diff --git a/forge-core/runtime/entity_test.go b/forge-core/runtime/entity_test.go
new file mode 100644
index 0000000..d23cabe
--- /dev/null
+++ b/forge-core/runtime/entity_test.go
@@ -0,0 +1,116 @@
+package runtime
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+// TestAuditLogger_StaticEntityStampsPlainEmit pins the deployment-
+// time stamp behavior: WithEntity installed once at startup, plain
+// Emit (no ctx) lands entity_id + entity_type on the event. This is
+// the startup-banner case (agent_card_published, policy_loaded).
+func TestAuditLogger_StaticEntityStampsPlainEmit(t *testing.T) {
+	var buf bytes.Buffer
+	al := NewAuditLogger(&buf).WithEntity("agent", "my-agent")
+	al.Emit(AuditEvent{Event: "test_banner"})
+
+	var got AuditEvent
+	if err := json.Unmarshal(buf.Bytes(), &got); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if got.EntityID != "my-agent" {
+		t.Errorf("EntityID = %q, want my-agent", got.EntityID)
+	}
+	if got.EntityType != "agent" {
+		t.Errorf("EntityType = %q, want agent", got.EntityType)
+	}
+}
+
+// TestAuditLogger_NoEntityStamp_OmitsFields confirms back-compat:
+// without WithEntity, the emitted JSON carries no entity_id /
+// entity_type keys at all.
+func TestAuditLogger_NoEntityStamp_OmitsFields(t *testing.T) {
+	var buf bytes.Buffer
+	al := NewAuditLogger(&buf)
+	al.Emit(AuditEvent{Event: "test_banner"})
+
+	out := buf.String()
+	if strings.Contains(out, `"entity_id"`) {
+		t.Errorf("expected no entity_id key, got: %s", out)
+	}
+	if strings.Contains(out, `"entity_type"`) {
+		t.Errorf("expected no entity_type key, got: %s", out)
+	}
+}
+
+// TestEmitFromContext_StaticEntityStampsPerInvocationEvents verifies
+// the stamp lands on EmitFromContext events too — per-invocation
+// rows must carry entity_id alongside the request-scoped correlation
+// fields, not just startup banners.
+func TestEmitFromContext_StaticEntityStampsPerInvocationEvents(t *testing.T) {
+	var buf bytes.Buffer
+	al := NewAuditLogger(&buf).WithEntity("agent", "my-agent")
+
+	ctx := WithCorrelationID(context.Background(), "corr-1")
+	ctx = WithTaskID(ctx, "task-1")
+	al.EmitFromContext(ctx, AuditEvent{Event: "test_invocation"})
+
+	var got AuditEvent
+	if err := json.Unmarshal(buf.Bytes(), &got); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if got.EntityID != "my-agent" || got.EntityType != "agent" {
+		t.Errorf("got (entity_id=%q, entity_type=%q), want (my-agent, agent)",
+			got.EntityID, got.EntityType)
+	}
+	if got.CorrelationID != "corr-1" || got.TaskID != "task-1" {
+		t.Errorf("correlation/task not preserved: %+v", got)
+	}
+}
+
+// TestEmitFromContext_ExplicitEntityValueWins protects the
+// "explicit-on-event beats every fallback" rule. Mirrors the same
+// invariant EmitFromContext upholds for correlation_id, workflow_id,
+// trace_id, and the tenancy keys.
+func TestEmitFromContext_ExplicitEntityValueWins(t *testing.T) {
+	var buf bytes.Buffer
+	al := NewAuditLogger(&buf).WithEntity("agent", "my-agent")
+	al.EmitFromContext(context.Background(), AuditEvent{
+		Event:      "test_explicit",
+		EntityID:   "explicit-entity",
+		EntityType: "workflow",
+	})
+
+	var got AuditEvent
+	if err := json.Unmarshal(buf.Bytes(), &got); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if got.EntityID != "explicit-entity" {
+		t.Errorf("EntityID = %q, want explicit-entity", got.EntityID)
+	}
+	if got.EntityType != "workflow" {
+		t.Errorf("EntityType = %q, want workflow", got.EntityType)
+	}
+}
+
+// TestAuditLogger_WithEntity_PartialStamp documents the per-field
+// independence: setting only EntityID without EntityType (or vice
+// versa) installs only that field on the logger. The omitempty tags
+// drop the missing one in emitted JSON.
+func TestAuditLogger_WithEntity_PartialStamp(t *testing.T) {
+	var buf bytes.Buffer
+	// EntityType empty, only EntityID set.
+	al := NewAuditLogger(&buf).WithEntity("", "my-agent")
+	al.Emit(AuditEvent{Event: "test_banner"})
+
+	out := buf.String()
+	if !strings.Contains(out, `"entity_id":"my-agent"`) {
+		t.Errorf("expected entity_id=my-agent, got: %s", out)
+	}
+	if strings.Contains(out, `"entity_type"`) {
+		t.Errorf("expected no entity_type key (empty), got: %s", out)
+	}
+}