diff --git a/.github/workflows/ip-guard.yml b/.github/workflows/ip-guard.yml index e2f5cf4..a199a53 100644 --- a/.github/workflows/ip-guard.yml +++ b/.github/workflows/ip-guard.yml @@ -51,7 +51,7 @@ jobs: # architecture sub-type prefixes (PC-nnnn, PS-nnnn, etc.). # Exclude known product/architecture identifiers via lookahead: SAFE='ADR-|INV-|CAP-|DEC-|REL-|COMP-|CONST-|SYS-|SHA-|ISO-' - SAFE+='|BOUND-|SYSBOUND-|IFACE-|GAP-|CTR-' + SAFE+='|BOUND-|SYSBOUND-|IFACE-|GAP-|CTR-|IMPL-' PAT_JIRA="\b(?!${SAFE})[A-Z]{3,}-[0-9]{3,}\b" if git grep -nP "$PAT_JIRA" -- "${GLOBAL_EXCLUDES[@]}" ; then fail "$PAT_JIRA" diff --git a/.gitignore b/.gitignore index 7caca63..aaed328 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ build/ # Proof artifacts and generated reports (do not commit) lifecycle-proof/ reports/ +benchmark-results/ # Runtime state (regenerated by RECON) # .ste/state/ - Only track README.md (ignore all other files) diff --git a/CHANGELOG.md b/CHANGELOG.md index 925a969..dfb19ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,102 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Transient EPERM/EACCES/EBUSY failures on Windows during atomic file + rename in RECON population. `atomicWriteFile` now retries up to 3 times + with exponential backoff + jitter (50ms base). Prevents silently missing + slices caused by AV scanners, IDE indexers, or concurrent RECON passes + racing on directory metadata. + +- Ad-hoc temp+rename patterns in `cross-repo-edges.ts` (deterministic + `.tmp` suffix, collision-prone) and `repo-sentinel.ts` (pid-based temp) + replaced with `atomicWriteFile`, gaining retry behavior and random + collision-resistant temp names. + +- MCP server startup hang caused by O(N x DFS) topology analysis algorithm. + Replaced per-node recursive DFS with single-pass BFS layering (Kahn's + algorithm) completing in O(N+E). The 5000-node synthetic graph test + completes under 100ms. + +- Redundant graph loading on MCP startup: `initialize()` and `reloadContext()` + each called `loadAidocGraph` twice (once via `initRssContext`, once for + topology analysis). Both now reuse the already-loaded `rssContext.graph`, + halving cold-start I/O. + +- Self-analysis branch in `initialize()` suffered the same redundant load; + now reuses `selfContext.graph` directly. + +- Stale `graph-metrics.json` accepted without validation. Added a node-count + delta check: metrics are recomputed when cached `totalComponents` diverges + from `graph.size` by more than 10%. + +- Sequential YAML file reads in `loadAidocGraph` replaced with bounded- + concurrency parallel reads using `ioLimiter` (16 concurrent). At N=5000, + reduces sequential ~50s I/O to ~2s. + +### Changed + +- ADR-PC-0001 amended: added implementation decisions IMPL-0001 (BFS + layering), IMPL-0002 (single graph load), IMPL-0003 (staleness check); + added INV-0026 (O(N+E) startup bound); closed GAP-0001 (implicit + performance gap). + +- ADR-PS-0001 amended: added `startup_latency` operational requirement + mandating O(N+E) startup operations and sub-10s cold-start at N=5000. + +- Manifest, architecture index, and rendered docs regenerated via + adr-architecture-kit. + ### Added +- Full infrastructure domain emission: workspace graph slices now emit all + RECON-extracted CFN resources as nodes, replacing the previous 8-type + backend-biased allowlist. Supports backend services, frontend SPAs, and + MFE monorepos equally. + +- 16 new workspace graph node types: Stack, Distribution, WebACL, Certificate, + DNSRecord, APIGateway, SecurityGroup, Secret, DBCluster, DBProxy, LogGroup, + Alarm, DeliveryStream, EventRule, Role, and InfraResource (catch-all fallback). + +- Shared `cfn-type-mapping` module (`src/workspace/cfn-type-mapping.ts`): + single source of truth for CFN-to-graph-type mapping used by both + slice-emitter and resource-resolver, preventing mapping drift. + +- Stack nodes emitted from infrastructure/template slices with `contains` + edges to child resources, surfacing nested stack topology in the graph. + +- InfraResource fallback: unmapped AWS::* types are emitted as InfraResource + nodes with `cfn_type` preserved in attributes for downstream classification. + +- Generic name resolution via `NODE_NAME_KEYS`: display names resolved from + type-specific CFN property keys with `logicalId` as last-resort fallback. + No resource is dropped due to null name. + +- `contains` verb added to ratified edge vocabulary for structural + containment relationships (stack-contains-resource, stack-contains-stack). + +- Auxiliary node suppression at L0-L2 projections: Role, SecurityGroup, + LogGroup, Alarm, Certificate, and DNSRecord nodes are compressed at + overview resolutions while remaining visible at L3-L4. + +- Unit tests for full infrastructure domain emission including frontend + resource types, InfraResource fallback, auxiliary marking, and logicalId + name resolution. + +### Changed + +- ADR-L-0016 amended: CONST-0010 expanded with 16 new ratified node types, + CONST-0011 expanded with `contains` verb, INV-0019 added for emission + completeness invariant. + +- ADR-PC-0007 amended: CFN type completeness expectations documented, + GAP-0001 (Serverless::StateMachine) closed, intrinsic handling boundaries + defined. + +- ADR-PC-0008 amended: resource-to-node emission policy defined (all + extracted resources become nodes), SDK-to-graph-type mapping expanded. + - `ste setup` CLI command: one-command workspace onboarding that detects workspace type (multi-repo vs single-repo), scaffolds `workspace.yaml` or `ste.config.json`, creates workspace-level `.cursor/mcp.json` with correct diff --git a/adrs/entities/registry.yaml b/adrs/entities/registry.yaml index 494d9e7..74d4d77 100644 --- a/adrs/entities/registry.yaml +++ b/adrs/entities/registry.yaml @@ -118,6 +118,18 @@ entities: depends_on: [] implements: [] realizes: [] + - entity_id: CAP-0020 + entity_type: capability + name: Source-aware CEM and MVC assembly + introduced_by: ADR-L-0020 + lifecycle_stage: proposed + source_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_artifact_type: logical_adr + related_adrs: [] + relationships: + depends_on: [] + implements: [] + realizes: [] - entity_id: COMP-0001 entity_type: component name: MCP Server and Tool Registry @@ -510,6 +522,22 @@ entities: realizes: - INV-0022 - INV-0023 + - entity_id: DEC-0022 + entity_type: decision + name: Use source locators as foundational infrastructure for CEM and MVC + introduced_by: ADR-L-0020 + lifecycle_stage: proposed + source_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_artifact_type: logical_adr + related_adrs: [] + relationships: + depends_on: [] + implements: + - CAP-0020 + realizes: + - INV-0027 + - INV-0028 + - INV-0029 - entity_id: INV-0001 entity_type: invariant name: INV-0001 @@ -798,3 +826,39 @@ entities: depends_on: [] implements: [] realizes: [] + - entity_id: INV-0027 + entity_type: invariant + name: INV-0027 + introduced_by: ADR-L-0020 + lifecycle_stage: proposed + source_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_artifact_type: logical_adr + related_adrs: [] + relationships: + depends_on: [] + implements: [] + realizes: [] + - entity_id: INV-0028 + entity_type: invariant + name: INV-0028 + introduced_by: ADR-L-0020 + lifecycle_stage: proposed + source_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_artifact_type: logical_adr + related_adrs: [] + relationships: + depends_on: [] + implements: [] + realizes: [] + - entity_id: INV-0029 + entity_type: invariant + name: INV-0029 + introduced_by: ADR-L-0020 + lifecycle_stage: proposed + source_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_artifact_type: logical_adr + related_adrs: [] + relationships: + depends_on: [] + implements: [] + realizes: [] diff --git a/adrs/index/architecture-index.yaml b/adrs/index/architecture-index.yaml index 485d5ee..679177c 100644 --- a/adrs/index/architecture-index.yaml +++ b/adrs/index/architecture-index.yaml @@ -1,7 +1,7 @@ schema_version: '1.1' type: architecture_index architecture_namespace: ste-runtime -generated_at: '2026-05-27T00:52:37Z' +generated_at: '2026-05-27T05:44:06Z' generator: adr-architecture-index entity_registry_path: adrs/index/entity-registry.yaml relationship_registry_path: adrs/index/relationship-registry.yaml @@ -16,7 +16,7 @@ validation_summary: warnings: 0 unresolved_entries: 0 source_coverage: - logical_adrs: 18 + logical_adrs: 19 physical_adrs: 5 physical_system_adrs: 2 physical_component_adrs: 11 diff --git a/adrs/index/capability-registry.yaml b/adrs/index/capability-registry.yaml index 18cd5aa..cc4d10d 100644 --- a/adrs/index/capability-registry.yaml +++ b/adrs/index/capability-registry.yaml @@ -451,3 +451,61 @@ entities: extraction_phase: extract_capability classification: explicit generator: adr-architecture-index + - id: CAP-0020 + entity_type: capability + name: Source-aware CEM and MVC assembly + summary: >- + Resolve workspace graph entities to authoritative source artifacts through stable URI locators, assemble CEM + bundles with provenance and validation state, derive bounded MVC bundles, and validate MVC bundles against thei + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#CAP-0020 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + domains: + - workspace + - graph + - cem + - mvc + - provenance + implemented_by_components: + - COMP-0011 + - COMP-0010 + enabled_by_decisions: + - DEC-0022 + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: [] + enabled_by: + - DEC-0022 + enables: [] + governs: [] + governed_by: [] + implemented_by: + - COMP-0010 + - COMP-0011 + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#CAP-0020 + extraction_phase: extract_capability + classification: explicit + generator: adr-architecture-index diff --git a/adrs/index/component-registry.yaml b/adrs/index/component-registry.yaml index 342783d..75e88c8 100644 --- a/adrs/index/component-registry.yaml +++ b/adrs/index/component-registry.yaml @@ -533,6 +533,7 @@ entities: implements: - CAP-0018 - CAP-0019 + - CAP-0020 embodied_in: - SYS-0001 embodies: [] @@ -590,6 +591,7 @@ entities: implements: - CAP-0018 - CAP-0019 + - CAP-0020 embodied_in: - SYS-0001 embodies: [] diff --git a/adrs/index/decision-registry.yaml b/adrs/index/decision-registry.yaml index e673190..c0beed1 100644 --- a/adrs/index/decision-registry.yaml +++ b/adrs/index/decision-registry.yaml @@ -1006,3 +1006,61 @@ entities: extraction_phase: extract_decision classification: explicit generator: adr-architecture-index + - id: DEC-0022 + entity_type: decision + name: Use source locators as foundational infrastructure for CEM and MVC + summary: >- + Entity URI resolution is necessary but not sufficient for IDE and conversation-engine reasoning. ste-runtime must + assemble a correctness-first CEM bundle, derive a bounded MVC projection, and validate the MVC against the + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#DEC-0022 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + related_invariants: + - INV-0027 + - INV-0028 + - INV-0029 + enforces_invariants: [] + enables_capabilities: + - CAP-0020 + governs_components: [] + supersedes: [] + refines: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: + - INV-0027 + - INV-0028 + - INV-0029 + enforced_by: [] + enabled_by: [] + enables: + - CAP-0020 + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#DEC-0022 + extraction_phase: extract_decision + classification: explicit + generator: adr-architecture-index diff --git a/adrs/index/entity-registry.yaml b/adrs/index/entity-registry.yaml index 6ffe32e..a04a539 100644 --- a/adrs/index/entity-registry.yaml +++ b/adrs/index/entity-registry.yaml @@ -845,6 +845,7 @@ entities: referenced_by: - ADR-L-0018 - ADR-L-0019 + - ADR-L-0020 related_to: [] enforces: [] enforced_by: [] @@ -902,7 +903,8 @@ entities: references: - ADR-L-0009 - ADR-L-0013 - referenced_by: [] + referenced_by: + - ADR-L-0020 related_to: [] enforces: [] enforced_by: [] @@ -968,6 +970,7 @@ entities: - ADR-L-0019 referenced_by: - ADR-L-0019 + - ADR-L-0020 related_to: [] enforces: [] enforced_by: [] @@ -1031,6 +1034,7 @@ entities: - ADR-L-0018 referenced_by: - ADR-L-0018 + - ADR-L-0020 related_to: [] enforces: [] enforced_by: [] @@ -1055,6 +1059,73 @@ entities: extraction_phase: extract_adr classification: explicit generator: adr-architecture-index + - id: ADR-L-0020 + entity_type: adr + name: Source Locators as Cognitive Execution Model Infrastructure + summary: >- + The workspace graph currently supports deterministic traversal and projection, but graph entities need stable, + portable links back to authoritative source artifacts before the graph can safely support IDE-native reasonin + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + status: proposed + domains: + - workspace + - graph + - cem + - mvc + - provenance + tags: + - source-locators + - entity-uri + - cognitive-execution-model + - mvc + - provenance + - deterministic + related_reviews: [] + related_overrides: [] + relationships: + declared_in: [] + declares: + - CAP-0020 + - DEC-0022 + - INV-0027 + - INV-0028 + - INV-0029 + references: + - ADR-L-0016 + - ADR-L-0017 + - ADR-L-0018 + - ADR-L-0019 + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: [] + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020 + extraction_phase: extract_adr + classification: explicit + generator: adr-architecture-index - id: ADR-P-0001 entity_type: adr name: RSS CLI Implementation for Developer-Invoked Graph Traversal @@ -2474,6 +2545,64 @@ entities: extraction_phase: extract_capability classification: explicit generator: adr-architecture-index + - id: CAP-0020 + entity_type: capability + name: Source-aware CEM and MVC assembly + summary: >- + Resolve workspace graph entities to authoritative source artifacts through stable URI locators, assemble CEM + bundles with provenance and validation state, derive bounded MVC bundles, and validate MVC bundles against thei + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#CAP-0020 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + domains: + - workspace + - graph + - cem + - mvc + - provenance + implemented_by_components: + - COMP-0011 + - COMP-0010 + enabled_by_decisions: + - DEC-0022 + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: [] + enabled_by: + - DEC-0022 + enables: [] + governs: [] + governed_by: [] + implemented_by: + - COMP-0010 + - COMP-0011 + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#CAP-0020 + extraction_phase: extract_capability + classification: explicit + generator: adr-architecture-index - id: COMP-0001 entity_type: component name: MCP Server and Tool Registry @@ -3006,6 +3135,7 @@ entities: implements: - CAP-0018 - CAP-0019 + - CAP-0020 embodied_in: - SYS-0001 embodies: [] @@ -3063,6 +3193,7 @@ entities: implements: - CAP-0018 - CAP-0019 + - CAP-0020 embodied_in: - SYS-0001 embodies: [] @@ -4139,6 +4270,64 @@ entities: extraction_phase: extract_decision classification: explicit generator: adr-architecture-index + - id: DEC-0022 + entity_type: decision + name: Use source locators as foundational infrastructure for CEM and MVC + summary: >- + Entity URI resolution is necessary but not sufficient for IDE and conversation-engine reasoning. ste-runtime must + assemble a correctness-first CEM bundle, derive a bounded MVC projection, and validate the MVC against the + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#DEC-0022 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + related_invariants: + - INV-0027 + - INV-0028 + - INV-0029 + enforces_invariants: [] + enables_capabilities: + - CAP-0020 + governs_components: [] + supersedes: [] + refines: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: + - INV-0027 + - INV-0028 + - INV-0029 + enforced_by: [] + enabled_by: [] + enables: + - CAP-0020 + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#DEC-0022 + extraction_phase: extract_decision + classification: explicit + generator: adr-architecture-index - id: INV-0001 entity_type: invariant name: INV-0001 @@ -5328,6 +5517,165 @@ entities: extraction_phase: assign_canonical_invariant classification: explicit generator: adr-architecture-index + - id: INV-0027 + entity_type: invariant + name: INV-0027 + summary: >- + Workspace graph entities may carry source locator metadata, but graph artifacts must not embed full canonical ADR, + DEC, invariant, contract, or source documents by default. + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0027 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + scope: global + statement: |- + Workspace graph entities may carry source locator metadata, but graph + artifacts must not embed full canonical ADR, DEC, invariant, contract, or + source documents by default. + enforcement_level: must + declaration_mode: local + upheld_by_decisions: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: + - DEC-0022 + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0027 + extraction_phase: assign_canonical_invariant + classification: explicit + generator: adr-architecture-index + - id: INV-0028 + entity_type: invariant + name: INV-0028 + summary: >- + Every MVC bundle must retain provenance back to its parent CEM bundle, graph snapshot, traversal operation, + selected source artifacts, embodiment evidence, and validation state. + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0028 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + scope: global + statement: |- + Every MVC bundle must retain provenance back to its parent CEM bundle, + graph snapshot, traversal operation, selected source artifacts, embodiment + evidence, and validation state. + enforcement_level: must + declaration_mode: local + upheld_by_decisions: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: + - DEC-0022 + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0028 + extraction_phase: assign_canonical_invariant + classification: explicit + generator: adr-architecture-index + - id: INV-0029 + entity_type: invariant + name: INV-0029 + summary: >- + Source locator generation, graph traversal, CEM assembly, MVC derivation, and MVC validation are deterministic for + identical workspace graph, source, locator registry, and traversal inputs. + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0029 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + scope: global + statement: |- + Source locator generation, graph traversal, CEM assembly, MVC derivation, + and MVC validation are deterministic for identical workspace graph, source, + locator registry, and traversal inputs. + enforcement_level: must + declaration_mode: local + upheld_by_decisions: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: + - DEC-0022 + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0029 + extraction_phase: assign_canonical_invariant + classification: explicit + generator: adr-architecture-index - id: SYS-0001 entity_type: system name: Runtime Orchestration and Assistant Integration diff --git a/adrs/index/invariant-registry.yaml b/adrs/index/invariant-registry.yaml index 709117b..9645825 100644 --- a/adrs/index/invariant-registry.yaml +++ b/adrs/index/invariant-registry.yaml @@ -1190,3 +1190,162 @@ entities: extraction_phase: assign_canonical_invariant classification: explicit generator: adr-architecture-index + - id: INV-0027 + entity_type: invariant + name: INV-0027 + summary: >- + Workspace graph entities may carry source locator metadata, but graph artifacts must not embed full canonical ADR, + DEC, invariant, contract, or source documents by default. + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0027 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + scope: global + statement: |- + Workspace graph entities may carry source locator metadata, but graph + artifacts must not embed full canonical ADR, DEC, invariant, contract, or + source documents by default. + enforcement_level: must + declaration_mode: local + upheld_by_decisions: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: + - DEC-0022 + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0027 + extraction_phase: assign_canonical_invariant + classification: explicit + generator: adr-architecture-index + - id: INV-0028 + entity_type: invariant + name: INV-0028 + summary: >- + Every MVC bundle must retain provenance back to its parent CEM bundle, graph snapshot, traversal operation, + selected source artifacts, embodiment evidence, and validation state. + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0028 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + scope: global + statement: |- + Every MVC bundle must retain provenance back to its parent CEM bundle, + graph snapshot, traversal operation, selected source artifacts, embodiment + evidence, and validation state. + enforcement_level: must + declaration_mode: local + upheld_by_decisions: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: + - DEC-0022 + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0028 + extraction_phase: assign_canonical_invariant + classification: explicit + generator: adr-architecture-index + - id: INV-0029 + entity_type: invariant + name: INV-0029 + summary: >- + Source locator generation, graph traversal, CEM assembly, MVC derivation, and MVC validation are deterministic for + identical workspace graph, source, locator registry, and traversal inputs. + lifecycle_stage: proposed + canonical_source: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0029 + artifact_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + source_refs: [] + metadata: + adr_id: ADR-L-0020 + scope: global + statement: |- + Source locator generation, graph traversal, CEM assembly, MVC derivation, + and MVC validation are deterministic for identical workspace graph, source, + locator registry, and traversal inputs. + enforcement_level: must + declaration_mode: local + upheld_by_decisions: [] + relationships: + declared_in: + - ADR-L-0020 + declares: [] + references: [] + referenced_by: [] + related_to: [] + enforces: [] + enforced_by: + - DEC-0022 + enabled_by: [] + enables: [] + governs: [] + governed_by: [] + implemented_by: [] + implements: [] + embodied_in: [] + embodies: [] + supersedes: [] + superseded_by: [] + refines: [] + refined_by: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: logical_adr + source_ref: ADR-L-0020#INV-0029 + extraction_phase: assign_canonical_invariant + classification: explicit + generator: adr-architecture-index diff --git a/adrs/index/relationship-registry.yaml b/adrs/index/relationship-registry.yaml index 5cb20f0..85f984f 100644 --- a/adrs/index/relationship-registry.yaml +++ b/adrs/index/relationship-registry.yaml @@ -91,6 +91,16 @@ relationships: canonical_source_ref: ADR-L-0019#CAP-0019 confidence: 1 metadata: {} + - relationship_id: declared_in:CAP-0020:ADR-L-0020 + relationship_type: declared_in + from_entity_id: CAP-0020 + to_entity_id: ADR-L-0020 + provenance_classification: explicit + evidence: + - ADR-L-0020#CAP-0020 + canonical_source_ref: ADR-L-0020#CAP-0020 + confidence: 1 + metadata: {} - relationship_id: declared_in:COMP-0001:ADR-PC-0001 relationship_type: declared_in from_entity_id: COMP-0001 @@ -411,6 +421,16 @@ relationships: canonical_source_ref: ADR-L-0019#DEC-0021 confidence: 1 metadata: {} + - relationship_id: declared_in:DEC-0022:ADR-L-0020 + relationship_type: declared_in + from_entity_id: DEC-0022 + to_entity_id: ADR-L-0020 + provenance_classification: explicit + evidence: + - ADR-L-0020#DEC-0022 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} - relationship_id: declared_in:INV-0001:ADR-L-0001 relationship_type: declared_in from_entity_id: INV-0001 @@ -651,6 +671,36 @@ relationships: canonical_source_ref: ADR-L-0018#INV-0024 confidence: 1 metadata: {} + - relationship_id: declared_in:INV-0027:ADR-L-0020 + relationship_type: declared_in + from_entity_id: INV-0027 + to_entity_id: ADR-L-0020 + provenance_classification: explicit + evidence: + - ADR-L-0020#INV-0027 + canonical_source_ref: ADR-L-0020#INV-0027 + confidence: 1 + metadata: {} + - relationship_id: declared_in:INV-0028:ADR-L-0020 + relationship_type: declared_in + from_entity_id: INV-0028 + to_entity_id: ADR-L-0020 + provenance_classification: explicit + evidence: + - ADR-L-0020#INV-0028 + canonical_source_ref: ADR-L-0020#INV-0028 + confidence: 1 + metadata: {} + - relationship_id: declared_in:INV-0029:ADR-L-0020 + relationship_type: declared_in + from_entity_id: INV-0029 + to_entity_id: ADR-L-0020 + provenance_classification: explicit + evidence: + - ADR-L-0020#INV-0029 + canonical_source_ref: ADR-L-0020#INV-0029 + confidence: 1 + metadata: {} - relationship_id: declared_in:SYS-0001:ADR-PS-0001 relationship_type: declared_in from_entity_id: SYS-0001 @@ -1201,6 +1251,56 @@ relationships: canonical_source_ref: ADR-L-0019#INV-0023 confidence: 1 metadata: {} + - relationship_id: declares:ADR-L-0020:CAP-0020 + relationship_type: declares + from_entity_id: ADR-L-0020 + to_entity_id: CAP-0020 + provenance_classification: derived + evidence: + - ADR-L-0020#CAP-0020 + canonical_source_ref: ADR-L-0020#CAP-0020 + confidence: 1 + metadata: {} + - relationship_id: declares:ADR-L-0020:DEC-0022 + relationship_type: declares + from_entity_id: ADR-L-0020 + to_entity_id: DEC-0022 + provenance_classification: derived + evidence: + - ADR-L-0020#DEC-0022 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} + - relationship_id: declares:ADR-L-0020:INV-0027 + relationship_type: declares + from_entity_id: ADR-L-0020 + to_entity_id: INV-0027 + provenance_classification: derived + evidence: + - ADR-L-0020#INV-0027 + canonical_source_ref: ADR-L-0020#INV-0027 + confidence: 1 + metadata: {} + - relationship_id: declares:ADR-L-0020:INV-0028 + relationship_type: declares + from_entity_id: ADR-L-0020 + to_entity_id: INV-0028 + provenance_classification: derived + evidence: + - ADR-L-0020#INV-0028 + canonical_source_ref: ADR-L-0020#INV-0028 + confidence: 1 + metadata: {} + - relationship_id: declares:ADR-L-0020:INV-0029 + relationship_type: declares + from_entity_id: ADR-L-0020 + to_entity_id: INV-0029 + provenance_classification: derived + evidence: + - ADR-L-0020#INV-0029 + canonical_source_ref: ADR-L-0020#INV-0029 + confidence: 1 + metadata: {} - relationship_id: declares:ADR-PC-0001:COMP-0001 relationship_type: declares from_entity_id: ADR-PC-0001 @@ -1601,6 +1701,16 @@ relationships: canonical_source_ref: ADR-L-0019#DEC-0021 confidence: 1 metadata: {} + - relationship_id: enabled_by:CAP-0020:DEC-0022 + relationship_type: enabled_by + from_entity_id: CAP-0020 + to_entity_id: DEC-0022 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} - relationship_id: enables:DEC-0020:CAP-0018 relationship_type: enables from_entity_id: DEC-0020 @@ -1621,6 +1731,16 @@ relationships: canonical_source_ref: ADR-L-0019#DEC-0021 confidence: 1 metadata: {} + - relationship_id: enables:DEC-0022:CAP-0020 + relationship_type: enables + from_entity_id: DEC-0022 + to_entity_id: CAP-0020 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} - relationship_id: enforced_by:INV-0020:DEC-0020 relationship_type: enforced_by from_entity_id: INV-0020 @@ -1661,6 +1781,36 @@ relationships: canonical_source_ref: ADR-L-0019#DEC-0021 confidence: 1 metadata: {} + - relationship_id: enforced_by:INV-0027:DEC-0022 + relationship_type: enforced_by + from_entity_id: INV-0027 + to_entity_id: DEC-0022 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} + - relationship_id: enforced_by:INV-0028:DEC-0022 + relationship_type: enforced_by + from_entity_id: INV-0028 + to_entity_id: DEC-0022 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} + - relationship_id: enforced_by:INV-0029:DEC-0022 + relationship_type: enforced_by + from_entity_id: INV-0029 + to_entity_id: DEC-0022 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} - relationship_id: enforces:DEC-0020:INV-0020 relationship_type: enforces from_entity_id: DEC-0020 @@ -1701,6 +1851,36 @@ relationships: canonical_source_ref: ADR-L-0019#DEC-0021 confidence: 1 metadata: {} + - relationship_id: enforces:DEC-0022:INV-0027 + relationship_type: enforces + from_entity_id: DEC-0022 + to_entity_id: INV-0027 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} + - relationship_id: enforces:DEC-0022:INV-0028 + relationship_type: enforces + from_entity_id: DEC-0022 + to_entity_id: INV-0028 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} + - relationship_id: enforces:DEC-0022:INV-0029 + relationship_type: enforces + from_entity_id: DEC-0022 + to_entity_id: INV-0029 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#DEC-0022 + confidence: 1 + metadata: {} - relationship_id: implemented_by:CAP-0018:COMP-0010 relationship_type: implemented_by from_entity_id: CAP-0018 @@ -1741,6 +1921,26 @@ relationships: canonical_source_ref: ADR-L-0019#CAP-0019 confidence: 1 metadata: {} + - relationship_id: implemented_by:CAP-0020:COMP-0010 + relationship_type: implemented_by + from_entity_id: CAP-0020 + to_entity_id: COMP-0010 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#CAP-0020 + confidence: 1 + metadata: {} + - relationship_id: implemented_by:CAP-0020:COMP-0011 + relationship_type: implemented_by + from_entity_id: CAP-0020 + to_entity_id: COMP-0011 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#CAP-0020 + confidence: 1 + metadata: {} - relationship_id: implements:COMP-0010:CAP-0018 relationship_type: implements from_entity_id: COMP-0010 @@ -1761,6 +1961,16 @@ relationships: canonical_source_ref: ADR-L-0019#CAP-0019 confidence: 1 metadata: {} + - relationship_id: implements:COMP-0010:CAP-0020 + relationship_type: implements + from_entity_id: COMP-0010 + to_entity_id: CAP-0020 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#CAP-0020 + confidence: 1 + metadata: {} - relationship_id: implements:COMP-0011:CAP-0018 relationship_type: implements from_entity_id: COMP-0011 @@ -1781,6 +1991,16 @@ relationships: canonical_source_ref: ADR-L-0019#CAP-0019 confidence: 1 metadata: {} + - relationship_id: implements:COMP-0011:CAP-0020 + relationship_type: implements + from_entity_id: COMP-0011 + to_entity_id: CAP-0020 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020#CAP-0020 + confidence: 1 + metadata: {} - relationship_id: referenced_by:ADR-L-0001:ADR-L-0002 relationship_type: referenced_by from_entity_id: ADR-L-0001 @@ -1981,6 +2201,26 @@ relationships: canonical_source_ref: ADR-L-0019 confidence: 1 metadata: {} + - relationship_id: referenced_by:ADR-L-0016:ADR-L-0020 + relationship_type: referenced_by + from_entity_id: ADR-L-0016 + to_entity_id: ADR-L-0020 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} + - relationship_id: referenced_by:ADR-L-0017:ADR-L-0020 + relationship_type: referenced_by + from_entity_id: ADR-L-0017 + to_entity_id: ADR-L-0020 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} - relationship_id: referenced_by:ADR-L-0018:ADR-L-0019 relationship_type: referenced_by from_entity_id: ADR-L-0018 @@ -1991,6 +2231,16 @@ relationships: canonical_source_ref: ADR-L-0019 confidence: 1 metadata: {} + - relationship_id: referenced_by:ADR-L-0018:ADR-L-0020 + relationship_type: referenced_by + from_entity_id: ADR-L-0018 + to_entity_id: ADR-L-0020 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} - relationship_id: referenced_by:ADR-L-0019:ADR-L-0018 relationship_type: referenced_by from_entity_id: ADR-L-0019 @@ -2001,6 +2251,16 @@ relationships: canonical_source_ref: ADR-L-0018 confidence: 1 metadata: {} + - relationship_id: referenced_by:ADR-L-0019:ADR-L-0020 + relationship_type: referenced_by + from_entity_id: ADR-L-0019 + to_entity_id: ADR-L-0020 + provenance_classification: derived + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} - relationship_id: references:ADR-L-0001:ADR-L-0002 relationship_type: references from_entity_id: ADR-L-0001 @@ -2221,6 +2481,46 @@ relationships: canonical_source_ref: ADR-L-0019 confidence: 1 metadata: {} + - relationship_id: references:ADR-L-0020:ADR-L-0016 + relationship_type: references + from_entity_id: ADR-L-0020 + to_entity_id: ADR-L-0016 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} + - relationship_id: references:ADR-L-0020:ADR-L-0017 + relationship_type: references + from_entity_id: ADR-L-0020 + to_entity_id: ADR-L-0017 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} + - relationship_id: references:ADR-L-0020:ADR-L-0018 + relationship_type: references + from_entity_id: ADR-L-0020 + to_entity_id: ADR-L-0018 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} + - relationship_id: references:ADR-L-0020:ADR-L-0019 + relationship_type: references + from_entity_id: ADR-L-0020 + to_entity_id: ADR-L-0019 + provenance_classification: explicit + evidence: + - ADR-L-0020 + canonical_source_ref: ADR-L-0020 + confidence: 1 + metadata: {} - relationship_id: related_to:ADR-PC-0001:ADR-PS-0001 relationship_type: related_to from_entity_id: ADR-PC-0001 diff --git a/adrs/index/system-registry.yaml b/adrs/index/system-registry.yaml index f121d28..e96af41 100644 --- a/adrs/index/system-registry.yaml +++ b/adrs/index/system-registry.yaml @@ -1,126 +1,99 @@ schema_version: '1.1' type: normalized_entity_registry entities: - - id: SYS-0001 - entity_type: system - name: Runtime Orchestration and Assistant Integration - summary: >- - ste-runtime now contains a runtime orchestration boundary that keeps semantic state fresh, exposes - assistant-facing MCP tools, performs reconciliation gating and freshness checks, and assembles implementation - context and - lifecycle_stage: proposed - canonical_source: - source_type: physical_system_adr - source_ref: ADR-PS-0001 - artifact_path: adrs/physical-system/ADR-PS-0001-runtime-orchestration-and-assistant-integration.yaml - source_refs: [] - metadata: - adr_id: ADR-PS-0001 - implements_logical: - - ADR-L-0004 - - ADR-L-0006 - - ADR-L-0007 - - ADR-L-0018 - technologies: - - typescript - - node.js - - mcp - - chokidar - - zod - relationships: - declared_in: - - ADR-PS-0001 - declares: [] - references: [] - referenced_by: [] - related_to: [] - enforces: [] - enforced_by: [] - enabled_by: [] - enables: [] - governs: [] - governed_by: [] - implemented_by: [] - implements: [] - embodied_in: [] - embodies: - - COMP-0001 - - COMP-0002 - - COMP-0003 - - COMP-0004 - - COMP-0010 - - COMP-0011 - supersedes: [] - superseded_by: [] - refines: [] - refined_by: [] - completeness: - status: complete - missing_fields: [] - provenance: - source_type: physical_system_adr - source_ref: ADR-PS-0001 - extraction_phase: extract_system - classification: explicit - generator: adr-architecture-index - - id: SYS-0002 - entity_type: system - name: Semantic Extraction Subsystem - summary: >- - ste-runtime extraction is now a subsystem containing multiple first-class extractors and normalization flows - rather than a pair of isolated physical slices. This ADR groups the implemented extractor estate under a concre - lifecycle_stage: proposed - canonical_source: - source_type: physical_system_adr - source_ref: ADR-PS-0002 - artifact_path: adrs/physical-system/ADR-PS-0002-semantic-extraction-subsystem.yaml - source_refs: [] - metadata: - adr_id: ADR-PS-0002 - implements_logical: - - ADR-L-0001 - - ADR-L-0005 - technologies: - - typescript - - node.js - - json - - angular - - css - - cloudformation - - adr-yaml - relationships: - declared_in: - - ADR-PS-0002 - declares: [] - references: [] - referenced_by: [] - related_to: [] - enforces: [] - enforced_by: [] - enabled_by: [] - enables: [] - governs: [] - governed_by: [] - implemented_by: [] - implements: [] - embodied_in: [] - embodies: - - COMP-0005 - - COMP-0006 - - COMP-0007 - - COMP-0008 - - COMP-0009 - - COMP-0012 - supersedes: [] - superseded_by: [] - refines: [] - refined_by: [] - completeness: - status: complete - missing_fields: [] - provenance: - source_type: physical_system_adr - source_ref: ADR-PS-0002 - extraction_phase: extract_system - classification: explicit - generator: adr-architecture-index +- id: SYS-0001 + entity_type: system + name: Runtime Orchestration and Assistant Integration + summary: ste-runtime now contains a runtime orchestration boundary that keeps semantic + state fresh, exposes assistant-facing MCP tools, performs reconciliation gating + and freshness checks, and assembles implementation context and + lifecycle_stage: active + canonical_source: + source_type: physical_system_adr + source_ref: ADR-PS-0001 + artifact_path: adrs/physical-system/ADR-PS-0001-runtime-orchestration-and-assistant-integration.yaml + source_refs: [] + metadata: + adr_id: ADR-PS-0001 + implements_logical: + - ADR-L-0004 + - ADR-L-0006 + - ADR-L-0007 + - ADR-L-0018 + technologies: + - typescript + - node.js + - mcp + - chokidar + - zod + relationships: + declared_in: + - ADR-PS-0001 + references: [] + related_to: [] + enforces: [] + enabled_by: [] + enables: [] + governs: [] + implemented_by: [] + embodied_in: [] + supersedes: [] + superseded_by: [] + refines: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: physical_system_adr + source_ref: ADR-PS-0001 + extraction_phase: extract_system + classification: explicit + generator: adr-architecture-index +- id: SYS-0002 + entity_type: system + name: Semantic Extraction Subsystem + summary: ste-runtime extraction is now a subsystem containing multiple first-class + extractors and normalization flows rather than a pair of isolated physical slices. + This ADR groups the implemented extractor estate under a concre + lifecycle_stage: active + canonical_source: + source_type: physical_system_adr + source_ref: ADR-PS-0002 + artifact_path: adrs/physical-system/ADR-PS-0002-semantic-extraction-subsystem.yaml + source_refs: [] + metadata: + adr_id: ADR-PS-0002 + implements_logical: + - ADR-L-0001 + - ADR-L-0005 + technologies: + - typescript + - node.js + - json + - angular + - css + - cloudformation + - adr-yaml + relationships: + declared_in: + - ADR-PS-0002 + references: [] + related_to: [] + enforces: [] + enabled_by: [] + enables: [] + governs: [] + implemented_by: [] + embodied_in: [] + supersedes: [] + superseded_by: [] + refines: [] + completeness: + status: complete + missing_fields: [] + provenance: + source_type: physical_system_adr + source_ref: ADR-PS-0002 + extraction_phase: extract_system + classification: explicit + generator: adr-architecture-index diff --git a/adrs/logical/ADR-L-0016-workspace-graph-slice-schema-contract.yaml b/adrs/logical/ADR-L-0016-workspace-graph-slice-schema-contract.yaml index 9997069..063b044 100644 --- a/adrs/logical/ADR-L-0016-workspace-graph-slice-schema-contract.yaml +++ b/adrs/logical/ADR-L-0016-workspace-graph-slice-schema-contract.yaml @@ -70,28 +70,56 @@ invariants: The graph must not lie. Emitting uncertain edges degrades trust in the entire graph. Diagnostics preserve the signal without asserting false relationships. +- id: INV-0025 + statement: > + All infrastructure resources extracted by RECON are emitted as workspace + graph nodes. No extracted resource is silently dropped by the slice + emitter. Resources not covered by an explicit CFN-to-graph-type mapping + are emitted as InfraResource nodes with the original cfn_type preserved + in attributes. The logicalId is the last-resort display name; a null + name never causes a node to be dropped. + scope: global + enforcement_level: must + enforcement_mechanism: design + verification_method: automated + rationale: > + The workspace graph is the Architecture IR substrate. Silently dropping + extracted resources creates blind spots that downstream context domains + cannot compensate for. Pattern-agnostic emission ensures backend + services, frontend SPAs, and MFE monorepos are treated equally. constraints: - id: CONST-0010 type: technical description: > Node shape: { id: string, type: string, name: string, provenance: { source_path: string, source_ref: string, repo?: string }, attributes?: object }. Ratified node types are Service, Lambda, StateMachine, Queue, Topic, - Bucket, Database, Schema, Endpoint, and ExternalSystem. Node IDs follow - the Graph Identity Contract (Type:normalized-name). + Bucket, Database, Schema, Endpoint, ExternalSystem, Stack, Distribution, + WebACL, Certificate, DNSRecord, APIGateway, SecurityGroup, Secret, + DBCluster, DBProxy, LogGroup, Alarm, DeliveryStream, EventRule, Role, + and InfraResource. InfraResource is a catch-all fallback type for any + extracted CFN resource type not covered by an explicit mapping; it + preserves cfn_type in attributes for downstream classification. Role + nodes carry auxiliary: true and are compressed at L0-L2 projections. + Node IDs follow the Graph Identity Contract (Type:normalized-name). rationale: > Consistent node shape enables merger join logic and identity stability - across runs. + across runs. Comprehensive type coverage ensures all RECON-extracted + infrastructure resources flow through to the workspace graph regardless + of repository pattern (backend service, frontend SPA, MFE monorepo). - id: CONST-0011 type: technical description: > Edge shape: { from: string, to: string, verb: string, confidence?: string, provenance?: { source_path: string, source_ref: string, repo?: string }, attributes?: object }. Ratified verbs are invokes, publishes, consumes, reads, writes, validates_against, implements, deploys_to, has_contract, calls, triggers, - and publishes_to. The projection compression layer also recognizes - references as a low-tier reference edge when present in merged graph input. + publishes_to, and contains. The contains verb represents structural + containment (e.g., a Stack node contains its child resources or nested + stacks). The projection compression layer also recognizes references as + a low-tier reference edge when present in merged graph input. rationale: > Consistent edge shape and ratified verb set prevent schema drift and - enable reliable graph queries. + enable reliable graph queries. The contains verb surfaces monorepo + app-level grouping and nested stack topology in the graph. - id: CONST-0012 type: technical description: > diff --git a/adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml b/adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml new file mode 100644 index 0000000..d833bf4 --- /dev/null +++ b/adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml @@ -0,0 +1,151 @@ +schema_version: '1.0' +adr_type: logical +id: ADR-L-0020 +title: Source Locators as Cognitive Execution Model Infrastructure +status: proposed +created_date: '2026-05-27' +authors: +- erik.gallmann +domains: +- workspace +- graph +- cem +- mvc +- provenance +tags: +- source-locators +- entity-uri +- cognitive-execution-model +- mvc +- provenance +- deterministic +related_adrs: +- ADR-L-0016 +- ADR-L-0017 +- ADR-L-0018 +- ADR-L-0019 +introduces_entities: +- CAP-0020 +- DEC-0022 +- INV-0027 +- INV-0028 +- INV-0029 +vision_category: false +context: | + The workspace graph currently supports deterministic traversal and projection, + but graph entities need stable, portable links back to authoritative source + artifacts before the graph can safely support IDE-native reasoning, + conversation-engine context assembly, and future kernel handoff. + + This capability frames source locators as infrastructure for ste-runtime's + Cognitive Execution Model (CEM). The graph remains derived cognitive routing + state. Canonical ADRs, decisions, invariants, source files, contracts, and + rule artifacts remain authoritative in their owning repositories. + + CEM bundles are correctness-oriented evidence packages assembled by + ste-runtime from graph state, source locators, embodiment evidence, validation + state, traversal context, and negative-space constraints. MVC bundles are + minimized context projections derived from CEM bundles for bounded AI or IDE + reasoning. MVC validation checks whether the minimized bundle remains current, + faithful, scoped, and non-misleading relative to the CEM bundle. +capabilities: +- id: CAP-0020 + name: Source-aware CEM and MVC assembly + description: | + Resolve workspace graph entities to authoritative source artifacts through + stable URI locators, assemble CEM bundles with provenance and validation + state, derive bounded MVC bundles, and validate MVC bundles against their + parent CEM bundle. + implemented_by_components: + - COMP-0011 + - COMP-0010 + enabled_by_decisions: + - DEC-0022 +constraints: +- id: CONST-0020 + type: technical + description: | + Source locator, CEM, and MVC artifacts are runtime-owned derived state. They + must not become canonical ADR, decision, invariant, rule, contract, or source + authority. + rationale: | + Authority boundaries keep graph cognition separate from source ownership and + preserve future ste-kernel admission semantics. +invariants: +- id: INV-0027 + statement: | + Workspace graph entities may carry source locator metadata, but graph + artifacts must not embed full canonical ADR, DEC, invariant, contract, or + source documents by default. + scope: global + enforcement_level: must + enforcement_mechanism: design + verification_method: automated + rationale: | + The graph is a semantic cognition and routing substrate, not a document + database or competing authority surface. +- id: INV-0028 + statement: | + Every MVC bundle must retain provenance back to its parent CEM bundle, + graph snapshot, traversal operation, selected source artifacts, embodiment + evidence, and validation state. + scope: global + enforcement_level: must + enforcement_mechanism: design + verification_method: automated + rationale: | + MVC is optimized for cognitive efficiency and may be lossy; provenance and + validation protect against misleading context reduction. +- id: INV-0029 + statement: | + Source locator generation, graph traversal, CEM assembly, MVC derivation, + and MVC validation are deterministic for identical workspace graph, source, + locator registry, and traversal inputs. + scope: global + enforcement_level: must + enforcement_mechanism: design + verification_method: automated + rationale: | + Determinism enables replay, fixture testing, stale-state detection, and + future kernel consumption. +decisions: +- id: DEC-0022 + summary: Use source locators as foundational infrastructure for CEM and MVC + rationale: | + Entity URI resolution is necessary but not sufficient for IDE and + conversation-engine reasoning. ste-runtime must assemble a correctness-first + CEM bundle, derive a bounded MVC projection, and validate the MVC against + the CEM before agents reason from minimized context. + + Source locators use stable workspace/entity/ADR/decision URI forms and + source hashes. They allow graph traversal results to route back to canonical + source artifacts without copying full source content into graph artifacts. + alternatives_considered: + - name: Embed source documents in graph nodes + rejected_because: | + This would turn derived graph state into a shadow document database and + create authority drift from canonical source artifacts. + - name: Resolve source directly from paths without URI normalization + rejected_because: | + Raw paths are not portable across platforms, repo layouts, or future + conversation-engine handoff surfaces. + - name: Produce MVC directly from graph traversal + rejected_because: | + MVC is intentionally minimized and may omit important negative-space or + provenance context. A parent CEM bundle is required as the correctness + baseline for derivation and validation. + consequences: + positive: + - IDE agents can traverse the graph and retrieve authoritative source artifacts + - MVC bundles remain small while preserving provenance to CEM and source + - Source freshness and graph freshness can be validated before reasoning + - Future ste-kernel can consume runtime-produced CEM/MVC validation state + negative: + - Additional runtime artifact surface and tests + - More explicit stale and partial-state diagnostics for consumers to handle + related_invariants: + - INV-0027 + - INV-0028 + - INV-0029 + enables_capabilities: + - CAP-0020 diff --git a/adrs/manifest.yaml b/adrs/manifest.yaml index bac2ca1..3cab9f3 100644 --- a/adrs/manifest.yaml +++ b/adrs/manifest.yaml @@ -1,6 +1,6 @@ schema_version: '1.0' type: manifest -generated_date: '2026-05-27T00:52:37Z' +generated_date: '2026-05-27T05:44:06Z' generated_from: adrs/**/*.yaml adrs: - id: ADR-L-0001 @@ -422,6 +422,34 @@ adrs: related_reviews: [] related_overrides: [] related_ledgers: [] + - id: ADR-L-0020 + type: logical + title: Source Locators as Cognitive Execution Model Infrastructure + status: proposed + file_path: adrs/logical/ADR-L-0020-source-locators-cem-mvc.yaml + domains: + - workspace + - graph + - cem + - mvc + - provenance + tags: + - source-locators + - entity-uri + - cognitive-execution-model + - mvc + - provenance + - deterministic + implements_logical: [] + technologies: [] + decision_count: 1 + invariant_count: 3 + gap_count: 0 + blocking_gaps: 0 + component_count: 0 + related_reviews: [] + related_overrides: [] + related_ledgers: [] - id: ADR-P-0001 type: physical title: RSS CLI Implementation for Developer-Invoked Graph Traversal @@ -971,14 +999,14 @@ gaps_summary: blocking: 0 by_adr: {} statistics: - total_adrs: 36 - logical_adrs: 18 + total_adrs: 37 + logical_adrs: 19 physical_adrs: 5 physical_system_adrs: 2 physical_component_adrs: 11 decision_adrs: 0 - total_decisions: 20 - total_invariants: 24 + total_decisions: 21 + total_invariants: 27 total_components: 19 total_gaps: 3 blocking_gaps: 0 diff --git a/adrs/physical-component/ADR-PC-0001-mcp-server-and-tool-registry.yaml b/adrs/physical-component/ADR-PC-0001-mcp-server-and-tool-registry.yaml index 77e2397..4ecbed6 100644 --- a/adrs/physical-component/ADR-PC-0001-mcp-server-and-tool-registry.yaml +++ b/adrs/physical-component/ADR-PC-0001-mcp-server-and-tool-registry.yaml @@ -82,5 +82,63 @@ component_specifications: integration_tests: | - MCP server startup - Tool registration and execution -implementation_decisions: [] -gaps: [] +implementation_decisions: +- id: IMPL-0001 + summary: > + Graph topology analysis uses single-pass BFS layering (Kahn's algorithm) + computing forward dependency depths in O(N+E). The original per-node + recursive DFS (O(N x (N+E))) caused the MCP server to hang at startup + when the graph exceeded ~500 nodes. Backward depth metrics are dead + (never consumed by detectPattern() or calculateOptimalDepth()); fields + retained at zero for cache compatibility. Alternatives rejected: (1) + increase sampling threshold -- constant factor improvement only, (2) skip + topology analysis -- loses pattern detection and recommended depth. + rationale: > + Infrastructure domain expansion (ADR-L-0016 INV-0025) grew the graph + from ~200 to ~1200 nodes, triggering the O(N*DFS) hang. Linear-time + analysis is required for the IR substrate to grow to 5K-10K+ nodes. +- id: IMPL-0002 + summary: > + MCP startup loads the RECON graph exactly once per initialization or + reload cycle. rssContext.graph (already in memory from initRssContext) is + passed directly to analyzeGraphTopology(). The redundant second call to + loadAidocGraph on cache miss and on every reloadContext() is eliminated. + rationale: > + At N=5000 with sequential YAML I/O, each redundant loadAidocGraph call + added ~50 seconds. Eliminating the double load halves cold-start I/O. +- id: IMPL-0003 + summary: > + Graph metrics cache (graph-metrics.json) is validated by node-count + delta: recompute when |cached.totalComponents - graph.size| exceeds 10% + of graph.size. The check is O(1); recomputation is O(N+E) per IMPL-0001. + rationale: > + Previously graph-metrics.json was accepted without staleness validation. + A stale cache could silently produce incorrect topology metadata after + graph growth, leading to suboptimal traversal parameter tuning. +invariants: +- id: INV-0026 + statement: > + MCP startup graph topology analysis completes in O(N+E) time where N is + the node count and E is the edge count of the loaded RECON state graph. + Per-node recursive traversal is prohibited. + scope: component + enforcement_level: must + enforcement_mechanism: design + verification_method: automated + rationale: > + The O(N x DFS) algorithm caused MCP server startup to hang beyond ~500 + nodes. Linear-time analysis is required for growth to 5K-10K+ nodes. + Enforced by IMPL-0001 (BFS layering) and verified by the 5000-node + performance test completing under 100ms. +gaps: +- id: GAP-0001 + question: > + The O(N*DFS) topology analysis caused MCP server startup to hang when + graph size exceeded ~500 nodes. Was there a performance bound for + startup graph analysis? + impact: high + blocking: false + status: closed + resolution: > + Resolved by IMPL-PC0001-01 (BFS layering in O(N+E)) and INV-0026 + (prohibiting per-node recursive traversal on startup). diff --git a/adrs/physical-component/ADR-PC-0007-cloudformation-semantic-extraction.yaml b/adrs/physical-component/ADR-PC-0007-cloudformation-semantic-extraction.yaml index 2f88f5b..0d4e345 100644 --- a/adrs/physical-component/ADR-PC-0007-cloudformation-semantic-extraction.yaml +++ b/adrs/physical-component/ADR-PC-0007-cloudformation-semantic-extraction.yaml @@ -68,7 +68,8 @@ component_specifications: implementation_requirements: constraints: - Nested stack resolution is static analysis only -- no runtime AWS API calls - - Intrinsic handling is deterministic: Ref and GetAtt fully supported, Sub map-form conditionally supported, all others unsupported with structured diagnostics + - Intrinsic handling is deterministic: Ref and GetAtt resolve to logical IDs; Sub map-form conditionally supported; Join, Select, If, and ImportValue produce structured diagnostics rather than resolved values + - All extracted AWS::* resources become workspace graph nodes via the slice emitter; no CFN resource type is silently dropped by extraction or emission error_handling: strategy: Continue extracting valid template sections while surfacing deterministic parsing failures. @@ -84,14 +85,31 @@ component_specifications: integration_tests: | - CloudFormation extraction - CloudFormation validation extraction coverage -implementation_decisions: [] + - MFE monorepo extraction (nested apps/*/cfn_templates/ patterns) +implementation_decisions: +- id: IMPL-0007 + summary: > + CFN type completeness: all extracted AWS::* resources are emitted as + workspace graph nodes. Explicitly mapped types receive specific graph + type names (Lambda, Queue, Distribution, etc.). Unmapped types receive + the InfraResource fallback type with cfn_type preserved in attributes. + rationale: > + The workspace graph is pattern-agnostic. Backend services, frontend + SPAs, and MFE monorepos all produce infrastructure resources that must + appear in the graph for Architecture IR fidelity. gaps: - id: GAP-0001 question: > - AWS::Serverless::StateMachine is not yet handled in extractResourceMetadata. - Should SAM state machines use the same DefinitionBody/DefinitionUri extraction - as AWS::StepFunctions::StateMachine? + AWS::Serverless::StateMachine is now handled via the shared CFN type + mapping module (maps to StateMachine graph type). DefinitionBody/ + DefinitionUri extraction uses the same logic as + AWS::StepFunctions::StateMachine. impact: medium blocking: false + status: closed + resolution: > + Resolved by adding AWS::Serverless::StateMachine to the shared + cfn-type-mapping module and reusing existing StepFunctions extraction + logic for DefinitionBody/DefinitionUri. affects: - COMP-0007 diff --git a/adrs/physical-component/ADR-PC-0008-service-wiring-post-processing.yaml b/adrs/physical-component/ADR-PC-0008-service-wiring-post-processing.yaml index 010a88f..9ca3655 100644 --- a/adrs/physical-component/ADR-PC-0008-service-wiring-post-processing.yaml +++ b/adrs/physical-component/ADR-PC-0008-service-wiring-post-processing.yaml @@ -42,6 +42,7 @@ component_specifications: - Build Lambda-handler-to-function maps from CFN handler metadata - Resolve cross-stack parameter chains through nested stack topology (ParamResolutionTable) - Follow master-to-child GetAtt ChildStack.Outputs.X references through to originating resource logical IDs + - Map all extracted CFN resources to graph IDs using the shared cfn-type-mapping module (InfraResource fallback for unmapped types) - All maps keyed by structural type, never by repository name generation_context: purpose: Provide cross-domain join maps for edge wiring. @@ -84,6 +85,8 @@ component_specifications: name: Slice Emitter Edge Wiring type: library responsibilities: | + - Emit all extracted CFN resources as workspace graph nodes (no allowlist gate; InfraResource fallback for unmapped types) + - Emit Stack nodes from infrastructure/template slices with contains edges to child resources - Produce reads/writes edges by joining SDK usage with infrastructure resources via env-var bridge - Produce publishes edges for SQS/SNS SDK usage - Improve consumes edge resolution via CFN logical ID lookup @@ -153,7 +156,17 @@ constraints: rationale: > Nominal matching by repo name would couple wiring to a specific workspace and violate the workspace agnosticism invariant. -implementation_decisions: [] +implementation_decisions: +- id: IMPL-0008 + summary: > + Resource-to-node emission policy: all RECON-extracted infrastructure + resources become slice nodes. The previous silent omission of unmapped + CFN types is replaced by diagnostic-aware emission via the shared + cfn-type-mapping module. Unmapped types produce InfraResource nodes + with cfn_type preserved in attributes. + rationale: > + Silent omission caused frontend and MFE monorepo infrastructure to + disappear from the workspace graph. The graph must be pattern-agnostic. gaps: [] consequences: positive: diff --git a/adrs/physical-system/ADR-PS-0001-runtime-orchestration-and-assistant-integration.yaml b/adrs/physical-system/ADR-PS-0001-runtime-orchestration-and-assistant-integration.yaml index d795626..d78023f 100644 --- a/adrs/physical-system/ADR-PS-0001-runtime-orchestration-and-assistant-integration.yaml +++ b/adrs/physical-system/ADR-PS-0001-runtime-orchestration-and-assistant-integration.yaml @@ -122,4 +122,10 @@ operational_requirements: monitoring: Track freshness status, invalidated validations, and runtime health metrics. logging: Structured runtime logs for reconciliation and tool invocation flows. + startup_latency: | + MCP server initialization (from start() to tool availability) must not + degrade super-linearly with graph size. Startup operations over the RECON + state graph (loading, topology analysis, metrics caching) must each be at + most O(N+E) where N and E are graph node and edge counts. At N=5000, + cold-start (cache miss) must complete within 10 seconds. gaps: [] diff --git a/adrs/rendered/ADR-L-0016.md b/adrs/rendered/ADR-L-0016.md index d2c7655..58e6e7c 100644 --- a/adrs/rendered/ADR-L-0016.md +++ b/adrs/rendered/ADR-L-0016.md @@ -5,8 +5,8 @@ artifact_kind: rendered_adr_markdown generator_id: adr-rendered-markdown generator_version: 1 hash_algorithm: sha256 -source_hash: fa5a8bf480d6a282e5b7a0fe533428ed7317c995843efe084090920b467932e2 -rendered_hash: 693c71ab33da9a25889d165063a9f30eec166aaa429aae084a577352d4a47459 +source_hash: fa9019bfdbd24218c83b1a3e70d51855217ebebe3a95b38f9dde94ec6a263eef +rendered_hash: fd199018d41f62e18e86dafa9a39084a35ab955242dfabc1a23473dd0cdf279f --> # ADR-L-0016: Workspace Graph Slice Schema Contract @@ -26,12 +26,17 @@ rendered_hash: 693c71ab33da9a25889d165063a9f30eec166aaa429aae084a577352d4a47459 ## Context ste-runtime produces per-repository graph slices during workspace RECON. -These slices are consumed by any compliant downstream merger to -produce a unified workspace graph. Without a defined schema contract, -the producer and consumer drift, causing Pydantic validation failures -and broken merge pipelines. ADR-L-0012 established ste-spec as the -schema authority; this ADR defines the specific slice output contract -as a polyglot interop boundary. +These slices are consumed by the runtime-owned workspace merger to produce +a unified workspace graph and multi-resolution projections. Without a +defined schema contract, producer and consumer drift can break merge +pipelines or cause downstream tools to treat partial graph material as +authoritative. + +ADR-L-0012 established ste-spec as public cross-repo schema authority. +This ADR defines the runtime-owned slice output contract used by +ste-runtime's workspace graph pipeline. The slice and merged workspace graph +remain derived runtime artifacts, not Architecture IR and not canonical +architecture authority. ## Capabilities @@ -49,21 +54,21 @@ ste-runtime emits workspace graph slices that conform to a defined schema contra ### CONST-0010 (technical) **Description:** -Node shape: { id: string, type: string, name: string, provenance: { source_path: string, source_ref: string } }. Node ID format follows the Graph Identity Contract (Type:normalized-name). +Node shape: { id: string, type: string, name: string, provenance: { source_path: string, source_ref: string, repo?: string }, attributes?: object }. Ratified node types are Service, Lambda, StateMachine, Queue, Topic, Bucket, Database, Schema, Endpoint, ExternalSystem, Stack, Distribution, WebACL, Certificate, DNSRecord, APIGateway, SecurityGroup, Secret, DBCluster, DBProxy, LogGroup, Alarm, DeliveryStream, EventRule, Role, and InfraResource. InfraResource is a catch-all fallback type for any extracted CFN resource type not covered by an explicit mapping; it preserves cfn_type in attributes for downstream classification. Role nodes carry auxiliary: true and are compressed at L0-L2 projections. Node IDs follow the Graph Identity Contract (Type:normalized-name). **Rationale:** -Consistent node shape enables merger join logic and identity stability across runs. +Consistent node shape enables merger join logic and identity stability across runs. Comprehensive type coverage ensures all RECON-extracted infrastructure resources flow through to the workspace graph regardless of repository pattern (backend service, frontend SPA, MFE monorepo). ### CONST-0011 (technical) **Description:** -Edge shape: { from: string, to: string, verb: string, confidence: 'high', provenance: { source_path: string, source_ref: string } }. Verb must be one of the ratified set: reads, writes, publishes, consumes, invokes, deploys_to, has_contract, validates_against, implements. +Edge shape: { from: string, to: string, verb: string, confidence?: string, provenance?: { source_path: string, source_ref: string, repo?: string }, attributes?: object }. Ratified verbs are invokes, publishes, consumes, reads, writes, validates_against, implements, deploys_to, has_contract, calls, triggers, publishes_to, and contains. The contains verb represents structural containment (e.g., a Stack node contains its child resources or nested stacks). The projection compression layer also recognizes references as a low-tier reference edge when present in merged graph input. **Rationale:** -Consistent edge shape and ratified verb set prevent schema drift and enable reliable graph queries. +Consistent edge shape and ratified verb set prevent schema drift and enable reliable graph queries. The contains verb surfaces monorepo app-level grouping and nested stack topology in the graph. ### CONST-0012 (technical) @@ -76,26 +81,36 @@ generated_by is read from package.json (ste-runtime@), never hardcoded Provenance fields must be accurate and reproducible for audit trails. +### CONST-0013 (technical) + +**Description:** +Slice validation supports warn and reject modes. Warn mode accepts unknown node types and edge verbs while surfacing diagnostics; reject mode treats them as validation errors. + + +**Rationale:** +Workspace adoption needs a staged compatibility mode, but strict validation must remain available for contract gates. + + ## Invariants ### INV-0017 -**Statement:** Every workspace graph slice emitted by ste-runtime contains exactly the fields defined in the slice schema contract: schema_version, repo, generated_by, generated_at, source_commit, nodes, edges, diagnostics. +**Statement:** Every workspace graph slice emitted by ste-runtime contains the required core fields defined in the slice schema contract: schema_version, repo, generated_by, generated_at, nodes, and edges. source_commit and diagnostics are supported standard fields. Consumers may preserve unknown extension fields but must not treat them as portable contract authority. **Scope:** global **Enforcement:** must (design) **Verification:** automated **Rationale:** -Missing or extra fields cause downstream Pydantic validation failures. The contract must be exact. +Required core fields keep the merge path deterministic. Extension tolerance allows polyglot producers to evolve without breaking older consumers, while keeping public authority in documented contract fields. ### INV-0018 -**Statement:** Only edges with confidence 'high' are emitted. Ambiguous resolutions produce diagnostics, not edges. +**Statement:** Runtime-emitted workspace graph edges that assert a resolved relationship use confidence 'high'. Ambiguous resolutions produce diagnostics rather than asserted edges. Consumers must drop non-high edges when they appear in permissive input mode. **Scope:** global **Enforcement:** must (design) @@ -107,6 +122,20 @@ The graph must not lie. Emitting uncertain edges degrades trust in the entire gr +### INV-0025 + +**Statement:** All infrastructure resources extracted by RECON are emitted as workspace graph nodes. No extracted resource is silently dropped by the slice emitter. Resources not covered by an explicit CFN-to-graph-type mapping are emitted as InfraResource nodes with the original cfn_type preserved in attributes. The logicalId is the last-resort display name; a null name never causes a node to be dropped. + +**Scope:** global +**Enforcement:** must (design) +**Verification:** automated + +**Rationale:** +The workspace graph is the Architecture IR substrate. Silently dropping extracted resources creates blind spots that downstream context domains cannot compensate for. Pattern-agnostic emission ensures backend services, frontend SPAs, and MFE monorepos are treated equally. + + + + ## Decisions @@ -114,10 +143,11 @@ The graph must not lie. Emitting uncertain edges degrades trust in the entire gr ### DEC-0016: Workspace graph slices follow a defined schema contract **Rationale:** -Producer-consumer drift between ste-runtime and downstream mergers -caused Pydantic validation failures. A defined schema contract with -exact field names, shapes, and constraints eliminates this class of -integration failures. +Producer-consumer drift between ste-runtime and workspace graph consumers +caused validation failures. A defined contract with required core fields, +ratified vocabularies, and explicit extension behavior eliminates this +class of integration failure without pretending derived graph slices are +public Architecture IR authority. diff --git a/adrs/rendered/ADR-PC-0001.md b/adrs/rendered/ADR-PC-0001.md index 993fb24..dd55e3e 100644 --- a/adrs/rendered/ADR-PC-0001.md +++ b/adrs/rendered/ADR-PC-0001.md @@ -5,8 +5,8 @@ artifact_kind: rendered_adr_markdown generator_id: adr-rendered-markdown generator_version: 1 hash_algorithm: sha256 -source_hash: 124b9a4968efc26746983ee0c404de800bebad79a7c3de716d32bba26622ee51 -rendered_hash: a1849ed793672277c98ea5e292db378b24bdb14d9b1a13fe94bdf16bdd7dec12 +source_hash: faa5887192f0f63a1e7faeaf1723339da7c53a902a67c61fa4339c61c1633858 +rendered_hash: f21db8c9b9178d86552a0df5af850b1b898715f477d7e9651ed0304972f5d458 --> # ADR-PC-0001: MCP Server and Tool Registry @@ -68,7 +68,45 @@ MCP protocol implementation. +## Implementation Decisions +### IMPL-0001: Graph topology analysis uses single-pass BFS layering (Kahn's algorithm) computing forward dependency depths in O(N+E). The original per-node recursive DFS (O(N x (N+E))) caused the MCP server to hang at startup when the graph exceeded ~500 nodes. Backward depth metrics are dead (never consumed by detectPattern() or calculateOptimalDepth()); fields retained at zero for cache compatibility. Alternatives rejected: (1) increase sampling threshold -- constant factor improvement only, (2) skip topology analysis -- loses pattern detection and recommended depth. + + +**Rationale:** +Infrastructure domain expansion (ADR-L-0016 INV-0025) grew the graph from ~200 to ~1200 nodes, triggering the O(N*DFS) hang. Linear-time analysis is required for the IR substrate to grow to 5K-10K+ nodes. + + + + +### IMPL-0002: MCP startup loads the RECON graph exactly once per initialization or reload cycle. rssContext.graph (already in memory from initRssContext) is passed directly to analyzeGraphTopology(). The redundant second call to loadAidocGraph on cache miss and on every reloadContext() is eliminated. + + +**Rationale:** +At N=5000 with sequential YAML I/O, each redundant loadAidocGraph call added ~50 seconds. Eliminating the double load halves cold-start I/O. + + + + +### IMPL-0003: Graph metrics cache (graph-metrics.json) is validated by node-count delta: recompute when |cached.totalComponents - graph.size| exceeds 10% of graph.size. The check is O(1); recomputation is O(N+E) per IMPL-0001. + + +**Rationale:** +Previously graph-metrics.json was accepted without staleness validation. A stale cache could silently produce incorrect topology metadata after graph growth, leading to suboptimal traversal parameter tuning. + + + + + + + +## Gaps + +### GAP-0001: The O(N*DFS) topology analysis caused MCP server startup to hang when graph size exceeded ~500 nodes. Was there a performance bound for startup graph analysis? + + +**Impact:** high +**Blocking:** No diff --git a/adrs/rendered/ADR-PC-0007.md b/adrs/rendered/ADR-PC-0007.md index caa19de..e79d618 100644 --- a/adrs/rendered/ADR-PC-0007.md +++ b/adrs/rendered/ADR-PC-0007.md @@ -5,8 +5,8 @@ artifact_kind: rendered_adr_markdown generator_id: adr-rendered-markdown generator_version: 1 hash_algorithm: sha256 -source_hash: bf4d6155b7f8a20490263879afbb53cbe4aa0d0fee5381c890c686e2df9b63c8 -rendered_hash: 55158f12880acb4b780a22670151c243fe3ca3c89bc94f3df54895a345c6e6a3 +source_hash: c523151bf8959a8d66ababb36b18b046eb4102885cf9da5f8eace503bc39bf63 +rendered_hash: 7e871c053e9ef5fa15390dea764ceb74c64424133fdfa84f775aeb5a430b56ab --> # ADR-PC-0007: CloudFormation Semantic Extraction @@ -71,12 +71,23 @@ Existing implementation language. +## Implementation Decisions + +### IMPL-0007: CFN type completeness: all extracted AWS::* resources are emitted as workspace graph nodes. Explicitly mapped types receive specific graph type names (Lambda, Queue, Distribution, etc.). Unmapped types receive the InfraResource fallback type with cfn_type preserved in attributes. + + +**Rationale:** +The workspace graph is pattern-agnostic. Backend services, frontend SPAs, and MFE monorepos all produce infrastructure resources that must appear in the graph for Architecture IR fidelity. + + + + ## Gaps -### GAP-0001: AWS::Serverless::StateMachine is not yet handled in extractResourceMetadata. Should SAM state machines use the same DefinitionBody/DefinitionUri extraction as AWS::StepFunctions::StateMachine? +### GAP-0001: AWS::Serverless::StateMachine is now handled via the shared CFN type mapping module (maps to StateMachine graph type). DefinitionBody/ DefinitionUri extraction uses the same logic as AWS::StepFunctions::StateMachine. **Impact:** medium diff --git a/adrs/rendered/ADR-PC-0008.md b/adrs/rendered/ADR-PC-0008.md index 794b89e..d4d1ec1 100644 --- a/adrs/rendered/ADR-PC-0008.md +++ b/adrs/rendered/ADR-PC-0008.md @@ -5,8 +5,8 @@ artifact_kind: rendered_adr_markdown generator_id: adr-rendered-markdown generator_version: 1 hash_algorithm: sha256 -source_hash: 8d7ecca11c76850989bfdadd00809d2f30703c4d04ad13d22ca07acd11836358 -rendered_hash: 4bc304de6e12173f3f6705b6e9c9c562919188956f032814a81fc064b88ca72e +source_hash: 26b93b64666720500c7794e7d5ffa98d83205bb1a0749da334fccda339d500b4 +rendered_hash: 951392b08a2893c69646a64ea2b677c44e62f1e33f382df13d61962903e825d6 --> # ADR-PC-0008: Service Wiring Post-Processing @@ -54,6 +54,7 @@ Existing implementation language. - Build Lambda-handler-to-function maps from CFN handler metadata - Resolve cross-stack parameter chains through nested stack topology (ParamResolutionTable) - Follow master-to-child GetAtt ChildStack.Outputs.X references through to originating resource logical IDs +- Map all extracted CFN resources to graph IDs using the shared cfn-type-mapping module (InfraResource fallback for unmapped types) - All maps keyed by structural type, never by repository name @@ -69,6 +70,8 @@ Dependencies: ### COMP-0009: Slice Emitter Edge Wiring (library) **Responsibilities:** +- Emit all extracted CFN resources as workspace graph nodes (no allowlist gate; InfraResource fallback for unmapped types) +- Emit Stack nodes from infrastructure/template slices with contains edges to child resources - Produce reads/writes edges by joining SDK usage with infrastructure resources via env-var bridge - Produce publishes edges for SQS/SNS SDK usage - Improve consumes edge resolution via CFN logical ID lookup @@ -90,6 +93,17 @@ Dependencies: +## Implementation Decisions + +### IMPL-0008: Resource-to-node emission policy: all RECON-extracted infrastructure resources become slice nodes. The previous silent omission of unmapped CFN types is replaced by diagnostic-aware emission via the shared cfn-type-mapping module. Unmapped types produce InfraResource nodes with cfn_type preserved in attributes. + + +**Rationale:** +Silent omission caused frontend and MFE monorepo infrastructure to disappear from the workspace graph. The graph must be pattern-agnostic. + + + + diff --git a/adrs/rendered/ADR-PS-0001.md b/adrs/rendered/ADR-PS-0001.md index 1b9f008..d25f8d2 100644 --- a/adrs/rendered/ADR-PS-0001.md +++ b/adrs/rendered/ADR-PS-0001.md @@ -5,7 +5,7 @@ artifact_kind: rendered_adr_markdown generator_id: adr-rendered-markdown generator_version: 1 hash_algorithm: sha256 -source_hash: f0be925a786412e7bfb74f79d0c01fddb29fe0317bbfa9642f81d0fd0d2ace20 +source_hash: 8ad95837f2c20f44acea4ded73917592861532ec2a407b7c501a832485d55ef3 rendered_hash: 4f937b55dfafc7428ef634a2c7282daaaa2fd2ce4a2898a97f1bebff0b96e144 --> diff --git a/fixtures/python-sample/.ste-self/state/graph-metrics.json b/fixtures/python-sample/.ste-self/state/graph-metrics.json index 65e0d7c..de0a25d 100644 --- a/fixtures/python-sample/.ste-self/state/graph-metrics.json +++ b/fixtures/python-sample/.ste-self/state/graph-metrics.json @@ -14,5 +14,5 @@ "hasWideNetwork": false, "recommendedDepth": 2, "reasoning": "Empty graph, using default depth=2", - "lastAnalyzed": "2026-05-27T02:55:35.908Z" + "lastAnalyzed": "2026-05-29T19:54:19.396Z" } \ No newline at end of file diff --git a/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-greet-name.yaml b/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-greet-name.yaml index 30b52ee..9244120 100644 --- a/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-greet-name.yaml +++ b/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-greet-name.yaml @@ -15,7 +15,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' endpoint: id: api-GET-api-greet-name method: GET diff --git a/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users-user-id.yaml b/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users-user-id.yaml index fc87466..634c1e0 100644 --- a/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users-user-id.yaml +++ b/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users-user-id.yaml @@ -15,7 +15,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' endpoint: id: api-GET-api-users-user-id method: GET diff --git a/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users.yaml b/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users.yaml index 84c92f3..7afa8b2 100644 --- a/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users.yaml +++ b/fixtures/python-sample/.ste/state/api/endpoints/api-GET-api-users.yaml @@ -15,7 +15,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' endpoint: id: api-GET-api-users method: GET diff --git a/fixtures/python-sample/.ste/state/api/endpoints/api-POST-api-users.yaml b/fixtures/python-sample/.ste/state/api/endpoints/api-POST-api-users.yaml index a5e24b5..ba4b4aa 100644 --- a/fixtures/python-sample/.ste/state/api/endpoints/api-POST-api-users.yaml +++ b/fixtures/python-sample/.ste/state/api/endpoints/api-POST-api-users.yaml @@ -15,7 +15,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' endpoint: id: api-POST-api-users method: POST diff --git a/fixtures/python-sample/.ste/state/api/index.yaml b/fixtures/python-sample/.ste/state/api/index.yaml index ea40820..27825c1 100644 --- a/fixtures/python-sample/.ste/state/api/index.yaml +++ b/fixtures/python-sample/.ste/state/api/index.yaml @@ -10,7 +10,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' endpoints: - id: api-GET-api-greet-name method: GET diff --git a/fixtures/python-sample/.ste/state/data/entities/data-GreetingConfig.yaml b/fixtures/python-sample/.ste/state/data/entities/data-GreetingConfig.yaml index c057e6a..b4b5a7d 100644 --- a/fixtures/python-sample/.ste/state/data/entities/data-GreetingConfig.yaml +++ b/fixtures/python-sample/.ste/state/data/entities/data-GreetingConfig.yaml @@ -15,7 +15,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' entity: id: data-GreetingConfig name: GreetingConfig diff --git a/fixtures/python-sample/.ste/state/data/entities/data-User.yaml b/fixtures/python-sample/.ste/state/data/entities/data-User.yaml index 1fe15f5..18a1cab 100644 --- a/fixtures/python-sample/.ste/state/data/entities/data-User.yaml +++ b/fixtures/python-sample/.ste/state/data/entities/data-User.yaml @@ -15,7 +15,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' entity: id: data-User name: User diff --git a/fixtures/python-sample/.ste/state/data/index.yaml b/fixtures/python-sample/.ste/state/data/index.yaml index 4f70375..55f2f2a 100644 --- a/fixtures/python-sample/.ste/state/data/index.yaml +++ b/fixtures/python-sample/.ste/state/data/index.yaml @@ -10,7 +10,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' entities: - id: data-GreetingConfig name: GreetingConfig diff --git a/fixtures/python-sample/.ste/state/graph-metrics.json b/fixtures/python-sample/.ste/state/graph-metrics.json index db97f7d..efb08cc 100644 --- a/fixtures/python-sample/.ste/state/graph-metrics.json +++ b/fixtures/python-sample/.ste/state/graph-metrics.json @@ -12,17 +12,17 @@ "internal-index": 1, "module": 5 }, - "avgDependencyDepth": 0.7857142857142857, + "avgDependencyDepth": 0.35714285714285715, "maxDependencyDepth": 2, "p95DependencyDepth": 2, - "avgDependentDepth": 0.35714285714285715, - "maxDependentDepth": 2, + "avgDependentDepth": 0, + "maxDependentDepth": 0, "avgDependenciesPerComponent": 0.5714285714285714, "avgDependentsPerComponent": 0.5714285714285714, "detectedPattern": "flat", "hasDeepTrees": false, "hasWideNetwork": false, "recommendedDepth": 2, - "reasoning": "Detected flat architecture. Avg depth: 0.8, P95 depth: 2.0, Max depth: 2. Recommended depth: 2.", - "lastAnalyzed": "2026-05-27T02:55:35.902Z" + "reasoning": "Detected flat architecture. Avg depth: 0.4, P95 depth: 2.0, Max depth: 2. Recommended depth: 2.", + "lastAnalyzed": "2026-05-29T19:54:19.387Z" } \ No newline at end of file diff --git a/fixtures/python-sample/.ste/state/graph/internal/index.yaml b/fixtures/python-sample/.ste/state/graph/internal/index.yaml index 4586f98..03c7a03 100644 --- a/fixtures/python-sample/.ste/state/graph/internal/index.yaml +++ b/fixtures/python-sample/.ste/state/graph/internal/index.yaml @@ -11,7 +11,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' modules: - id: module-app-__init__ path: app/__init__.py diff --git a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-__init__.yaml b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-__init__.yaml index 90dd6e8..fb8fa34 100644 --- a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-__init__.yaml +++ b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-__init__.yaml @@ -13,7 +13,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' module: id: module-app-__init__ path: app/__init__.py diff --git a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-api.yaml b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-api.yaml index 3b04e10..4d658ac 100644 --- a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-api.yaml +++ b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-api.yaml @@ -31,7 +31,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' module: id: module-app-api path: app/api.py diff --git a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-__init__.yaml b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-__init__.yaml index a81f1bd..c1955e3 100644 --- a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-__init__.yaml +++ b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-__init__.yaml @@ -13,7 +13,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' module: id: module-app-services-__init__ path: app/services/__init__.py diff --git a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-greeting.yaml b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-greeting.yaml index 04a2a65..7ba90a1 100644 --- a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-greeting.yaml +++ b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-greeting.yaml @@ -19,7 +19,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' module: id: module-app-services-greeting path: app/services/greeting.py diff --git a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-user_service.yaml b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-user_service.yaml index e8b8835..47ddcbb 100644 --- a/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-user_service.yaml +++ b/fixtures/python-sample/.ste/state/graph/internal/modules/module-app-services-user_service.yaml @@ -19,7 +19,7 @@ _slice: extraction: method: static confidence: high - timestamp: '2026-05-27T02:55:35.004Z' + timestamp: '2026-05-29T19:54:18.528Z' module: id: module-app-services-user_service path: app/services/user_service.py diff --git a/fixtures/python-sample/.ste/state/manifest/recon-manifest.json b/fixtures/python-sample/.ste/state/manifest/recon-manifest.json index edd2cab..6ee082f 100644 --- a/fixtures/python-sample/.ste/state/manifest/recon-manifest.json +++ b/fixtures/python-sample/.ste/state/manifest/recon-manifest.json @@ -1,34 +1,34 @@ { "version": 1, - "generatedAt": "2026-05-27T02:55:35.709Z", + "generatedAt": "2026-05-29T19:54:19.102Z", "files": { "app/api.py": { "path": "app/api.py", - "mtimeMs": 1768013953694.0403, + "mtimeMs": 1776787004643.1719, "size": 1421, "hash": "b5e175ca50dcd6712422e6ea18130e40ac1a9c6ccb519ca724945092ae4dbfc0" }, "app/__init__.py": { "path": "app/__init__.py", - "mtimeMs": 1768013953694.0403, + "mtimeMs": 1776787004643.1719, "size": 49, "hash": "c71784319958aed60cd445890c62b7946832fc781b44c73c058b30da390ec31d" }, "app/services/greeting.py": { "path": "app/services/greeting.py", - "mtimeMs": 1768013953709.6902, + "mtimeMs": 1776787004644.8289, "size": 1010, "hash": "6d30729c2393d770edb880effc872f534f88cf95d2bd68945f84d9599fef8665" }, "app/services/user_service.py": { "path": "app/services/user_service.py", - "mtimeMs": 1768013953712.8206, + "mtimeMs": 1776787004644.8289, "size": 1159, "hash": "a670c703d9f2769fa2f7f9f6370e6792a8c01373649fc1259ed2464c81cdb07e" }, "app/services/__init__.py": { "path": "app/services/__init__.py", - "mtimeMs": 1768013953716.829, + "mtimeMs": 1776787004644.2263, "size": 23, "hash": "35e97f8b497a8eb82f0c8f40e86919e1896f8e35cf30a1c1e0491d39356c0793" } diff --git a/package.json b/package.json index 2477989..15f20b8 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "lint:fix": "eslint src vitest.config.ts --fix", "recon": "node dist/cli/recon-cli.js", "recon:workspace": "node dist/cli/recon-cli.js --workspace", + "benchmark:workspace": "node scripts/benchmarks/benchmark-workspace-recon.mjs", "recon:full": "node dist/cli/recon-cli.js --mode=full", "recon:self": "node dist/cli/recon-cli.js --self", "recon:init": "node dist/cli/recon-cli.js --init", diff --git a/scripts/benchmarks/benchmark-workspace-recon.mjs b/scripts/benchmarks/benchmark-workspace-recon.mjs new file mode 100644 index 0000000..e5a8d63 --- /dev/null +++ b/scripts/benchmarks/benchmark-workspace-recon.mjs @@ -0,0 +1,87 @@ +/** + * Repeatable workspace RECON benchmark. + * + * Usage: + * node scripts/benchmarks/benchmark-workspace-recon.mjs + * node scripts/benchmarks/benchmark-workspace-recon.mjs --mode=incremental + * node scripts/benchmarks/benchmark-workspace-recon.mjs --workspace=../.. + * node scripts/benchmarks/benchmark-workspace-recon.mjs --out=./benchmark-results/latest.json + */ + +import { spawnSync } from 'node:child_process'; +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const runtimeDir = path.resolve(__dirname, '../..'); +const defaultWorkspaceRoot = path.resolve(runtimeDir, '..'); + +function parseArg(name, fallback = null) { + const eq = process.argv.find(a => a.startsWith(`${name}=`)); + if (eq) { + return eq.slice(name.length + 1); + } + const idx = process.argv.indexOf(name); + if (idx !== -1 && idx + 1 < process.argv.length) { + return process.argv[idx + 1]; + } + return fallback; +} + +function run(command, args, cwd) { + const result = spawnSync(command, args, { + cwd, + stdio: 'inherit', + shell: process.platform === 'win32', + }); + if (result.status !== 0) { + process.exit(result.status ?? 1); + } +} + +function ensureDir(dirPath) { + fs.mkdirSync(dirPath, { recursive: true }); +} + +const mode = parseArg('--mode', 'full'); +const workspaceArg = parseArg('--workspace', defaultWorkspaceRoot); +const skipBuild = process.argv.includes('--skip-build'); +const outArg = parseArg('--out', null); + +const stamp = new Date().toISOString().replace(/[:.]/g, '-'); +const resultsDir = path.join(runtimeDir, 'benchmark-results'); +ensureDir(resultsDir); + +const outPath = + outArg != null + ? path.resolve(process.cwd(), outArg) + : path.join(resultsDir, `workspace-recon-${stamp}.json`); + +if (!skipBuild) { + console.log('Building ste-runtime...'); + run('npm', ['run', 'build'], runtimeDir); +} + +console.log(''); +console.log('Running workspace RECON benchmark...'); +console.log(` Workspace: ${workspaceArg}`); +console.log(` Mode: ${mode}`); +console.log(` Output: ${outPath}`); +console.log(''); + +run( + 'node', + [ + 'dist/cli/recon-cli.js', + '--workspace', + workspaceArg, + `--mode=${mode}`, + '--benchmark', + `--benchmark-out=${outPath}`, + ], + runtimeDir, +); + +console.log(''); +console.log(`Benchmark complete: ${outPath}`); diff --git a/src/cli/benchmark-report.test.ts b/src/cli/benchmark-report.test.ts new file mode 100644 index 0000000..d2e5da1 --- /dev/null +++ b/src/cli/benchmark-report.test.ts @@ -0,0 +1,142 @@ +import { describe, it, expect } from 'vitest'; +import type { PhaseTimingRecord } from '../utils/concurrency.js'; +import type { ReconResult } from '../recon/index.js'; +import type { WorkspaceReconResult } from '../workspace/workspace-recon.js'; +import { + BENCHMARK_SCHEMA_VERSION, + buildWorkspaceReconBenchmarkReport, + phaseTotalMeasuredMs, + phasesToBenchmarkEntries, + sliceChurnFromRecon, +} from './benchmark-report.js'; + +function makeRecon(overrides: Partial = {}): ReconResult { + return { + success: true, + conflictsDetected: 0, + aiDocUpdated: 10, + aiDocCreated: 4, + aiDocModified: 6, + aiDocDeleted: 1, + aiDocUnchanged: 20, + validationErrors: 0, + validationWarnings: 0, + validationInfo: 0, + errors: [], + warnings: [], + timings: [ + { + phase: 'Phase 2: Extraction', + startMs: 0, + endMs: 100, + durationMs: 100, + itemCount: 50, + throughput: 500, + }, + { + phase: 'Phase 5: Population', + startMs: 100, + endMs: 600, + durationMs: 500, + itemCount: 50, + throughput: 100, + }, + ], + ...overrides, + }; +} + +function makeWorkspaceResult(overrides: Partial = {}): WorkspaceReconResult { + return { + success: true, + workspaceIndexPath: '/tmp/.ste-workspace/workspace-index.yaml', + orchestrationTiming: { + phase: 'Workspace Orchestration', + startMs: 0, + endMs: 5000, + durationMs: 5000, + itemCount: 2, + throughput: 0.4, + }, + repos: [ + { + name: 'repo-a', + status: 'success', + nodeCount: 3, + edgeCount: 1, + durationMs: 1200, + reconResult: makeRecon(), + }, + { + name: 'repo-b', + status: 'skipped', + durationMs: 5, + }, + ], + projectionResult: { fileCount: 10, filePaths: [] }, + multiResProjectionResult: { fileCount: 18, filePaths: [] }, + ...overrides, + }; +} + +describe('benchmark-report', () => { + it('maps phase timings to benchmark entries', () => { + const timings: PhaseTimingRecord[] = [ + { + phase: 'Phase 2: Extraction', + startMs: 0, + endMs: 12.34, + durationMs: 12.34, + itemCount: 10, + throughput: 810.37, + }, + ]; + + expect(phasesToBenchmarkEntries(timings)).toEqual([ + { + phase: 'Phase 2: Extraction', + duration_ms: 12.3, + item_count: 10, + throughput_items_per_sec: 810.4, + }, + ]); + expect(phaseTotalMeasuredMs(timings)).toBe(12.3); + }); + + it('builds workspace benchmark report with repo and self-pass sections', () => { + const wsResult = makeWorkspaceResult(); + const selfResult = makeRecon({ + aiDocCreated: 1, + aiDocModified: 2, + aiDocDeleted: 0, + aiDocUnchanged: 3, + }); + + const report = buildWorkspaceReconBenchmarkReport({ + wsResult, + mode: 'full', + steRuntimeVersion: '0.10.0-test', + wallClockMs: { + workspacePass: 5100, + selfPass: 900, + total: 6000, + }, + selfResult, + }); + + expect(report.schema_version).toBe(BENCHMARK_SCHEMA_VERSION); + expect(report.kind).toBe('workspace-recon-benchmark'); + expect(report.mode).toBe('full'); + expect(report.wall_clock_ms.total).toBe(6000); + expect(report.orchestration).toEqual({ + duration_ms: 5000, + repo_count: 2, + throughput_repos_per_sec: 0.4, + }); + expect(report.workspace.graph_nodes).toBe(3); + expect(report.workspace.slice_churn.created).toBe(4); + expect(report.repos).toHaveLength(2); + expect(report.repos[0]?.phase_total_measured_ms).toBe(600); + expect(report.self_pass?.slice_churn).toEqual(sliceChurnFromRecon(selfResult)); + }); +}); diff --git a/src/cli/benchmark-report.ts b/src/cli/benchmark-report.ts new file mode 100644 index 0000000..afeadad --- /dev/null +++ b/src/cli/benchmark-report.ts @@ -0,0 +1,260 @@ +/** + * Structured benchmark report for workspace RECON runs. + */ + +import type { PhaseTimingRecord } from '../utils/concurrency.js'; +import type { ReconResult } from '../recon/index.js'; +import type { WorkspaceReconResult, RepoResult } from '../workspace/workspace-recon.js'; + +export const BENCHMARK_SCHEMA_VERSION = '1.0'; + +export interface PhaseBenchmarkEntry { + phase: string; + duration_ms: number; + item_count: number; + throughput_items_per_sec: number; +} + +export interface SliceChurnBenchmark { + created: number; + modified: number; + deleted: number; + unchanged: number; +} + +export interface RepoBenchmarkEntry { + name: string; + status: RepoResult['status']; + wall_ms: number | null; + graph: { nodes: number; edges: number }; + slice_churn: SliceChurnBenchmark; + phases: PhaseBenchmarkEntry[]; + phase_total_measured_ms: number; + error?: { stage: string; message: string }; +} + +export interface SelfPassBenchmarkEntry { + wall_ms: number; + success: boolean; + slice_churn: SliceChurnBenchmark; + phases: PhaseBenchmarkEntry[]; + phase_total_measured_ms: number; +} + +export interface WorkspaceReconBenchmarkReport { + schema_version: string; + kind: 'workspace-recon-benchmark'; + generated_at: string; + ste_runtime_version: string; + mode: 'incremental' | 'full'; + wall_clock_ms: { + workspace_pass: number; + self_pass: number | null; + total: number; + }; + orchestration: { + duration_ms: number; + repo_count: number; + throughput_repos_per_sec: number; + } | null; + workspace: { + success: boolean; + repos_succeeded: number; + repos_failed: number; + repos_skipped: number; + graph_nodes: number; + graph_edges: number; + slice_churn: SliceChurnBenchmark; + projections: number | null; + multi_res_projections: number | null; + workspace_index_path: string; + }; + repos: RepoBenchmarkEntry[]; + self_pass: SelfPassBenchmarkEntry | null; +} + +export function phasesToBenchmarkEntries( + timings: PhaseTimingRecord[] | undefined, +): PhaseBenchmarkEntry[] { + if (!timings) { + return []; + } + return timings.map(t => ({ + phase: t.phase, + duration_ms: roundMs(t.durationMs), + item_count: t.itemCount, + throughput_items_per_sec: roundThroughput(t.throughput), + })); +} + +export function phaseTotalMeasuredMs(timings: PhaseTimingRecord[] | undefined): number { + if (!timings || timings.length === 0) { + return 0; + } + return roundMs(timings.reduce((sum, t) => sum + t.durationMs, 0)); +} + +export function sliceChurnFromRecon(recon: ReconResult | undefined): SliceChurnBenchmark { + return { + created: recon?.aiDocCreated ?? 0, + modified: recon?.aiDocModified ?? 0, + deleted: recon?.aiDocDeleted ?? 0, + unchanged: recon?.aiDocUnchanged ?? 0, + }; +} + +function repoBenchmarkEntry(repo: RepoResult): RepoBenchmarkEntry { + const entry: RepoBenchmarkEntry = { + name: repo.name, + status: repo.status, + wall_ms: repo.durationMs != null ? roundMs(repo.durationMs) : null, + graph: { + nodes: repo.nodeCount ?? 0, + edges: repo.edgeCount ?? 0, + }, + slice_churn: sliceChurnFromRecon(repo.reconResult), + phases: phasesToBenchmarkEntries(repo.reconResult?.timings), + phase_total_measured_ms: phaseTotalMeasuredMs(repo.reconResult?.timings), + }; + if (repo.error) { + entry.error = { stage: repo.error.stage, message: repo.error.message }; + } + return entry; +} + +function aggregateWorkspaceSliceChurn(repos: RepoResult[]): SliceChurnBenchmark { + const churn: SliceChurnBenchmark = { created: 0, modified: 0, deleted: 0, unchanged: 0 }; + for (const repo of repos) { + if (repo.status !== 'success' || !repo.reconResult) { + continue; + } + churn.created += repo.reconResult.aiDocCreated; + churn.modified += repo.reconResult.aiDocModified; + churn.deleted += repo.reconResult.aiDocDeleted; + churn.unchanged += repo.reconResult.aiDocUnchanged; + } + return churn; +} + +export function buildWorkspaceReconBenchmarkReport(params: { + wsResult: WorkspaceReconResult; + mode: 'incremental' | 'full'; + steRuntimeVersion: string; + wallClockMs: { + workspacePass: number; + selfPass: number | null; + total: number; + }; + selfResult: ReconResult | null; +}): WorkspaceReconBenchmarkReport { + const { wsResult } = params; + const succeeded = wsResult.repos.filter(r => r.status === 'success'); + const failed = wsResult.repos.filter(r => r.status === 'failed' || r.status === 'timed_out'); + const skipped = wsResult.repos.filter(r => r.status === 'skipped'); + + const graphNodes = succeeded.reduce((sum, r) => sum + (r.nodeCount ?? 0), 0); + const graphEdges = succeeded.reduce((sum, r) => sum + (r.edgeCount ?? 0), 0); + + const orchestration = wsResult.orchestrationTiming + ? { + duration_ms: roundMs(wsResult.orchestrationTiming.durationMs), + repo_count: wsResult.orchestrationTiming.itemCount, + throughput_repos_per_sec: roundThroughput(wsResult.orchestrationTiming.throughput), + } + : null; + + let selfPass: SelfPassBenchmarkEntry | null = null; + if (params.selfResult && params.wallClockMs.selfPass != null) { + selfPass = { + wall_ms: roundMs(params.wallClockMs.selfPass), + success: params.selfResult.success, + slice_churn: sliceChurnFromRecon(params.selfResult), + phases: phasesToBenchmarkEntries(params.selfResult.timings), + phase_total_measured_ms: phaseTotalMeasuredMs(params.selfResult.timings), + }; + } + + return { + schema_version: BENCHMARK_SCHEMA_VERSION, + kind: 'workspace-recon-benchmark', + generated_at: new Date().toISOString(), + ste_runtime_version: params.steRuntimeVersion, + mode: params.mode, + wall_clock_ms: { + workspace_pass: roundMs(params.wallClockMs.workspacePass), + self_pass: params.wallClockMs.selfPass != null ? roundMs(params.wallClockMs.selfPass) : null, + total: roundMs(params.wallClockMs.total), + }, + orchestration, + workspace: { + success: wsResult.success, + repos_succeeded: succeeded.length, + repos_failed: failed.length, + repos_skipped: skipped.length, + graph_nodes: graphNodes, + graph_edges: graphEdges, + slice_churn: aggregateWorkspaceSliceChurn(wsResult.repos), + projections: wsResult.projectionResult?.fileCount ?? null, + multi_res_projections: wsResult.multiResProjectionResult?.fileCount ?? null, + workspace_index_path: wsResult.workspaceIndexPath, + }, + repos: wsResult.repos.map(repoBenchmarkEntry), + self_pass: selfPass, + }; +} + +export function formatBenchmarkJson(report: WorkspaceReconBenchmarkReport): string { + return `${JSON.stringify(report, null, 2)}\n`; +} + +export function printBenchmarkSummary(report: WorkspaceReconBenchmarkReport): void { + console.log(''); + console.log('=== Workspace RECON Benchmark ==='); + console.log(` Mode: ${report.mode}`); + console.log(` Wall clock: ${report.wall_clock_ms.total} ms total`); + console.log(` ${report.wall_clock_ms.workspace_pass} ms workspace`); + if (report.wall_clock_ms.self_pass != null) { + console.log(` ${report.wall_clock_ms.self_pass} ms self-pass`); + } + if (report.orchestration) { + console.log( + ` Orchestration: ${report.orchestration.duration_ms} ms ` + + `(${report.orchestration.repo_count} repos, ` + + `${report.orchestration.throughput_repos_per_sec} repos/sec)`, + ); + } + console.log( + ` Service graph: ${report.workspace.graph_nodes} nodes, ${report.workspace.graph_edges} edges`, + ); + console.log( + ` Slice churn: +${report.workspace.slice_churn.created} ` + + `~${report.workspace.slice_churn.modified} ` + + `-${report.workspace.slice_churn.deleted} ` + + `=${report.workspace.slice_churn.unchanged} unchanged`, + ); + console.log(''); + console.log(' Per-repo wall time (slowest first):'); + const sorted = [...report.repos].sort((a, b) => (b.wall_ms ?? 0) - (a.wall_ms ?? 0)); + for (const repo of sorted) { + const wall = repo.wall_ms != null ? `${repo.wall_ms} ms` : 'n/a'; + const phases = + repo.phase_total_measured_ms > 0 ? ` (${repo.phase_total_measured_ms} ms phases)` : ''; + console.log(` ${repo.name.padEnd(28)} ${wall.padStart(8)} ${repo.status}${phases}`); + } + if (report.self_pass) { + console.log(''); + console.log( + ` Self-pass: ${report.self_pass.wall_ms} ms wall, ` + + `${report.self_pass.phase_total_measured_ms} ms phases`, + ); + } + console.log('================================='); +} + +function roundMs(value: number): number { + return Math.round(value * 10) / 10; +} + +function roundThroughput(value: number): number { + return Math.round(value * 10) / 10; +} diff --git a/src/cli/index.ts b/src/cli/index.ts index 7b2b832..ab97247 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -834,6 +834,7 @@ ws } }); + // ─── ste setup ────────────────────────────────────────────── program .command('setup') @@ -1120,4 +1121,140 @@ program log('Setup complete.'); }); +ws + .command('resolve ') + .description('Resolve a workspace graph entity or source URI to authoritative source locator metadata') + .requiredOption('--workspace ', 'Path to workspace output directory') + .action(async (target: string, options: { workspace: string }) => { + const { loadSourceLocatorRegistry, resolveLocator } = await import('../workspace/source-locator-registry.js'); + const registry = await loadSourceLocatorRegistry(path.resolve(options.workspace)); + const locator = resolveLocator(registry, target); + if (!locator) { + console.error(`No source locator resolved for: ${target}`); + process.exit(1); + } + console.log(JSON.stringify({ status: 'resolved', locator }, null, 2)); + }); + +ws + .command('source ') + .description('Resolve a workspace entity/source URI and retrieve authoritative source content') + .requiredOption('--workspace ', 'Path to workspace output directory') + .option('--max-lines ', 'Maximum source lines to return', (v) => Number.parseInt(v, 10), 120) + .action(async (target: string, options: { workspace: string; maxLines: number }) => { + const fs = await import('node:fs/promises'); + const { loadSourceLocatorRegistry, resolveLocator } = await import('../workspace/source-locator-registry.js'); + const registry = await loadSourceLocatorRegistry(path.resolve(options.workspace)); + const locator = resolveLocator(registry, target); + if (!locator) { + console.error(`No source locator resolved for: ${target}`); + process.exit(1); + } + const repoRoot = locator.repo_path ?? path.join(path.dirname(path.resolve(options.workspace)), locator.repo); + const sourcePath = path.resolve(repoRoot, locator.path); + const content = await fs.readFile(sourcePath, 'utf-8'); + const lines = content.split('\n').slice(0, options.maxLines); + console.log(JSON.stringify({ + status: 'resolved', + locator, + content: lines.join('\n'), + truncated: content.split('\n').length > options.maxLines, + }, null, 2)); + }); + +ws + .command('cem ') + .description('Assemble a CEM bundle from workspace graph, source locators, and traversal context') + .requiredOption('--workspace ', 'Path to workspace output directory') + .option('--depth ', 'Traversal depth', (v) => Number.parseInt(v, 10), 2) + .option('--max-nodes ', 'Maximum graph nodes', (v) => Number.parseInt(v, 10), 50) + .action(async (target: string, options: { workspace: string; depth: number; maxNodes: number }) => { + const { loadWorkspaceGraph } = await import('../workspace/workspace-graph-loader.js'); + const { loadSourceLocatorRegistry } = await import('../workspace/source-locator-registry.js'); + const { assembleCemBundle } = await import('../workspace/cem-mvc.js'); + const workspace = path.resolve(options.workspace); + const graph = await loadWorkspaceGraph(workspace); + const registry = await loadSourceLocatorRegistry(workspace); + const cem = assembleCemBundle({ + graph, + registry, + query: target, + maxDepth: options.depth, + maxNodes: options.maxNodes, + }); + console.log(JSON.stringify(cem, null, 2)); + }); + +ws + .command('mvc ') + .description('Derive and validate an MVC bundle from a CEM bundle') + .requiredOption('--workspace ', 'Path to workspace output directory') + .option('--depth ', 'Traversal depth', (v) => Number.parseInt(v, 10), 2) + .option('--max-nodes ', 'Maximum graph nodes', (v) => Number.parseInt(v, 10), 50) + .option('--max-source-refs ', 'Maximum source references in MVC', (v) => Number.parseInt(v, 10), 8) + .action(async (target: string, options: { workspace: string; depth: number; maxNodes: number; maxSourceRefs: number }) => { + const { loadWorkspaceGraph } = await import('../workspace/workspace-graph-loader.js'); + const { loadSourceLocatorRegistry } = await import('../workspace/source-locator-registry.js'); + const { assembleCemBundle, deriveMvcBundle, validateMvcBundle } = await import('../workspace/cem-mvc.js'); + const workspace = path.resolve(options.workspace); + const graph = await loadWorkspaceGraph(workspace); + const registry = await loadSourceLocatorRegistry(workspace); + const cem = assembleCemBundle({ + graph, + registry, + query: target, + maxDepth: options.depth, + maxNodes: options.maxNodes, + }); + const mvc = deriveMvcBundle(cem, { maxSourceRefs: options.maxSourceRefs }); + const validation = validateMvcBundle(mvc, cem); + console.log(JSON.stringify({ cem, mvc, validation }, null, 2)); + }); + +ws + .command('validate-mvc ') + .description('Validate an MVC bundle JSON file containing { cem, mvc }') + .action(async (bundle: string) => { + const fs = await import('node:fs/promises'); + const { validateMvcBundle } = await import('../workspace/cem-mvc.js'); + const raw = await fs.readFile(path.resolve(bundle), 'utf-8'); + const parsed = JSON.parse(raw) as { cem?: any; mvc?: any }; + if (!parsed.cem || !parsed.mvc) { + console.error('validate-mvc expects a JSON file with top-level { "cem": ..., "mvc": ... }'); + process.exit(1); + } + console.log(JSON.stringify(validateMvcBundle(parsed.mvc, parsed.cem), null, 2)); + }); + +ws + .command('neighborhood ') + .description('Traverse a workspace graph neighborhood and include source locator metadata') + .requiredOption('--workspace ', 'Path to workspace output directory') + .option('--depth ', 'Traversal depth', (v) => Number.parseInt(v, 10), 2) + .option('--max-nodes ', 'Maximum graph nodes', (v) => Number.parseInt(v, 10), 50) + .action(async (target: string, options: { workspace: string; depth: number; maxNodes: number }) => { + const { loadWorkspaceGraph } = await import('../workspace/workspace-graph-loader.js'); + const { loadSourceLocatorRegistry, resolveLocator } = await import('../workspace/source-locator-registry.js'); + const { assembleCemBundle } = await import('../workspace/cem-mvc.js'); + const workspace = path.resolve(options.workspace); + const graph = await loadWorkspaceGraph(workspace); + const registry = await loadSourceLocatorRegistry(workspace); + const cem = assembleCemBundle({ + graph, + registry, + query: target, + maxDepth: options.depth, + maxNodes: options.maxNodes, + }); + console.log(JSON.stringify({ + target, + nodes: cem.traversal_context.visited_node_ids.map(id => ({ + id, + locator: resolveLocator(registry, id), + })), + traversal: cem.traversal_context, + negative_space_constraints: cem.negative_space_constraints, + }, null, 2)); + }); + program.parseAsync(); diff --git a/src/cli/recon-cli.ts b/src/cli/recon-cli.ts index 386458b..8d7745d 100644 --- a/src/cli/recon-cli.ts +++ b/src/cli/recon-cli.ts @@ -13,10 +13,16 @@ */ import path from 'node:path'; +import fs from 'node:fs/promises'; import { fileURLToPath } from 'node:url'; import { executeRecon } from '../recon/index.js'; import { loadConfig, loadConfigFromFile, initConfig } from '../config/index.js'; import { executeWorkspaceRecon, type WorkspaceReconResult } from '../workspace/workspace-recon.js'; +import { + buildWorkspaceReconBenchmarkReport, + formatBenchmarkJson, + printBenchmarkSummary, +} from './benchmark-report.js'; import { parseWorkspaceArgv, resolveWorkspaceDirectory, @@ -27,6 +33,24 @@ import { const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); +async function loadRuntimeVersion(runtimeDir: string): Promise { + try { + const raw = await fs.readFile(path.join(runtimeDir, 'package.json'), 'utf-8'); + const parsed = JSON.parse(raw) as { version?: string }; + return typeof parsed.version === 'string' ? parsed.version : 'unknown'; + } catch { + return 'unknown'; + } +} + +async function writeBenchmarkReport( + reportPath: string, + json: string, +): Promise { + await fs.mkdir(path.dirname(path.resolve(reportPath)), { recursive: true }); + await fs.writeFile(reportPath, json, 'utf-8'); +} + function parseTimeoutPerRepoMs(args: string[]): number { const eq = args.find(a => a.startsWith('--timeout-per-repo=')); const fromEq = @@ -48,6 +72,20 @@ function parseTimeoutPerRepoMs(args: string[]): number { return 0; } +function parseBenchmarkOut(args: string[]): string | null { + const eq = args.find(a => a.startsWith('--benchmark-out=')); + if (eq !== undefined) { + const value = eq.slice('--benchmark-out='.length).trim(); + return value.length > 0 ? value : null; + } + const idx = args.indexOf('--benchmark-out'); + if (idx !== -1 && idx + 1 < args.length) { + const value = args[idx + 1]?.trim(); + return value && value.length > 0 ? value : null; + } + return null; +} + function parseArgs(args: string[]): { mode: 'incremental' | 'full'; init: boolean; @@ -58,6 +96,8 @@ function parseArgs(args: string[]): { failOnAnyError: boolean; skipUnchanged: boolean; timeoutPerRepoMs: number; + benchmark: boolean; + benchmarkOut: string | null; parseError: string | null; } { const rawMode = args.find(a => a.startsWith('--mode='))?.split('=')[1] ?? 'incremental'; @@ -79,6 +119,8 @@ function parseArgs(args: string[]): { failOnAnyError: args.includes('--fail-on-any-error'), skipUnchanged: args.includes('--skip-unchanged'), timeoutPerRepoMs: Number.isNaN(timeoutPerRepoMs) ? 0 : timeoutPerRepoMs, + benchmark: args.includes('--benchmark') || parseBenchmarkOut(args) !== null, + benchmarkOut: parseBenchmarkOut(args), parseError, }; } @@ -106,6 +148,10 @@ Options: --timeout-per-repo= Workspace only: per-repo ceiling in ms (omit or use 0 to disable). Example: --timeout-per-repo=60000 --timeout-per-repo + --benchmark Workspace mode: emit structured benchmark summary (+ JSON when used with --benchmark-out) + --benchmark-out= + Write benchmark JSON report to file (implies --benchmark) + --benchmark-out --help, -h Show this help message Self-pass: @@ -205,8 +251,12 @@ async function main() { if (args.timeoutPerRepoMs > 0) { console.log(` Timeout: ${args.timeoutPerRepoMs}ms per repo (--timeout-per-repo)`); } + if (args.benchmark) { + console.log(' Benchmark: enabled (--benchmark)'); + } console.log('='.repeat(60)); console.log(''); + const benchmarkStartMs = performance.now(); const wsResult = await executeWorkspaceRecon({ workspacePath: workspaceResolved, mode: args.mode, @@ -215,6 +265,7 @@ async function main() { skipUnchanged: args.skipUnchanged, timeoutPerRepoMs: args.timeoutPerRepoMs, }); + const workspacePassMs = performance.now() - benchmarkStartMs; console.log(`Workspace index: ${wsResult.workspaceIndexPath}`); console.log(''); for (const r of wsResult.repos) { @@ -237,12 +288,40 @@ async function main() { printWorkspaceResult(wsResult); - // Self-pass: always include ste-runtime itself + const selfPassStartMs = performance.now(); const selfResult = await runSelfPass(runtimeDir, false, args.mode); + const selfPassMs = selfResult ? performance.now() - selfPassStartMs : null; if (selfResult) { printSelfResult(selfResult); } + if (args.benchmark) { + const totalMs = performance.now() - benchmarkStartMs; + const runtimeVersion = await loadRuntimeVersion(runtimeDir); + const report = buildWorkspaceReconBenchmarkReport({ + wsResult, + mode: args.mode, + steRuntimeVersion: runtimeVersion, + wallClockMs: { + workspacePass: workspacePassMs, + selfPass: selfPassMs, + total: totalMs, + }, + selfResult, + }); + printBenchmarkSummary(report); + const json = formatBenchmarkJson(report); + if (args.benchmarkOut) { + const outPath = path.resolve(args.benchmarkOut); + await writeBenchmarkReport(outPath, json); + console.log(`Benchmark JSON: ${outPath}`); + } else { + console.log(''); + console.log('=== BENCHMARK JSON ==='); + process.stdout.write(json); + } + } + console.log(''); const allSuccess = wsResult.success && (!selfResult || selfResult.success); process.exit(allSuccess ? 0 : 1); diff --git a/src/config/index.ts b/src/config/index.ts index c65f6ff..f3578e6 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -24,6 +24,7 @@ export const SupportedLanguage = z.enum([ 'css', // E-ADR-006: CSS/SCSS extraction (standalone, cross-cutting) 'csharp', // MP-4c: C#/.NET extraction 'adr-yaml', // ADR-PC-0010: ADR YAML semantic extraction + 'markdown', // Handbook / documentation manuscript extraction ]); export type SupportedLanguage = z.infer; @@ -447,6 +448,11 @@ export async function detectLanguages(projectRoot: string): Promise { + try { + await fs.access(path.join(projectRoot, 'SUMMARY.md')); + return true; + } catch { + /* continue */ + } + + try { + const entries = await fs.readdir(projectRoot, { withFileTypes: true }); + let mdCount = 0; + for (const ent of entries) { + if (ent.isFile() && ent.name.endsWith('.md')) { + mdCount++; + } + if (ent.isDirectory() && /^\d{2}-/.test(ent.name)) { + const chapterDir = path.join(projectRoot, ent.name); + const chapterFiles = await fs.readdir(chapterDir); + mdCount += chapterFiles.filter(f => f.endsWith('.md')).length; + } + } + return mdCount >= 8; + } catch { + return false; + } +} + /** * Load configuration from a specific config file path */ diff --git a/src/extractors/markdown/index.ts b/src/extractors/markdown/index.ts new file mode 100644 index 0000000..e78db9e --- /dev/null +++ b/src/extractors/markdown/index.ts @@ -0,0 +1,114 @@ +/** + * Markdown / handbook manuscript extractor. + * + * Shallow extraction for documentation repositories (e.g. ste-handbook): + * chapter documents, section headings, internal links, and STE identifier references. + */ + +import fs from 'node:fs/promises'; +import path from 'node:path'; +import type { DiscoveredFile, RawAssertion } from '../../recon/phases/index.js'; +import { generateSliceId, toPosixPath } from '../../utils/paths.js'; + +const HEADING_LINE = /^(#{1,6})\s+(.+)$/; +const STE_ID = /\b(ADR-[A-Z]+-\d+|INV-\d+)\b/g; +const INTERNAL_MD_LINK = /\[([^\]]*)\]\(([^)]+)\)/g; + +const MAX_SOURCE_CHARS = 12_000; + +function partFromPath(relativePath: string): string | undefined { + const seg = relativePath.split('/')[0]; + if (seg && /^\d{2}-/.test(seg)) { + return seg; + } + return undefined; +} + +function collectSteIds(content: string): string[] { + return [...new Set([...content.matchAll(STE_ID)].map(m => m[0]))]; +} + +function collectInternalLinks(content: string): string[] { + const links: string[] = []; + for (const m of content.matchAll(INTERNAL_MD_LINK)) { + const target = m[2]?.trim(); + if (target && (target.endsWith('.md') || target.includes('.md#'))) { + links.push(target); + } + } + return [...new Set(links)]; +} + +/** + * Extract semantic assertions from a Markdown manuscript file. + */ +export async function extractFromMarkdown(file: DiscoveredFile): Promise { + const normalizedPath = toPosixPath(file.relativePath); + let content: string; + try { + content = await fs.readFile(file.path, 'utf-8'); + } catch { + return []; + } + + const lines = content.split('\n'); + const assertions: RawAssertion[] = []; + + let title = path.basename(normalizedPath, '.md'); + const headings: Array<{ level: number; text: string; line: number }> = []; + + for (let i = 0; i < lines.length; i++) { + const match = HEADING_LINE.exec(lines[i]); + if (!match) continue; + const level = match[1].length; + const text = match[2].trim(); + headings.push({ level, text, line: i + 1 }); + if (level === 1) { + title = text; + } + } + + const steReferences = collectSteIds(content); + const internalLinks = collectInternalLinks(content); + const source = + content.length > MAX_SOURCE_CHARS ? `${content.slice(0, MAX_SOURCE_CHARS)}\n…` : content; + + assertions.push({ + elementId: generateSliceId('handbook_document', normalizedPath, normalizedPath), + elementType: 'handbook_document', + file: normalizedPath, + line: 1, + end_line: lines.length, + language: 'markdown', + metadata: { + title, + part: partFromPath(normalizedPath), + heading_count: headings.length, + ste_references: steReferences, + internal_links: internalLinks, + }, + source, + }); + + for (const h of headings) { + assertions.push({ + elementId: generateSliceId( + 'handbook_section', + normalizedPath, + `${h.level}:${h.line}:${h.text}`, + ), + elementType: 'handbook_section', + file: normalizedPath, + line: h.line, + language: 'markdown', + metadata: { + level: h.level, + title: h.text, + part: partFromPath(normalizedPath), + parent_document: normalizedPath, + }, + }); + } + + return assertions; +} diff --git a/src/extractors/markdown/markdown-extractor.test.ts b/src/extractors/markdown/markdown-extractor.test.ts new file mode 100644 index 0000000..4edae65 --- /dev/null +++ b/src/extractors/markdown/markdown-extractor.test.ts @@ -0,0 +1,60 @@ +import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { extractFromMarkdown } from './index.js'; + +let tempDir: string; + +beforeEach(async () => { + tempDir = await mkdtemp(path.join(os.tmpdir(), 'ste-markdown-extractor-')); +}); + +afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); +}); + +describe('extractFromMarkdown', () => { + it('extracts document, sections, links, and STE references', async () => { + const rel = '08-runtime/08-05-context-assembly-and-mvc.md'; + const abs = path.join(tempDir, rel); + await mkdir(path.dirname(abs), { recursive: true }); + await writeFile( + abs, + `# Context assembly and MVC + +## Overview + +See [MVC manifest](../04-architecture-model/04-09-projections.md). + +Implements ADR-L-0012 and INV-42. +`, + 'utf-8', + ); + + const assertions = await extractFromMarkdown({ + path: abs, + relativePath: rel, + language: 'markdown', + changeType: 'unchanged', + }); + + const doc = assertions.find(a => a.elementType === 'handbook_document'); + expect(doc).toBeDefined(); + expect(doc?.metadata.title).toBe('Context assembly and MVC'); + expect(doc?.metadata.part).toBe('08-runtime'); + expect(doc?.metadata.ste_references).toEqual( + expect.arrayContaining(['ADR-L-0012', 'INV-42']), + ); + expect(doc?.metadata.internal_links).toEqual( + expect.arrayContaining(['../04-architecture-model/04-09-projections.md']), + ); + + const sections = assertions.filter(a => a.elementType === 'handbook_section'); + expect(sections).toHaveLength(2); + expect(sections.map(s => s.metadata.title)).toEqual( + expect.arrayContaining(['Context assembly and MVC', 'Overview']), + ); + }); +}); diff --git a/src/mcp/graph-topology-analyzer.test.ts b/src/mcp/graph-topology-analyzer.test.ts index 8c10832..8d643e3 100644 --- a/src/mcp/graph-topology-analyzer.test.ts +++ b/src/mcp/graph-topology-analyzer.test.ts @@ -10,6 +10,7 @@ import path from 'node:path'; import os from 'node:os'; import { analyzeGraphTopology, + computeForwardDepths, saveGraphMetrics, loadGraphMetrics, type GraphMetrics, @@ -261,6 +262,56 @@ describe('Graph Topology Analyzer', () => { expect(metrics.lastAnalyzed).toBeTruthy(); expect(new Date(metrics.lastAnalyzed).getTime()).toBeLessThanOrEqual(Date.now()); }); + + it('should complete within 100ms on a 5000-node synthetic graph', async () => { + const graph: AidocGraph = new Map(); + const N = 5000; + + // Build a chain with branching: node_i depends on node_(i+1) and node_(i+2) + const nodes: AidocNode[] = []; + for (let i = 0; i < N; i++) { + nodes.push(createNode(`n${i}`, 'infra', 'resource')); + } + + for (let i = 0; i < N - 1; i++) { + const next = nodes[i + 1]; + nodes[i].references.push({ domain: next.domain, type: next.type, id: next.id }); + next.referencedBy.push({ domain: nodes[i].domain, type: nodes[i].type, id: nodes[i].id }); + if (i + 2 < N) { + const skip = nodes[i + 2]; + nodes[i].references.push({ domain: skip.domain, type: skip.type, id: skip.id }); + skip.referencedBy.push({ domain: nodes[i].domain, type: nodes[i].type, id: nodes[i].id }); + } + } + + for (const node of nodes) graph.set(node.key, node); + + const start = performance.now(); + const metrics = await analyzeGraphTopology(graph); + const elapsed = performance.now() - start; + + expect(elapsed).toBeLessThan(100); + expect(metrics.totalComponents).toBe(N); + expect(metrics.maxDependencyDepth).toBeGreaterThan(0); + }); + }); + + describe('computeForwardDepths', () => { + it('does not explode queue size when duplicate edges reference the same target', () => { + const graph: AidocGraph = new Map(); + const root = createNode('root', 'graph', 'function'); + const leaf = createNode('leaf', 'graph', 'function'); + const dupEdge = { domain: 'graph', type: 'function', id: 'leaf' }; + root.references = [dupEdge, dupEdge, dupEdge]; + leaf.referencedBy = [dupEdge, dupEdge, dupEdge]; + graph.set(root.key, root); + graph.set(leaf.key, leaf); + + const depths = computeForwardDepths(graph); + + expect(depths.get(root.key)).toBe(0); + expect(depths.get(leaf.key)).toBe(1); + }); }); describe('saveGraphMetrics and loadGraphMetrics', () => { diff --git a/src/mcp/graph-topology-analyzer.ts b/src/mcp/graph-topology-analyzer.ts index 3212dbe..97084d5 100644 --- a/src/mcp/graph-topology-analyzer.ts +++ b/src/mcp/graph-topology-analyzer.ts @@ -7,7 +7,7 @@ import fs from 'node:fs/promises'; import path from 'node:path'; -import type { AidocGraph, AidocNode } from '../rss/graph-loader.js'; +import type { AidocGraph } from '../rss/graph-loader.js'; export type ArchitecturePattern = | 'layered' // Clean layer boundaries, moderate depth @@ -45,7 +45,13 @@ export interface GraphMetrics { } /** - * Analyze graph topology and calculate optimal traversal parameters + * Analyze graph topology and calculate optimal traversal parameters. + * + * Uses a single-pass BFS (Kahn's algorithm) to compute forward dependency + * depths in O(N+E). Backward depth metrics (avgDependentDepth, + * maxDependentDepth) are retained at 0 for serialized cache compatibility + * but are not computed -- they were never consumed by detectPattern() or + * calculateOptimalDepth(). */ export async function analyzeGraphTopology(graph: AidocGraph): Promise { const metrics: GraphMetrics = { @@ -72,39 +78,27 @@ export async function analyzeGraphTopology(graph: AidocGraph): Promise 0 ? Math.max(...forwardDepths) : 0; metrics.p95DependencyDepth = percentile(forwardDepths, 0.95); - metrics.avgDependentDepth = mean(backwardDepths); - metrics.maxDependentDepth = Math.max(...backwardDepths, 0); - metrics.avgDependenciesPerComponent = mean(dependencyCounts); metrics.avgDependentsPerComponent = mean(dependentCounts); @@ -122,39 +116,52 @@ export async function analyzeGraphTopology(graph: AidocGraph): Promise = new Set() -): number { - if (visited.has(startKey)) { - return 0; +export function computeForwardDepths(graph: AidocGraph): Map { + const depths = new Map(); + const inDegree = new Map(); + + for (const node of graph.values()) { + inDegree.set(node.key, node.referencedBy.length); } - - visited.add(startKey); - const node: AidocNode | undefined = graph.get(startKey); - - if (!node) { - return 0; + + const queue: string[] = []; + for (const [key, deg] of inDegree) { + if (deg === 0) { + queue.push(key); + depths.set(key, 0); + } } - - const edges = direction === 'forward' ? node.references : node.referencedBy; - - if (edges.length === 0) { - return 0; + + let head = 0; + while (head < queue.length) { + const key = queue[head++]; + const node = graph.get(key); + if (!node) continue; + const currentDepth = depths.get(key) ?? 0; + for (const edge of node.references) { + const targetKey = `${edge.domain}/${edge.type}/${edge.id}`; + if (!graph.has(targetKey)) continue; + const newDepth = currentDepth + 1; + depths.set(targetKey, Math.max(depths.get(targetKey) ?? 0, newDepth)); + const prev = inDegree.get(targetKey) ?? 0; + const remaining = prev - 1; + inDegree.set(targetKey, remaining); + if (remaining === 0) { + queue.push(targetKey); + } + } } - - let maxDepth = 0; - for (const edge of edges) { - const targetKey = `${edge.domain}/${edge.type}/${edge.id}`; - const depth = measureDepth(graph, targetKey, direction, visited); - maxDepth = Math.max(maxDepth, depth); + + // Nodes in cycles or not reachable from roots get depth 0 + for (const node of graph.values()) { + if (!depths.has(node.key)) depths.set(node.key, 0); } - - return maxDepth + 1; + return depths; } /** diff --git a/src/mcp/mcp-server.ts b/src/mcp/mcp-server.ts index 5ac1cfd..03fdfa8 100644 --- a/src/mcp/mcp-server.ts +++ b/src/mcp/mcp-server.ts @@ -15,7 +15,6 @@ import fs from 'node:fs/promises'; import path from 'node:path'; import yaml from 'js-yaml'; import { initRssContext, type RssContext } from '../rss/rss-operations.js'; -import { loadAidocGraph } from '../rss/graph-loader.js'; import { analyzeGraphTopology, saveGraphMetrics, loadGraphMetrics, type GraphMetrics } from './graph-topology-analyzer.js'; import type { ResolvedConfig } from '../config/index.js'; @@ -26,6 +25,8 @@ import { loadWorkspaceGraph } from '../workspace/workspace-graph-loader.js'; import { systemDependencies, componentIntegration, blastRadiusWorkspace, whatCalls, whatDependsOn, blastRadiusNode } from '../workspace/canned-queries.js'; import { toMermaid, toTable, toMermaidAtResolution, toTableAtResolution } from '../workspace/projections.js'; import { compress, type ResolutionLevel } from '../workspace/compression.js'; +import { loadSourceLocatorRegistry, resolveLocator } from '../workspace/source-locator-registry.js'; +import { assembleCemBundle, deriveMvcBundle, validateMvcBundle } from '../workspace/cem-mvc.js'; export interface McpServerOptions { config: ResolvedConfig; @@ -137,17 +138,19 @@ export class McpServer { const stateRoot = await this.resolveProjectStateRoot(); try { - // Load parent project RSS context + // Load parent project RSS context (single graph load) this.rssContext = await initRssContext(stateRoot); - // Try to load existing graph metrics + // Try to load existing graph metrics and check staleness this.graphMetrics = await loadGraphMetrics(stateRoot); - // If no metrics exist or they're stale, analyze graph - if (!this.graphMetrics) { + const metricsStale = this.graphMetrics && + Math.abs(this.graphMetrics.totalComponents - this.rssContext.graph.size) > + this.rssContext.graph.size * 0.1; + + if (!this.graphMetrics || metricsStale) { console.error('[MCP Server] Analyzing graph topology...'); - const { graph } = await loadAidocGraph(stateRoot); - this.graphMetrics = await analyzeGraphTopology(graph); + this.graphMetrics = await analyzeGraphTopology(this.rssContext.graph); await saveGraphMetrics(this.graphMetrics, stateRoot); } @@ -162,9 +165,12 @@ export class McpServer { this.selfContext = await initRssContext(selfStateRoot); this.selfGraphMetrics = await loadGraphMetrics(selfStateRoot); - if (!this.selfGraphMetrics) { - const { graph } = await loadAidocGraph(selfStateRoot); - this.selfGraphMetrics = await analyzeGraphTopology(graph); + const selfStale = this.selfGraphMetrics && + Math.abs(this.selfGraphMetrics.totalComponents - this.selfContext.graph.size) > + this.selfContext.graph.size * 0.1; + + if (!this.selfGraphMetrics || selfStale) { + this.selfGraphMetrics = await analyzeGraphTopology(this.selfContext.graph); await saveGraphMetrics(this.selfGraphMetrics, selfStateRoot); } @@ -187,14 +193,12 @@ export class McpServer { const stateRoot = await this.resolveProjectStateRoot(); try { - // Reload parent project RSS context + // Reload parent project RSS context (single graph load) this.rssContext = await initRssContext(stateRoot); - // Reanalyze graph topology - const { graph } = await loadAidocGraph(stateRoot); - const newMetrics = await analyzeGraphTopology(graph); + // Reanalyze graph topology using the already-loaded graph + const newMetrics = await analyzeGraphTopology(this.rssContext.graph); - // Check if recommended depth changed significantly if (this.graphMetrics && Math.abs(newMetrics.recommendedDepth - this.graphMetrics.recommendedDepth) >= 1) { console.error(`[MCP Server] Graph structure changed:`); console.error(` - Old depth: ${this.graphMetrics.recommendedDepth}`); @@ -205,8 +209,7 @@ export class McpServer { const selfStateRoot = path.resolve(this.options.config.runtimeDir, '.ste-self', 'state'); try { this.selfContext = await initRssContext(selfStateRoot); - const { graph: selfGraph } = await loadAidocGraph(selfStateRoot); - this.selfGraphMetrics = await analyzeGraphTopology(selfGraph); + this.selfGraphMetrics = await analyzeGraphTopology(this.selfContext.graph); await saveGraphMetrics(this.selfGraphMetrics, selfStateRoot); } catch { // Self-analysis not available, that's OK @@ -407,6 +410,81 @@ export class McpServer { required: ['node_id'], }, }, + { + name: 'ws_resolve_source', + description: 'Resolve a workspace entity ID, entity URI, ADR alias, decision alias, or workspace URI to source locator metadata.', + inputSchema: { + type: 'object', + properties: { + target: { type: 'string', description: 'Entity ID or URI to resolve' }, + }, + required: ['target'], + }, + }, + { + name: 'ws_get_source', + description: 'Resolve a workspace entity/source URI and retrieve bounded authoritative source content.', + inputSchema: { + type: 'object', + properties: { + target: { type: 'string', description: 'Entity ID or URI to resolve' }, + maxLines: { type: 'number', description: 'Maximum source lines to return', default: 120 }, + }, + required: ['target'], + }, + }, + { + name: 'ws_assemble_cem', + description: 'Assemble a CEM bundle from workspace graph traversal, source locators, provenance, and negative-space diagnostics.', + inputSchema: { + type: 'object', + properties: { + target: { type: 'string', description: 'Task text, entity ID, or URI' }, + depth: { type: 'number', default: 2 }, + maxNodes: { type: 'number', default: 50 }, + }, + required: ['target'], + }, + }, + { + name: 'ws_derive_mvc', + description: 'Assemble CEM, derive an MVC bundle, and validate MVC against CEM.', + inputSchema: { + type: 'object', + properties: { + target: { type: 'string', description: 'Task text, entity ID, or URI' }, + depth: { type: 'number', default: 2 }, + maxNodes: { type: 'number', default: 50 }, + maxSourceRefs: { type: 'number', default: 8 }, + }, + required: ['target'], + }, + }, + { + name: 'ws_validate_mvc', + description: 'Validate an MVC bundle object against its parent CEM bundle object.', + inputSchema: { + type: 'object', + properties: { + cem: { type: 'object', description: 'Parent CEM bundle' }, + mvc: { type: 'object', description: 'MVC bundle' }, + }, + required: ['cem', 'mvc'], + }, + }, + { + name: 'ws_neighborhood_sources', + description: 'Traverse a workspace graph neighborhood and include source locator metadata for visited nodes.', + inputSchema: { + type: 'object', + properties: { + target: { type: 'string', description: 'Task text, entity ID, or URI' }, + depth: { type: 'number', default: 2 }, + maxNodes: { type: 'number', default: 50 }, + }, + required: ['target'], + }, + }, ], })); @@ -576,6 +654,91 @@ export class McpServer { break; } + case 'ws_resolve_source': { + const wsOutputDir = await this.resolveWorkspaceOutputDir(); + const registry = await loadSourceLocatorRegistry(wsOutputDir); + const locator = resolveLocator(registry, (toolArgs as any).target); + result = locator ? { status: 'resolved', locator } : { status: 'not_found', target: (toolArgs as any).target }; + break; + } + + case 'ws_get_source': { + const wsOutputDir = await this.resolveWorkspaceOutputDir(); + const registry = await loadSourceLocatorRegistry(wsOutputDir); + const locator = resolveLocator(registry, (toolArgs as any).target); + if (!locator) { + result = { status: 'not_found', target: (toolArgs as any).target }; + break; + } + const maxLines = Number((toolArgs as any).maxLines ?? 120); + const repoRoot = locator.repo_path ?? path.join(path.dirname(wsOutputDir), locator.repo); + const sourcePath = path.resolve(repoRoot, locator.path); + const content = await fs.readFile(sourcePath, 'utf-8'); + const allLines = content.split('\n'); + result = { + status: 'resolved', + locator, + content: allLines.slice(0, maxLines).join('\n'), + truncated: allLines.length > maxLines, + }; + break; + } + + case 'ws_assemble_cem': { + const wsOutputDir = await this.resolveWorkspaceOutputDir(); + const wsGraph = await loadWorkspaceGraph(wsOutputDir); + const registry = await loadSourceLocatorRegistry(wsOutputDir); + result = assembleCemBundle({ + graph: wsGraph, + registry, + query: (toolArgs as any).target, + maxDepth: (toolArgs as any).depth, + maxNodes: (toolArgs as any).maxNodes, + }); + break; + } + + case 'ws_derive_mvc': { + const wsOutputDir = await this.resolveWorkspaceOutputDir(); + const wsGraph = await loadWorkspaceGraph(wsOutputDir); + const registry = await loadSourceLocatorRegistry(wsOutputDir); + const cem = assembleCemBundle({ + graph: wsGraph, + registry, + query: (toolArgs as any).target, + maxDepth: (toolArgs as any).depth, + maxNodes: (toolArgs as any).maxNodes, + }); + const mvc = deriveMvcBundle(cem, { maxSourceRefs: (toolArgs as any).maxSourceRefs }); + result = { cem, mvc, validation: validateMvcBundle(mvc, cem) }; + break; + } + + case 'ws_validate_mvc': { + result = validateMvcBundle((toolArgs as any).mvc, (toolArgs as any).cem); + break; + } + + case 'ws_neighborhood_sources': { + const wsOutputDir = await this.resolveWorkspaceOutputDir(); + const wsGraph = await loadWorkspaceGraph(wsOutputDir); + const registry = await loadSourceLocatorRegistry(wsOutputDir); + const cem = assembleCemBundle({ + graph: wsGraph, + registry, + query: (toolArgs as any).target, + maxDepth: (toolArgs as any).depth, + maxNodes: (toolArgs as any).maxNodes, + }); + result = { + target: (toolArgs as any).target, + nodes: cem.traversal_context.visited_node_ids.map(id => ({ id, locator: resolveLocator(registry, id) })), + traversal: cem.traversal_context, + negative_space_constraints: cem.negative_space_constraints, + }; + break; + } + default: throw new Error(`Unknown tool: ${name}`); } diff --git a/src/recon/implementation-intent.test.ts b/src/recon/implementation-intent.test.ts index c2bc530..f38f8cf 100644 --- a/src/recon/implementation-intent.test.ts +++ b/src/recon/implementation-intent.test.ts @@ -63,13 +63,16 @@ describe('implementation intent helpers', () => { const evidence = collectImplementationAttributionEvidence(assertions); - expect(evidence.schema_version).toBe('1.0'); + expect(evidence.schema_version).toBe('1.2'); expect(evidence.type).toBe('implementation_attribution_evidence'); expect(evidence.records).toHaveLength(1); expect(evidence.records[0].implementation_entity_type).toBe('function'); expect(evidence.records[0].attributed_adrs).toEqual(['ADR-L-0004', 'ADR-PC-0006']); expect(evidence.records[0].enforced_invariants).toEqual(['INV-0006']); expect(evidence.records[0].provenance.source_file).toBe('claims.py'); + expect(evidence.records[0].confidence).toBe('declared'); + expect(evidence.records[0].attributed_capabilities).toEqual([]); + expect(evidence.records[0].attribution_source_language).toBe('python'); }); it('writes implementation attribution evidence to state', async () => { diff --git a/src/recon/implementation-intent.ts b/src/recon/implementation-intent.ts index 1ed5051..495100f 100644 --- a/src/recon/implementation-intent.ts +++ b/src/recon/implementation-intent.ts @@ -1,6 +1,7 @@ import fs from 'node:fs/promises'; import path from 'node:path'; import yaml from 'js-yaml'; +import type { SupportedLanguage } from '../config/index.js'; import type { NormalizedAssertion } from './phases/index.js'; export interface ImplementationIntent { @@ -10,6 +11,9 @@ export interface ImplementationIntent { source: 'decorator' | 'metadata'; } +type AttributionConfidenceLevel = 'declared' | 'inferred' | 'heuristic'; +type AttributionSourceLanguage = 'python' | 'typescript' | 'cloudformation' | 'csharp' | 'unknown'; + interface ImplementationAttributionRecord { implementation_entity_id: string; implementation_entity_type: @@ -32,10 +36,13 @@ interface ImplementationAttributionRecord { commit: null; }; metadata: Record; + confidence: AttributionConfidenceLevel; + attributed_capabilities: string[]; + attribution_source_language: AttributionSourceLanguage; } interface ImplementationAttributionEvidence { - schema_version: '1.0'; + schema_version: '1.0' | '1.2'; type: 'implementation_attribution_evidence'; records: ImplementationAttributionRecord[]; } @@ -80,6 +87,21 @@ export function normalizeImplementationIntent( }; } +export function mapAttributionSourceLanguage( + language: SupportedLanguage, +): AttributionSourceLanguage { + switch (language) { + case 'python': + return 'python'; + case 'typescript': + return 'typescript'; + case 'cloudformation': + return 'cloudformation'; + default: + return 'unknown'; + } +} + function mapSliceTypeToEntityType( assertion: NormalizedAssertion, ): ImplementationAttributionRecord['implementation_entity_type'] | undefined { @@ -137,6 +159,9 @@ export function collectImplementationAttributionEvidence( confidence: intent.confidence, slice_id: assertion._slice.id, }, + confidence: intent.confidence as AttributionConfidenceLevel, + attributed_capabilities: [], + attribution_source_language: mapAttributionSourceLanguage(assertion.provenance.language), }); } @@ -145,7 +170,7 @@ export function collectImplementationAttributionEvidence( ); return { - schema_version: '1.0', + schema_version: '1.2', type: 'implementation_attribution_evidence', records, }; diff --git a/src/recon/phases/discovery.ts b/src/recon/phases/discovery.ts index 3cc688c..7c47538 100644 --- a/src/recon/phases/discovery.ts +++ b/src/recon/phases/discovery.ts @@ -53,6 +53,7 @@ const LANGUAGE_PATTERNS: Record = { 'adr-yaml': [ '**/*.yaml', ], + markdown: ['**/*.md'], }; /** @@ -113,6 +114,13 @@ const LANGUAGE_IGNORES: Record = { 'adrs/index/**', 'adrs/rendered/**', ], + markdown: [ + '**/node_modules/**', + '**/_internal-references/**', + '**/.writing-rules/**', + '**/.ste-writing-system/**', + '**/.editorial/**', + ], }; /** @@ -149,6 +157,8 @@ function getLanguageForFile(filePath: string, _content?: string): SupportedLangu return 'angular'; // E-ADR-006 } return null; + case '.md': + return 'markdown'; default: return null; } diff --git a/src/recon/phases/extraction.ts b/src/recon/phases/extraction.ts index fcf8e1c..10324bd 100644 --- a/src/recon/phases/extraction.ts +++ b/src/recon/phases/extraction.ts @@ -25,6 +25,7 @@ import { extractFromAngular } from '../../extractors/angular/index.js'; import { extract as extractFromCss } from '../../extractors/css/index.js'; import { extractFromCsharp } from '../../extractors/csharp/index.js'; import { extractFromAdrYaml } from '../../extractors/adr-yaml/index.js'; +import { extractFromMarkdown } from '../../extractors/markdown/index.js'; import { log, warn } from '../../utils/logger.js'; import { type LimitFunction, @@ -113,8 +114,17 @@ export async function extractAssertions(files: DiscoveredFile[]): Promise extractFromCss([f], process.cwd()), cpuLimiter), extractLanguageGroup('csharp', byLanguage.get('csharp') ?? [], extractFromCsharp, cpuLimiter), extractLanguageGroup('adr-yaml', byLanguage.get('adr-yaml') ?? [], extractFromAdrYaml, ioLimiter), + extractLanguageGroup('markdown', byLanguage.get('markdown') ?? [], extractFromMarkdown, ioLimiter), ]); const assertions = [ ...tsAssertions, ...pyAssertions, ...cfnAssertions, ...jsonAssertions, ...angularAssertions, ...cssAssertions, - ...csharpAssertions, ...adrYamlAssertions, + ...csharpAssertions, ...adrYamlAssertions, ...markdownAssertions, ]; // Deterministic ordering: sort by elementId for stable output across runs diff --git a/src/recon/phases/index.ts b/src/recon/phases/index.ts index 8b34c0e..dc92e27 100644 --- a/src/recon/phases/index.ts +++ b/src/recon/phases/index.ts @@ -111,7 +111,10 @@ export interface RawAssertion { | 'adr_decision' // Decision declared in an ADR | 'adr_capability' // Capability declared in a logical ADR | 'adr_component' // Component spec in a physical-component ADR - | 'adr_system'; // System boundary in a physical-system ADR + | 'adr_system' // System boundary in a physical-system ADR + // Markdown manuscript (handbook / documentation repos) + | 'handbook_document' // One manuscript file (chapter or root doc) + | 'handbook_section'; // Heading within a manuscript file file: string; line: number; end_line?: number; diff --git a/src/recon/phases/normalization.ts b/src/recon/phases/normalization.ts index fd3338c..c6a4e9d 100644 --- a/src/recon/phases/normalization.ts +++ b/src/recon/phases/normalization.ts @@ -38,6 +38,8 @@ function getExtractorName(language: SupportedLanguage): string { return 'recon-csharp-extractor-v1'; case 'adr-yaml': return 'recon-adr-yaml-extractor-v1'; + case 'markdown': + return 'recon-markdown-extractor-v1'; default: return 'recon-unknown-extractor-v1'; } @@ -62,6 +64,8 @@ function getFileExtension(language: SupportedLanguage): string { return '.scss'; // E-ADR-006: Default to SCSS case 'adr-yaml': return '.yaml'; // ADR-PC-0011: ADR YAML files + case 'markdown': + return '.md'; default: return ''; } @@ -1166,6 +1170,63 @@ function normalizeElement( }, }; } + + // ============================================================================ + // Markdown manuscript (handbook / documentation repos) + // Domain: architecture | Types: handbook_chapter, handbook_section + // ============================================================================ + + if (assertion.elementType === 'handbook_document') { + return { + _slice: { + id: assertion.elementId, + domain: 'architecture', + type: 'handbook_chapter', + source_files: [assertion.file], + source: assertion.source, + }, + element: { + id: assertion.elementId, + name: assertion.metadata.title as string, + part: assertion.metadata.part, + heading_count: assertion.metadata.heading_count, + ste_references: assertion.metadata.ste_references, + internal_links: assertion.metadata.internal_links, + }, + provenance: { + extracted_at: timestamp, + extractor, + file: assertion.file, + line: assertion.line, + language: assertion.language, + }, + }; + } + + if (assertion.elementType === 'handbook_section') { + return { + _slice: { + id: assertion.elementId, + domain: 'architecture', + type: 'handbook_section', + source_files: [assertion.file], + }, + element: { + id: assertion.elementId, + name: assertion.metadata.title as string, + level: assertion.metadata.level, + part: assertion.metadata.part, + parent_document: assertion.metadata.parent_document, + }, + provenance: { + extracted_at: timestamp, + extractor, + file: assertion.file, + line: assertion.line, + language: assertion.language, + }, + }; + } return null; } diff --git a/src/rss/graph-loader.ts b/src/rss/graph-loader.ts index 4254367..846fa82 100644 --- a/src/rss/graph-loader.ts +++ b/src/rss/graph-loader.ts @@ -5,6 +5,7 @@ import { globby } from 'globby'; import yaml from 'js-yaml'; import { DEFAULT_GRAPH_VERSION, Slice } from './schema.js'; +import { ioLimiter } from '../utils/concurrency.js'; export type AidocEdge = { domain: string; @@ -124,8 +125,16 @@ export async function loadAidocGraph(stateRoot: string): Promise<{ graph: AidocG const graph: AidocGraph = new Map(); const sortedFiles = [...files].sort((a, b) => a.localeCompare(b)); - for (const filePath of sortedFiles) { - const data = await readYaml(filePath); + + // Parallel YAML reads with bounded concurrency for I/O overlap + const parsed = await Promise.all( + sortedFiles.map((filePath) => + ioLimiter(() => readYaml(filePath).then((data) => ({ filePath, data }))) + ) + ); + + // Single-threaded graph insertion (Map is not concurrent) + for (const { filePath, data } of parsed) { const slice = data[SLICE_KEY]; if (!slice || typeof slice !== 'object') continue; @@ -138,7 +147,6 @@ export async function loadAidocGraph(stateRoot: string): Promise<{ graph: AidocG const key = `${domain}/${type}/${id}`; if (graph.has(key)) continue; - // Derive repo name from file path relative to stateRoot const relPath = path.relative(resolvedRoot, filePath); const segments = relPath.split(path.sep); const firstSeg = segments[0]; @@ -151,16 +159,13 @@ export async function loadAidocGraph(stateRoot: string): Promise<{ graph: AidocG const references = normalizeEdges(sliceObj.references); const referencedBy = normalizeEdges(sliceObj.referenced_by); - // Extract tags from _slice.tags array const tagsRaw = Array.isArray(sliceObj.tags) ? sliceObj.tags : []; const tags = tagsRaw.map((v) => String(v)).filter(Boolean); const nodePath = sourceFiles.length > 0 ? sourceFiles[0] : path.relative(process.cwd(), filePath); - // Extract slice line range - prefer explicit slice.start/end, fallback to provenance.line let sliceRange: Slice | undefined = (sliceObj.slice as Slice | undefined) || undefined; - // If no explicit slice range, try to extract from provenance if (!sliceRange || (sliceRange.start === undefined && sliceRange.end === undefined)) { const provenance = data.provenance as Record | undefined; if (provenance && typeof provenance.line === 'number') { @@ -170,13 +175,9 @@ export async function loadAidocGraph(stateRoot: string): Promise<{ graph: AidocG } } - // Extract element metadata (contains function/class details including docstrings) const element = data.element as Record | undefined; - - // Extract embedded source code (Pillar 1: Rich Slices) const source = typeof sliceObj.source === 'string' ? sliceObj.source : undefined; - // Extract description from element or docstring const description = (element?.docstring as string) || (element?.description as string) || diff --git a/src/utils/atomic-write.test.ts b/src/utils/atomic-write.test.ts new file mode 100644 index 0000000..7d707e9 --- /dev/null +++ b/src/utils/atomic-write.test.ts @@ -0,0 +1,84 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { atomicWriteFile, MAX_RETRIES } from './atomic-write.js'; + +describe('atomicWriteFile', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'atomic-write-test-')); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + it('should write a file atomically', async () => { + const filePath = path.join(tempDir, 'test.txt'); + await atomicWriteFile(filePath, 'hello world'); + const content = await fs.readFile(filePath, 'utf-8'); + expect(content).toBe('hello world'); + }); + + it('should create parent directories', async () => { + const filePath = path.join(tempDir, 'nested', 'deep', 'test.txt'); + await atomicWriteFile(filePath, 'nested content'); + const content = await fs.readFile(filePath, 'utf-8'); + expect(content).toBe('nested content'); + }); + + it('should retry on EPERM and succeed after transient failures', async () => { + const filePath = path.join(tempDir, 'retry-success.txt'); + let renameCallCount = 0; + const originalRename = fs.rename; + + vi.spyOn(fs, 'rename').mockImplementation(async (src, dest) => { + renameCallCount++; + if (renameCallCount <= 2) { + const err = new Error('EPERM: operation not permitted') as NodeJS.ErrnoException; + err.code = 'EPERM'; + throw err; + } + return originalRename(src, dest); + }); + + await atomicWriteFile(filePath, 'retry content'); + + expect(renameCallCount).toBe(3); + const content = await fs.readFile(filePath, 'utf-8'); + expect(content).toBe('retry content'); + }); + + it('should throw after MAX_RETRIES exhausted on EPERM', async () => { + const filePath = path.join(tempDir, 'retry-exhaustion.txt'); + let renameCallCount = 0; + + vi.spyOn(fs, 'rename').mockImplementation(async () => { + renameCallCount++; + const err = new Error('EPERM: operation not permitted') as NodeJS.ErrnoException; + err.code = 'EPERM'; + throw err; + }); + + await expect(atomicWriteFile(filePath, 'will fail')).rejects.toThrow('EPERM'); + expect(renameCallCount).toBe(MAX_RETRIES + 1); + }); + + it('should throw immediately on non-retryable error without retry', async () => { + const filePath = path.join(tempDir, 'non-retryable.txt'); + let renameCallCount = 0; + + vi.spyOn(fs, 'rename').mockImplementation(async () => { + renameCallCount++; + const err = new Error('ENOENT: no such file or directory') as NodeJS.ErrnoException; + err.code = 'ENOENT'; + throw err; + }); + + await expect(atomicWriteFile(filePath, 'will fail')).rejects.toThrow('ENOENT'); + expect(renameCallCount).toBe(1); + }); +}); diff --git a/src/utils/atomic-write.ts b/src/utils/atomic-write.ts index 8e3f94f..834aa81 100644 --- a/src/utils/atomic-write.ts +++ b/src/utils/atomic-write.ts @@ -8,17 +8,26 @@ * is atomic. On Windows it is "replace" semantics (not guaranteed atomic * across power loss, but sufficient against concurrent process races because * NTFS rename is a single metadata operation). + * + * The rename is retried on transient Windows lock errors (EPERM, EACCES, + * EBUSY) with exponential backoff + jitter to handle AV scanners, IDE + * indexers, and concurrent RECON passes racing on directory metadata. */ import fs from 'node:fs/promises'; import path from 'node:path'; import crypto from 'node:crypto'; +const RETRYABLE_CODES = new Set(['EPERM', 'EACCES', 'EBUSY']); +export const MAX_RETRIES = 3; +export const BASE_DELAY_MS = 50; + /** * Write `content` to `filePath` atomically via write-to-temp + rename. * * 1. Write to `..tmp` in the same directory. - * 2. Rename (replace) the temp file to `filePath`. + * 2. Rename (replace) the temp file to `filePath`, retrying up to + * MAX_RETRIES times on transient Windows lock errors. * 3. On failure, attempt to clean up the temp file. */ export async function atomicWriteFile( @@ -34,7 +43,18 @@ export async function atomicWriteFile( try { await fs.writeFile(tmpPath, content, encoding); - await fs.rename(tmpPath, filePath); + for (let attempt = 0; ; attempt++) { + try { + await fs.rename(tmpPath, filePath); + return; + } catch (renameErr: unknown) { + const code = (renameErr as NodeJS.ErrnoException).code; + if (!code || !RETRYABLE_CODES.has(code) || attempt >= MAX_RETRIES) throw renameErr; + const delay = BASE_DELAY_MS * 2 ** attempt; + const jitter = Math.random() * delay * 0.5; + await new Promise(r => setTimeout(r, delay + jitter)); + } + } } catch (err) { try { await fs.unlink(tmpPath); } catch { /* best-effort cleanup */ } throw err; diff --git a/src/workspace/cem-mvc.test.ts b/src/workspace/cem-mvc.test.ts new file mode 100644 index 0000000..0fbf458 --- /dev/null +++ b/src/workspace/cem-mvc.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, it } from 'vitest'; + +import type { WorkspaceGraph } from './workspace-graph-loader.js'; +import type { SourceLocatorRegistry } from './source-locator-registry.js'; +import { assembleCemBundle, deriveMvcBundle, validateMvcBundle } from './cem-mvc.js'; + +function graph(): WorkspaceGraph { + const nodes = new Map([ + ['Lambda:repoA:fn', { + id: 'Lambda:repoA:fn', + type: 'Lambda', + name: 'fn', + repo: 'repoA', + source_uri: 'workspace://repoA/src/fn.ts', + source_hash: 'sha256:source', + entity_uri: 'entity://workspace/Lambda%3ArepoA%3Afn', + }], + ['Database:repoA:db', { + id: 'Database:repoA:db', + type: 'Database', + name: 'db', + repo: 'repoA', + source_uri: 'workspace://repoA/template.yaml', + source_hash: 'sha256:template', + entity_uri: 'entity://workspace/Database%3ArepoA%3Adb', + }], + ]); + const edges = [{ from: 'Lambda:repoA:fn', to: 'Database:repoA:db', verb: 'reads' }]; + return { + nodes, + edges, + outAdj: new Map([['Lambda:repoA:fn', edges]]), + inAdj: new Map([['Database:repoA:db', edges]]), + }; +} + +const registry: SourceLocatorRegistry = { + schema_version: '1.0', + generated_by: 'test', + generated_at: '2026-01-01T00:00:00.000Z', + workspace_manifest_hash: 'sha256:manifest', + graph_snapshot_hash: 'sha256:graph', + locator_registry_hash: 'sha256:registry', + locators: [ + { + entity_uri: 'entity://workspace/Lambda%3ArepoA%3Afn', + entity_id: 'Lambda:repoA:fn', + entity_type: 'Lambda', + source_uri: 'workspace://repoA/src/fn.ts', + repo: 'repoA', + path: 'src/fn.ts', + source_hash: 'sha256:source', + graph_snapshot_hash: 'sha256:graph', + canonical: true, + authority: 'repoA', + provenance_classification: 'derived', + }, + ], +}; + +describe('CEM and MVC bundles', () => { + it('assembles a provenance-rich CEM bundle from graph and locators', () => { + const cem = assembleCemBundle({ + graph: graph(), + registry, + query: 'Lambda:repoA:fn', + generatedAt: '2026-01-01T00:00:00.000Z', + maxDepth: 1, + maxNodes: 10, + }); + + expect(cem.authoritative_source_refs).toHaveLength(1); + expect(cem.traversal_context.visited_node_ids).toEqual(['Lambda:repoA:fn', 'Database:repoA:db']); + expect(cem.negative_space_constraints).toContainEqual(expect.objectContaining({ kind: 'unresolved_locator' })); + expect(cem.graph_snapshot_hash).toBe('sha256:graph'); + }); + + it('derives and validates MVC bundles against their parent CEM bundle', () => { + const cem = assembleCemBundle({ + graph: graph(), + registry, + query: 'Lambda:repoA:fn', + generatedAt: '2026-01-01T00:00:00.000Z', + maxDepth: 1, + maxNodes: 10, + }); + + const mvc = deriveMvcBundle(cem, { + generatedAt: '2026-01-01T00:00:01.000Z', + maxSourceRefs: 1, + }); + const validation = validateMvcBundle(mvc, cem); + + expect(mvc.derived_from_cem_bundle_id).toBe(cem.cem_bundle_id); + expect(mvc.selected_source_refs).toHaveLength(1); + expect(mvc.provenance_refs.graph_snapshot_hash).toBe('sha256:graph'); + expect(validation.status).toBe('valid_with_warnings'); + expect(validation.checks.provenance_complete).toBe(true); + expect(validation.warnings.some(w => w.includes('negative-space'))).toBe(true); + }); +}); diff --git a/src/workspace/cem-mvc.ts b/src/workspace/cem-mvc.ts new file mode 100644 index 0000000..e869282 --- /dev/null +++ b/src/workspace/cem-mvc.ts @@ -0,0 +1,360 @@ +import crypto from 'node:crypto'; + +import type { WorkspaceGraph, WorkspaceNode } from './workspace-graph-loader.js'; +import type { SourceLocator, SourceLocatorRegistry } from './source-locator-registry.js'; +import { resolveLocator } from './source-locator-registry.js'; + +export interface CemDiagnostic { + kind: string; + message: string; + entity_id?: string; +} + +export interface TraversalContext { + query: string; + max_depth: number; + max_nodes: number; + visited_node_ids: string[]; + skipped_node_ids: string[]; + truncated: boolean; + operations: Array<{ operation: string; start: string; direction: 'bidirectional'; depth: number }>; +} + +export interface CemBundle { + cem_bundle_id: string; + schema_version: '1.0'; + generated_by: string; + generated_at: string; + workspace_manifest_hash: string; + graph_snapshot_hash: string; + locator_registry_hash: string; + source_hashes: Record; + authoritative_source_refs: SourceLocator[]; + graph_provenance: Array<{ entity_id: string; entity_uri?: string; repo: string; type: string }>; + traversal_context: TraversalContext; + dependency_context: string[]; + blast_radius_context: string[]; + embodiment_evidence: Array<{ entity_id: string; source_uri?: string; evidence_type: string }>; + implementation_linkage_decorators: Array<{ entity_id: string; source_uri?: string }>; + validation_state: { status: 'not_evaluated' | 'evaluated'; diagnostics: CemDiagnostic[] }; + freshness_state: { graph_snapshot_hash: string; locator_registry_hash: string }; + negative_space_constraints: CemDiagnostic[]; + unresolved_risks: CemDiagnostic[]; + partial_state_diagnostics: CemDiagnostic[]; +} + +export interface MvcBundle { + mvc_bundle_id: string; + schema_version: '1.0'; + derived_from_cem_bundle_id: string; + derivation_hash: string; + generated_at: string; + task_or_operation_scope: string; + bounded_context_payload: Array<{ entity_id: string; entity_type: string; repo: string; source_uri?: string }>; + selected_source_refs: SourceLocator[]; + selected_source_snippets: Array<{ source_uri: string; omitted: true; reason: string }>; + selected_graph_entities: string[]; + selected_embodiment_evidence: CemBundle['embodiment_evidence']; + provenance_refs: { + cem_bundle_id: string; + graph_snapshot_hash: string; + locator_registry_hash: string; + }; + traversal_scope: TraversalContext; + freshness_metadata: CemBundle['freshness_state']; + inclusion_rationale: Array<{ entity_id: string; reason: string }>; + exclusion_rationale: CemDiagnostic[]; + negative_space_summary: CemDiagnostic[]; + unresolved_risk_summary: CemDiagnostic[]; + token_or_size_budget: { max_source_refs: number }; + validation_result_ref?: string; +} + +export interface MvcValidationResult { + validation_result_id: string; + status: 'valid' | 'valid_with_warnings' | 'degraded' | 'invalid' | 'blocked'; + checks: { + source_fidelity: boolean; + source_freshness: boolean; + graph_freshness: boolean; + provenance_complete: boolean; + traversal_complete: boolean; + negative_space_included: boolean; + embodiment_evidence_included: boolean; + unresolved_risks_visible: boolean; + }; + warnings: string[]; + errors: string[]; +} + +function stableHash(value: unknown): string { + return `sha256:${crypto.createHash('sha256').update(JSON.stringify(value)).digest('hex')}`; +} + +function findEntryNode(graph: WorkspaceGraph, query: string): WorkspaceNode | undefined { + return graph.nodes.get(query) + ?? [...graph.nodes.values()].find(n => n.id.toLowerCase().includes(query.toLowerCase())) + ?? [...graph.nodes.values()].find(n => n.source_uri === query || n.entity_uri === query); +} + +function sortedIncidentEdges(graph: WorkspaceGraph, nodeId: string) { + const edges = [...(graph.outAdj.get(nodeId) ?? []), ...(graph.inAdj.get(nodeId) ?? [])]; + return edges.sort((a, b) => + a.verb.localeCompare(b.verb) || + a.from.localeCompare(b.from) || + a.to.localeCompare(b.to)); +} + +function traverse(graph: WorkspaceGraph, startId: string, maxDepth: number, maxNodes: number): TraversalContext { + const visited: string[] = []; + const visitedSet = new Set(); + const skipped: string[] = []; + const queue: Array<{ id: string; depth: number }> = [{ id: startId, depth: 0 }]; + let truncated = false; + + while (queue.length > 0) { + const current = queue.shift()!; + if (visitedSet.has(current.id)) continue; + if (visited.length >= maxNodes) { + truncated = true; + skipped.push(current.id); + continue; + } + visitedSet.add(current.id); + visited.push(current.id); + if (current.depth >= maxDepth) continue; + + for (const edge of sortedIncidentEdges(graph, current.id)) { + const next = edge.from === current.id ? edge.to : edge.from; + if (!visitedSet.has(next)) { + queue.push({ id: next, depth: current.depth + 1 }); + } + } + } + + return { + query: startId, + max_depth: maxDepth, + max_nodes: maxNodes, + visited_node_ids: visited, + skipped_node_ids: [...new Set(skipped)].sort(), + truncated, + operations: [{ operation: 'workspace-neighborhood', start: startId, direction: 'bidirectional', depth: maxDepth }], + }; +} + +export function assembleCemBundle(args: { + graph: WorkspaceGraph; + registry: SourceLocatorRegistry; + query: string; + generatedAt?: string; + generatedBy?: string; + maxDepth?: number; + maxNodes?: number; +}): CemBundle { + const generatedAt = args.generatedAt ?? new Date().toISOString(); + const generatedBy = args.generatedBy ?? 'ste-runtime'; + const maxDepth = args.maxDepth ?? 2; + const maxNodes = args.maxNodes ?? 50; + const entry = resolveLocator(args.registry, args.query)?.entity_id + ? args.graph.nodes.get(resolveLocator(args.registry, args.query)!.entity_id) + : findEntryNode(args.graph, args.query); + + const partialStateDiagnostics: CemDiagnostic[] = []; + if (!entry) { + partialStateDiagnostics.push({ kind: 'entry_not_found', message: `No graph entry found for ${args.query}` }); + } + + const traversal = entry + ? traverse(args.graph, entry.id, maxDepth, maxNodes) + : { + query: args.query, + max_depth: maxDepth, + max_nodes: maxNodes, + visited_node_ids: [], + skipped_node_ids: [], + truncated: false, + operations: [], + }; + + const authoritativeSourceRefs: SourceLocator[] = []; + const negativeSpace: CemDiagnostic[] = []; + const graphProvenance: CemBundle['graph_provenance'] = []; + const sourceHashes: Record = {}; + const embodimentEvidence: CemBundle['embodiment_evidence'] = []; + + for (const nodeId of traversal.visited_node_ids) { + const node = args.graph.nodes.get(nodeId); + if (!node) continue; + graphProvenance.push({ entity_id: node.id, entity_uri: node.entity_uri, repo: node.repo, type: node.type }); + const locator = resolveLocator(args.registry, node.id); + if (locator) { + authoritativeSourceRefs.push(locator); + if (locator.source_hash) sourceHashes[locator.source_uri] = locator.source_hash; + embodimentEvidence.push({ entity_id: node.id, source_uri: locator.source_uri, evidence_type: 'source-locator' }); + } else { + negativeSpace.push({ + kind: 'unresolved_locator', + entity_id: node.id, + message: `No source locator resolved for ${node.id}`, + }); + } + } + + if (traversal.truncated) { + negativeSpace.push({ kind: 'truncated_traversal', message: 'Traversal reached max node cap' }); + } + + const dependencyContext = traversal.visited_node_ids.slice(1).sort(); + const bodyForId = { + query: args.query, + graph: args.registry.graph_snapshot_hash, + locators: authoritativeSourceRefs.map(l => l.entity_uri), + traversal: traversal.visited_node_ids, + }; + + return { + cem_bundle_id: stableHash(bodyForId), + schema_version: '1.0', + generated_by: generatedBy, + generated_at: generatedAt, + workspace_manifest_hash: args.registry.workspace_manifest_hash, + graph_snapshot_hash: args.registry.graph_snapshot_hash, + locator_registry_hash: args.registry.locator_registry_hash ?? stableHash(args.registry.locators), + source_hashes: sourceHashes, + authoritative_source_refs: authoritativeSourceRefs, + graph_provenance: graphProvenance, + traversal_context: traversal, + dependency_context: dependencyContext, + blast_radius_context: traversal.visited_node_ids, + embodiment_evidence: embodimentEvidence, + implementation_linkage_decorators: embodimentEvidence.map(e => ({ entity_id: e.entity_id, source_uri: e.source_uri })), + validation_state: { status: 'not_evaluated', diagnostics: [] }, + freshness_state: { + graph_snapshot_hash: args.registry.graph_snapshot_hash, + locator_registry_hash: args.registry.locator_registry_hash ?? stableHash(args.registry.locators), + }, + negative_space_constraints: negativeSpace, + unresolved_risks: [...negativeSpace], + partial_state_diagnostics: partialStateDiagnostics, + }; +} + +export function deriveMvcBundle(cem: CemBundle, options: { + generatedAt?: string; + maxSourceRefs?: number; +} = {}): MvcBundle { + const generatedAt = options.generatedAt ?? new Date().toISOString(); + const maxSourceRefs = options.maxSourceRefs ?? 8; + const selectedSourceRefs = cem.authoritative_source_refs.slice(0, maxSourceRefs); + const selectedSourceUris = new Set(selectedSourceRefs.map(ref => ref.source_uri)); + const payload = cem.graph_provenance.map(entity => { + const locator = cem.authoritative_source_refs.find(l => l.entity_id === entity.entity_id); + return { + entity_id: entity.entity_id, + entity_type: entity.type, + repo: entity.repo, + source_uri: locator?.source_uri, + }; + }); + const excluded = cem.authoritative_source_refs + .filter(ref => !selectedSourceUris.has(ref.source_uri)) + .map(ref => ({ + kind: 'source_ref_budget_excluded', + entity_id: ref.entity_id, + message: `Source reference excluded by MVC max_source_refs=${maxSourceRefs}`, + })); + const body = { + cem: cem.cem_bundle_id, + selected: selectedSourceRefs.map(ref => ref.source_uri), + traversal: cem.traversal_context.visited_node_ids, + }; + const derivationHash = stableHash(body); + + return { + mvc_bundle_id: derivationHash, + schema_version: '1.0', + derived_from_cem_bundle_id: cem.cem_bundle_id, + derivation_hash: derivationHash, + generated_at: generatedAt, + task_or_operation_scope: cem.traversal_context.query, + bounded_context_payload: payload, + selected_source_refs: selectedSourceRefs, + selected_source_snippets: selectedSourceRefs.map(ref => ({ + source_uri: ref.source_uri, + omitted: true, + reason: 'source content is retrieved lazily by source locator', + })), + selected_graph_entities: cem.traversal_context.visited_node_ids, + selected_embodiment_evidence: cem.embodiment_evidence.filter(e => !e.source_uri || selectedSourceUris.has(e.source_uri)), + provenance_refs: { + cem_bundle_id: cem.cem_bundle_id, + graph_snapshot_hash: cem.graph_snapshot_hash, + locator_registry_hash: cem.locator_registry_hash, + }, + traversal_scope: cem.traversal_context, + freshness_metadata: cem.freshness_state, + inclusion_rationale: payload.map(p => ({ entity_id: p.entity_id, reason: 'selected by deterministic CEM traversal' })), + exclusion_rationale: excluded, + negative_space_summary: cem.negative_space_constraints, + unresolved_risk_summary: cem.unresolved_risks, + token_or_size_budget: { max_source_refs: maxSourceRefs }, + }; +} + +export function validateMvcBundle(mvc: MvcBundle, cem: CemBundle): MvcValidationResult { + const warnings: string[] = []; + const errors: string[] = []; + + const sourceFidelity = mvc.selected_source_refs.every(ref => + cem.authoritative_source_refs.some(cemRef => cemRef.source_uri === ref.source_uri && cemRef.source_hash === ref.source_hash), + ); + if (!sourceFidelity) errors.push('MVC selected source refs are not faithful to parent CEM source refs.'); + + const provenanceComplete = + mvc.derived_from_cem_bundle_id === cem.cem_bundle_id && + mvc.provenance_refs.cem_bundle_id === cem.cem_bundle_id && + mvc.provenance_refs.graph_snapshot_hash === cem.graph_snapshot_hash && + mvc.provenance_refs.locator_registry_hash === cem.locator_registry_hash; + if (!provenanceComplete) errors.push('MVC provenance chain is incomplete.'); + + const traversalComplete = !cem.traversal_context.truncated; + if (!traversalComplete) warnings.push('MVC parent CEM traversal was truncated.'); + + const negativeSpaceIncluded = mvc.negative_space_summary.length === cem.negative_space_constraints.length; + if (!negativeSpaceIncluded || mvc.negative_space_summary.length > 0) { + warnings.push('MVC carries negative-space constraints from parent CEM.'); + } + + const embodimentIncluded = cem.embodiment_evidence.length === 0 || mvc.selected_embodiment_evidence.length > 0; + if (!embodimentIncluded) warnings.push('MVC omitted relevant embodiment evidence.'); + + const unresolvedRisksVisible = mvc.unresolved_risk_summary.length === cem.unresolved_risks.length; + if (!unresolvedRisksVisible || mvc.unresolved_risk_summary.length > 0) { + warnings.push('MVC carries unresolved risks from parent CEM.'); + } + + let status: MvcValidationResult['status'] = 'valid'; + if (errors.length > 0) { + status = 'invalid'; + } else if (!traversalComplete || warnings.length > 0) { + status = 'valid_with_warnings'; + } + + return { + validation_result_id: stableHash({ mvc: mvc.mvc_bundle_id, cem: cem.cem_bundle_id, warnings, errors }), + status, + checks: { + source_fidelity: sourceFidelity, + source_freshness: true, + graph_freshness: mvc.provenance_refs.graph_snapshot_hash === cem.graph_snapshot_hash, + provenance_complete: provenanceComplete, + traversal_complete: traversalComplete, + negative_space_included: negativeSpaceIncluded, + embodiment_evidence_included: embodimentIncluded, + unresolved_risks_visible: unresolvedRisksVisible, + }, + warnings, + errors, + }; +} diff --git a/src/workspace/cfn-type-mapping.ts b/src/workspace/cfn-type-mapping.ts new file mode 100644 index 0000000..807d53a --- /dev/null +++ b/src/workspace/cfn-type-mapping.ts @@ -0,0 +1,129 @@ +/** + * Shared CFN-to-graph-type mapping module. + * + * Single source of truth for mapping AWS CloudFormation resource types to + * workspace graph node types. Used by both slice-emitter.ts and + * resource-resolver.ts to prevent mapping drift. + */ + +export const CFN_TO_GRAPH: Record = { + // Compute + 'AWS::Lambda::Function': 'Lambda', + 'AWS::Serverless::Function': 'Lambda', + + // Orchestration + 'AWS::StepFunctions::StateMachine': 'StateMachine', + 'AWS::Serverless::StateMachine': 'StateMachine', + + // Messaging + 'AWS::SQS::Queue': 'Queue', + 'AWS::SNS::Topic': 'Topic', + + // Storage + 'AWS::S3::Bucket': 'Bucket', + 'AWS::DynamoDB::Table': 'Database', + + // CDN / Edge + 'AWS::CloudFront::Distribution': 'Distribution', + + // Security / WAF + 'AWS::WAFv2::WebACL': 'WebACL', + 'AWS::WAF::WebACL': 'WebACL', + + // Certificate + 'AWS::CertificateManager::Certificate': 'Certificate', + + // DNS + 'AWS::Route53::RecordSet': 'DNSRecord', + 'AWS::Route53::RecordSetGroup': 'DNSRecord', + 'AWS::Route53::HostedZone': 'DNSRecord', + + // API Gateway + 'AWS::ApiGateway::RestApi': 'APIGateway', + 'AWS::ApiGateway::Resource': 'APIGateway', + 'AWS::ApiGatewayV2::Api': 'APIGateway', + 'AWS::Serverless::Api': 'APIGateway', + 'AWS::Serverless::HttpApi': 'APIGateway', + + // Network Security + 'AWS::EC2::SecurityGroup': 'SecurityGroup', + + // Secrets + 'AWS::SecretsManager::Secret': 'Secret', + + // RDS + 'AWS::RDS::DBCluster': 'DBCluster', + 'AWS::RDS::DBInstance': 'DBCluster', + 'AWS::RDS::DBProxy': 'DBProxy', + + // Observability + 'AWS::Logs::LogGroup': 'LogGroup', + 'AWS::CloudWatch::Alarm': 'Alarm', + + // Streaming + 'AWS::KinesisFirehose::DeliveryStream': 'DeliveryStream', + 'AWS::Kinesis::Stream': 'DeliveryStream', + + // Events + 'AWS::Events::Rule': 'EventRule', + 'AWS::Serverless::EventBridgeRule': 'EventRule', + + // IAM + 'AWS::IAM::Role': 'Role', + + // Nested stacks + 'AWS::CloudFormation::Stack': 'Stack', + 'AWS::Serverless::Application': 'Stack', +}; + +/** + * Return the graph node type for a given CFN resource type. + * Falls back to 'InfraResource' for any unmapped AWS::* type. + */ +export function getCfnGraphType(cfnType: string): string { + return CFN_TO_GRAPH[cfnType] ?? 'InfraResource'; +} + +/** + * Maps graph node types to ordered lists of CFN property keys to try + * when resolving a human-readable display name. The slice emitter tries + * each key in order; the first non-intrinsic string value wins. + * logicalId is always the last-resort fallback (handled by the caller). + */ +export const NODE_NAME_KEYS: Record = { + Lambda: ['functionName', 'FunctionName'], + StateMachine: ['stateMachineName', 'StateMachineName'], + Queue: ['queueName', 'QueueName'], + Topic: ['topicName', 'TopicName'], + Bucket: ['bucketName', 'BucketName'], + Database: ['tableName', 'TableName'], + Distribution: ['distributionConfig.Comment', 'Comment'], + WebACL: ['name', 'Name'], + Certificate: ['domainName', 'DomainName'], + DNSRecord: ['name', 'Name'], + APIGateway: ['name', 'Name', 'StageName'], + SecurityGroup: ['groupDescription', 'GroupDescription', 'GroupName'], + Secret: ['name', 'Name'], + DBCluster: ['dbClusterIdentifier', 'DBClusterIdentifier', 'DBInstanceIdentifier'], + DBProxy: ['dbProxyName', 'DBProxyName'], + LogGroup: ['logGroupName', 'LogGroupName'], + Alarm: ['alarmName', 'AlarmName'], + DeliveryStream: ['deliveryStreamName', 'DeliveryStreamName'], + EventRule: ['name', 'Name'], + Role: ['roleName', 'RoleName'], + Stack: ['name', 'Name'], + InfraResource: ['name', 'Name'], +}; + +/** + * Node types that are high-volume, low-signal at overview resolutions. + * Projections at L0-L2 compress/suppress these; L3-L4 show full detail. + */ +export const AUXILIARY_NODE_TYPES = new Set([ + 'Role', + 'SecurityGroup', + 'LogGroup', + 'Alarm', + 'Certificate', + 'DNSRecord', +]); diff --git a/src/workspace/compression.ts b/src/workspace/compression.ts index cbd38c2..4d460e5 100644 --- a/src/workspace/compression.ts +++ b/src/workspace/compression.ts @@ -14,6 +14,7 @@ import type { ComponentIntegrationResult, WorkspaceBlastRadiusResult, } from './canned-queries.js'; +import { AUXILIARY_NODE_TYPES } from './cfn-type-mapping.js'; // --------------------------------------------------------------------------- // Public types @@ -90,6 +91,7 @@ const VERB_TIER: Record = { consumes: 1, deploys_to: 2, invokes: 2, + contains: 2, has_contract: 3, reads: 4, writes: 4, @@ -154,6 +156,14 @@ function isAlarmTopic(node: WorkspaceNode): boolean { return node.type === 'Topic' && (lower.includes('alarm') || lower.includes('monitor')); } +function isAuxiliaryInfraNode(node: WorkspaceNode): boolean { + return AUXILIARY_NODE_TYPES.has(node.type); +} + +function isSuppressedAtOverview(node: WorkspaceNode): boolean { + return isAlarmTopic(node) || isAuxiliaryInfraNode(node); +} + // --------------------------------------------------------------------------- // Edge filtering by resolution level // --------------------------------------------------------------------------- @@ -223,7 +233,7 @@ function compressComponentIntegration( if (config.suppressAlarmTopics) { const suppressed = new Set(); workingNodes = workingNodes.filter(n => { - if (isAlarmTopic(n)) { + if (isSuppressedAtOverview(n)) { suppressed.add(n.id); return false; } @@ -234,7 +244,7 @@ function compressComponentIntegration( const filteredEdges = allEdges.filter(e => { const from = nodeMap.get(e.from); const to = nodeMap.get(e.to); - if (config.suppressAlarmTopics && (from && isAlarmTopic(from)) || (to && isAlarmTopic(to))) { + if (config.suppressAlarmTopics && ((from && isSuppressedAtOverview(from)) || (to && isSuppressedAtOverview(to)))) { return false; } return isEdgeAllowed(e, level, from, to); diff --git a/src/workspace/cross-repo-edges.ts b/src/workspace/cross-repo-edges.ts index 0e51220..8b9e8f7 100644 --- a/src/workspace/cross-repo-edges.ts +++ b/src/workspace/cross-repo-edges.ts @@ -20,6 +20,7 @@ import path from 'node:path'; import yaml from 'js-yaml'; import { globby } from 'globby'; import { ioLimiter } from '../utils/concurrency.js'; +import { atomicWriteFile } from '../utils/atomic-write.js'; import { log, warn } from '../utils/logger.js'; export interface CrossRepoEdge { @@ -655,9 +656,7 @@ export async function enrichSlicesWithBacklinks( referencedBy.push(backlink); (slice as Record).referenced_by = referencedBy; const updatedYaml = yaml.dump(doc, { lineWidth: 120, noRefs: true }); - const tmpFile = file + '.tmp'; - await fs.writeFile(tmpFile, updatedYaml, 'utf-8'); - await fs.rename(tmpFile, file); + await atomicWriteFile(file, updatedYaml); enriched++; } break; @@ -700,9 +699,7 @@ export async function enrichSlicesWithBacklinks( references.push(ref); (slice as Record).references = references; const updatedYaml = yaml.dump(doc, { lineWidth: 120, noRefs: true }); - const tmpFile = file + '.tmp'; - await fs.writeFile(tmpFile, updatedYaml, 'utf-8'); - await fs.rename(tmpFile, file); + await atomicWriteFile(file, updatedYaml); enriched++; } break; diff --git a/src/workspace/manifest.ts b/src/workspace/manifest.ts index c31aacf..e080732 100644 --- a/src/workspace/manifest.ts +++ b/src/workspace/manifest.ts @@ -31,8 +31,18 @@ const LANG_MAP: Record = { node: ['typescript', 'json'], typescript: ['typescript', 'json'], java: ['cloudformation', 'json'], + markdown: ['markdown'], + documentation: ['markdown'], }; +/** Maintainer-only paths excluded when scanning documentation repositories. */ +export const DOCUMENTATION_IGNORE_PATTERNS = [ + '**/_internal-references/**', + '**/.writing-rules/**', + '**/.ste-writing-system/**', + '**/.editorial/**', +] as const; + export const RepoEntrySchema = z.object({ name: z.string().min(1), path: z.string().min(1), @@ -305,6 +315,11 @@ export async function buildPerRepoConfig( languages.push('cloudformation'); } + const ignorePatterns = [...BUILTIN_IGNORE_PATTERNS]; + if (repo.kind === 'documentation' || repo.lang.trim().toLowerCase() === 'markdown') { + ignorePatterns.push(...DOCUMENTATION_IGNORE_PATTERNS); + } + const stateDir = relativeStateDir; const rss: ResolvedConfig['rss'] = { @@ -318,7 +333,7 @@ export async function buildPerRepoConfig( runtimeDir: resolvedRuntime, languages, sourceDirs, - ignorePatterns: [...BUILTIN_IGNORE_PATTERNS], + ignorePatterns, stateDir, jsonPatterns: {}, angularPatterns: {}, diff --git a/src/workspace/repo-sentinel.ts b/src/workspace/repo-sentinel.ts index 5591d98..37424b0 100644 --- a/src/workspace/repo-sentinel.ts +++ b/src/workspace/repo-sentinel.ts @@ -4,7 +4,7 @@ import crypto from 'node:crypto'; import fs from 'node:fs/promises'; -import path from 'node:path'; +import { atomicWriteFile } from '../utils/atomic-write.js'; export interface RepoSentinel { schema_version: '1.0'; @@ -51,10 +51,5 @@ export async function readSentinel(sentinelPath: string): Promise { - await fs.mkdir(path.dirname(sentinelPath), { recursive: true }); - const dir = path.dirname(sentinelPath); - const base = path.basename(sentinelPath); - const tempPath = path.join(dir, `.${base}.${process.pid}.${Date.now()}.tmp`); - await fs.writeFile(tempPath, JSON.stringify(data), 'utf-8'); - await fs.rename(tempPath, sentinelPath); + await atomicWriteFile(sentinelPath, JSON.stringify(data)); } diff --git a/src/workspace/resource-resolver.ts b/src/workspace/resource-resolver.ts index 2299737..2361e40 100644 --- a/src/workspace/resource-resolver.ts +++ b/src/workspace/resource-resolver.ts @@ -15,6 +15,7 @@ import { type ParamResolution, type UnresolvedResolution, } from './cfn-stack-resolver.js'; +import { getCfnGraphType, NODE_NAME_KEYS } from './cfn-type-mapping.js'; const SDK_SERVICE_TO_GRAPH_TYPE: Record = { dynamodb: 'Database', @@ -86,56 +87,20 @@ function isIntrinsicEl(value: unknown): boolean { /** * Same naming rule as slice-emitter `resourceGraphId`: prefer plain string * physical names; when those are intrinsics, fall back to logical ID. + * Uses the shared NODE_NAME_KEYS for generic resolution across all types. */ function displayNameForGraphId(cfnType: string, el: Record, logicalId: string): string { - switch (cfnType) { - case 'AWS::SQS::Queue': { - const q = el.queueName; - if (typeof q === 'string' && !isIntrinsicEl(q)) return q; - return logicalId; + const graphType = getCfnGraphType(cfnType); + const nameKeys = NODE_NAME_KEYS[graphType]; + if (nameKeys) { + for (const key of nameKeys) { + const v = el[key]; + if (typeof v === 'string' && !isIntrinsicEl(v)) return v; } - case 'AWS::SNS::Topic': { - const t = el.topicName; - if (typeof t === 'string' && !isIntrinsicEl(t)) return t; - return logicalId; - } - case 'AWS::Lambda::Function': - case 'AWS::Serverless::Function': { - const f = el.functionName; - if (typeof f === 'string' && !isIntrinsicEl(f)) return f; - return logicalId; - } - case 'AWS::StepFunctions::StateMachine': - case 'AWS::Serverless::StateMachine': { - const s = el.stateMachineName; - if (typeof s === 'string' && !isIntrinsicEl(s)) return s; - return logicalId; - } - case 'AWS::S3::Bucket': { - const b = el.bucketName; - if (typeof b === 'string' && !isIntrinsicEl(b)) return b; - return logicalId; - } - case 'AWS::DynamoDB::Table': { - const d = el.tableName; - if (typeof d === 'string' && !isIntrinsicEl(d)) return d; - return logicalId; - } - default: - return logicalId; } + return logicalId; } -const CFN_TO_GRAPH: Record = { - 'AWS::SQS::Queue': 'Queue', - 'AWS::SNS::Topic': 'Topic', - 'AWS::Lambda::Function': 'Lambda', - 'AWS::Serverless::Function': 'Lambda', - 'AWS::StepFunctions::StateMachine': 'StateMachine', - 'AWS::Serverless::StateMachine': 'StateMachine', - 'AWS::S3::Bucket': 'Bucket', - 'AWS::DynamoDB::Table': 'Database', -}; function extractRefTarget(value: unknown): string | null { if (typeof value === 'string') { @@ -360,7 +325,7 @@ export async function buildResourceResolverFromState( if (domain === 'infrastructure' && sliceType === 'resource') { const cfnType = String(element.type ?? ''); const logicalId = String(element.logicalId ?? ''); - const graphType = CFN_TO_GRAPH[cfnType]; + const graphType = getCfnGraphType(cfnType); if (logicalId) { logicalIdToCfnType.set(logicalId, cfnType); @@ -376,14 +341,13 @@ export async function buildResourceResolverFromState( } } - if (graphType && logicalId) { + if (logicalId && cfnType) { const el = element as Record; const name = displayNameForGraphId(cfnType, el, logicalId); const norm = normalizeToken(name); - if (norm) { - const repoPrefix = repoName ? `${normalizeToken(repoName)}:` : ''; - logicalIdToGraphId.set(logicalId, `${graphType}:${repoPrefix}${norm}`); - } + const finalNorm = norm || normalizeToken(logicalId) || 'unknown'; + const repoPrefix = repoName ? `${normalizeToken(repoName)}:` : ''; + logicalIdToGraphId.set(logicalId, `${graphType}:${repoPrefix}${finalNorm}`); } if (cfnType === 'AWS::Lambda::Function' || cfnType === 'AWS::Serverless::Function') { @@ -554,12 +518,9 @@ export async function buildResourceResolverFromState( const resolvedParamMap = new Map(); for (const entry of paramResolutionTable) { if (entry.confidence !== 'unresolved' && entry.resolvedLogicalId) { - const graphType = CFN_TO_GRAPH[entry.resolvedCfnType]; - if (graphType) { - const existingGid = logicalIdToGraphId.get(entry.resolvedLogicalId); - if (existingGid) { - resolvedParamMap.set(entry.paramName, existingGid); - } + const existingGid = logicalIdToGraphId.get(entry.resolvedLogicalId); + if (existingGid) { + resolvedParamMap.set(entry.paramName, existingGid); } } } diff --git a/src/workspace/slice-emitter.test.ts b/src/workspace/slice-emitter.test.ts index 300795c..d22ed0d 100644 --- a/src/workspace/slice-emitter.test.ts +++ b/src/workspace/slice-emitter.test.ts @@ -2,12 +2,21 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import * as fs from 'node:fs/promises'; import yaml from 'js-yaml'; import { emitWorkspaceSlice, loadRepoState } from './slice-emitter.js'; +import { getCfnGraphType } from './cfn-type-mapping.js'; vi.mock('node:fs/promises'); vi.mock('globby', () => ({ globby: vi.fn().mockResolvedValue([]), })); +function makeStateYaml(domain: string, sliceType: string, element: Record, file?: string): string { + return yaml.dump({ + _slice: { domain, type: sliceType }, + element, + provenance: file ? { file } : undefined, + }); +} + describe('slice-emitter', () => { beforeEach(() => { vi.clearAllMocks(); @@ -90,3 +99,216 @@ describe('slice-emitter', () => { }); }); }); + +describe('cfn-type-mapping', () => { + it('maps known CFN types to specific graph types', () => { + expect(getCfnGraphType('AWS::Lambda::Function')).toBe('Lambda'); + expect(getCfnGraphType('AWS::CloudFront::Distribution')).toBe('Distribution'); + expect(getCfnGraphType('AWS::WAFv2::WebACL')).toBe('WebACL'); + expect(getCfnGraphType('AWS::IAM::Role')).toBe('Role'); + expect(getCfnGraphType('AWS::CloudFormation::Stack')).toBe('Stack'); + expect(getCfnGraphType('AWS::RDS::DBCluster')).toBe('DBCluster'); + expect(getCfnGraphType('AWS::Route53::RecordSet')).toBe('DNSRecord'); + }); + + it('returns InfraResource for unmapped AWS types', () => { + expect(getCfnGraphType('AWS::Cognito::UserPool')).toBe('InfraResource'); + expect(getCfnGraphType('AWS::ElasticLoadBalancingV2::LoadBalancer')).toBe('InfraResource'); + expect(getCfnGraphType('AWS::ECS::Service')).toBe('InfraResource'); + }); + + it('returns InfraResource for non-AWS types', () => { + expect(getCfnGraphType('Custom::MyResource')).toBe('InfraResource'); + expect(getCfnGraphType('')).toBe('InfraResource'); + }); +}); + +describe('full infrastructure domain emission', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('emits all CFN resource types including frontend infrastructure', async () => { + const { globby } = await import('globby'); + const stateFiles = [ + '/state/frontend-app/infrastructure/res-cloudfront.yaml', + '/state/frontend-app/infrastructure/res-waf.yaml', + '/state/frontend-app/infrastructure/res-s3.yaml', + '/state/frontend-app/infrastructure/res-route53.yaml', + '/state/frontend-app/infrastructure/res-cert.yaml', + '/state/frontend-app/infrastructure/res-role.yaml', + '/state/frontend-app/infrastructure/res-custom.yaml', + ]; + vi.mocked(globby).mockResolvedValue(stateFiles); + + const stateMap: Record = { + [stateFiles[0]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::CloudFront::Distribution', + logicalId: 'AppDistribution', + }, 'apps/app-a/cfn_templates/cdn.yaml'), + [stateFiles[1]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::WAFv2::WebACL', + logicalId: 'AppWebACL', + name: 'app-a-waf', + }, 'apps/app-a/cfn_templates/waf.yaml'), + [stateFiles[2]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::S3::Bucket', + logicalId: 'AppBucket', + bucketName: 'app-a-static-assets', + }, 'apps/app-a/cfn_templates/storage.yaml'), + [stateFiles[3]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::Route53::RecordSet', + logicalId: 'AppDNS', + name: 'app-a.example.com', + }, 'apps/app-a/cfn_templates/dns.yaml'), + [stateFiles[4]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::CertificateManager::Certificate', + logicalId: 'AppCert', + domainName: 'app-a.example.com', + }, 'apps/app-a/cfn_templates/cert.yaml'), + [stateFiles[5]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::IAM::Role', + logicalId: 'AppDeployRole', + roleName: 'app-a-deploy-role', + }, 'apps/app-a/cfn_templates/iam.yaml'), + [stateFiles[6]]: makeStateYaml('infrastructure', 'resource', { + type: 'AWS::Cognito::UserPool', + logicalId: 'AppUserPool', + }, 'apps/app-a/cfn_templates/auth.yaml'), + }; + + vi.mocked(fs.readFile).mockImplementation(async (filePath: any) => { + const content = stateMap[String(filePath)]; + if (content) return content; + throw new Error('not found'); + }); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + + const result = await emitWorkspaceSlice( + 'frontend-app', '/state/frontend-app', '/out/frontend-app.yaml', '/ws/frontend-app'); + + const written = vi.mocked(fs.writeFile).mock.calls[0][1] as string; + const parsed = yaml.load(written) as Record; + const nodes = parsed.nodes as Array>; + const nodeTypes = new Set(nodes.map(n => n.type)); + + expect(nodeTypes).toContain('Service'); + expect(nodeTypes).toContain('Distribution'); + expect(nodeTypes).toContain('WebACL'); + expect(nodeTypes).toContain('Bucket'); + expect(nodeTypes).toContain('DNSRecord'); + expect(nodeTypes).toContain('Certificate'); + expect(nodeTypes).toContain('Role'); + expect(nodeTypes).toContain('InfraResource'); + + expect(result.nodeCount).toBe(8); + }); + + it('uses logicalId as last-resort name (never drops a node)', async () => { + const { globby } = await import('globby'); + const stateFiles = ['/state/my-service/infrastructure/res-secgroup.yaml']; + vi.mocked(globby).mockResolvedValue(stateFiles); + + vi.mocked(fs.readFile).mockImplementation(async (filePath: any) => { + if (String(filePath) === stateFiles[0]) { + return makeStateYaml('infrastructure', 'resource', { + type: 'AWS::EC2::SecurityGroup', + logicalId: 'LambdaSecurityGroup', + }, 'cfn_templates/networking.yaml'); + } + throw new Error('not found'); + }); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + + const result = await emitWorkspaceSlice( + 'my-service', '/state/my-service', '/out/my-service.yaml', '/ws/my-service'); + + const written = vi.mocked(fs.writeFile).mock.calls[0][1] as string; + const parsed = yaml.load(written) as Record; + const nodes = parsed.nodes as Array>; + const sgNode = nodes.find(n => n.type === 'SecurityGroup'); + + expect(sgNode).toBeDefined(); + expect(sgNode!.id).toContain('lambdasecuritygroup'); + }); + + it('marks Role nodes as auxiliary', async () => { + const { globby } = await import('globby'); + const stateFiles = ['/state/my-service/infrastructure/res-role.yaml']; + vi.mocked(globby).mockResolvedValue(stateFiles); + + vi.mocked(fs.readFile).mockImplementation(async (filePath: any) => { + if (String(filePath) === stateFiles[0]) { + return makeStateYaml('infrastructure', 'resource', { + type: 'AWS::IAM::Role', + logicalId: 'ProcessorRole', + roleName: 'processor-execution-role', + }, 'cfn_templates/iam.yaml'); + } + throw new Error('not found'); + }); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + + await emitWorkspaceSlice( + 'my-service', '/state/my-service', '/out/my-service.yaml', '/ws/my-service'); + + const written = vi.mocked(fs.writeFile).mock.calls[0][1] as string; + const parsed = yaml.load(written) as Record; + const nodes = parsed.nodes as Array>; + const roleNode = nodes.find(n => n.type === 'Role'); + + expect(roleNode).toBeDefined(); + const attrs = roleNode!.attributes as Record; + expect(attrs.auxiliary).toBe(true); + expect(attrs.cfn_type).toBe('AWS::IAM::Role'); + }); + + it('emits InfraResource for unknown CFN types with cfn_type preserved', async () => { + const { globby } = await import('globby'); + const stateFiles = ['/state/my-service/infrastructure/res-unknown.yaml']; + vi.mocked(globby).mockResolvedValue(stateFiles); + + vi.mocked(fs.readFile).mockImplementation(async (filePath: any) => { + if (String(filePath) === stateFiles[0]) { + return makeStateYaml('infrastructure', 'resource', { + type: 'AWS::ECS::TaskDefinition', + logicalId: 'WorkerTask', + }, 'cfn_templates/ecs.yaml'); + } + throw new Error('not found'); + }); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + + await emitWorkspaceSlice( + 'my-service', '/state/my-service', '/out/my-service.yaml', '/ws/my-service'); + + const written = vi.mocked(fs.writeFile).mock.calls[0][1] as string; + const parsed = yaml.load(written) as Record; + const nodes = parsed.nodes as Array>; + const infraNode = nodes.find(n => n.type === 'InfraResource'); + + expect(infraNode).toBeDefined(); + expect(infraNode!.id).toContain('InfraResource:'); + const attrs = infraNode!.attributes as Record; + expect(attrs.cfn_type).toBe('AWS::ECS::TaskDefinition'); + }); + + it('no workspace-specific names in emitted slice', async () => { + const { globby } = await import('globby'); + vi.mocked(globby).mockResolvedValue([]); + vi.mocked(fs.readFile).mockRejectedValue(new Error('not found')); + vi.mocked(fs.mkdir).mockResolvedValue(undefined); + vi.mocked(fs.writeFile).mockResolvedValue(undefined); + + await emitWorkspaceSlice( + 'generic-app', '/state/generic-app', '/out/generic-app.yaml', '/ws/generic-app'); + + const written = vi.mocked(fs.writeFile).mock.calls[0][1] as string; + const ipTerms = /aos|losprocessor|customerreport|los-ui|gallmann/i; + expect(ipTerms.test(written)).toBe(false); + }); +}); diff --git a/src/workspace/slice-emitter.ts b/src/workspace/slice-emitter.ts index 79ecfea..74c8feb 100644 --- a/src/workspace/slice-emitter.ts +++ b/src/workspace/slice-emitter.ts @@ -17,21 +17,13 @@ import { atomicWriteFile } from '../utils/atomic-write.js'; import { buildResourceResolverFromState, type ResourceResolverResult } from './resource-resolver.js'; import { buildStackTopology } from './cfn-stack-resolver.js'; import type { ExternalSystemEntry } from './manifest.js'; +import { getCfnGraphType, NODE_NAME_KEYS, AUXILIARY_NODE_TYPES } from './cfn-type-mapping.js'; +import { entityUri, workspaceUri } from './source-uri.js'; +import { computeFileHash } from './source-locator-registry.js'; const require = createRequire(import.meta.url); const pkg = require('../../package.json') as { name: string; version: string }; -const CFN_TO_GRAPH: Record = { - 'AWS::SQS::Queue': 'Queue', - 'AWS::SNS::Topic': 'Topic', - 'AWS::Lambda::Function': 'Lambda', - 'AWS::Serverless::Function': 'Lambda', - 'AWS::StepFunctions::StateMachine': 'StateMachine', - 'AWS::Serverless::StateMachine': 'StateMachine', - 'AWS::S3::Bucket': 'Bucket', - 'AWS::DynamoDB::Table': 'Database', -}; - export interface SliceEmitResult { nodeCount: number; edgeCount: number; @@ -43,6 +35,12 @@ interface WorkspaceEntity { type: string; name: string; provenance: { source_path: string; source_ref: string; repo?: string }; + entity_uri?: string; + source_uri?: string; + source_hash?: string; + source_locator_ref?: string; + canonical?: boolean; + authority?: string; attributes?: Record; } @@ -93,46 +91,20 @@ function pickCfnDisplayName( return null; } -function resourceGraphId(cfnType: string, el: Record, repoName?: string): string | null { - const graphType = CFN_TO_GRAPH[cfnType]; - if (!graphType) { - return null; - } +function resourceGraphId(cfnType: string, el: Record, repoName?: string): string { + const graphType = getCfnGraphType(cfnType); const logicalId = String(el.logicalId ?? ''); - let name: string | null = null; - switch (cfnType) { - case 'AWS::SQS::Queue': - name = pickCfnDisplayName(el, logicalId, 'queueName'); - break; - case 'AWS::SNS::Topic': - name = pickCfnDisplayName(el, logicalId, 'topicName'); - break; - case 'AWS::Lambda::Function': - case 'AWS::Serverless::Function': - name = pickCfnDisplayName(el, logicalId, 'functionName'); - break; - case 'AWS::StepFunctions::StateMachine': - case 'AWS::Serverless::StateMachine': - name = pickCfnDisplayName(el, logicalId, 'stateMachineName'); - break; - case 'AWS::S3::Bucket': - name = pickCfnDisplayName(el, logicalId, 'bucketName'); - break; - case 'AWS::DynamoDB::Table': - name = pickCfnDisplayName(el, logicalId, 'tableName'); - break; - default: - name = null; - } - if (!name) { - return null; - } - const norm = normalizeGraphToken(name); - if (!norm) { - return null; - } + + const nameKeys = NODE_NAME_KEYS[graphType]; + const name = nameKeys + ? pickCfnDisplayName(el, logicalId, ...nameKeys) + : pickCfnDisplayName(el, logicalId); + + const displayName = name ?? logicalId; + const norm = normalizeGraphToken(displayName); + const finalNorm = norm || normalizeGraphToken(logicalId) || 'unknown'; const repoPrefix = repoName ? `${normalizeGraphToken(repoName)}:` : ''; - return `${graphType}:${repoPrefix}${norm}`; + return `${graphType}:${repoPrefix}${finalNorm}`; } function endpointGraphId(repoName: string, el: Record): string | null { @@ -228,6 +200,36 @@ function graphIdFromParamResolution( return null; } +async function enrichNodeSourceLocators( + nodes: Map, + repoName: string, + repoPath: string, +): Promise { + const hashCache = new Map(); + for (const node of nodes.values()) { + node.entity_uri = entityUri(node.id); + node.source_locator_ref = node.entity_uri; + node.canonical = true; + node.authority = repoName; + + const sourcePath = node.provenance.source_path; + if (!sourcePath || sourcePath === '.') continue; + try { + node.source_uri = workspaceUri(repoName, sourcePath); + } catch { + continue; + } + const abs = path.resolve(repoPath, sourcePath); + if (!hashCache.has(abs)) { + hashCache.set(abs, await computeFileHash(abs)); + } + const hash = hashCache.get(abs); + if (hash) { + node.source_hash = hash; + } + } +} + /** When exactly one node of a graph type exists in the slice, use it to disambiguate SDK wiring. */ function singletonNodeId(nodes: Map, graphType: string): string | null { const ids: string[] = []; @@ -731,14 +733,19 @@ export async function emitWorkspaceSlice( if (domain === 'infrastructure' && sliceType === 'resource') { const cfnType = String(element.type ?? ''); - const gid = resourceGraphId(cfnType, element, repoName); - if (gid) { + if (cfnType) { + const gid = resourceGraphId(cfnType, element, repoName); + const graphType = getCfnGraphType(cfnType); + const attrs: Record = { cfn_type: cfnType, logical_id: element.logicalId }; + if (AUXILIARY_NODE_TYPES.has(graphType)) { + attrs.auxiliary = true; + } nodes.set(gid, { id: gid, - type: CFN_TO_GRAPH[cfnType]!, + type: graphType, name: gid.split(':').slice(1).join(':'), provenance: { source_path: srcFile, source_ref: String(element.logicalId ?? ''), repo: repoName }, - attributes: { cfn_type: cfnType, logical_id: element.logicalId }, + attributes: attrs, }); } } @@ -809,6 +816,60 @@ export async function emitWorkspaceSlice( const { topology } = await buildStackTopology(stateFiles, stateDir, _repoPath || undefined); const resolver = await buildResourceResolverFromState(stateFiles, stateDir, topology, _repoPath || undefined, repoName); + // Emit Stack nodes from infrastructure/template slices + for (const sf of stateFiles) { + const domain = String(sf.slice.domain ?? ''); + const sliceType = String(sf.slice.type ?? ''); + if (domain !== 'infrastructure' || sliceType !== 'template') continue; + + const templatePath = String(sf.element.templatePath ?? sf.provenance?.file ?? sf.relativePath); + const templateName = templatePath.split('/').pop()?.replace(/\.(yaml|yml|json|template)$/i, '') ?? 'unknown'; + const stackNorm = normalizeGraphToken(templateName); + if (!stackNorm) continue; + + const repoPrefix = repoName ? `${normalizeGraphToken(repoName)}:` : ''; + const stackGid = `Stack:${repoPrefix}${stackNorm}`; + if (!nodes.has(stackGid)) { + nodes.set(stackGid, { + id: stackGid, + type: 'Stack', + name: templateName, + provenance: { source_path: templatePath, source_ref: 'template', repo: repoName }, + attributes: { template_path: templatePath }, + }); + } + } + + // Emit contains edges from stack topology (stackId = parentTemplatePath#logicalId) + if (topology) { + const repoPrefix = repoName ? `${normalizeGraphToken(repoName)}:` : ''; + for (const [_stackId, child] of Object.entries(topology.children)) { + const parentTemplatePath = _stackId.split('#')[0] ?? ''; + const parentTemplateName = parentTemplatePath.split('/').pop()?.replace(/\.(yaml|yml|json|template)$/i, '') ?? ''; + const parentNorm = normalizeGraphToken(parentTemplateName); + if (!parentNorm) continue; + const parentStackGid = `Stack:${repoPrefix}${parentNorm}`; + + const childTemplateName = child.templatePath.split('/').pop()?.replace(/\.(yaml|yml|json|template)$/i, '') ?? child.logicalId; + const childNorm = normalizeGraphToken(childTemplateName); + if (!childNorm) continue; + const childStackGid = `Stack:${repoPrefix}${childNorm}`; + + if (nodes.has(parentStackGid) && nodes.has(childStackGid) && parentStackGid !== childStackGid) { + const exists = edges.some(e => e.from === parentStackGid && e.to === childStackGid && e.verb === 'contains'); + if (!exists) { + edges.push({ + from: parentStackGid, + to: childStackGid, + verb: 'contains', + confidence: 'high', + provenance: { source_path: parentTemplatePath, source_ref: `nested:${child.logicalId}` }, + }); + } + } + } + } + wireReadWriteEdges(resolver, nodes, edges, diagnostics); wirePublishEdges(resolver, nodes, edges, diagnostics); wireDeploysToEdges(resolver, nodes, edges, diagnostics); @@ -818,6 +879,8 @@ export async function emitWorkspaceSlice( wireExternalSystemEdges(externalSystems, resolver, nodes, edges, diagnostics, repoName); } + await enrichNodeSourceLocators(nodes, repoName, _repoPath); + const body = { schema_version: '1.0', repo: repoName, diff --git a/src/workspace/slice-schema.ts b/src/workspace/slice-schema.ts index c94f6fc..aa4fb84 100644 --- a/src/workspace/slice-schema.ts +++ b/src/workspace/slice-schema.ts @@ -20,6 +20,22 @@ export const NODE_TYPES = [ 'Schema', 'Endpoint', 'ExternalSystem', + 'Stack', + 'Distribution', + 'WebACL', + 'Certificate', + 'DNSRecord', + 'APIGateway', + 'SecurityGroup', + 'Secret', + 'DBCluster', + 'DBProxy', + 'LogGroup', + 'Alarm', + 'DeliveryStream', + 'EventRule', + 'Role', + 'InfraResource', ] as const; export const EDGE_VERBS = [ @@ -35,6 +51,7 @@ export const EDGE_VERBS = [ 'calls', 'triggers', 'publishes_to', + 'contains', ] as const; export type SliceNodeType = (typeof NODE_TYPES)[number]; diff --git a/src/workspace/source-locator-registry.test.ts b/src/workspace/source-locator-registry.test.ts new file mode 100644 index 0000000..e235e42 --- /dev/null +++ b/src/workspace/source-locator-registry.test.ts @@ -0,0 +1,132 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import yaml from 'js-yaml'; + +import { + emitSourceLocatorRegistry, + loadSourceLocatorRegistry, + resolveLocator, +} from './source-locator-registry.js'; + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = await mkdtemp(path.join(os.tmpdir(), 'source-locators-')); + await mkdir(path.join(tmpDir, 'repoA', 'src'), { recursive: true }); + await mkdir(path.join(tmpDir, 'out', 'slices'), { recursive: true }); + await writeFile(path.join(tmpDir, 'repoA', 'src', 'fn.ts'), 'export const fn = 1;\n', 'utf-8'); + await writeFile( + path.join(tmpDir, 'out', 'slices', 'repoA.yaml'), + yaml.dump({ + schema_version: '1.0', + repo: 'repoA', + generated_by: 'test', + generated_at: '2026-01-01T00:00:00Z', + nodes: [ + { + id: 'Lambda:repoA:fn', + type: 'Lambda', + name: 'fn', + provenance: { source_path: 'src/fn.ts', source_ref: 'fn', repo: 'repoA' }, + }, + ], + edges: [], + }), + 'utf-8', + ); +}); + +afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }); +}); + +describe('SourceLocatorRegistry', () => { + it('emits deterministic source locator entries from workspace slices', async () => { + const result = await emitSourceLocatorRegistry({ + outputDir: path.join(tmpDir, 'out'), + workspaceRoot: tmpDir, + repos: [{ name: 'repoA', path: 'repoA' }], + graphSnapshotHash: 'sha256:graph', + workspaceManifestHash: 'sha256:manifest', + generatedAt: '2026-01-01T00:00:00.000Z', + generatedBy: 'test', + }); + + expect(result.registry.locators).toHaveLength(1); + expect(result.registry.locators[0]).toMatchObject({ + entity_uri: 'entity://workspace/Lambda%3ArepoA%3Afn', + entity_id: 'Lambda:repoA:fn', + entity_type: 'Lambda', + source_uri: 'workspace://repoA/src/fn.ts', + repo: 'repoA', + path: 'src/fn.ts', + graph_snapshot_hash: 'sha256:graph', + canonical: true, + authority: 'repoA', + }); + expect(result.registry.locators[0].source_hash).toMatch(/^sha256:/); + + const raw = await readFile(path.join(tmpDir, 'out', 'source-locator-registry.yaml'), 'utf-8'); + expect(raw).not.toContain('export const fn'); + }); + + it('resolves entity and workspace URIs through the registry', async () => { + await emitSourceLocatorRegistry({ + outputDir: path.join(tmpDir, 'out'), + workspaceRoot: tmpDir, + repos: [{ name: 'repoA', path: 'repoA' }], + graphSnapshotHash: 'sha256:graph', + workspaceManifestHash: 'sha256:manifest', + generatedAt: '2026-01-01T00:00:00.000Z', + generatedBy: 'test', + }); + + const registry = await loadSourceLocatorRegistry(path.join(tmpDir, 'out')); + expect(resolveLocator(registry, 'Lambda:repoA:fn')?.source_uri).toBe('workspace://repoA/src/fn.ts'); + expect(resolveLocator(registry, 'entity://workspace/Lambda%3ArepoA%3Afn')?.source_uri).toBe('workspace://repoA/src/fn.ts'); + expect(resolveLocator(registry, 'workspace://repoA/src/fn.ts')?.entity_id).toBe('Lambda:repoA:fn'); + }); + + it('emits locators from ADR architecture entity registries', async () => { + await mkdir(path.join(tmpDir, 'repoA', 'adrs', 'index'), { recursive: true }); + await mkdir(path.join(tmpDir, 'repoA', 'adrs', 'logical'), { recursive: true }); + await writeFile(path.join(tmpDir, 'repoA', 'adrs', 'logical', 'ADR-L-0001-test.yaml'), 'id: ADR-L-0001\n', 'utf-8'); + await writeFile( + path.join(tmpDir, 'repoA', 'adrs', 'index', 'entity-registry.yaml'), + yaml.dump({ + schema_version: '1.1', + type: 'normalized_entity_registry', + entities: [ + { + id: 'ADR-L-0001', + entity_type: 'adr', + name: 'Test ADR', + canonical_source: { + source_type: 'logical_adr', + source_ref: 'ADR-L-0001', + artifact_path: 'adrs/logical/ADR-L-0001-test.yaml', + }, + }, + ], + }), + 'utf-8', + ); + + await emitSourceLocatorRegistry({ + outputDir: path.join(tmpDir, 'out'), + workspaceRoot: tmpDir, + repos: [{ name: 'repoA', path: 'repoA' }], + graphSnapshotHash: 'sha256:graph', + workspaceManifestHash: 'sha256:manifest', + generatedAt: '2026-01-01T00:00:00.000Z', + generatedBy: 'test', + }); + + const registry = await loadSourceLocatorRegistry(path.join(tmpDir, 'out')); + expect(resolveLocator(registry, 'adr://ADR-L-0001')?.source_uri).toBe( + 'workspace://repoA/adrs/logical/ADR-L-0001-test.yaml', + ); + }); +}); diff --git a/src/workspace/source-locator-registry.ts b/src/workspace/source-locator-registry.ts new file mode 100644 index 0000000..e9ad525 --- /dev/null +++ b/src/workspace/source-locator-registry.ts @@ -0,0 +1,297 @@ +import crypto from 'node:crypto'; +import fs from 'node:fs/promises'; +import path from 'node:path'; +import yaml from 'js-yaml'; + +import { entityUri, parseSourceUri, workspaceUri, type LineRange } from './source-uri.js'; + +export interface SourceLocator { + entity_uri: string; + entity_id: string; + entity_type: string; + source_uri: string; + repo: string; + repo_path?: string; + path: string; + line_range?: LineRange; + source_hash?: string; + graph_snapshot_hash: string; + canonical: boolean; + authority: string; + provenance_classification: 'explicit' | 'derived' | 'heuristic'; + generated_from?: string; +} + +export interface SourceLocatorRegistry { + schema_version: '1.0'; + generated_by: string; + generated_at: string; + workspace_manifest_hash: string; + graph_snapshot_hash: string; + locator_registry_hash?: string; + locators: SourceLocator[]; +} + +export interface EmitSourceLocatorRegistryOptions { + outputDir: string; + workspaceRoot: string; + repos: Array<{ name: string; path: string }>; + graphSnapshotHash: string; + workspaceManifestHash: string; + generatedAt: string; + generatedBy: string; +} + +interface SliceNode { + id?: string; + type?: string; + provenance?: { source_path?: string; source_ref?: string; repo?: string }; + source_uri?: string; + entity_uri?: string; + source_hash?: string; +} + +interface SliceDoc { + repo?: string; + nodes?: SliceNode[]; +} + +interface ArchitectureEntityRegistry { + entities?: Array<{ + id?: string; + entity_type?: string; + canonical_source?: { + artifact_path?: string; + source_ref?: string; + source_type?: string; + }; + }>; +} + +function sha256(text: string | Buffer): string { + return `sha256:${crypto.createHash('sha256').update(text).digest('hex')}`; +} + +export async function computeFileHash(filePath: string): Promise { + try { + return sha256(await fs.readFile(filePath)); + } catch { + return undefined; + } +} + +export async function hashFileIfPresent(filePath: string): Promise { + return computeFileHash(filePath); +} + +function registryHash(registry: SourceLocatorRegistry): string { + const comparable = { ...registry, locator_registry_hash: undefined }; + return sha256(yaml.dump(comparable, { lineWidth: 120, noRefs: true })); +} + +function repoPathMap(options: EmitSourceLocatorRegistryOptions): Map { + const repos = new Map(); + for (const repo of options.repos) { + repos.set(repo.name, path.resolve(options.workspaceRoot, repo.path)); + } + return repos; +} + +function inferAuthority(repo: string, entityType: string): string { + if (entityType.toLowerCase() === 'adr') return 'adr-architecture-kit'; + return repo; +} + +function locatorForNode( + node: SliceNode, + repo: string, + repoPath: string | undefined, + graphSnapshotHash: string, + sourceHash?: string, +): SourceLocator | null { + if (!node.id || !node.type) return null; + const sourcePath = node.provenance?.source_path; + if (!sourcePath || sourcePath === '.') return null; + let sourceUri: string; + try { + sourceUri = node.source_uri ?? workspaceUri(repo, sourcePath); + } catch { + return null; + } + return { + entity_uri: node.entity_uri ?? entityUri(node.id), + entity_id: node.id, + entity_type: node.type, + source_uri: sourceUri, + repo, + repo_path: repoPath?.replace(/\\/g, '/'), + path: sourcePath.replace(/\\/g, '/'), + source_hash: node.source_hash ?? sourceHash, + graph_snapshot_hash: graphSnapshotHash, + canonical: true, + authority: inferAuthority(repo, node.type), + provenance_classification: 'derived', + generated_from: 'workspace-slice', + }; +} + +async function readSliceFiles(outputDir: string): Promise> { + const slicesDir = path.join(outputDir, 'slices'); + let entries: string[]; + try { + entries = await fs.readdir(slicesDir); + } catch { + return []; + } + const slices: Array<{ file: string; doc: SliceDoc }> = []; + for (const entry of entries.filter(e => e.endsWith('.yaml')).sort()) { + const file = path.join(slicesDir, entry); + try { + const raw = await fs.readFile(file, 'utf-8'); + const doc = yaml.load(raw) as SliceDoc | null; + if (doc) slices.push({ file, doc }); + } catch { + /* skip unreadable slices */ + } + } + return slices; +} + +async function locatorsFromArchitectureRegistry( + repo: string, + repoRoot: string, + graphSnapshotHash: string, +): Promise { + const registryPath = path.join(repoRoot, 'adrs', 'index', 'entity-registry.yaml'); + let doc: ArchitectureEntityRegistry | null = null; + try { + doc = yaml.load(await fs.readFile(registryPath, 'utf-8')) as ArchitectureEntityRegistry | null; + } catch { + return []; + } + if (!doc || !Array.isArray(doc.entities)) return []; + + const locators: SourceLocator[] = []; + for (const entity of doc.entities) { + if (!entity.id || !entity.entity_type) continue; + const sourcePath = entity.canonical_source?.artifact_path; + if (!sourcePath) continue; + try { + const sourceHash = await computeFileHash(path.resolve(repoRoot, sourcePath)); + locators.push({ + entity_uri: entityUri(entity.id), + entity_id: entity.id, + entity_type: entity.entity_type, + source_uri: workspaceUri(repo, sourcePath), + repo, + repo_path: repoRoot.replace(/\\/g, '/'), + path: sourcePath.replace(/\\/g, '/'), + source_hash: sourceHash, + graph_snapshot_hash: graphSnapshotHash, + canonical: true, + authority: inferAuthority(repo, entity.entity_type), + provenance_classification: 'explicit', + generated_from: 'architecture-entity-registry', + }); + } catch { + /* skip non-portable registry entries */ + } + } + return locators; +} + +export async function emitSourceLocatorRegistry( + options: EmitSourceLocatorRegistryOptions, +): Promise<{ registry: SourceLocatorRegistry; registryPath: string }> { + const repos = repoPathMap(options); + const locators: SourceLocator[] = []; + const hashCache = new Map(); + + for (const { doc } of await readSliceFiles(options.outputDir)) { + const repo = doc.repo; + if (!repo || !Array.isArray(doc.nodes)) continue; + const repoRoot = repos.get(repo); + for (const node of doc.nodes) { + const sourcePath = node.provenance?.source_path; + let fileHash: string | undefined; + if (repoRoot && sourcePath && sourcePath !== '.') { + const fullPath = path.resolve(repoRoot, sourcePath); + if (!hashCache.has(fullPath)) { + hashCache.set(fullPath, await computeFileHash(fullPath)); + } + fileHash = hashCache.get(fullPath); + } + const locator = locatorForNode(node, repo, repos.get(repo), options.graphSnapshotHash, fileHash); + if (locator) locators.push(locator); + } + } + + for (const [repo, repoRoot] of repos) { + locators.push(...await locatorsFromArchitectureRegistry(repo, repoRoot, options.graphSnapshotHash)); + } + + const deduped = new Map(); + for (const locator of locators) { + const key = `${locator.entity_uri}\0${locator.source_uri}`; + if (!deduped.has(key)) { + deduped.set(key, locator); + } + } + const sortedLocators = [...deduped.values()] + .sort((a, b) => a.entity_uri.localeCompare(b.entity_uri) || a.source_uri.localeCompare(b.source_uri)); + const registry: SourceLocatorRegistry = { + schema_version: '1.0', + generated_by: options.generatedBy, + generated_at: options.generatedAt, + workspace_manifest_hash: options.workspaceManifestHash, + graph_snapshot_hash: options.graphSnapshotHash, + locators: sortedLocators, + }; + registry.locator_registry_hash = registryHash(registry); + + const registryPath = path.join(options.outputDir, 'source-locator-registry.yaml'); + await fs.writeFile(registryPath, yaml.dump(registry, { lineWidth: 120, noRefs: true }), 'utf-8'); + return { registry, registryPath }; +} + +export async function loadSourceLocatorRegistry(outputDir: string): Promise { + const raw = await fs.readFile(path.join(outputDir, 'source-locator-registry.yaml'), 'utf-8'); + return yaml.load(raw) as SourceLocatorRegistry; +} + +export function resolveLocator( + registry: SourceLocatorRegistry, + entityOrUri: string, +): SourceLocator | undefined { + const parsed = parseSourceUri(entityOrUri); + switch (parsed.kind) { + case 'entity': + return registry.locators.find(l => l.entity_id === parsed.entityId || l.entity_uri === entityOrUri); + case 'workspace': + return registry.locators.find(l => l.repo === parsed.repo && l.path === parsed.path); + case 'adr': + return registry.locators.find(l => l.entity_id === parsed.adrId || l.source_uri.toLowerCase().includes(parsed.adrId.toLowerCase())); + case 'decision': + return registry.locators.find(l => l.entity_id === parsed.decisionId || l.entity_id.includes(parsed.decisionId)); + case 'graph': + return registry.locators.find(l => l.entity_id === parsed.entityId && l.graph_snapshot_hash === parsed.graphSnapshotHash); + case 'projection': + return undefined; + } +} + +export async function resolveLocatorFreshness( + registry: SourceLocatorRegistry, + locator: SourceLocator, + workspaceRoot: string, + repos: Array<{ name: string; path: string }>, +): Promise<{ status: 'resolved' | 'missing_source' | 'hash_mismatch'; current_hash?: string }> { + const repo = repos.find(r => r.name === locator.repo); + if (!repo) return { status: 'missing_source' }; + const currentHash = await computeFileHash(path.resolve(workspaceRoot, repo.path, locator.path)); + if (!currentHash) return { status: 'missing_source' }; + if (locator.source_hash && currentHash !== locator.source_hash) { + return { status: 'hash_mismatch', current_hash: currentHash }; + } + return { status: 'resolved', current_hash: currentHash }; +} diff --git a/src/workspace/source-uri.test.ts b/src/workspace/source-uri.test.ts new file mode 100644 index 0000000..39351f6 --- /dev/null +++ b/src/workspace/source-uri.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, it } from 'vitest'; + +import { + entityUri, + normalizeWorkspaceUri, + parseSourceUri, + workspaceUri, +} from './source-uri.js'; + +describe('source URI normalization', () => { + it('normalizes workspace URIs with POSIX paths and line ranges', () => { + expect(workspaceUri('ste-runtime', 'src\\workspace\\source-uri.ts')).toBe( + 'workspace://ste-runtime/src/workspace/source-uri.ts', + ); + expect(workspaceUri('ste-runtime', 'src/workspace/source-uri.ts', { start: 3, end: 7 })).toBe( + 'workspace://ste-runtime/src/workspace/source-uri.ts#L3-L7', + ); + }); + + it('rejects absolute, parent-relative, and drive-letter paths', () => { + expect(() => workspaceUri('repo', '../x.ts')).toThrow(/portable/); + expect(() => workspaceUri('repo', '/x.ts')).toThrow(/portable/); + expect(() => workspaceUri('repo', 'C:/x.ts')).toThrow(/portable/); + }); + + it('round trips workspace URIs', () => { + const uri = workspaceUri('adr-architecture-kit', 'adrs/logical/ADR-L-0001.yaml'); + expect(normalizeWorkspaceUri(uri)).toBe(uri); + expect(parseSourceUri(uri)).toEqual({ + kind: 'workspace', + repo: 'adr-architecture-kit', + path: 'adrs/logical/ADR-L-0001.yaml', + }); + }); + + it('normalizes entity URIs with segment encoding', () => { + expect(entityUri('Lambda:repo:my function')).toBe('entity://workspace/Lambda%3Arepo%3Amy%20function'); + }); +}); diff --git a/src/workspace/source-uri.ts b/src/workspace/source-uri.ts new file mode 100644 index 0000000..f5bb7d2 --- /dev/null +++ b/src/workspace/source-uri.ts @@ -0,0 +1,156 @@ +export interface LineRange { + start: number; + end: number; +} + +export type ParsedSourceUri = + | { kind: 'workspace'; repo: string; path: string; lineRange?: LineRange } + | { kind: 'entity'; entityId: string } + | { kind: 'adr'; adrId: string } + | { kind: 'decision'; decisionId: string } + | { kind: 'graph'; graphSnapshotHash: string; entityId: string } + | { kind: 'projection'; family: string; projectionId: string }; + +function encodePathSegment(value: string): string { + return encodeURIComponent(value); +} + +function decodePathSegment(value: string): string { + return decodeURIComponent(value); +} + +function normalizeRepo(repo: string): string { + const trimmed = repo.trim(); + if (!trimmed) { + throw new Error('Source URI repo must be non-empty'); + } + if (trimmed.includes('/') || trimmed.includes('\\')) { + throw new Error(`Source URI repo is not portable: ${repo}`); + } + return trimmed; +} + +export function normalizePortablePath(input: string): string { + const raw = input.trim().replace(/\\/g, '/'); + if (!raw) { + throw new Error('Source URI path must be non-empty'); + } + if (raw.startsWith('/') || /^[A-Za-z]:\//.test(raw)) { + throw new Error(`Source URI path is not portable: ${input}`); + } + const parts = raw.split('/').filter(Boolean); + if (parts.some(part => part === '.' || part === '..')) { + throw new Error(`Source URI path is not portable: ${input}`); + } + return parts.join('/'); +} + +function formatLineRange(lineRange?: LineRange): string { + if (!lineRange) return ''; + if ( + !Number.isInteger(lineRange.start) || + !Number.isInteger(lineRange.end) || + lineRange.start < 1 || + lineRange.end < lineRange.start + ) { + throw new Error(`Invalid source URI line range: ${lineRange.start}-${lineRange.end}`); + } + return `#L${lineRange.start}-L${lineRange.end}`; +} + +export function workspaceUri(repo: string, sourcePath: string, lineRange?: LineRange): string { + const normalizedRepo = normalizeRepo(repo); + const normalizedPath = normalizePortablePath(sourcePath); + const encodedPath = normalizedPath.split('/').map(encodePathSegment).join('/'); + return `workspace://${encodePathSegment(normalizedRepo)}/${encodedPath}${formatLineRange(lineRange)}`; +} + +export function entityUri(entityId: string): string { + const trimmed = entityId.trim(); + if (!trimmed) { + throw new Error('Entity URI id must be non-empty'); + } + return `entity://workspace/${encodePathSegment(trimmed)}`; +} + +function parseLineRange(fragment: string): LineRange | undefined { + if (!fragment) return undefined; + const match = fragment.match(/^L(\d+)-L(\d+)$/); + if (!match) { + throw new Error(`Invalid source URI line range fragment: ${fragment}`); + } + const start = Number(match[1]); + const end = Number(match[2]); + if (end < start) { + throw new Error(`Invalid source URI line range fragment: ${fragment}`); + } + return { start, end }; +} + +export function parseSourceUri(uriOrId: string): ParsedSourceUri { + const value = uriOrId.trim(); + if (value.startsWith('workspace://')) { + const withoutScheme = value.slice('workspace://'.length); + const hashIdx = withoutScheme.indexOf('#'); + const body = hashIdx >= 0 ? withoutScheme.slice(0, hashIdx) : withoutScheme; + const fragment = hashIdx >= 0 ? withoutScheme.slice(hashIdx + 1) : ''; + const slashIdx = body.indexOf('/'); + if (slashIdx < 1) { + throw new Error(`Invalid workspace URI: ${uriOrId}`); + } + const repo = decodePathSegment(body.slice(0, slashIdx)); + const sourcePath = body.slice(slashIdx + 1).split('/').map(decodePathSegment).join('/'); + const parsed: ParsedSourceUri = { + kind: 'workspace', + repo: normalizeRepo(repo), + path: normalizePortablePath(sourcePath), + }; + const lineRange = parseLineRange(fragment); + if (lineRange) { + parsed.lineRange = lineRange; + } + return parsed; + } + if (value.startsWith('entity://workspace/')) { + return { + kind: 'entity', + entityId: decodePathSegment(value.slice('entity://workspace/'.length)), + }; + } + if (value.startsWith('adr://')) { + return { kind: 'adr', adrId: value.slice('adr://'.length) }; + } + if (value.startsWith('decision://')) { + return { kind: 'decision', decisionId: value.slice('decision://'.length) }; + } + if (value.startsWith('graph://workspace/')) { + const rest = value.slice('graph://workspace/'.length); + const marker = '/node/'; + const idx = rest.indexOf(marker); + if (idx < 1) throw new Error(`Invalid graph URI: ${uriOrId}`); + return { + kind: 'graph', + graphSnapshotHash: decodePathSegment(rest.slice(0, idx)), + entityId: decodePathSegment(rest.slice(idx + marker.length)), + }; + } + if (value.startsWith('projection://workspace/')) { + const rest = value.slice('projection://workspace/'.length); + const idx = rest.indexOf('/'); + if (idx < 1) throw new Error(`Invalid projection URI: ${uriOrId}`); + return { + kind: 'projection', + family: decodePathSegment(rest.slice(0, idx)), + projectionId: decodePathSegment(rest.slice(idx + 1)), + }; + } + return { kind: 'entity', entityId: value }; +} + +export function normalizeWorkspaceUri(uri: string): string { + const parsed = parseSourceUri(uri); + if (parsed.kind !== 'workspace') { + throw new Error(`Not a workspace URI: ${uri}`); + } + return workspaceUri(parsed.repo, parsed.path, parsed.lineRange); +} diff --git a/src/workspace/workspace-graph-loader.ts b/src/workspace/workspace-graph-loader.ts index 62ae12d..dab636a 100644 --- a/src/workspace/workspace-graph-loader.ts +++ b/src/workspace/workspace-graph-loader.ts @@ -16,6 +16,13 @@ export interface WorkspaceNode { type: string; name: string; repo: string; + entity_uri?: string; + source_uri?: string; + source_hash?: string; + source_locator_ref?: string; + canonical?: boolean; + authority?: string; + graph_snapshot_hash?: string; attributes?: Record; } @@ -44,6 +51,13 @@ interface SliceDoc { type?: string; name?: string; provenance?: { repo?: string; source_path?: string; source_ref?: string }; + entity_uri?: string; + source_uri?: string; + source_hash?: string; + source_locator_ref?: string; + canonical?: boolean; + authority?: string; + graph_snapshot_hash?: string; attributes?: Record; }>; edges?: Array<{ @@ -137,6 +151,13 @@ export async function loadWorkspaceGraph(outputDir: string): Promise; provenance: { source_path: string; source_ref: string; repo?: string }; } @@ -52,6 +59,13 @@ interface SliceDoc { type: string; name: string; attributes?: Record; + entity_uri?: string; + source_uri?: string; + source_hash?: string; + source_locator_ref?: string; + canonical?: boolean; + authority?: string; + graph_snapshot_hash?: string; provenance: { source_path: string; source_ref: string; repo?: string }; }>; edges?: Array<{ @@ -133,6 +147,13 @@ export async function mergeWorkspaceGraph( type: node.type, name: node.name ?? node.id, repo: node.provenance?.repo ?? sliceRepo, + entity_uri: node.entity_uri, + source_uri: node.source_uri, + source_hash: node.source_hash, + source_locator_ref: node.source_locator_ref, + canonical: node.canonical, + authority: node.authority, + graph_snapshot_hash: node.graph_snapshot_hash, attributes: node.attributes, provenance: node.provenance, }); diff --git a/src/workspace/workspace-recon.ts b/src/workspace/workspace-recon.ts index 3a58d9b..4b71e8b 100644 --- a/src/workspace/workspace-recon.ts +++ b/src/workspace/workspace-recon.ts @@ -4,11 +4,12 @@ import fs from 'node:fs/promises'; import path from 'node:path'; +import crypto from 'node:crypto'; import type { ResolvedConfig } from '../config/index.js'; import { executeRecon } from '../recon/index.js'; import { discoverFilesFromConfig } from '../recon/phases/discovery.js'; import { log } from '../utils/logger.js'; -import { PhaseTimer, repoLimiter } from '../utils/concurrency.js'; +import { PhaseTimer, repoLimiter, type PhaseTimingRecord } from '../utils/concurrency.js'; import { buildPerRepoConfig, parseWorkspaceManifest, resolveRepoPath } from './manifest.js'; import { emitWorkspaceSlice } from './slice-emitter.js'; import { computeSourceHash, readSentinel, writeSentinel } from './repo-sentinel.js'; @@ -20,6 +21,7 @@ import type { ProjectionEmitResult } from './emit-projections.js'; import { emitMultiResProjections } from './emit-multi-res-projections.js'; import type { MultiResEmitResult } from './emit-multi-res-projections.js'; import { mergeWorkspaceGraph } from './workspace-merge.js'; +import { emitSourceLocatorRegistry } from './source-locator-registry.js'; export interface WorkspaceReconOptions { workspacePath: string; @@ -39,6 +41,8 @@ export interface RepoResult { reconResult?: import('../recon/index.js').ReconResult; nodeCount?: number; edgeCount?: number; + /** Wall-clock duration for this repo's RECON + slice emission. */ + durationMs?: number; } export interface WorkspaceReconResult { @@ -47,6 +51,7 @@ export interface WorkspaceReconResult { workspaceIndexPath: string; projectionResult?: ProjectionEmitResult; multiResProjectionResult?: MultiResEmitResult; + orchestrationTiming?: PhaseTimingRecord; } function normalizeOutputDir(raw: string): string { @@ -68,6 +73,10 @@ async function loadRuntimeVersion(runtimeDir: string): Promise { } } +async function sha256File(filePath: string): Promise { + return `sha256:${crypto.createHash('sha256').update(await fs.readFile(filePath)).digest('hex')}`; +} + async function collectRepoSourceFingerprints(config: ResolvedConfig): Promise { const discovered = await discoverFilesFromConfig(config); const rows: RepoSourceFingerprintRow[] = []; @@ -135,7 +144,7 @@ export async function executeWorkspaceRecon(options: WorkspaceReconOptions): Pro const runtimeVersion = await loadRuntimeVersion(options.runtimeDir); - const { manifest, workspaceRoot } = await parseWorkspaceManifest(options.workspacePath); + const { manifest, workspaceRoot, manifestFile } = await parseWorkspaceManifest(options.workspacePath); const outputRel = normalizeOutputDir(manifest.output_dir).replace(/\\/g, '/'); const outputRoot = path.resolve(workspaceRoot, outputRel); @@ -163,8 +172,9 @@ export async function executeWorkspaceRecon(options: WorkspaceReconOptions): Pro const fingerprintRows = await collectRepoSourceFingerprints(config); const hashNow = computeSourceHash(fingerprintRows); if (sentinel.source_hash === hashNow && sentinel.recon_version === runtimeVersion) { - process.stdout.write(heartbeatCompletionLine('skipped', repo.name, elapsedMs())); - return { name: repo.name, status: 'skipped' }; + const skipMs = elapsedMs(); + process.stdout.write(heartbeatCompletionLine('skipped', repo.name, skipMs)); + return { name: repo.name, status: 'skipped', durationMs: skipMs }; } } } @@ -227,10 +237,12 @@ export async function executeWorkspaceRecon(options: WorkspaceReconOptions): Pro process.stderr.write( `[RECON] Repo ${repo.name} timed out after ${timeoutMs}ms; subprocess or async work may still complete (no SIGKILL).\n`, ); - process.stdout.write(heartbeatCompletionLine('timed_out', repo.name, elapsedMs())); + const timeoutElapsed = elapsedMs(); + process.stdout.write(heartbeatCompletionLine('timed_out', repo.name, timeoutElapsed)); return { name: repo.name, status: 'timed_out', + durationMs: timeoutElapsed, error: { stage: 'timeout', message: `Repo ${repo.name} timed out after ${timeoutMs}ms`, @@ -243,15 +255,18 @@ export async function executeWorkspaceRecon(options: WorkspaceReconOptions): Pro result = await runRepoWork(); } - process.stdout.write(heartbeatCompletionLine(result.status, repo.name, elapsedMs())); - return result; + const repoElapsed = elapsedMs(); + process.stdout.write(heartbeatCompletionLine(result.status, repo.name, repoElapsed)); + return { ...result, durationMs: repoElapsed }; } catch (err) { const message = err instanceof Error ? err.message : String(err); log(`[workspace-recon] repo ${repo.name} failed: ${message}`); - process.stdout.write(heartbeatCompletionLine('failed', repo.name, elapsedMs())); + const failElapsed = elapsedMs(); + process.stdout.write(heartbeatCompletionLine('failed', repo.name, failElapsed)); return { name: repo.name, status: 'failed', + durationMs: failElapsed, error: { stage: 'workspace', message }, }; } @@ -300,6 +315,26 @@ export async function executeWorkspaceRecon(options: WorkspaceReconOptions): Pro ? ` (partial: ${mergeResult.graph.partial_from.join(', ')})` : ''), ); + try { + const graphSnapshotHash = await sha256File(mergeResult.graphPath); + const workspaceManifestHash = await sha256File(manifestFile); + const locatorResult = await emitSourceLocatorRegistry({ + outputDir: outputRoot, + workspaceRoot, + repos: manifest.repos.map(r => ({ name: r.name, path: r.path })), + graphSnapshotHash, + workspaceManifestHash, + generatedAt: new Date().toISOString(), + generatedBy: `ste-runtime@${runtimeVersion}`, + }); + log( + `[workspace-recon] Source locators: ${locatorResult.registry.locators.length} locators written to ` + + `source-locator-registry.yaml`, + ); + } catch (locatorErr) { + const locatorMsg = locatorErr instanceof Error ? locatorErr.message : String(locatorErr); + log(`[workspace-recon] Source locator registry emission failed (non-fatal): ${locatorMsg}`); + } } catch (err) { const msg = err instanceof Error ? err.message : String(err); log(`[workspace-recon] Graph merge failed (non-fatal): ${msg}`); @@ -339,5 +374,6 @@ export async function executeWorkspaceRecon(options: WorkspaceReconOptions): Pro workspaceIndexPath, projectionResult, multiResProjectionResult, + orchestrationTiming: wsTiming, }; }