From 4d2fa47ac24b58a4843f98ae8c59eda6e16f9ce5 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Sun, 17 May 2026 00:47:18 -0400 Subject: [PATCH 01/12] Refactor Claim ADT + add merkle infrastructure for ZK aggregation Adds the format hooks needed before recursive verification lands in Aiur. Five-variant Claim ADT with explicit assumption commitments, a canonical Blake3 merkle module, a serializable AssumptionTree for recovering leaf sets, and a Contains claim for the discharge step. Claim ADT (5 variants): - Eval { input, output, assumptions: Option
} - Check { const_addr, assumptions: Option
} - CheckEnv { root, assumptions: Option
} - Reveal { comm, info } -- unchanged, no assumptions - Contains { tree, const_addr } -- new, for inclusion proofs Tag4 reorganized to keep everything in single-byte tags: - 0xE for env, comm, AssumptionTree, and claims (slots 0-7) - 0xF for proofs (slots 0-4; 5-7 reserved) - Comm moved from variant 5 -> 1 Matches the "Variant (0-7)" constraint documented in docs/Ixon.md. Env serialization: - Every .ixe file now carries a canonical merkle root over its consts.keys() in the on-disk header (non-optional, 32 bytes; empty const sets use the zero-address sentinel). - Two envs with the same const set produce byte-identical roots regardless of insertion order. Verified on deserialize. New modules: - src/ix/ixon/merkle.rs + Ix/Merkle.lean: canonical sorted builder, free-form merkle_join composition, membership proofs, domain separation per RFC 6962. - src/ix/ixon/assumption_tree.rs + Ix/AssumptionTree.lean: serializable merkle tree with Leaf/Padding/Node variants. canonical() builds the same shape merkle_root_canonical hashes; join() is O(1) free-form composition. - src/ix/kernel/claim.rs: builders that compute transitive-dep assumptions from an env (build_check_claim, build_eval_claim, build_check_env_claim, env_merkle_root). Other: - Extracted shared BFS walker on Env::bfs_refs + Env::transitive_deps_excl; the inlined test-feature copy in lean_env.rs now calls into it. - Lean FFI export rs_env_merkle_root for cross-impl verification of the env root. - Proof bytes for all variants are uniform opaque ZK bytes; witness data (e.g., Contains merkle paths) is prover-side scratch consumed by the ZK circuit and not transmitted on the wire. - docs/Ixon.md Tag4 tables and env section updated; .ixe extension documented. Recursive verification (the ZK proof generation for Contains and the aggregation discharge transitions) is intentionally deferred to a follow-up. Tests: 993 Rust unit tests pass (was 953 pre-refactor), 813 Lean tests pass with no failures; cargo clippy clean. --- Ix.lean | 2 + Ix/AssumptionTree.lean | 193 ++++++++ Ix/Claim.lean | 118 ++++- Ix/Commit.lean | 13 +- Ix/Ixon.lean | 60 ++- Ix/Merkle.lean | 180 ++++++++ Tests/Gen/Claim.lean | 21 +- Tests/Ix/AssumptionTree.lean | 152 ++++++ Tests/Ix/Claim.lean | 97 ++-- Tests/Ix/Commit.lean | 14 +- Tests/Ix/Ixon.lean | 41 ++ Tests/Ix/Merkle.lean | 118 +++++ Tests/Main.lean | 4 + docs/Ixon.md | 157 ++++--- src/ffi/ixon/env.rs | 24 + src/ffi/lean_env.rs | 74 ++- src/ix/ixon.rs | 55 ++- src/ix/ixon/assumption_tree.rs | 510 +++++++++++++++++++++ src/ix/ixon/comm.rs | 16 +- src/ix/ixon/env.rs | 147 ++++++ src/ix/ixon/merkle.rs | 441 ++++++++++++++++++ src/ix/ixon/proof.rs | 812 ++++++++++++++++++++++++--------- src/ix/ixon/serialize.rs | 153 ++++++- src/ix/kernel.rs | 1 + src/ix/kernel/claim.rs | 282 ++++++++++++ 25 files changed, 3250 insertions(+), 435 deletions(-) create mode 100644 Ix/AssumptionTree.lean create mode 100644 Ix/Merkle.lean create mode 100644 Tests/Ix/AssumptionTree.lean create mode 100644 Tests/Ix/Merkle.lean create mode 100644 src/ix/ixon/assumption_tree.rs create mode 100644 src/ix/ixon/merkle.rs create mode 100644 src/ix/kernel/claim.rs diff --git a/Ix.lean b/Ix.lean index b1809a25..de1327d6 100644 --- a/Ix.lean +++ b/Ix.lean @@ -12,6 +12,8 @@ public import Ix.CompileM public import Ix.DecompileM public import Ix.KernelCheck public import Ix.Claim +public import Ix.Merkle +public import Ix.AssumptionTree public import Ix.Commit public import Ix.Benchmark.Bench public import Ix.Aiur diff --git a/Ix/AssumptionTree.lean b/Ix/AssumptionTree.lean new file mode 100644 index 00000000..da4b1dc0 --- /dev/null +++ b/Ix/AssumptionTree.lean @@ -0,0 +1,193 @@ +/- + # AssumptionTree: serializable merkle tree over `Address` leaves + + Used to recover the leaf set committed to by a conditional claim's + `assumptions` root. The root alone tells the verifier *which* set + was assumed; the AssumptionTree carries the actual leaves so the + verifier can inspect them. + + Two construction modes — both produce the same `node`-shaped trees, + differ only in how leaves are arranged: + + - `canonical leaves` builds the same shape that `Ix.Merkle.merkleRootCanonical` + hashes, with `padding` nodes wherever odd-leaf padding occurs. + - `join l r` is free-form O(1) composition; result root matches + `Ix.Merkle.merkleJoin`. + + ## Serialization + + Tag4 size 2 under flag 0xE: + + ```text + [Tag4(0xE, 2) = 0xE2] [body] + + body recursive: + leaf(addr): [0x00] [addr:32] + padding: [0x01] + node(l, r): [0x02] [body l] [body r] + ``` + + `padding` represents the zero-sentinel slot used by the canonical + builder to even out odd levels; its root is exactly `zeroAddress`, + matching the bare 32-byte zero that `Ix.Merkle` mixes into odd-level + hashing. Splitting it from `leaf` keeps `leaves` clean (returns only + real leaves, not synthetic padding addresses). +-/ + +module +public import Ix.Address +public import Ix.Merkle +public import Ix.Ixon + +public section + +namespace Ix + +open Ixon +open Ix.Merkle (leafHash nodeHash zeroAddress merkleJoin) + +/-- A merkle tree over `Address` leaves with explicit shape. -/ +inductive AssumptionTree where + | leaf (addr : Address) + | padding + | node (left right : AssumptionTree) + deriving BEq, Repr, Inhabited + +namespace AssumptionTree + +/-- Recursively compute the root hash. -/ +partial def root : AssumptionTree → Address + | .leaf addr => leafHash addr + | .padding => zeroAddress + | .node l r => nodeHash l.root r.root + +/-- In-order traversal of real leaves (skips `padding`). Iterative + stack-based walk to avoid stack overflow on deep trees. -/ +partial def leaves (t : AssumptionTree) : Array Address := Id.run do + let mut acc : Array Address := #[] + let mut stack : Array AssumptionTree := #[t] + while !stack.isEmpty do + let top := stack.back! + stack := stack.pop + match top with + | .leaf a => acc := acc.push a + | .padding => continue + | .node l r => + -- Push right first so left is processed first (in-order via LIFO). + stack := stack.push r + stack := stack.push l + return acc + +/-- True iff `target` appears as a `leaf` somewhere in the tree. -/ +partial def contains (t : AssumptionTree) (target : Address) : Bool := + match t with + | .leaf a => a == target + | .padding => false + | .node l r => l.contains target || r.contains target + +/-- Build the canonical sorted+padded merkle tree over a leaf set. + Returns `none` for an empty (post-dedup) leaf set. Matches the + shape committed to by `merkleRootCanonical`. -/ +partial def canonical (leaves : Array Address) : Option AssumptionTree := + let sorted := dedupSorted (leaves.qsort fun a b => compare a b == .lt) + if sorted.isEmpty then + none + else if sorted.size == 1 then + some (.leaf sorted[0]!) + else + some (reduce (sorted.map .leaf)) + where + dedupSorted (xs : Array Address) : Array Address := Id.run do + if xs.isEmpty then return #[] + let mut acc : Array Address := #[xs[0]!] + for i in [1:xs.size] do + if !(xs[i]! == xs[i-1]!) then acc := acc.push xs[i]! + return acc + reduce (level : Array AssumptionTree) : AssumptionTree := + if level.size == 1 then level[0]! + else reduce (pairLevel level) + pairLevel (level : Array AssumptionTree) : Array AssumptionTree := Id.run do + let mut next : Array AssumptionTree := #[] + let mut i := 0 + while i < level.size do + let l := level[i]! + let r := if i + 1 < level.size then level[i+1]! else .padding + next := next.push (.node l r) + i := i + 2 + return next + +/-- Combine two existing subtrees into a new free-form node in O(1). -/ +@[inline] def join (l r : AssumptionTree) : AssumptionTree := .node l r + +/-- Recursive helper for `merkleProof`. Returns the leaf-to-root path + if `target` is present, else `none`. -/ +partial def searchPath (t : AssumptionTree) (target : Address) + : Option (Array (Address × Bool)) := + match t with + | .leaf a => if a == target then some #[] else none + | .padding => none + | .node l r => + match l.searchPath target with + | some p => some (p.push (r.root, false)) + | none => + match r.searchPath target with + | some p => some (p.push (l.root, true)) + | none => none + +/-- Produce a merkle membership path for `target`. Path is in + leaf-to-root order (matches `verifyMerkleProof`). -/ +def merkleProof (t : AssumptionTree) (target : Address) + : Option Ix.Merkle.MerklePath := searchPath t target + +/-! ## Serialization -/ + +def FLAG : UInt8 := 0xE +def VARIANT : UInt64 := 2 + +def BODY_LEAF : UInt8 := 0x00 +def BODY_PADDING : UInt8 := 0x01 +def BODY_NODE : UInt8 := 0x02 + +partial def putBody : AssumptionTree → PutM Unit + | .leaf addr => do + putU8 BODY_LEAF + Serialize.put addr + | .padding => do + putU8 BODY_PADDING + | .node l r => do + putU8 BODY_NODE + putBody l + putBody r + +def put (t : AssumptionTree) : PutM Unit := do + putTag4 ⟨FLAG, VARIANT⟩ + putBody t + +partial def getBody : GetM AssumptionTree := do + let tag : UInt8 ← getU8 + if tag == BODY_LEAF then + return .leaf (← Serialize.get) + else if tag == BODY_PADDING then + return .padding + else if tag == BODY_NODE then + let l ← getBody + let r ← getBody + return .node l r + else + throw s!"AssumptionTree.getBody: invalid body tag {tag.toNat}" + +def get : GetM AssumptionTree := do + let tag ← getTag4 + if tag.flag != FLAG || tag.size != VARIANT then + throw s!"AssumptionTree.get: expected Tag4 0xE/2, got {tag.flag.toNat}/{tag.size}" + getBody + +def ser (t : AssumptionTree) : ByteArray := runPut (put t) +def de (bytes : ByteArray) : Except String AssumptionTree := + runGet get bytes + +end AssumptionTree + +end Ix + +end diff --git a/Ix/Claim.lean b/Ix/Claim.lean index 4d180bc5..1600afae 100644 --- a/Ix/Claim.lean +++ b/Ix/Claim.lean @@ -121,11 +121,30 @@ inductive RevealConstantInfo where | muts (components : Array (UInt64 × RevealMutConstInfo)) deriving BEq, Repr, Inhabited -/-- A claim that can be proven. -/ +/-- +A claim that can be proven. + +Five variants in three families: + +- **Typechecking claims** (`eval`, `check`, `checkEnv`): assert that a + constant evaluates, a constant is well-typed, or every constant in an + env is well-typed. Each carries `assumptions : Option Address`: + - `none` → unconditional. + - `some root` → conditional on every leaf in the merkle tree rooted + at `root` being well-typed. +- **`reveal`**: selective field revelation of a committed constant. + Carries no assumptions (orthogonal to typechecking). +- **`contains`**: structural membership claim — `const` is a leaf in + the merkle tree rooted at `tree`. Used by aggregation to discharge + leaves from a conditional claim's assumption set. Carries no + assumptions. +-/ inductive Claim where - | eval (input : Address) (output : Address) - | check (value : Address) - | reveal (comm : Address) (info : RevealConstantInfo) + | eval (input output : Address) (assumptions : Option Address) + | check (const : Address) (assumptions : Option Address) + | checkEnv (root : Address) (assumptions : Option Address) + | reveal (comm : Address) (info : RevealConstantInfo) + | contains (tree : Address) (const : Address) deriving BEq, Repr, Inhabited -- ============================================================================ @@ -439,36 +458,97 @@ end RevealConstantInfo namespace Claim +-- Tag4 size dispatch (mirrors src/ix/ixon/proof.rs). +-- Flag 0xE holds Env, Comm, AssumptionTree, and claims (single-byte tags). +-- Flag 0xF holds proofs (single-byte tags). + +def FLAG_CLAIM : UInt8 := 0xE +def FLAG_PROOF : UInt8 := 0xF + +def VARIANT_ENV : UInt64 := 0 +-- VARIANT 1 = Comm (handled in Ix.Ixon) +def VARIANT_ASSUMPTION_TREE : UInt64 := 2 +def VARIANT_EVAL_CLAIM : UInt64 := 3 +def VARIANT_CHECK_CLAIM : UInt64 := 4 +def VARIANT_CHECK_ENV_CLAIM : UInt64 := 5 +def VARIANT_REVEAL_CLAIM : UInt64 := 6 +def VARIANT_CONTAINS_CLAIM : UInt64 := 7 + +def VARIANT_EVAL_PROOF : UInt64 := 0 +def VARIANT_CHECK_PROOF : UInt64 := 1 +def VARIANT_CHECK_ENV_PROOF : UInt64 := 2 +def VARIANT_REVEAL_PROOF : UInt64 := 3 +def VARIANT_CONTAINS_PROOF : UInt64 := 4 + +/-- Encode an `Option Address` as `[0x00]` (none) or `[0x01][addr:32]` + (some). Mirrors `put_opt_addr` in src/ix/ixon/proof.rs. -/ +def putOptAddr : Option Address → PutM Unit + | none => putU8 0x00 + | some a => do putU8 0x01; Serialize.put a + +def getOptAddr : GetM (Option Address) := do + let b ← getU8 + if b == 0x00 then return none + else if b == 0x01 then return some (← Serialize.get) + else throw s!"getOptAddr: invalid tag {b}" + def put : Claim → PutM Unit - | .eval input output => do - putTag4 ⟨0xE, 4⟩ + | .eval input output assumptions => do + putTag4 ⟨FLAG_CLAIM, VARIANT_EVAL_CLAIM⟩ Serialize.put input Serialize.put output - | .check value => do - putTag4 ⟨0xE, 3⟩ - Serialize.put value + putOptAddr assumptions + | .check const assumptions => do + putTag4 ⟨FLAG_CLAIM, VARIANT_CHECK_CLAIM⟩ + Serialize.put const + putOptAddr assumptions + | .checkEnv root assumptions => do + putTag4 ⟨FLAG_CLAIM, VARIANT_CHECK_ENV_CLAIM⟩ + Serialize.put root + putOptAddr assumptions | .reveal comm info => do - putTag4 ⟨0xE, 6⟩ + putTag4 ⟨FLAG_CLAIM, VARIANT_REVEAL_CLAIM⟩ Serialize.put comm RevealConstantInfo.put info + | .contains tree const => do + putTag4 ⟨FLAG_CLAIM, VARIANT_CONTAINS_CLAIM⟩ + Serialize.put tree + Serialize.put const def get : GetM Claim := do let tag ← getTag4 - if tag.flag != 0xE then throw s!"Claim.get: expected flag 0xE, got {tag.flag}" - match tag.size with - | 4 => return .eval (← Serialize.get) (← Serialize.get) - | 3 => return .check (← Serialize.get) - | 6 => return .reveal (← Serialize.get) (← RevealConstantInfo.get) - | n => throw s!"Claim.get: invalid variant {n}" + if tag.flag != FLAG_CLAIM then + throw s!"Claim.get: expected flag 0xE, got {tag.flag}" + if tag.size == VARIANT_EVAL_CLAIM then + let input ← Serialize.get + let output ← Serialize.get + let asm ← getOptAddr + return .eval input output asm + else if tag.size == VARIANT_CHECK_CLAIM then + let const ← Serialize.get + let asm ← getOptAddr + return .check const asm + else if tag.size == VARIANT_CHECK_ENV_CLAIM then + let root ← Serialize.get + let asm ← getOptAddr + return .checkEnv root asm + else if tag.size == VARIANT_REVEAL_CLAIM then + return .reveal (← Serialize.get) (← RevealConstantInfo.get) + else if tag.size == VARIANT_CONTAINS_CLAIM then + return .contains (← Serialize.get) (← Serialize.get) + else + throw s!"Claim.get: invalid claim variant {tag.size}" def ser (c : Claim) : ByteArray := runPut (put c) def commit (c : Claim) : Address := Address.blake3 (ser c) instance : ToString Claim where toString c := match c with - | .eval i o => s!"EvalClaim({i}, {o})" - | .check v => s!"CheckClaim({v})" - | .reveal comm info => s!"RevealClaim({comm}, {repr info})" + | .eval i o asm => s!"Eval({i}, {o}, {asm})" + | .check v asm => s!"Check({v}, {asm})" + | .checkEnv r asm => s!"CheckEnv({r}, {asm})" + | .reveal comm info => s!"Reveal({comm}, {repr info})" + | .contains t c => s!"Contains({t}, {c})" end Claim diff --git a/Ix/Commit.lean b/Ix/Commit.lean index 088190af..feff82cb 100644 --- a/Ix/Commit.lean +++ b/Ix/Commit.lean @@ -152,21 +152,24 @@ def commitDef (compileEnv : CompileM.CompileEnv) (leanEnv : Lean.Environment) -- Build claims -- ============================================================================ -/-- Build an evaluation claim from input and output expressions. - Compiles both expressions to get their content addresses. -/ +/-- Build an unconditional evaluation claim from input and output + expressions. Compiles both to get their content addresses. The + `assumptions` field is `none`; higher-level builders can compute + transitive-dep assumptions when available. -/ def evalClaim (compileEnv : CompileM.CompileEnv) (lvls : List Lean.Name) (input output type : Lean.Expr) : Except String Claim := do let (inputAddr, compileEnv') ← compileDef compileEnv lvls type input let (outputAddr, _) ← compileDef compileEnv' lvls type output - return .eval inputAddr outputAddr + return .eval inputAddr outputAddr none -/-- Build a check claim: asserts that the compiled definition is well-typed. -/ +/-- Build an unconditional check claim: asserts that the compiled + definition is well-typed. -/ def checkClaim (compileEnv : CompileM.CompileEnv) (lvls : List Lean.Name) (type value : Lean.Expr) : Except String Claim := do let (addr, _) ← compileDef compileEnv lvls type value - return .check addr + return .check addr none /-- Build a reveal claim from a commitment address and revealed field info. -/ def revealClaim (comm : Address) (info : RevealConstantInfo) : Claim := diff --git a/Ix/Ixon.lean b/Ix/Ixon.lean index cbebedf7..f89fa041 100644 --- a/Ix/Ixon.lean +++ b/Ix/Ixon.lean @@ -11,6 +11,7 @@ module public import Ix.Address public import Ix.Common public import Ix.Environment +public import Ix.Merkle public section @@ -1419,12 +1420,12 @@ instance : Serialize Comm where def serComm (c : Comm) : ByteArray := runPut (putComm c) def deComm (bytes : ByteArray) : Except String Comm := runGet getComm bytes -/-- Serialize Comm with Tag4{0xE, 5} header. -/ +/-- Serialize Comm with Tag4{0xE, 1} header. -/ def putCommTagged (c : Comm) : PutM Unit := do - putTag4 ⟨0xE, 5⟩ + putTag4 ⟨0xE, 1⟩ putComm c -/-- Serialize Comm with Tag4{0xE, 5} header to bytes. -/ +/-- Serialize Comm with Tag4{0xE, 1} header to bytes. -/ def serCommTagged (c : Comm) : ByteArray := runPut (putCommTagged c) /-- Compute commitment address: blake3(Tag4{0xE,5} + secret + payload). -/ @@ -1712,6 +1713,15 @@ def putEnv (env : Env) : PutM Unit := do -- Header: Tag4 with flag=0xE, size=0 (Env variant) putTag4 ⟨FLAG, 0⟩ + -- Canonical merkle root over consts addresses (matches Rust Env::put). + -- Always 32 bytes: for empty const sets, the sentinel + -- `Ix.Merkle.zeroAddress` is used (cannot collide with any non-empty + -- canonical root, which is always a Blake3 hash). + let constAddrs : Array Address := + (env.consts.toList.toArray.map (·.1)) + let root := (Ix.Merkle.merkleRootCanonical constAddrs).getD Ix.Merkle.zeroAddress + Serialize.put root + -- Section 1: Blobs (Address -> bytes) let blobs := env.blobs.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT putTag0 ⟨blobs.size.toUInt64⟩ @@ -1770,6 +1780,11 @@ def getEnv : GetM Env := do if tag.size != 0 then throw s!"Env.get: expected Env variant 0, got {tag.size}" + -- Canonical merkle root (fixed 32 bytes). For empty const sets the + -- stored value is `Ix.Merkle.zeroAddress`. Verified at end against + -- the recomputed root. + let storedRoot : Address ← Serialize.get + let mut env : Env := {} -- Section 1: Blobs @@ -1831,6 +1846,15 @@ def getEnv : GetM Env := do let comm ← getComm env := { env with comms := env.comms.insert addr comm } + -- Verify the stored merkle root against the recomputed value. Empty + -- const set → expected = zeroAddress. + let constAddrs : Array Address := + (env.consts.toList.toArray.map (·.1)) + let computedRoot := + (Ix.Merkle.merkleRootCanonical constAddrs).getD Ix.Merkle.zeroAddress + if computedRoot != storedRoot then + throw "Env.get: merkle root mismatch" + pure env end Env @@ -1912,6 +1936,36 @@ opaque rsDeEnvFFI : @& ByteArray → Except String RawEnv def rsDeEnv (bytes : ByteArray) : Except String Env := return (← rsDeEnvFFI bytes).toEnv +/-! ## Canonical merkle root over consts -/ + +@[extern "rs_env_merkle_root"] +opaque rsEnvMerkleRootFFI : @& RawEnv → ByteArray + +/-- +Compute the canonical merkle root over an Ixon env's `consts.keys()` via +the Rust implementation. Returns `none` for an empty const set, otherwise +the 32-byte root wrapped in `some`. + +The same value is stored in the env's on-disk Tag4 header (see +`Env::put`/`Env::get` in `src/ix/ixon/serialize.rs`). +-/ +def rsEnvMerkleRoot (env : Env) : Option Address := + let bytes := rsEnvMerkleRootFFI env.toRawEnv + if bytes.size == 0 then none + else if bytes.size == 32 then some ⟨bytes⟩ + else none + +/-- +Pure-Lean canonical merkle root over a `RawEnv`'s consts addresses. +Used as a cross-check against the Rust FFI: both should agree. +-/ +def RawEnv.merkleRoot (env : RawEnv) : Option Address := + Ix.Merkle.merkleRootCanonical (env.consts.map (·.addr)) + +/-- Pure-Lean canonical merkle root for an `Env`. -/ +def Env.merkleRoot (env : Env) : Option Address := + env.toRawEnv.merkleRoot + end Ixon end diff --git a/Ix/Merkle.lean b/Ix/Merkle.lean new file mode 100644 index 00000000..411ba140 --- /dev/null +++ b/Ix/Merkle.lean @@ -0,0 +1,180 @@ +/- + # Merkle: canonical and free-form Merkle trees over `Address` leaves + + Two construction modes share the same hash primitives: + + - **Canonical** (`merkleRootCanonical`): lex-sorted, deduped leaves; + odd levels padded with a zero sentinel. Used for env merkle roots + (deterministic env identity) and as the default builder for + assumption-tree roots. + - **Free-form** (`merkleJoin`): O(1) composition of two existing + subtree roots into a new root. Used to aggregate the assumption + sets of two claims without re-sorting all leaves. + + ## Domain separation + + Leaves are hashed as `blake3(0x00 || addr)` and internal nodes as + `blake3(0x01 || left || right)`. Follows RFC 6962 (Certificate + Transparency) convention. Strictly speaking the prefix bytes are + redundant for our scheme because leaf inputs (32 B) and node inputs + (64 B) have distinct lengths — so cross-length Blake3 collision is + the only attack vector and that's infeasible. But the prefix bytes + make the security argument structural rather than parametric, robust + under future refactors (variable-length leaves, raw-address mixing) + and hash swaps (Poseidon2 sponge has fixed arity and doesn't give + the length argument for free). + + ## Odd-leaf padding + + The canonical builder pads odd levels with a fixed `[0u8; 32]` + sentinel rather than duplicating the trailing leaf. Duplication + introduces CVE-2012-2459-style malleability where two distinct leaf + lists can produce the same root. +-/ + +module +public import Ix.Address +public import Ix.ByteArray + +public section + +namespace Ix.Merkle + +/-- Domain-separation prefix for leaf hashes. -/ +def LEAF_DOMAIN : UInt8 := 0x00 + +/-- Domain-separation prefix for internal-node hashes. -/ +def NODE_DOMAIN : UInt8 := 0x01 + +/-- 32-byte zero sentinel used as canonical-tree padding. -/ +def ZERO_SENTINEL : ByteArray := ⟨Array.replicate 32 0⟩ + +/-- The fixed zero-sentinel address. -/ +def zeroAddress : Address := ⟨ZERO_SENTINEL⟩ + +/-- Hash a leaf value into its canonical leaf-level digest. -/ +def leafHash (addr : Address) : Address := + Address.blake3 (⟨#[LEAF_DOMAIN]⟩ ++ addr.hash) + +/-- Hash a pair of child digests into their parent internal-node digest. -/ +def nodeHash (left right : Address) : Address := + Address.blake3 (⟨#[NODE_DOMAIN]⟩ ++ left.hash ++ right.hash) + +/-- +Combine two existing subtree roots into a new free-form root in O(1). + +The result is a non-canonical tree even if both inputs were canonical; +verifiers accept both forms and recover the leaf set by walking the +tree from witness data. +-/ +@[inline] def merkleJoin (left right : Address) : Address := + nodeHash left right + +/-! ## Helpers -/ + +/-- Sort an Array of Addresses lex-ascending by hash bytes. -/ +private def sortAddrs (xs : Array Address) : Array Address := + xs.qsort fun a b => compare a b == .lt + +/-- Deduplicate a sorted Array of Addresses. -/ +private def dedupSortedAddrs (xs : Array Address) : Array Address := Id.run do + if xs.isEmpty then return #[] + let mut result : Array Address := #[xs[0]!] + for i in [1:xs.size] do + if !(xs[i]! == xs[i-1]!) then + result := result.push xs[i]! + return result + +/-- One level of the canonical tree: pair adjacent siblings, pad with +zero sentinel if odd count. -/ +private def reduceLevel (level : Array Address) : Array Address := Id.run do + let z := zeroAddress + let mut next : Array Address := #[] + let mut i := 0 + while i < level.size do + let l := level[i]! + let r := if i + 1 < level.size then level[i+1]! else z + next := next.push (nodeHash l r) + i := i + 2 + return next + +/-- Reduce a list of nodes to a single root via repeated `reduceLevel`. +Assumes input is non-empty. -/ +private partial def buildTree (level : Array Address) : Address := + if level.size == 1 then level[0]! + else buildTree (reduceLevel level) + +/-- Build the canonical merkle root over a leaf set. Leaves are +lex-sorted and deduplicated before hashing. Returns: + +- `none` if `leaves` is empty (post-dedup). +- `some (leafHash x)` for a single leaf. +- otherwise an internal-node root with odd levels padded by + `zeroAddress`. +-/ +def merkleRootCanonical (leaves : Array Address) : Option Address := + let sorted := dedupSortedAddrs (sortAddrs leaves) + if sorted.isEmpty then + none + else if sorted.size == 1 then + some (leafHash sorted[0]!) + else + some (buildTree (sorted.map leafHash)) + +/-! ## Membership proofs -/ + +/-- A merkle-path step: `(sibling, isLeft)`. `isLeft = true` means the +sibling sits on the left side at this level, so verification combines +it as `nodeHash sibling current`; otherwise `nodeHash current sibling`. -/ +abbrev MerklePath := Array (Address × Bool) + +/-- One step of the canonical proof generator: record the sibling at +position `pos` in `level`, then return the next level and updated +position. -/ +private def proofStep (level : Array Address) (pos : Nat) + : (Array Address × Address × Bool × Nat) := + let z := zeroAddress + let siblingIdx := pos ^^^ 1 + let sibling := if h : siblingIdx < level.size then level[siblingIdx] else z + let isLeft := pos % 2 == 1 + let next := reduceLevel level + (next, sibling, isLeft, pos / 2) + +/-- Produce a sibling-path for `target` in the canonical tree over +`leaves`. Returns `none` if `target` is not in the (post-dedup) leaf +set. Returns the empty path for a single-leaf tree. -/ +partial def merkleProofCanonical (leaves : Array Address) (target : Address) + : Option MerklePath := + let sorted := dedupSortedAddrs (sortAddrs leaves) + if sorted.isEmpty then + none + else + -- Linear find — assumption sets are small in v1. + let idxOpt := sorted.findIdx? (fun a => a == target) + match idxOpt with + | none => none + | some idx => + if sorted.size == 1 then + some #[] + else + some (go (sorted.map leafHash) idx #[]) + where + go (level : Array Address) (pos : Nat) (acc : MerklePath) : MerklePath := + if level.size ≤ 1 then acc + else + let (next, sibling, isLeft, pos') := proofStep level pos + go next pos' (acc.push (sibling, isLeft)) + +/-- Verify a merkle membership proof against any root (canonical or +free-form). Shape-agnostic — verification just hashes upward. -/ +def verifyMerkleProof (root : Address) (leaf : Address) + (path : MerklePath) : Bool := + let final := path.foldl + (fun current (sibling, isLeft) => + if isLeft then nodeHash sibling current else nodeHash current sibling) + (leafHash leaf) + final == root + +end Ix.Merkle + +end diff --git a/Tests/Gen/Claim.lean b/Tests/Gen/Claim.lean index f9230fc8..da876b74 100644 --- a/Tests/Gen/Claim.lean +++ b/Tests/Gen/Claim.lean @@ -85,11 +85,15 @@ def genRevealConstantInfo : Gen RevealConstantInfo := <$> genSmallArray (Prod.mk <$> genUInt64Small <*> genRevealMutConstInfo)), ] +def genOptAddress : Gen (Option Address) := genOptional genAddress + def genClaim : Gen Claim := frequency [ - (10, Claim.eval <$> genAddress <*> genAddress), - (10, Claim.check <$> genAddress), - (10, Claim.reveal <$> genAddress <*> genRevealConstantInfo), + (10, Claim.eval <$> genAddress <*> genAddress <*> genOptAddress), + (10, Claim.check <$> genAddress <*> genOptAddress), + (10, Claim.checkEnv <$> genAddress <*> genOptAddress), + (10, Claim.reveal <$> genAddress <*> genRevealConstantInfo), + (10, Claim.contains <$> genAddress <*> genAddress), ] /-! ## Shrinkable instances -/ @@ -118,9 +122,14 @@ instance : Shrinkable RevealConstantInfo where instance : Shrinkable Claim where shrink - | .check _ => [] - | .eval input _ => [.check input] - | .reveal comm info => (.reveal comm <$> Shrinkable.shrink info) ++ [.check comm] + | .check _ none => [] + | .checkEnv _ none => [] + | .check c (some _) => [.check c none] + | .checkEnv r (some _) => [.checkEnv r none] + | .eval input _ _ => [.check input none] + | .reveal comm info => + (.reveal comm <$> Shrinkable.shrink info) ++ [.check comm none] + | .contains t _ => [.checkEnv t none] /-! ## SampleableExt instances -/ diff --git a/Tests/Ix/AssumptionTree.lean b/Tests/Ix/AssumptionTree.lean new file mode 100644 index 00000000..1a403f53 --- /dev/null +++ b/Tests/Ix/AssumptionTree.lean @@ -0,0 +1,152 @@ +/- + Tests for `Ix.AssumptionTree`. +-/ + +module +public import Ix.AssumptionTree +public import LSpec + +open LSpec +open Ix.AssumptionTree +open Ix.Merkle (merkleRootCanonical merkleJoin verifyMerkleProof) +open Ixon (runGet) + +private def addr (s : String) : Address := Address.blake3 s.toUTF8 + +/-! ## Canonical construction -/ + +private partial def hasPad : Ix.AssumptionTree → Bool + | .padding => true + | .leaf _ => false + | .node l r => hasPad l || hasPad r + +private def canonicalUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let c := addr "c" + test "canonical of empty is none" ((Ix.AssumptionTree.canonical #[]) == none) + ++ test "canonical of single leaf" + ((Ix.AssumptionTree.canonical #[a]) == some (.leaf a)) + ++ test "canonical of three leaves contains padding" + (let t := (Ix.AssumptionTree.canonical #[a, b, c]).get! + hasPad t) + +/-! ## Root agreement with `merkleRootCanonical` -/ + +private def rootAgreementUnits : TestSeq := + let mkLeaves (n : Nat) : Array Address := + Array.range n |>.map fun i => addr s!"leaf-{i}" + let check (n : Nat) : Bool := + let leaves := mkLeaves n + let t := (Ix.AssumptionTree.canonical leaves).get! + some t.root == merkleRootCanonical leaves + test "root matches merkleRootCanonical for n=1" (check 1) + ++ test "root matches merkleRootCanonical for n=2" (check 2) + ++ test "root matches merkleRootCanonical for n=3" (check 3) + ++ test "root matches merkleRootCanonical for n=4" (check 4) + ++ test "root matches merkleRootCanonical for n=5" (check 5) + ++ test "root matches merkleRootCanonical for n=7" (check 7) + ++ test "root matches merkleRootCanonical for n=8" (check 8) + +/-! ## Join agreement -/ + +private def joinUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let l := (Ix.AssumptionTree.canonical #[a]).get! + let r := (Ix.AssumptionTree.canonical #[b]).get! + let joined := Ix.AssumptionTree.join l r + test "join root matches merkleJoin" + (joined.root == merkleJoin l.root r.root) + +/-! ## Leaves + contains -/ + +private def leavesUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let c := addr "c" + let absent := addr "absent" + let t := (Ix.AssumptionTree.canonical #[a, b, c]).get! + -- Sorted leaves. + let sorted := (#[a, b, c]).qsort fun a b => compare a b == .lt + test "leaves skip padding and yield sorted set" + (t.leaves == sorted) + ++ test "contains true for members" + (t.contains a && t.contains b && t.contains c) + ++ test "contains false for nonmember" + (!t.contains absent) + +/-! ## Merkle proofs -/ + +private def merkleProofUnits : TestSeq := + let mkLeaves (n : Nat) : Array Address := + Array.range n |>.map fun i => addr s!"leaf-{i}" + let checkAll (n : Nat) : Bool := + let leaves := mkLeaves n + let t := (Ix.AssumptionTree.canonical leaves).get! + let root := t.root + t.leaves.all fun leaf => + match t.merkleProof leaf with + | none => false + | some path => verifyMerkleProof root leaf path + test "single-leaf empty path verifies" + (let a := addr "x" + let t : Ix.AssumptionTree := .leaf a + let ok : Bool := match t.merkleProof a with + | some path => path.isEmpty && verifyMerkleProof t.root a path + | none => false + ok) + ++ test "all leaves prove for n=2" (checkAll 2) + ++ test "all leaves prove for n=3" (checkAll 3) + ++ test "all leaves prove for n=5" (checkAll 5) + ++ test "all leaves prove for n=8" (checkAll 8) + ++ test "nonmember returns none" + (let leaves := mkLeaves 3 + let t := (Ix.AssumptionTree.canonical leaves).get! + t.merkleProof (addr "absent") == none) + +/-! ## Serialization -/ + +private def serdeRoundtrip (t : Ix.AssumptionTree) : Bool := + let bytes := Ix.AssumptionTree.ser t + match runGet Ix.AssumptionTree.get bytes with + | .ok t' => t == t' + | .error _ => false + +private def serdeUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let leaf := Ix.AssumptionTree.leaf a + let pad := Ix.AssumptionTree.padding + let nodeLeaves := + Ix.AssumptionTree.node (Ix.AssumptionTree.leaf a) (Ix.AssumptionTree.leaf b) + let canon := (Ix.AssumptionTree.canonical #[a, b, addr "c"]).get! + -- Outer tag byte + let leafBytes := Ix.AssumptionTree.ser leaf + let padBytes := Ix.AssumptionTree.ser pad + let nodeBytes := Ix.AssumptionTree.ser nodeLeaves + test "outer tag byte is 0xE2 (Leaf)" (leafBytes.data[0]! == 0xE2) + ++ test "outer tag byte is 0xE2 (Padding)" (padBytes.data[0]! == 0xE2) + ++ test "outer tag byte is 0xE2 (Node)" (nodeBytes.data[0]! == 0xE2) + ++ test "Leaf body tag is 0x00" (leafBytes.data[1]! == 0x00) + ++ test "Padding body tag is 0x01" (padBytes.data[1]! == 0x01) + ++ test "Node body tag is 0x02" (nodeBytes.data[1]! == 0x02) + ++ test "Leaf total bytes" (leafBytes.size == 1 + 1 + 32) + ++ test "Padding total bytes" (padBytes.size == 2) + ++ test "Node total bytes" (nodeBytes.size == 1 + 1 + (1 + 32) + (1 + 32)) + -- Roundtrips + ++ test "roundtrip Leaf" (serdeRoundtrip leaf) + ++ test "roundtrip Padding" (serdeRoundtrip pad) + ++ test "roundtrip Node simple" (serdeRoundtrip nodeLeaves) + ++ test "roundtrip canonical n=3" (serdeRoundtrip canon) + +/-! ## Suite -/ + +public def Tests.AssumptionTree.suite : List TestSeq := [ + canonicalUnits, + rootAgreementUnits, + joinUnits, + leavesUnits, + merkleProofUnits, + serdeUnits, +] diff --git a/Tests/Ix/Claim.lean b/Tests/Ix/Claim.lean index d64afb80..41ba9859 100644 --- a/Tests/Ix/Claim.lean +++ b/Tests/Ix/Claim.lean @@ -25,62 +25,81 @@ private def addr2 : Address := Address.blake3 "world".toUTF8 private def addr3 : Address := Address.blake3 "test".toUTF8 def claimUnits : TestSeq := - -- EvalClaim - test "EvalClaim roundtrip" (claimSerde (.eval addr1 addr2)) - -- CheckClaim - ++ test "CheckClaim roundtrip" (claimSerde (.check addr1)) - -- RevealClaim with defn revealing only safety - ++ test "RevealClaim defn safety-only" (claimSerde (.reveal addr1 + -- Typechecking claims: both no-asm and with-asm forms. + test "Eval no-asm roundtrip" (claimSerde (.eval addr1 addr2 none)) + ++ test "Eval with-asm roundtrip" + (claimSerde (.eval addr1 addr2 (some addr3))) + ++ test "Check no-asm roundtrip" (claimSerde (.check addr1 none)) + ++ test "Check with-asm roundtrip" + (claimSerde (.check addr1 (some addr2))) + ++ test "CheckEnv no-asm roundtrip" (claimSerde (.checkEnv addr1 none)) + ++ test "CheckEnv with-asm roundtrip" + (claimSerde (.checkEnv addr1 (some addr2))) + -- Contains + ++ test "Contains roundtrip" (claimSerde (.contains addr1 addr2)) + -- Reveal claim variants (carried over from previous suite) + ++ test "Reveal defn safety-only" (claimSerde (.reveal addr1 (.defn none (some .safe) none none none))) - -- RevealClaim with defn revealing all fields - ++ test "RevealClaim defn all fields" (claimSerde (.reveal addr1 + ++ test "Reveal defn all fields" (claimSerde (.reveal addr1 (.defn (some .defn) (some .safe) (some 3) (some addr2) (some addr3)))) - -- RevealClaim with axio revealing type - ++ test "RevealClaim axio with type" (claimSerde (.reveal addr1 + ++ test "Reveal axio with type" (claimSerde (.reveal addr1 (.axio none none (some addr2)))) - -- RevealClaim with recr with rules - ++ test "RevealClaim recr with rules" (claimSerde (.reveal addr1 + ++ test "Reveal recr with rules" (claimSerde (.reveal addr1 (.recr (some true) none (some 2) none none none none none (some #[⟨0, 3, addr2⟩])))) - -- RevealClaim with muts with component - ++ test "RevealClaim muts with component" (claimSerde (.reveal addr1 + ++ test "Reveal muts with component" (claimSerde (.reveal addr1 (.muts #[(0, .defn (some .defn) (some .safe) none none none)]))) - -- Projection variants - ++ test "RevealClaim cPrj" (claimSerde (.reveal addr1 + ++ test "Reveal cPrj" (claimSerde (.reveal addr1 (.cPrj (some 0) (some 1) (some addr2)))) - ++ test "RevealClaim rPrj" (claimSerde (.reveal addr1 + ++ test "Reveal rPrj" (claimSerde (.reveal addr1 (.rPrj (some 2) (some addr2)))) - ++ test "RevealClaim iPrj" (claimSerde (.reveal addr1 + ++ test "Reveal iPrj" (claimSerde (.reveal addr1 (.iPrj (some 3) (some addr2)))) - ++ test "RevealClaim dPrj" (claimSerde (.reveal addr1 + ++ test "Reveal dPrj" (claimSerde (.reveal addr1 (.dPrj (some 0) (some addr2)))) - -- Empty fields - ++ test "RevealClaim defn all none" (claimSerde (.reveal addr1 + ++ test "Reveal defn all none" (claimSerde (.reveal addr1 (.defn none none none none none))) - -- Quot variant - ++ test "RevealClaim quot" (claimSerde (.reveal addr1 + ++ test "Reveal quot" (claimSerde (.reveal addr1 (.quot (some .type) (some 1) (some addr2)))) -/-! ## Byte-level encoding tests -/ +/-! ## Byte-level encoding tests + + All claim variants are at Tag4 sizes 3..=7 (single-byte tags + `0xE3`..`0xE7`). Each claim has an opt byte (`0x00` for `none`, + `0x01`+32-byte address for `some`). +-/ def claimEncodingTests : TestSeq := - let evalBytes := Claim.ser (.eval addr1 addr2) - let checkBytes := Claim.ser (.check addr1) - let revealSafetyOnly := Claim.ser (.reveal addr1 (.defn none (some .safe) none none none)) + let evalBytes := Claim.ser (.eval addr1 addr2 none) + let evalWithAsm := Claim.ser (.eval addr1 addr2 (some addr3)) + let checkBytes := Claim.ser (.check addr1 none) + let checkWithAsm := Claim.ser (.check addr1 (some addr2)) + let checkEnvBytes := Claim.ser (.checkEnv addr1 none) + let containsBytes := Claim.ser (.contains addr1 addr2) + let revealSafetyOnly := Claim.ser + (.reveal addr1 (.defn none (some .safe) none none none)) let revealAllFields := Claim.ser (.reveal addr1 (.defn (some .defn) (some .safe) (some 3) (some addr2) (some addr3))) - -- EvalClaim: starts with 0xE4, total 65 bytes (1 tag + 32 + 32) - test "EvalClaim tag byte is 0xE4" (evalBytes.data[0]! == 0xE4) - ++ test "EvalClaim size is 65" (evalBytes.size == 65) - -- CheckClaim: starts with 0xE3, total 33 bytes (1 tag + 32) - ++ test "CheckClaim tag byte is 0xE3" (checkBytes.data[0]! == 0xE3) - ++ test "CheckClaim size is 33" (checkBytes.size == 33) - -- RevealClaim: starts with 0xE6 - ++ test "RevealClaim tag byte is 0xE6" (revealSafetyOnly.data[0]! == 0xE6) - -- RevealClaim safety-only defn: 36 bytes (1 tag + 32 comm + 1 variant + 1 mask + 1 safety) - ++ test "RevealClaim safety-only defn size is 36" (revealSafetyOnly.size == 36) - -- RevealClaim with all defn fields should be larger - ++ test "RevealClaim all-fields defn is larger" (revealAllFields.size > revealSafetyOnly.size) + -- Eval claim: 0xE3 + 64 + 1 (opt) = 66 bytes; with asm = 98 + test "Eval tag byte is 0xE3" (evalBytes.data[0]! == 0xE3) + ++ test "Eval no-asm size is 66" (evalBytes.size == 66) + ++ test "Eval with-asm size is 98" (evalWithAsm.size == 98) + -- Check claim: 0xE4 + 32 + 1 = 34 bytes; with asm = 66 + ++ test "Check tag byte is 0xE4" (checkBytes.data[0]! == 0xE4) + ++ test "Check no-asm size is 34" (checkBytes.size == 34) + ++ test "Check with-asm size is 66" (checkWithAsm.size == 66) + -- CheckEnv claim: 0xE5 + 32 + 1 = 34 bytes + ++ test "CheckEnv tag byte is 0xE5" (checkEnvBytes.data[0]! == 0xE5) + ++ test "CheckEnv no-asm size is 34" (checkEnvBytes.size == 34) + -- Reveal claim: 0xE6 + ++ test "Reveal tag byte is 0xE6" (revealSafetyOnly.data[0]! == 0xE6) + -- Reveal safety-only defn: 1 (tag) + 32 (comm) + 1 (variant) + 1 (mask) + 1 (safety) = 36 + ++ test "Reveal safety-only defn size is 36" (revealSafetyOnly.size == 36) + ++ test "Reveal all-fields defn is larger" + (revealAllFields.size > revealSafetyOnly.size) + -- Contains claim: 0xE7 + 64 = 65 + ++ test "Contains tag byte is 0xE7" (containsBytes.data[0]! == 0xE7) + ++ test "Contains size is 65" (containsBytes.size == 65) /-! ## Suite -/ diff --git a/Tests/Ix/Commit.lean b/Tests/Ix/Commit.lean index e83d3166..7979927e 100644 --- a/Tests/Ix/Commit.lean +++ b/Tests/Ix/Commit.lean @@ -30,17 +30,17 @@ def commDeterminismTests : TestSeq := -- Different payloads → different addresses ++ test "Different payloads produce different commit addresses" (Comm.commit (Comm.mk secret1 payload1) != Comm.commit (Comm.mk secret1 payload2)) - -- Verify commitment format: tagged serialization starts with 0xE5 and is 65 bytes - ++ test "serCommTagged starts with 0xE5" - ((serCommTagged comm1).data[0]! == 0xE5) + -- Verify commitment format: tagged serialization starts with 0xE1 and is 65 bytes + ++ test "serCommTagged starts with 0xE1" + ((serCommTagged comm1).data[0]! == 0xE1) ++ test "serCommTagged is 65 bytes" ((serCommTagged comm1).size == 65) /-! ## Claim.commit tests -/ def claimCommitTests : TestSeq := - let evalClaim := Claim.eval payload1 payload2 - let checkClaim := Claim.check payload1 + let evalClaim := Claim.eval payload1 payload2 none + let checkClaim := Claim.check payload1 none let revealSafety := Claim.reveal payload1 (.defn none (some .safe) none none none) let revealKind := Claim.reveal payload1 (.defn (some .defn) none none none none) let revealBoth := Claim.reveal payload1 (.defn (some .defn) (some .safe) none none none) @@ -167,13 +167,13 @@ def compileDefTests : TestSeq := private def checkClaimSucceeds : Bool := match Ix.Commit.checkClaim emptyCompileEnv [] simpleType simpleValue with - | .ok (.check _) => true + | .ok (.check _ _) => true | _ => false private def checkClaimMatchesCompileDef : Bool := match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue, Ix.Commit.checkClaim emptyCompileEnv [] simpleType simpleValue with - | .ok (addr, _), .ok (.check claimAddr) => addr == claimAddr + | .ok (addr, _), .ok (.check claimAddr _) => addr == claimAddr | _, _ => false private def openConstantInfoDefn : Bool := diff --git a/Tests/Ix/Ixon.lean b/Tests/Ix/Ixon.lean index 8a30d693..72d22ec4 100644 --- a/Tests/Ix/Ixon.lean +++ b/Tests/Ix/Ixon.lean @@ -235,6 +235,45 @@ def envSerializationUnitTests : TestSeq := test "Comm env Lean==Rust" (envSerializationMatches commRaw) ++ test "Blob+Comm env Lean==Rust" (envSerializationMatches blobCommRaw) +/-! ## Canonical env merkle root: Lean vs. Rust agreement -/ + +def envMerkleRootMatches (raw : RawEnv) : Bool := + let env := raw.toEnv + raw.merkleRoot == rsEnvMerkleRoot env + +/-- Both pure-Lean and Rust FFI agree on the merkle root over the env's + consts addresses. Distinct tests for various const-set shapes. -/ +def envMerkleRootUnitTests : TestSeq := + -- Empty env: no root. + let emptyRaw : RawEnv := + { consts := #[], named := #[], blobs := #[], comms := #[] } + -- Single-const env. + let constAddr := Address.blake3 (ByteArray.mk #[42]) + let oneConst : Constant := + { info := .axio { isUnsafe := false, lvls := 0, typ := .sort 0 }, + sharing := #[], refs := #[], univs := #[] } + let singleRaw : RawEnv := + { consts := #[{ addr := constAddr, const := oneConst }], + named := #[], blobs := #[], comms := #[] } + -- Two-const env, inserted in different orders to check sort invariance. + let addrA := Address.blake3 "a".toUTF8 + let addrB := Address.blake3 "b".toUTF8 + let raw_ab : RawEnv := + { consts := #[{ addr := addrA, const := oneConst }, + { addr := addrB, const := oneConst }], + named := #[], blobs := #[], comms := #[] } + let raw_ba : RawEnv := + { consts := #[{ addr := addrB, const := oneConst }, + { addr := addrA, const := oneConst }], + named := #[], blobs := #[], comms := #[] } + test "empty env merkle root is none" (emptyRaw.merkleRoot == none) ++ + test "empty env: Lean==Rust" (envMerkleRootMatches emptyRaw) ++ + test "single const: Lean==Rust" (envMerkleRootMatches singleRaw) ++ + test "two consts (a,b): Lean==Rust" (envMerkleRootMatches raw_ab) ++ + test "two consts (b,a): Lean==Rust" (envMerkleRootMatches raw_ba) ++ + test "(a,b) and (b,a) same root" + (raw_ab.merkleRoot == raw_ba.merkleRoot) + /-! ## Test Suite (property-based) -/ public def Tests.Ixon.suite : List TestSeq := [ @@ -242,6 +281,8 @@ public def Tests.Ixon.suite : List TestSeq := [ envUnitTests, -- Env serialization comparison unit tests envSerializationUnitTests, + -- Env merkle root agreement (Lean vs. Rust FFI) + envMerkleRootUnitTests, -- Pure Lean serde roundtrips checkIO "Univ serde roundtrips" (∀ u : Univ, univSerde u), checkIO "Expr serde roundtrips" (∀ e : Expr, exprSerde e), diff --git a/Tests/Ix/Merkle.lean b/Tests/Ix/Merkle.lean new file mode 100644 index 00000000..969df078 --- /dev/null +++ b/Tests/Ix/Merkle.lean @@ -0,0 +1,118 @@ +/- + Canonical-builder, free-form composition, and membership-proof tests + for `Ix.Merkle`. +-/ + +module +public import Ix.Merkle +public import LSpec + +open LSpec +open Ix.Merkle + +private def addr (seed : String) : Address := + Address.blake3 seed.toUTF8 + +/-! ## Canonical builder -/ + +private def canonicalUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let c := addr "c" + test "canonical_empty → none" (merkleRootCanonical #[] == none) + ++ test "canonical_single = leafHash" + (merkleRootCanonical #[a] == some (leafHash a)) + ++ test "canonical_sort_invariant [a,b] = [b,a]" + (merkleRootCanonical #[a, b] == merkleRootCanonical #[b, a]) + ++ test "canonical_dedup [a,a,b] = [a,b]" + (merkleRootCanonical #[a, a, b] == merkleRootCanonical #[a, b]) + ++ test "canonical_distinguishes [a] ≠ [a,b]" + (!(merkleRootCanonical #[a] == merkleRootCanonical #[a, b])) + ++ test "canonical_three_leaves nonempty" + ((merkleRootCanonical #[a, b, c]).isSome) + ++ test "canonical_no_malleability" + -- [a, a] post-dedup is [a], yielding leafHash a. + -- A two-leaf tree node_hash(leaf_hash(a), leaf_hash(a)) is different. + (merkleRootCanonical #[a, a] + == some (leafHash a) + && !(merkleRootCanonical #[a, a] + == some (nodeHash (leafHash a) (leafHash a)))) + +/-! ## Domain separation -/ + +private def domainSepUnits : TestSeq := + let a := addr "a" + test "leaf_vs_node_disjoint" + (!(leafHash a == nodeHash a zeroAddress)) + +/-! ## Free-form (merkleJoin) -/ + +private def joinUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let c := addr "c" + test "join_is_node_hash" (merkleJoin a b == nodeHash a b) + ++ test "join_non_commutative" (!(merkleJoin a b == merkleJoin b a)) + ++ test "join_canonical_inequal" + -- Free-form tree of {a,b}{c} ≠ canonical tree of {a,b,c}. + (let left := (merkleRootCanonical #[a, b]).get! + let right := (merkleRootCanonical #[c]).get! + let joined := merkleJoin left right + let canon := (merkleRootCanonical #[a, b, c]).get! + !(joined == canon)) + +/-! ## Membership proofs -/ + +private def membershipUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let c := addr "c" + let leaves := #[a, b, c] + let root := (merkleRootCanonical leaves).get! + test "proof_single_leaf empty path" + ((merkleProofCanonical #[a] a).get!.isEmpty + && verifyMerkleProof (leafHash a) a #[]) + ++ test "proof_two_leaves roundtrip" + (let leaves := #[a, b] + let root := (merkleRootCanonical leaves).get! + let path_a := (merkleProofCanonical leaves a).get! + let path_b := (merkleProofCanonical leaves b).get! + verifyMerkleProof root a path_a && verifyMerkleProof root b path_b) + ++ test "proof_three_leaves all members" + (let path_a := (merkleProofCanonical leaves a).get! + let path_b := (merkleProofCanonical leaves b).get! + let path_c := (merkleProofCanonical leaves c).get! + verifyMerkleProof root a path_a + && verifyMerkleProof root b path_b + && verifyMerkleProof root c path_c) + ++ test "proof_rejects_nonmember_direct" + ((merkleProofCanonical #[a, b] (addr "x")) == none) + +/-! ## Join composes membership -/ + +private def joinMembershipUnits : TestSeq := + let a := addr "a" + let b := addr "b" + let c := addr "c" + let leftLeaves := #[a, b] + let rightLeaves := #[c] + let leftRoot := (merkleRootCanonical leftLeaves).get! + let rightRoot := (merkleRootCanonical rightLeaves).get! + let joined := merkleJoin leftRoot rightRoot + test "join_composes_left" + (let path := (merkleProofCanonical leftLeaves a).get! + verifyMerkleProof joined a (path.push (rightRoot, false))) + ++ test "join_composes_right" + (let path := (merkleProofCanonical rightLeaves c).get! + verifyMerkleProof joined c (path.push (leftRoot, true))) + +/-! ## Suite -/ + +public def Tests.Merkle.suite : List TestSeq := [ + canonicalUnits, + domainSepUnits, + joinUnits, + membershipUnits, + joinMembershipUnits, +] + diff --git a/Tests/Main.lean b/Tests/Main.lean index 73eb720d..06f38ae1 100644 --- a/Tests/Main.lean +++ b/Tests/Main.lean @@ -3,6 +3,8 @@ import Tests.ByteArray import Tests.Ix.Ixon import Tests.Ix.IxVM import Tests.Ix.Claim +import Tests.Ix.Merkle +import Tests.Ix.AssumptionTree import Tests.Ix.Commit import Tests.Ix.Compile import Tests.Ix.Compile.ValidateAux @@ -37,6 +39,8 @@ def primarySuites : Std.HashMap String (List LSpec.TestSeq) := .ofList [ ("byte-array", Tests.ByteArray.suite), ("ixon", Tests.Ixon.suite), ("claim", Tests.Claim.suite), + ("merkle", Tests.Merkle.suite), + ("assumption-tree", Tests.AssumptionTree.suite), ("commit", Tests.Commit.suite), ("canon", [Tests.CanonM.suite]), ("keccak", Tests.Keccak.suite), diff --git a/docs/Ixon.md b/docs/Ixon.md index 655f06d8..c383d648 100644 --- a/docs/Ixon.md +++ b/docs/Ixon.md @@ -91,8 +91,8 @@ Used for expressions, constants, and environment/proof structures. Header byte f | 0xB | Expr | Share | Share vector index | | 0xC | Constant | Muts | Entry count | | 0xD | Constant | Non-Muts | Variant (0-7) | -| 0xE | Env/Proof | Env/Claim/Proof/Comm | Variant (0-7) | -| 0xF | - | Reserved | - | +| 0xE | Env/Claim | Env/Comm/AssumptionTree/Claim | Variant (0-7) | +| 0xF | Proof | ZK proofs | Variant (0-4); 5-7 reserved | ```rust pub struct Tag4 { @@ -795,13 +795,29 @@ Names are topologically sorted in the environment so parents are serialized befo ### Environment Serialization -The environment serializes in 5 sections with a version header: +Serialized environments are stored on disk with the **`.ixe`** file +extension (e.g., `compilemathlib.ixe`). The `ix compile` CLI produces +these files; the default output name is the lowercased input file stem +plus `.ixe`. See `src/ix/ixon/serialize.rs::Env::put` for the +byte-level layout. + +The .ixe layout is a Tag4(0xE, 0) header byte followed by a 32-byte +canonical merkle root and then 5 sections: ``` -Header: Tag4 { flag: 0xE, size: VERSION } +Header: Tag4 { flag: 0xE, size: 0 } -- one byte (0xE0) +Root: 32 bytes -- canonical merkle root over + consts.keys(); for empty + const sets this is the + fixed `zero_address` + sentinel. ``` -Current version is 2 (supports zstd compression after header). +The root is mandatory (non-optional): every env has a unique canonical +identity recoverable from its file. Two envs with the same const set +produce byte-identical roots regardless of construction order. +Deserialization recomputes the root from `consts` and rejects any +mismatch as tampered. **Section 1: Blobs** (Address → raw bytes) ``` @@ -837,53 +853,79 @@ count (Tag0) ## Proofs and Claims -Claims, proofs, commitments, and environments share Tag4 flag 0xE. +Envs, commitments, the AssumptionTree data type, and all claims share +Tag4 flag 0xE. Proofs (opaque ZK bytes) share Tag4 flag 0xF. Everything +fits in single-byte tags (sizes 0..=7 per flag). + +### Tag4 0xE Variant Layout (Env + Comm + AssumptionTree + Claims) + +| Size | Byte | Type | Payload | +|------|------|------|---------| +| 0 | `0xE0` | Environment | opt-tagged merkle root + sections | +| 1 | `0xE1` | Commitment | 2 addr: secret, payload | +| 2 | `0xE2` | AssumptionTree | recursive merkle-tree body (see below) | +| 3 | `0xE3` | Eval claim | 2 addr (input, output) + opt assumptions | +| 4 | `0xE4` | Check claim | 1 addr (const) + opt assumptions | +| 5 | `0xE5` | CheckEnv claim | 1 addr (env root) + opt assumptions | +| 6 | `0xE6` | Reveal claim | 1 addr (comm) + RevealConstantInfo | +| 7 | `0xE7` | Contains claim | 2 addr (tree, const) | + +`opt assumptions` encoding: 1 byte `0x00` for `None`, or `0x01` followed +by 32 bytes for `Some(merkle_root)`. -### Tag4 0xE Variant Layout +### Tag4 0xF Variant Layout (Proofs) | Size | Byte | Type | Payload | |------|------|------|---------| -| 0 | `0xE0` | Environment | sections | -| 1 | `0xE1` | CheckProof | 1 addr + proof bytes | -| 2 | `0xE2` | EvalProof | 2 addr + proof bytes | -| 3 | `0xE3` | CheckClaim | 1 addr | -| 4 | `0xE4` | EvalClaim | 2 addr: input, output | -| 5 | `0xE5` | Commitment | 2 addr: secret, payload | -| 6 | `0xE6` | RevealClaim | 1 addr + RevealConstantInfo | -| 7 | `0xE7` | RevealProof | 1 addr + RevealConstantInfo + proof bytes | +| 0 | `0xF0` | Eval proof | claim payload + Tag0 length + opaque ZK bytes | +| 1 | `0xF1` | Check proof | claim payload + Tag0 length + opaque ZK bytes | +| 2 | `0xF2` | CheckEnv proof | claim payload + Tag0 length + opaque ZK bytes | +| 3 | `0xF3` | Reveal proof | claim payload + Tag0 length + opaque ZK bytes | +| 4 | `0xF4` | Contains proof | claim payload + Tag0 length + opaque ZK bytes | + +Proof bytes are uniform opaque ZK proofs — witness data (e.g., merkle +paths for Contains) is prover-side scratch consumed by the ZK circuit +and NOT transmitted on the wire. ### Claim Types ```rust -/// Evaluation claim: the constant at `input` evaluates to the constant at `output`. -pub struct EvalClaim { - pub input: Address, // Input constant address - pub output: Address, // Output constant address +pub enum Claim { + /// `input` evaluates to `output`, optionally modulo `assumptions`. + Eval { input: Address, output: Address, assumptions: Option
}, + /// The constant at `const_addr` is well-typed, optionally modulo + /// `assumptions`. + Check { const_addr: Address, assumptions: Option
}, + /// Every constant in the env merkle-rooted at `root` is well-typed, + /// optionally modulo `assumptions` (typically the env's axiom leaves). + CheckEnv { root: Address, assumptions: Option
}, + /// Selective field revelation of a committed constant. + Reveal { comm: Address, info: RevealConstantInfo }, + /// `const_addr` is a leaf in the merkle tree rooted at `tree`. + Contains { tree: Address, const_addr: Address }, } +``` -/// Type-checking claim: the constant at `value` is well-typed. -pub struct CheckClaim { - pub value: Address, // Value constant address -} +### AssumptionTree -/// Selective revelation of fields of a committed constant. -pub struct RevealClaim { - pub comm: Address, // Commitment address - pub info: RevealConstantInfo, // Revealed field information -} +A serializable merkle tree over `Address` leaves, used to recover the +leaf set committed to by a conditional claim's `assumptions` root. -pub enum Claim { - Evals(EvalClaim), - Checks(CheckClaim), - Reveals(RevealClaim), -} +``` +[Tag4(0xE, 2) = 0xE2] [body] + +body recursive: + Leaf(addr): [0x00] [addr:32] + Node(l, r): [0x01] [body l] [body r] ``` +Size: `34N - 1` bytes for N leaves + 1 byte top-level tag. + ### Commitment Hashing -Commitments are serialized with Tag4(0xE, 5) and hashed with blake3: +Commitments are serialized with Tag4(0xE, 1) and hashed with blake3: ``` -commitment_address = blake3(0xE5 + secret_address + payload_address) +commitment_address = blake3(0xE1 + secret_address + payload_address) ``` The payload address is always the transparent hash of the constant, regardless of the secret. @@ -891,48 +933,53 @@ Two commitments to the same constant share the same payload address. ### RevealConstantInfo Format -RevealClaim allows selective revelation of constant metadata fields (kind, safety, idx, etc.) -without opening the full commitment. Serialization: `variant (1 byte) + field_mask (Tag0) + field values...` +The Reveal claim allows selective revelation of constant metadata fields +(kind, safety, idx, etc.) without opening the full commitment. +Serialization: `variant (1 byte) + field_mask (Tag0) + field values...` -The field_mask uses Tag0 encoding (1 byte for masks < 128). Fields are serialized in mask bit order. -Expression fields are revealed as `Address = blake3(serialized Expr bytes)`. +The field_mask uses Tag0 encoding (1 byte for masks < 128). Fields are +serialized in mask bit order. Expression fields are revealed as `Address += blake3(serialized Expr bytes)`. ### Proof Structure ```rust pub struct Proof { pub claim: Claim, // The claim being proven - pub proof: Vec, // Opaque proof data (e.g., ZK proof bytes) + pub proof: Vec, // Opaque ZK proof bytes (uniform across variants) } ``` ### Serialization Examples -**EvalClaim** (0xE4, 2 addresses): +**Eval claim** (0xE3, 2 addresses + opt assumptions byte): ``` -E4 -- Tag4 { flag: 0xE, size: 4 } (EvalClaim) +E3 -- Tag4 { flag: 0xE, size: 3 } (Eval) [32 bytes] -- input address [32 bytes] -- output address +00 -- assumptions = None (or 01 + [32 bytes] for Some) ``` -**EvalProof** (0xE2, 2 addresses + proof): +**Eval proof** (0xF0, claim payload + opaque ZK bytes): ``` -E2 -- Tag4 { flag: 0xE, size: 2 } (EvalProof) +F0 -- Tag4 { flag: 0xF, size: 0 } (Eval proof) [32 bytes] -- input address [32 bytes] -- output address +00 -- assumptions = None 04 -- proof.len = 4 (Tag0) -01 02 03 04 -- proof bytes +01 02 03 04 -- opaque ZK proof bytes ``` -**CheckClaim** (0xE3, 1 address): +**Check claim** (0xE4, 1 address + opt assumptions byte): ``` -E3 -- Tag4 { flag: 0xE, size: 3 } (CheckClaim) -[32 bytes] -- value address +E4 -- Tag4 { flag: 0xE, size: 4 } (Check) +[32 bytes] -- const address +00 -- assumptions = None ``` -**RevealClaim** — reveal that a committed Definition has `safety = Safe`: +**Reveal claim** — reveal that a committed Definition has `safety = Safe`: ``` -E6 -- Tag4 { flag: 0xE, size: 6 } (RevealClaim) +E6 -- Tag4 { flag: 0xE, size: 6 } (Reveal) [32 bytes] -- comm_addr 00 -- variant: Definition 02 -- mask: bit 1 (safety) [Tag0] @@ -940,6 +987,14 @@ E6 -- Tag4 { flag: 0xE, size: 6 } (RevealClaim) ``` Total: 36 bytes. +**Contains claim** (0xE7, 2 addresses): +``` +E7 -- Tag4 { flag: 0xE, size: 7 } (Contains) +[32 bytes] -- tree (merkle root) +[32 bytes] -- const address (asserted leaf) +``` +Total: 65 bytes. + --- ## Compilation (Lean → Ixon) @@ -1288,7 +1343,7 @@ same constant share the same payload address (canonicity). The secret provides b Commitments enable: - **Whole-constant hiding** via `Comm` (hides everything including metadata) -- **Selective revelation** via `RevealClaim` (proves specific field values about a committed constant) +- **Selective revelation** via `Claim::Reveal` (proves specific field values about a committed constant) - **Expression-level blinding** via `Expr.ref ` within expression trees - **Verifiable computation** on committed data (the ZK circuit opens commitments privately) diff --git a/src/ffi/ixon/env.rs b/src/ffi/ixon/env.rs index bcafec12..2c4a3748 100644 --- a/src/ffi/ixon/env.rs +++ b/src/ffi/ixon/env.rs @@ -8,6 +8,7 @@ use crate::ix::env::Name; use crate::ix::ixon::comm::Comm; use crate::ix::ixon::constant::Constant as IxonConstant; use crate::ix::ixon::env::{Env as IxonEnv, Named as IxonNamed}; +use crate::ix::ixon::merkle::merkle_root_canonical; use crate::ix::ixon::metadata::ConstantMeta; use crate::lean::{ LeanIxName, LeanIxonComm, LeanIxonConstant, LeanIxonConstantMeta, @@ -389,6 +390,29 @@ pub extern "C" fn rs_ser_env( LeanByteArray::from_bytes(&buf) } +// ============================================================================= +// rs_env_merkle_root: Compute the canonical merkle root over an env's +// `consts` addresses. Used by the Lean side to verify env identity +// without re-parsing serialized bytes. +// +// Returns 32 bytes for non-empty const sets, empty bytes for empty. +// ============================================================================= + +#[unsafe(no_mangle)] +pub extern "C" fn rs_env_merkle_root( + obj: LeanIxonRawEnv>, +) -> LeanByteArray { + let decoded = obj.decode(); + let env = decoded_to_ixon_env(&decoded); + let mut addrs: Vec
= + env.consts.iter().map(|e| e.key().clone()).collect(); + addrs.sort_unstable(); + match merkle_root_canonical(&addrs) { + Some(root) => LeanByteArray::from_bytes(root.as_bytes()), + None => LeanByteArray::from_bytes(&[]), + } +} + // ============================================================================= // rs_de_env: Deserialize bytes to an Ixon.RawEnv // ============================================================================= diff --git a/src/ffi/lean_env.rs b/src/ffi/lean_env.rs index 21795ffe..9ebfba3a 100644 --- a/src/ffi/lean_env.rs +++ b/src/ffi/lean_env.rs @@ -3881,9 +3881,6 @@ impl ConstSizeBreakdown { #[cfg(feature = "test-ffi")] /// Analyze the serialized size of a constant and its transitive dependencies. fn analyze_const_size(stt: &crate::ix::compile::CompileState, name_str: &str) { - use crate::ix::address::Address; - use std::collections::{HashSet, VecDeque}; - // Build a global name index for metadata serialization let name_index = build_name_index(stt); @@ -3914,53 +3911,34 @@ fn analyze_const_size(stt: &crate::ix::compile::CompileState, name_str: &str) { let direct_breakdown = compute_const_size_breakdown(&constant, &name, stt, &name_index); - // BFS to collect all transitive dependencies - let mut visited: HashSet
= HashSet::new(); - let mut queue: VecDeque
= VecDeque::new(); + // Reuse the shared transitive-closure walker on Env. + let dep_addrs = stt.env.transitive_deps_excl(&addr); let mut dep_breakdowns: Vec<(String, ConstSizeBreakdown)> = Vec::new(); - - // Start with the constant's refs - visited.insert(addr.clone()); - for dep_addr in &constant.refs { - if !visited.contains(dep_addr) { - queue.push_back(dep_addr.clone()); - visited.insert(dep_addr.clone()); - } - } - - // BFS through all transitive dependencies - while let Some(dep_addr) = queue.pop_front() { - if let Some(dep_const) = stt.env.consts.get(&dep_addr) { - // Get the name for this dependency (scan named entries) - let dep_name_opt: Option = stt - .env - .named - .iter() - .find(|e| e.value().addr == dep_addr) - .map(|e| e.key().clone()); - let dep_name_str = dep_name_opt - .as_ref() - .map_or_else(|| format!("{:.12}", dep_addr.hex()), |n| n.pretty()); - - let breakdown = if let Some(ref dep_name) = dep_name_opt { - compute_const_size_breakdown(&dep_const, dep_name, stt, &name_index) - } else { - ConstSizeBreakdown { - alpha_size: serialized_const_size(&dep_const), - meta_size: 0, - } - }; - - dep_breakdowns.push((dep_name_str, breakdown)); - - // Add this constant's refs to the queue - for ref_addr in &dep_const.refs { - if !visited.contains(ref_addr) { - queue.push_back(ref_addr.clone()); - visited.insert(ref_addr.clone()); - } + for dep_addr in dep_addrs { + let Some(dep_const) = stt.env.consts.get(&dep_addr) else { + continue; + }; + // Get the name for this dependency (scan named entries) + let dep_name_opt: Option = stt + .env + .named + .iter() + .find(|e| e.value().addr == dep_addr) + .map(|e| e.key().clone()); + let dep_name_str = dep_name_opt + .as_ref() + .map_or_else(|| format!("{:.12}", dep_addr.hex()), |n| n.pretty()); + + let breakdown = if let Some(ref dep_name) = dep_name_opt { + compute_const_size_breakdown(&dep_const, dep_name, stt, &name_index) + } else { + ConstSizeBreakdown { + alpha_size: serialized_const_size(&dep_const), + meta_size: 0, } - } + }; + + dep_breakdowns.push((dep_name_str, breakdown)); } // Sort by total size descending diff --git a/src/ix/ixon.rs b/src/ix/ixon.rs index cda202a8..3a0319f0 100644 --- a/src/ix/ixon.rs +++ b/src/ix/ixon.rs @@ -6,11 +6,13 @@ //! - Content-addressed storage with sharing support //! - Cryptographic commitments for ZK proofs +pub mod assumption_tree; pub mod comm; pub mod constant; pub mod env; pub mod error; pub mod expr; +pub mod merkle; pub mod metadata; pub mod proof; pub mod serialize; @@ -33,8 +35,8 @@ pub use metadata::{ NameReverseIndex, }; pub use proof::{ - CheckClaim, Claim, EvalClaim, Proof, RevealClaim, RevealConstantInfo, - RevealConstructorInfo, RevealMutConstInfo, RevealRecursorRule, + Claim, Proof, RevealConstantInfo, RevealConstructorInfo, RevealMutConstInfo, + RevealRecursorRule, }; pub use tag::{Tag0, Tag2, Tag4}; pub use univ::Univ; @@ -305,56 +307,65 @@ mod doc_examples { #[test] fn eval_claim_tag() { - // EvalClaim -> Tag4 { flag: 0xE, size: 4 } -> 0xE4 - let claim = Claim::Evals(EvalClaim { + // Eval claim -> Tag4 { flag: 0xE, size: 3 } -> 0xE3 (single byte) + let claim = Claim::Eval { input: Address::hash(b"input"), output: Address::hash(b"output"), - }); + assumptions: None, + }; let mut buf = Vec::new(); claim.put(&mut buf); - assert_eq!(buf[0], 0xE4, "EvalClaim should start with 0xE4"); - assert_eq!(buf.len(), 1 + 64, "EvalClaim should be 1 + 2*32 = 65 bytes"); + assert_eq!(buf[0], 0xE3, "Eval claim should start with 0xE3"); + // 1 (tag) + 64 (addresses) + 1 (opt=None) = 66 + assert_eq!(buf.len(), 1 + 64 + 1, "Eval claim no-asm = 66 bytes"); } #[test] fn eval_proof_tag() { - // EvalProof -> Tag4 { flag: 0xE, size: 2 } -> 0xE2 + // Eval proof -> Tag4 { flag: 0xF, size: 0 } -> 0xF0 (single byte) let proof = Proof::new( - Claim::Evals(EvalClaim { + Claim::Eval { input: Address::hash(b"input"), output: Address::hash(b"output"), - }), + assumptions: None, + }, vec![1, 2, 3, 4], ); let mut buf = Vec::new(); proof.put(&mut buf); - assert_eq!(buf[0], 0xE2, "EvalProof should start with 0xE2"); - // 1 (tag) + 64 (addresses) + 1 (len=4) + 4 (proof bytes) = 70 - assert_eq!(buf.len(), 70, "EvalProof with 4 bytes should be 70 bytes"); - assert_eq!(buf[65], 0x04, "proof.len should be 0x04"); - assert_eq!(&buf[66..70], &[1, 2, 3, 4], "proof bytes should be [1,2,3,4]"); + assert_eq!(buf[0], 0xF0, "Eval proof should start with 0xF0"); + // 1 (tag) + 64 (addresses) + 1 (opt) + 1 (len=4) + 4 (proof) = 71 + assert_eq!(buf.len(), 71, "Eval proof no-asm + 4 proof bytes = 71 bytes"); + assert_eq!(buf[66], 0x04, "proof.len byte should be 0x04"); + assert_eq!(&buf[67..71], &[1, 2, 3, 4], "proof bytes should be [1,2,3,4]"); } #[test] fn check_claim_tag() { - // CheckClaim -> Tag4 { flag: 0xE, size: 3 } -> 0xE3 - let claim = Claim::Checks(CheckClaim { value: Address::hash(b"value") }); + // Check claim -> Tag4 { flag: 0xE, size: 4 } -> 0xE4 + let claim = Claim::Check { + const_addr: Address::hash(b"value"), + assumptions: None, + }; let mut buf = Vec::new(); claim.put(&mut buf); - assert_eq!(buf[0], 0xE3, "CheckClaim should start with 0xE3"); - assert_eq!(buf.len(), 1 + 32, "CheckClaim should be 1 + 1*32 = 33 bytes"); + assert_eq!(buf[0], 0xE4, "Check claim should start with 0xE4"); + assert_eq!(buf.len(), 1 + 32 + 1, "Check claim no-asm = 34 bytes"); } #[test] fn check_proof_tag() { - // CheckProof -> Tag4 { flag: 0xE, size: 1 } -> 0xE1 + // Check proof -> Tag4 { flag: 0xF, size: 1 } -> 0xF1 let proof = Proof::new( - Claim::Checks(CheckClaim { value: Address::hash(b"value") }), + Claim::Check { + const_addr: Address::hash(b"value"), + assumptions: None, + }, vec![5, 6, 7], ); let mut buf = Vec::new(); proof.put(&mut buf); - assert_eq!(buf[0], 0xE1, "CheckProof should start with 0xE1"); + assert_eq!(buf[0], 0xF1, "Check proof should start with 0xF1"); } // ========================================================================= diff --git a/src/ix/ixon/assumption_tree.rs b/src/ix/ixon/assumption_tree.rs new file mode 100644 index 00000000..7b890133 --- /dev/null +++ b/src/ix/ixon/assumption_tree.rs @@ -0,0 +1,510 @@ +//! Serializable merkle tree over `Address` leaves. +//! +//! Used to recover the leaf set committed to by a conditional claim's +//! `assumptions` root. The root alone tells the verifier *which* set +//! was assumed; the AssumptionTree carries the actual leaves so the +//! verifier can inspect them (e.g., "do I trust each of these axioms?"). +//! +//! Two construction modes — both produce the same `Node`-shaped trees, +//! differ only in how leaves are arranged: +//! +//! - `canonical(leaves)` builds the same shape that +//! [`merkle_root_canonical`] hashes, so +//! `Self::canonical(L).map(|t| t.root()) == merkle_root_canonical(L)`. +//! Includes `Padding` nodes at every level where odd-leaf padding +//! happened in the canonical builder. +//! - `join(l, r)` is free-form O(1) composition; result root matches +//! [`merkle_join`]. +//! +//! ## Serialization +//! +//! Tag4 size 2 under flag 0xE: +//! +//! ```text +//! [Tag4(0xE, 2) = 0xE2] [body] +//! +//! body recursive: +//! Leaf(addr): [0x00] [addr:32] +//! Padding: [0x01] +//! Node(l, r): [0x02] [body l] [body r] +//! ``` +//! +//! A `Padding` node represents the zero-sentinel slot used by the +//! canonical builder to even out odd levels. Its root is exactly +//! `zero_address()`, matching the bare 32-byte zero that merkle.rs +//! mixes into odd-level hashing. Splitting it from `Leaf` keeps +//! `leaves()` clean (it returns only real leaves, not the synthetic +//! padding addresses). + +use crate::ix::address::Address; + +use super::merkle::{ + MerklePath, leaf_hash, node_hash, zero_address, +}; +use super::proof::{FLAG_CLAIM, VARIANT_ASSUMPTION_TREE}; +use super::tag::Tag4; + +// Body-tag bytes (within the AssumptionTree-flagged payload). +const BODY_LEAF: u8 = 0x00; +const BODY_PADDING: u8 = 0x01; +const BODY_NODE: u8 = 0x02; + +/// A merkle tree over `Address` leaves with explicit shape. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum AssumptionTree { + /// A real assumption leaf. + Leaf(Address), + /// Canonical-builder padding. Root is `zero_address()` — matches the + /// raw-zero sentinel mixed in by `merkle_root_canonical` at odd-count + /// levels. + Padding, + /// Internal node combining two subtrees. + Node(Box, Box), +} + +impl AssumptionTree { + // ---- Construction ---- + + /// Build the canonical sorted+padded merkle tree over a leaf set. + /// + /// Returns `None` if `leaves` is empty (post-dedup). For a single + /// leaf returns `Leaf(only)`. Otherwise builds the same shape that + /// `merkle_root_canonical` produces, with `Padding` nodes wherever + /// odd-level zero-padding occurs. + pub fn canonical(leaves: &[Address]) -> Option { + let mut sorted: Vec
= leaves.to_vec(); + sorted.sort_unstable(); + sorted.dedup(); + if sorted.is_empty() { + return None; + } + if sorted.len() == 1 { + return Some(AssumptionTree::Leaf(sorted.into_iter().next().unwrap())); + } + let mut level: Vec = + sorted.into_iter().map(AssumptionTree::Leaf).collect(); + while level.len() > 1 { + let mut next = Vec::with_capacity(level.len().div_ceil(2)); + let mut iter = level.into_iter().peekable(); + while iter.peek().is_some() { + let l = iter.next().unwrap(); + let r = iter.next().unwrap_or(AssumptionTree::Padding); + next.push(AssumptionTree::Node(Box::new(l), Box::new(r))); + } + level = next; + } + Some(level.into_iter().next().unwrap()) + } + + /// Combine two existing subtrees into a new free-form node in O(1). + #[inline] + pub fn join(left: Self, right: Self) -> Self { + AssumptionTree::Node(Box::new(left), Box::new(right)) + } + + // ---- Queries ---- + + /// Recursively compute the root hash. + pub fn root(&self) -> Address { + match self { + AssumptionTree::Leaf(addr) => leaf_hash(addr), + AssumptionTree::Padding => zero_address(), + AssumptionTree::Node(l, r) => node_hash(&l.root(), &r.root()), + } + } + + /// In-order traversal of real leaves (skips `Padding`). + pub fn leaves(&self) -> Vec
{ + let mut out = Vec::new(); + self.collect_leaves(&mut out); + out + } + + fn collect_leaves(&self, out: &mut Vec
) { + match self { + AssumptionTree::Leaf(addr) => out.push(addr.clone()), + AssumptionTree::Padding => {}, + AssumptionTree::Node(l, r) => { + l.collect_leaves(out); + r.collect_leaves(out); + }, + } + } + + /// True iff `target` appears as a `Leaf` somewhere in the tree. + pub fn contains(&self, target: &Address) -> bool { + match self { + AssumptionTree::Leaf(addr) => addr == target, + AssumptionTree::Padding => false, + AssumptionTree::Node(l, r) => l.contains(target) || r.contains(target), + } + } + + /// Produce a merkle membership path for `target`. Returns `None` if + /// `target` is not a `Leaf` in the tree. Empty path for a + /// single-leaf tree. + /// + /// Path is in leaf-to-root order: `path[0]` is the immediate sibling + /// of the leaf, `path[N-1]` is the root-level sibling. Matches the + /// order expected by `verify_merkle_proof`. + pub fn merkle_proof(&self, target: &Address) -> Option { + let mut path: MerklePath = Vec::new(); + if self.search_path(target, &mut path) { + Some(path) + } else { + None + } + } + + /// Recursive helper: if `target` is in this subtree, push the sibling + /// at this level (after the recursive call returns) and return true. + /// Since pushes happen on the way back up, `path[0]` ends up as the + /// deepest (closest-to-leaf) sibling — leaf-to-root order. + fn search_path(&self, target: &Address, path: &mut MerklePath) -> bool { + match self { + AssumptionTree::Leaf(addr) => addr == target, + AssumptionTree::Padding => false, + AssumptionTree::Node(l, r) => { + if l.search_path(target, path) { + // Target was in left subtree; sibling = r.root(), sibling + // is on the right (is_left = false). + path.push((r.root(), false)); + true + } else if r.search_path(target, path) { + // Target was in right subtree; sibling = l.root(), sibling + // is on the left (is_left = true). + path.push((l.root(), true)); + true + } else { + false + } + }, + } + } + + // ---- Serialization ---- + + /// Serialize with Tag4(0xE, 2) outer header + recursive body. + pub fn put(&self, buf: &mut Vec) { + Tag4::new(FLAG_CLAIM, VARIANT_ASSUMPTION_TREE).put(buf); + self.put_body(buf); + } + + fn put_body(&self, buf: &mut Vec) { + match self { + AssumptionTree::Leaf(addr) => { + buf.push(BODY_LEAF); + buf.extend_from_slice(addr.as_bytes()); + }, + AssumptionTree::Padding => { + buf.push(BODY_PADDING); + }, + AssumptionTree::Node(l, r) => { + buf.push(BODY_NODE); + l.put_body(buf); + r.put_body(buf); + }, + } + } + + /// Deserialize: expects Tag4(0xE, 2) outer header. + pub fn get(buf: &mut &[u8]) -> Result { + let tag = Tag4::get(buf)?; + if tag.flag != FLAG_CLAIM || tag.size != VARIANT_ASSUMPTION_TREE { + return Err(format!( + "AssumptionTree::get: expected Tag4{{0xE, 2}}, got Tag4{{{}, {}}}", + tag.flag, tag.size, + )); + } + Self::get_body(buf) + } + + fn get_body(buf: &mut &[u8]) -> Result { + let (tag, rest) = buf.split_first().ok_or("AssumptionTree: EOF reading body tag")?; + *buf = rest; + match *tag { + BODY_LEAF => { + if buf.len() < 32 { + return Err(format!( + "AssumptionTree: Leaf needs 32 bytes, have {}", + buf.len() + )); + } + let (head, rest) = buf.split_at(32); + *buf = rest; + let addr = Address::from_slice(head) + .map_err(|_e| "AssumptionTree: invalid leaf address".to_string())?; + Ok(AssumptionTree::Leaf(addr)) + }, + BODY_PADDING => Ok(AssumptionTree::Padding), + BODY_NODE => { + let l = Self::get_body(buf)?; + let r = Self::get_body(buf)?; + Ok(AssumptionTree::Node(Box::new(l), Box::new(r))) + }, + b => Err(format!("AssumptionTree: invalid body tag 0x{:02X}", b)), + } + } + + /// Serialize to a fresh Vec. + pub fn ser(&self) -> Vec { + let mut buf = Vec::new(); + self.put(&mut buf); + buf + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use super::super::merkle::{ + merkle_join, merkle_root_canonical, verify_merkle_proof, + }; + + fn addr(seed: &[u8]) -> Address { + Address::hash(seed) + } + + // ---------- Construction ---------- + + #[test] + fn canonical_empty_is_none() { + assert!(AssumptionTree::canonical(&[]).is_none()); + } + + #[test] + fn canonical_single_leaf() { + let a = addr(b"only"); + let t = AssumptionTree::canonical(&[a.clone()]).unwrap(); + assert_eq!(t, AssumptionTree::Leaf(a)); + } + + #[test] + fn canonical_two_leaves_no_padding() { + let a = addr(b"a"); + let b = addr(b"b"); + let t = AssumptionTree::canonical(&[a.clone(), b.clone()]).unwrap(); + // sorted -> [min, max]; tree is Node(Leaf(min), Leaf(max)) + let (lo, hi) = if a < b { (a, b) } else { (b, a) }; + assert_eq!( + t, + AssumptionTree::Node( + Box::new(AssumptionTree::Leaf(lo)), + Box::new(AssumptionTree::Leaf(hi)) + ) + ); + } + + #[test] + fn canonical_three_leaves_has_padding() { + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let t = AssumptionTree::canonical(&[a, b, c]).unwrap(); + // We don't pin down exact shape (depends on sort order) but the + // tree must include at least one Padding node. + fn has_padding(t: &AssumptionTree) -> bool { + match t { + AssumptionTree::Padding => true, + AssumptionTree::Leaf(_) => false, + AssumptionTree::Node(l, r) => has_padding(l) || has_padding(r), + } + } + assert!(has_padding(&t)); + } + + // ---------- Root agreement ---------- + + #[test] + fn canonical_root_matches_merkle_root_canonical_single() { + let a = addr(b"only"); + let t = AssumptionTree::canonical(&[a.clone()]).unwrap(); + assert_eq!(Some(t.root()), merkle_root_canonical(&[a])); + } + + #[test] + fn canonical_root_matches_merkle_root_canonical_pairs() { + for n in 2..=10 { + let leaves: Vec
= (0..n) + .map(|i| addr(format!("leaf-{i}").as_bytes())) + .collect(); + let t = AssumptionTree::canonical(&leaves).unwrap(); + assert_eq!( + Some(t.root()), + merkle_root_canonical(&leaves), + "mismatch at n={n}" + ); + } + } + + #[test] + fn canonical_root_dedups_like_primitive() { + let a = addr(b"a"); + let b = addr(b"b"); + let t = AssumptionTree::canonical(&[a.clone(), a.clone(), b.clone()]).unwrap(); + assert_eq!(Some(t.root()), merkle_root_canonical(&[a, b])); + } + + #[test] + fn join_root_matches_merkle_join() { + let a = addr(b"a"); + let b = addr(b"b"); + let l = AssumptionTree::canonical(&[a.clone()]).unwrap(); + let r = AssumptionTree::canonical(&[b.clone()]).unwrap(); + let joined = AssumptionTree::join(l.clone(), r.clone()); + assert_eq!(joined.root(), merkle_join(&l.root(), &r.root())); + } + + // ---------- Leaves + contains ---------- + + #[test] + fn leaves_skip_padding() { + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let mut leaves = vec![a.clone(), b.clone(), c.clone()]; + leaves.sort_unstable(); + let t = AssumptionTree::canonical(&[a, b, c]).unwrap(); + assert_eq!(t.leaves(), leaves); + } + + #[test] + fn contains_matches_leaves() { + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let absent = addr(b"absent"); + let t = AssumptionTree::canonical(&[a.clone(), b.clone(), c.clone()]).unwrap(); + assert!(t.contains(&a)); + assert!(t.contains(&b)); + assert!(t.contains(&c)); + assert!(!t.contains(&absent)); + } + + // ---------- Merkle proofs ---------- + + #[test] + fn merkle_proof_single_leaf_empty_path() { + let a = addr(b"only"); + let t = AssumptionTree::canonical(&[a.clone()]).unwrap(); + let path = t.merkle_proof(&a).unwrap(); + assert!(path.is_empty()); + assert!(verify_merkle_proof(&t.root(), &a, &path)); + } + + #[test] + fn merkle_proof_roundtrip_all_leaves() { + for n in 1..=8 { + let leaves: Vec
= (0..n) + .map(|i| addr(format!("leaf-{i}").as_bytes())) + .collect(); + let t = AssumptionTree::canonical(&leaves).unwrap(); + let root = t.root(); + for leaf in t.leaves() { + let path = t.merkle_proof(&leaf).expect("leaf present"); + assert!( + verify_merkle_proof(&root, &leaf, &path), + "verify failed for n={n}, leaf={}", + leaf.hex() + ); + } + } + } + + #[test] + fn merkle_proof_nonmember_is_none() { + let a = addr(b"a"); + let b = addr(b"b"); + let absent = addr(b"absent"); + let t = AssumptionTree::canonical(&[a, b]).unwrap(); + assert!(t.merkle_proof(&absent).is_none()); + } + + #[test] + fn merkle_proof_through_join() { + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let left = AssumptionTree::canonical(&[a.clone(), b.clone()]).unwrap(); + let right = AssumptionTree::canonical(&[c.clone()]).unwrap(); + let joined = AssumptionTree::join(left, right); + for leaf in [a, b, c] { + let path = joined.merkle_proof(&leaf).expect("leaf present in join"); + assert!(verify_merkle_proof(&joined.root(), &leaf, &path)); + } + } + + // ---------- Serialization ---------- + + #[test] + fn serde_roundtrip_leaf() { + let t = AssumptionTree::Leaf(addr(b"x")); + let bytes = t.ser(); + assert_eq!(bytes[0], 0xE2, "outer tag is 0xE2"); + assert_eq!(bytes[1], BODY_LEAF, "body tag is 0x00 (Leaf)"); + assert_eq!(bytes.len(), 1 + 1 + 32); + let parsed = AssumptionTree::get(&mut &bytes[..]).unwrap(); + assert_eq!(parsed, t); + } + + #[test] + fn serde_roundtrip_padding() { + let t = AssumptionTree::Padding; + let bytes = t.ser(); + assert_eq!(bytes[0], 0xE2); + assert_eq!(bytes[1], BODY_PADDING); + assert_eq!(bytes.len(), 2); + let parsed = AssumptionTree::get(&mut &bytes[..]).unwrap(); + assert_eq!(parsed, t); + } + + #[test] + fn serde_roundtrip_node_simple() { + let a = addr(b"a"); + let b = addr(b"b"); + let t = AssumptionTree::Node( + Box::new(AssumptionTree::Leaf(a)), + Box::new(AssumptionTree::Leaf(b)), + ); + let bytes = t.ser(); + assert_eq!(bytes[0], 0xE2); + assert_eq!(bytes[1], BODY_NODE); + assert_eq!(bytes[2], BODY_LEAF); + // Tag + Node tag + Leaf tag + 32 + Leaf tag + 32 = 1 + 1 + 1 + 32 + 1 + 32 + assert_eq!(bytes.len(), 1 + 1 + (1 + 32) + (1 + 32)); + let parsed = AssumptionTree::get(&mut &bytes[..]).unwrap(); + assert_eq!(parsed, t); + } + + #[test] + fn serde_roundtrip_canonical_trees() { + for n in 1..=10 { + let leaves: Vec
= (0..n) + .map(|i| addr(format!("leaf-{i}").as_bytes())) + .collect(); + let t = AssumptionTree::canonical(&leaves).unwrap(); + let bytes = t.ser(); + let parsed = AssumptionTree::get(&mut &bytes[..]).unwrap(); + assert_eq!(parsed, t); + assert_eq!(parsed.root(), t.root()); + } + } + + #[test] + fn serde_rejects_wrong_tag() { + // Tag4(0xE, 3) = Eval claim, not AssumptionTree. + let bytes = vec![0xE3, 0x00, 0x00]; + assert!(AssumptionTree::get(&mut &bytes[..]).is_err()); + } + + #[test] + fn serde_rejects_invalid_body_tag() { + // 0xE2 outer + 0x99 invalid body tag + let bytes = vec![0xE2, 0x99]; + assert!(AssumptionTree::get(&mut &bytes[..]).is_err()); + } +} diff --git a/src/ix/ixon/comm.rs b/src/ix/ixon/comm.rs index 408a9c74..a27afcb2 100644 --- a/src/ix/ixon/comm.rs +++ b/src/ix/ixon/comm.rs @@ -7,12 +7,12 @@ use crate::ix::address::Address; use super::tag::Tag4; -/// Tag4 variant for Commitment (flag=0xE, size=5). -pub const VARIANT: u64 = 5; +/// Tag4 variant for Commitment (flag=0xE, size=1). +pub const VARIANT: u64 = 1; /// A cryptographic commitment. /// -/// The commitment is computed as `blake3(Tag4{0xE,5} || secret || payload)` where: +/// The commitment is computed as `blake3(Tag4{0xE,1} || secret || payload)` where: /// - `secret` is the address of a random blinding factor (stored in blobs) /// - `payload` is the address of the committed constant #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -51,25 +51,25 @@ impl Comm { Ok(Comm { secret, payload }) } - /// Serialize with Tag4{0xE, 5} header. + /// Serialize with Tag4{0xE, 1} header. pub fn put_tagged(&self, buf: &mut Vec) { Tag4::new(0xE, VARIANT).put(buf); self.put(buf); } - /// Deserialize with Tag4{0xE, 5} header. + /// Deserialize with Tag4{0xE, 1} header. pub fn get_tagged(buf: &mut &[u8]) -> Result { let tag = Tag4::get(buf)?; if tag.flag != 0xE || tag.size != VARIANT { return Err(format!( - "Comm::get_tagged: expected Tag4{{0xE, 5}}, got Tag4{{{}, {}}}", + "Comm::get_tagged: expected Tag4{{0xE, 1}}, got Tag4{{{}, {}}}", tag.flag, tag.size )); } Self::get(buf) } - /// Serialize with tag and compute content address: `blake3(0xE5 + secret + payload)`. + /// Serialize with tag and compute content address: `blake3(0xE1 + secret + payload)`. pub fn commit(&self) -> Address { let mut buf = Vec::new(); self.put_tagged(&mut buf); @@ -133,7 +133,7 @@ mod tests { let comm = Comm::new(Address::hash(b"a"), Address::hash(b"b")); let mut buf = Vec::new(); comm.put_tagged(&mut buf); - assert_eq!(buf[0], 0xE5, "Comm tagged should start with 0xE5"); + assert_eq!(buf[0], 0xE1, "Comm tagged should start with 0xE1"); } #[test] diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs index 7c43f2e8..54e5098b 100644 --- a/src/ix/ixon/env.rs +++ b/src/ix/ixon/env.rs @@ -1,6 +1,8 @@ //! Environment for storing Ixon data. use dashmap::DashMap; +use rustc_hash::FxHashSet; +use std::collections::VecDeque; use crate::ix::address::Address; use crate::ix::env::Name; @@ -167,6 +169,39 @@ impl Env { pub fn comm_count(&self) -> usize { self.comms.len() } + + /// BFS-collect all addresses transitively reachable from `root` via + /// the `Constant.refs` field. The returned set includes `root` itself. + /// + /// Addresses that are referenced but not present in `self.consts` are + /// still added to the set (so verifiers see external assumptions) + /// but we cannot recurse into them. + pub fn bfs_refs(&self, root: &Address) -> FxHashSet
{ + let mut visited: FxHashSet
= FxHashSet::default(); + let mut queue: VecDeque
= VecDeque::new(); + visited.insert(root.clone()); + queue.push_back(root.clone()); + while let Some(addr) = queue.pop_front() { + if let Some(entry) = self.consts.get(&addr) { + for r in &entry.value().refs { + if visited.insert(r.clone()) { + queue.push_back(r.clone()); + } + } + } + } + visited + } + + /// Transitive dep addresses of `root`, excluding `root` itself. Sorted + /// lex-ascending for canonical use (e.g., feeding `merkle_root_canonical`). + pub fn transitive_deps_excl(&self, root: &Address) -> Vec
{ + let mut set = self.bfs_refs(root); + set.remove(root); + let mut v: Vec
= set.into_iter().collect(); + v.sort_unstable(); + v + } } impl Clone for Env { @@ -324,4 +359,116 @@ mod tests { let addr3 = env.store_blob(vec![1, 2, 3]); assert_eq!(addr1, addr3); } + + /// Build a constant with the given refs (for BFS tests). + fn const_with_refs(refs: Vec
) -> Constant { + Constant::with_tables( + ConstantInfo::Axio(Axiom { + is_unsafe: false, + lvls: 0, + typ: Arc::new(Expr::Sort(0)), + }), + Vec::new(), + refs, + Vec::new(), + ) + } + + #[test] + fn bfs_refs_singleton_no_deps() { + let env = Env::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), const_with_refs(vec![])); + let visited = env.bfs_refs(&a); + assert_eq!(visited.len(), 1); + assert!(visited.contains(&a)); + } + + #[test] + fn bfs_refs_transitive() { + // a -> b -> c, a -> d + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let c = Address::hash(b"c"); + let d = Address::hash(b"d"); + env.store_const(a.clone(), const_with_refs(vec![b.clone(), d.clone()])); + env.store_const(b.clone(), const_with_refs(vec![c.clone()])); + env.store_const(c.clone(), const_with_refs(vec![])); + env.store_const(d.clone(), const_with_refs(vec![])); + let visited = env.bfs_refs(&a); + assert_eq!(visited.len(), 4); + assert!(visited.contains(&a)); + assert!(visited.contains(&b)); + assert!(visited.contains(&c)); + assert!(visited.contains(&d)); + } + + #[test] + fn bfs_refs_cycle_terminates() { + // a -> b -> a (cyclic, should not infinite-loop) + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + env.store_const(a.clone(), const_with_refs(vec![b.clone()])); + env.store_const(b.clone(), const_with_refs(vec![a.clone()])); + let visited = env.bfs_refs(&a); + assert_eq!(visited.len(), 2); + } + + #[test] + fn bfs_refs_includes_external_addresses() { + // a -> b, where b is referenced but not stored in env. We still + // surface b in the visited set so callers see the external dep. + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b-external"); + env.store_const(a.clone(), const_with_refs(vec![b.clone()])); + let visited = env.bfs_refs(&a); + assert!(visited.contains(&a)); + assert!(visited.contains(&b)); + } + + #[test] + fn transitive_deps_excl_excludes_root() { + // a -> b -> c + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let c = Address::hash(b"c"); + env.store_const(a.clone(), const_with_refs(vec![b.clone()])); + env.store_const(b.clone(), const_with_refs(vec![c.clone()])); + env.store_const(c.clone(), const_with_refs(vec![])); + let deps = env.transitive_deps_excl(&a); + assert!(!deps.contains(&a)); + assert!(deps.contains(&b)); + assert!(deps.contains(&c)); + assert_eq!(deps.len(), 2); + } + + #[test] + fn transitive_deps_excl_is_sorted() { + let env = Env::new(); + let a = Address::hash(b"a"); + // Use multiple deps; the returned Vec should be in sorted order + // regardless of how the BFS visited them. + let mut refs: Vec
= (0..16) + .map(|i| Address::hash(format!("dep-{i}").as_bytes())) + .collect(); + env.store_const(a.clone(), const_with_refs(refs.clone())); + for r in &refs { + env.store_const(r.clone(), const_with_refs(vec![])); + } + refs.sort_unstable(); + let deps = env.transitive_deps_excl(&a); + assert_eq!(deps, refs); + } + + #[test] + fn transitive_deps_excl_empty_for_root_with_no_refs() { + let env = Env::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), const_with_refs(vec![])); + assert!(env.transitive_deps_excl(&a).is_empty()); + } } diff --git a/src/ix/ixon/merkle.rs b/src/ix/ixon/merkle.rs new file mode 100644 index 00000000..b846385a --- /dev/null +++ b/src/ix/ixon/merkle.rs @@ -0,0 +1,441 @@ +//! Canonical and free-form Merkle trees over `Address` leaves. +//! +//! Two construction modes share the same hash primitives: +//! +//! - **Canonical** (`merkle_root_canonical`): lex-sorted, deduped leaves; +//! odd levels padded with the zero sentinel. Used for env merkle roots +//! (deterministic env identity) and as the default builder for +//! assumption-tree roots. +//! - **Free-form** (`merkle_join`): O(1) composition of two existing +//! subtree roots into a new root. Used to aggregate the assumption +//! sets of two claims without re-sorting all leaves. +//! +//! ## Domain separation +//! +//! Leaves are hashed as `blake3(0x00 || addr)` and internal nodes as +//! `blake3(0x01 || left || right)`. Follows RFC 6962 (Certificate +//! Transparency) convention. Strictly speaking the prefix bytes are +//! redundant for our scheme because leaf inputs (32 B) and node inputs +//! (64 B) have distinct lengths — so cross-length Blake3 collision is +//! the only attack vector and that's infeasible. But the prefix bytes +//! make the security argument structural rather than parametric, robust +//! under future refactors (variable-length leaves, raw-address mixing +//! into trees) and hash swaps (Poseidon2 sponge has fixed arity and +//! doesn't give the length argument for free). +//! +//! ## Odd-leaf padding +//! +//! The canonical builder pads odd levels with a fixed `[0u8; 32]` +//! sentinel rather than duplicating the trailing leaf. Duplication +//! introduces CVE-2012-2459-style malleability where two distinct leaf +//! lists can produce the same root. + +use crate::ix::address::Address; + +/// Domain-separation prefix for leaf hashes. +pub const LEAF_DOMAIN: u8 = 0x00; + +/// Domain-separation prefix for internal-node hashes. +pub const NODE_DOMAIN: u8 = 0x01; + +/// Zero sentinel used to pad odd levels of canonical trees. +pub const ZERO_SENTINEL: [u8; 32] = [0u8; 32]; + +// --------------------------------------------------------------------------- +// Primitives +// --------------------------------------------------------------------------- + +/// Hash a leaf value into its canonical leaf-level digest. +#[inline] +pub fn leaf_hash(addr: &Address) -> Address { + let mut h = blake3::Hasher::new(); + h.update(&[LEAF_DOMAIN]); + h.update(addr.as_bytes()); + Address::from_blake3_hash(h.finalize()) +} + +/// Hash a pair of child digests into their parent internal-node digest. +#[inline] +pub fn node_hash(left: &Address, right: &Address) -> Address { + let mut h = blake3::Hasher::new(); + h.update(&[NODE_DOMAIN]); + h.update(left.as_bytes()); + h.update(right.as_bytes()); + Address::from_blake3_hash(h.finalize()) +} + +/// The fixed zero-sentinel address used as canonical-tree padding. +#[inline] +pub fn zero_address() -> Address { + Address::from_slice(&ZERO_SENTINEL).expect("zero sentinel is 32 bytes") +} + +// --------------------------------------------------------------------------- +// Canonical builder +// --------------------------------------------------------------------------- + +/// Build the canonical merkle root over a leaf set. Leaves are lex-sorted +/// and deduplicated before hashing. Returns: +/// +/// - `None` if `leaves` is empty (post-dedup). +/// - `Some(leaf_hash(x))` for a single leaf (no internal node). +/// - Otherwise an internal-node root with odd levels padded by +/// `zero_address()`. +pub fn merkle_root_canonical(leaves: &[Address]) -> Option
{ + let mut sorted: Vec
= leaves.to_vec(); + sorted.sort_unstable(); + sorted.dedup(); + if sorted.is_empty() { + return None; + } + if sorted.len() == 1 { + return Some(leaf_hash(&sorted[0])); + } + let mut level: Vec
= sorted.iter().map(leaf_hash).collect(); + let zero = zero_address(); + while level.len() > 1 { + let mut next = Vec::with_capacity(level.len().div_ceil(2)); + let mut i = 0; + while i < level.len() { + let l = &level[i]; + let r = level.get(i + 1).unwrap_or(&zero); + next.push(node_hash(l, r)); + i += 2; + } + level = next; + } + Some(level.into_iter().next().unwrap()) +} + +// --------------------------------------------------------------------------- +// Free-form composition +// --------------------------------------------------------------------------- + +/// Combine two existing subtree roots into a new free-form root in O(1). +/// +/// The result is a non-canonical tree even if both inputs were canonical; +/// the verifier accepts both forms and the leaf set is recovered by +/// walking the tree from witness data, not by assuming any specific +/// shape. +#[inline] +pub fn merkle_join(left: &Address, right: &Address) -> Address { + node_hash(left, right) +} + +// --------------------------------------------------------------------------- +// Membership proofs +// --------------------------------------------------------------------------- + +/// A merkle-path step: `(sibling, is_left)`. `is_left = true` means the +/// sibling sits on the left side at this level, so verification combines +/// it as `node_hash(sibling, current)`; otherwise `node_hash(current, +/// sibling)`. +pub type MerklePath = Vec<(Address, bool)>; + +/// Produce a sibling-path for `target` in the canonical tree over +/// `leaves`. Returns `None` if `target` is not in the (post-dedup) leaf +/// set. Returns an empty path for a single-leaf tree. +pub fn merkle_proof_canonical( + leaves: &[Address], + target: &Address, +) -> Option { + let mut sorted: Vec
= leaves.to_vec(); + sorted.sort_unstable(); + sorted.dedup(); + let mut pos = sorted.binary_search(target).ok()?; + if sorted.len() == 1 { + return Some(Vec::new()); + } + let mut level: Vec
= sorted.iter().map(leaf_hash).collect(); + let zero = zero_address(); + let mut path: MerklePath = Vec::new(); + while level.len() > 1 { + let sibling_idx = pos ^ 1; + let sibling = level.get(sibling_idx).cloned().unwrap_or_else(|| zero.clone()); + let is_left = pos & 1 == 1; + path.push((sibling, is_left)); + // Build next level. + let mut next = Vec::with_capacity(level.len().div_ceil(2)); + let mut i = 0; + while i < level.len() { + let l = &level[i]; + let r = level.get(i + 1).unwrap_or(&zero); + next.push(node_hash(l, r)); + i += 2; + } + level = next; + pos /= 2; + } + Some(path) +} + +/// Verify a merkle membership proof against any root (canonical or +/// free-form). The path is shape-agnostic — verification just hashes +/// upward using each sibling at its recorded side. +pub fn verify_merkle_proof( + root: &Address, + leaf: &Address, + path: &[(Address, bool)], +) -> bool { + let mut current = leaf_hash(leaf); + for (sibling, is_left) in path { + current = if *is_left { + node_hash(sibling, ¤t) + } else { + node_hash(¤t, sibling) + }; + } + current == *root +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::{Arbitrary, Gen}; + + fn addr(seed: &[u8]) -> Address { + Address::hash(seed) + } + + // ---------- Canonical builder ---------- + + #[test] + fn canonical_empty() { + assert!(merkle_root_canonical(&[]).is_none()); + } + + #[test] + fn canonical_single() { + let a = addr(b"only"); + let root = merkle_root_canonical(&[a.clone()]).unwrap(); + assert_eq!(root, leaf_hash(&a)); + } + + #[test] + fn canonical_sort_invariant() { + let a = addr(b"a"); + let b = addr(b"b"); + let r1 = merkle_root_canonical(&[a.clone(), b.clone()]).unwrap(); + let r2 = merkle_root_canonical(&[b, a]).unwrap(); + assert_eq!(r1, r2); + } + + #[test] + fn canonical_dedup() { + let a = addr(b"a"); + let b = addr(b"b"); + let r1 = + merkle_root_canonical(&[a.clone(), a.clone(), b.clone()]).unwrap(); + let r2 = merkle_root_canonical(&[a, b]).unwrap(); + assert_eq!(r1, r2); + } + + #[test] + fn canonical_distinguishes() { + let a = addr(b"a"); + let b = addr(b"b"); + let r1 = merkle_root_canonical(&[a.clone()]).unwrap(); + let r2 = merkle_root_canonical(&[a, b]).unwrap(); + assert_ne!(r1, r2); + } + + #[test] + fn canonical_no_malleability() { + // [a, a] deduplicates to [a], producing leaf_hash(a). A two-leaf + // tree built from [a, a] without dedup would produce node_hash( + // leaf_hash(a), leaf_hash(a)), which must differ. + let a = addr(b"a"); + let deduped = merkle_root_canonical(&[a.clone(), a.clone()]).unwrap(); + let two_leaf_no_dedup = node_hash(&leaf_hash(&a), &leaf_hash(&a)); + assert_ne!(deduped, two_leaf_no_dedup); + assert_eq!(deduped, leaf_hash(&a)); + } + + // ---------- Domain separation ---------- + + #[test] + fn leaf_vs_node_disjoint() { + let a = addr(b"a"); + let z = zero_address(); + // leaf_hash(a) = blake3(0x00 || a) + // node_hash(a, ZERO) = blake3(0x01 || a || ZERO) + // Different domain prefixes AND different input lengths. + assert_ne!(leaf_hash(&a), node_hash(&a, &z)); + } + + // ---------- Free-form (merkle_join) ---------- + + #[test] + fn join_is_node_hash() { + let a = addr(b"a"); + let b = addr(b"b"); + assert_eq!(merkle_join(&a, &b), node_hash(&a, &b)); + } + + #[test] + fn join_non_commutative() { + let a = addr(b"a"); + let b = addr(b"b"); + assert_ne!(merkle_join(&a, &b), merkle_join(&b, &a)); + } + + #[test] + fn join_canonical_inequal() { + // Building a free-form tree by joining two canonical subtrees over + // {a, b} and {c} produces a different root than the canonical tree + // over {a, b, c} — and that's fine. Same leaf set, different + // protocol-level claims. + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let left = merkle_root_canonical(&[a.clone(), b.clone()]).unwrap(); + let right = merkle_root_canonical(&[c.clone()]).unwrap(); + let joined = merkle_join(&left, &right); + let canonical = merkle_root_canonical(&[a, b, c]).unwrap(); + assert_ne!(joined, canonical); + } + + #[test] + fn join_composes_membership() { + // After joining two canonical subtrees, leaves on each side are + // still provable by appending the join-step sibling to their + // sub-proofs. + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let left_leaves = vec![a.clone(), b.clone()]; + let right_leaves = vec![c.clone()]; + let left_root = merkle_root_canonical(&left_leaves).unwrap(); + let right_root = merkle_root_canonical(&right_leaves).unwrap(); + let joined = merkle_join(&left_root, &right_root); + + // Prove `a` is in the joined tree: sub-proof through left subtree, + // then sibling = right_root (on the right), so is_left = false. + let mut path = merkle_proof_canonical(&left_leaves, &a).unwrap(); + path.push((right_root.clone(), false)); + assert!(verify_merkle_proof(&joined, &a, &path)); + + // Prove `c` is in the joined tree: empty sub-proof (single-leaf + // right subtree), then sibling = left_root on the left. + let mut path = merkle_proof_canonical(&right_leaves, &c).unwrap(); + path.push((left_root, true)); + assert!(verify_merkle_proof(&joined, &c, &path)); + } + + // ---------- Membership ---------- + + #[test] + fn proof_single_leaf() { + let a = addr(b"only"); + let root = merkle_root_canonical(&[a.clone()]).unwrap(); + let path = merkle_proof_canonical(&[a.clone()], &a).unwrap(); + assert!(path.is_empty()); + assert!(verify_merkle_proof(&root, &a, &path)); + } + + #[test] + fn proof_two_leaves() { + let a = addr(b"a"); + let b = addr(b"b"); + let leaves = vec![a.clone(), b.clone()]; + let root = merkle_root_canonical(&leaves).unwrap(); + let path_a = merkle_proof_canonical(&leaves, &a).unwrap(); + let path_b = merkle_proof_canonical(&leaves, &b).unwrap(); + assert!(verify_merkle_proof(&root, &a, &path_a)); + assert!(verify_merkle_proof(&root, &b, &path_b)); + } + + #[test] + fn proof_three_leaves_odd_padding() { + let a = addr(b"a"); + let b = addr(b"b"); + let c = addr(b"c"); + let leaves = vec![a.clone(), b.clone(), c.clone()]; + let root = merkle_root_canonical(&leaves).unwrap(); + for leaf in &leaves { + let path = merkle_proof_canonical(&leaves, leaf).unwrap(); + assert!(verify_merkle_proof(&root, leaf, &path)); + } + } + + #[test] + fn proof_rejects_nonmember_direct() { + let a = addr(b"a"); + let b = addr(b"b"); + let leaves = vec![a.clone(), b]; + let x = addr(b"x"); + assert!(merkle_proof_canonical(&leaves, &x).is_none()); + } + + // ---------- Quickcheck properties ---------- + + // Small helper: distinct random addresses. + fn gen_distinct_addrs(g: &mut Gen, n: usize) -> Vec
{ + let mut seen: std::collections::HashSet
= + std::collections::HashSet::new(); + while seen.len() < n { + seen.insert(Address::arbitrary(g)); + } + seen.into_iter().collect() + } + + #[quickcheck] + fn prop_proof_roundtrip_canonical(seed: u8) -> bool { + let mut g = Gen::new(16); + // 1..=12 leaves + let n = ((seed as usize) % 12) + 1; + let leaves = gen_distinct_addrs(&mut g, n); + let root = merkle_root_canonical(&leaves).unwrap(); + leaves.iter().all(|leaf| { + let path = merkle_proof_canonical(&leaves, leaf).unwrap(); + verify_merkle_proof(&root, leaf, &path) + }) + } + + #[quickcheck] + fn prop_proof_rejects_nonmember(seed: u8) -> bool { + let mut g = Gen::new(16); + let n = ((seed as usize) % 10) + 1; + let leaves = gen_distinct_addrs(&mut g, n); + let root = merkle_root_canonical(&leaves).unwrap(); + // Fresh address: definitely not in `leaves`. + let mut nonmember = Address::arbitrary(&mut g); + while leaves.contains(&nonmember) { + nonmember = Address::arbitrary(&mut g); + } + // Any path the prover *could* try for a nonmember fails. Quick + // check: take a real member's path and try to verify it against + // the nonmember leaf. + let real_leaf = &leaves[0]; + let real_path = merkle_proof_canonical(&leaves, real_leaf).unwrap(); + !verify_merkle_proof(&root, &nonmember, &real_path) + } + + #[quickcheck] + fn prop_proof_roundtrip_joined(seed: u8) -> bool { + let mut g = Gen::new(16); + let n_left = ((seed as usize) % 5) + 1; + let n_right = (((seed >> 4) as usize) % 5) + 1; + let left_leaves = gen_distinct_addrs(&mut g, n_left); + let right_leaves = gen_distinct_addrs(&mut g, n_right); + let left_root = merkle_root_canonical(&left_leaves).unwrap(); + let right_root = merkle_root_canonical(&right_leaves).unwrap(); + let joined = merkle_join(&left_root, &right_root); + + let left_ok = left_leaves.iter().all(|leaf| { + let mut path = merkle_proof_canonical(&left_leaves, leaf).unwrap(); + path.push((right_root.clone(), false)); + verify_merkle_proof(&joined, leaf, &path) + }); + let right_ok = right_leaves.iter().all(|leaf| { + let mut path = merkle_proof_canonical(&right_leaves, leaf).unwrap(); + path.push((left_root.clone(), true)); + verify_merkle_proof(&joined, leaf, &path) + }); + left_ok && right_ok + } +} diff --git a/src/ix/ixon/proof.rs b/src/ix/ixon/proof.rs index 00b4d0c8..8eef684f 100644 --- a/src/ix/ixon/proof.rs +++ b/src/ix/ixon/proof.rs @@ -15,28 +15,7 @@ use super::constant::DefKind; use super::tag::{Tag0, Tag4}; // ============================================================================ -// Core claim/proof types -// ============================================================================ - -/// An evaluation claim: asserts that the constant at `input` evaluates to the -/// constant at `output`. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct EvalClaim { - /// Address of the input constant - pub input: Address, - /// Address of the output constant - pub output: Address, -} - -/// A type-checking claim: asserts that the constant at `value` is well-typed. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct CheckClaim { - /// Address of the value constant - pub value: Address, -} - -// ============================================================================ -// RevealClaim types +// Reveal info types (per-variant selective-field structures) // ============================================================================ /// Revealed fields of a Constructor within an Inductive. @@ -147,28 +126,50 @@ pub enum RevealConstantInfo { }, } -/// A reveal claim: selective revelation of fields of a committed constant. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct RevealClaim { - /// Address of the commitment - pub comm: Address, - /// Revealed field information - pub info: RevealConstantInfo, -} - // ============================================================================ -// Claim and Proof enums +// Claim and Proof types // ============================================================================ /// A claim that can be proven. +/// +/// Four families: +/// +/// - **Typechecking claims** (`Eval`, `Check`, `CheckEnv`): assert that a +/// constant evaluates, a constant is well-typed, or every constant in +/// an env is well-typed. Each carries `assumptions: Option
`: +/// - `None` → unconditional (constructive proof, no axioms). +/// - `Some(root)` → conditional on every leaf in the merkle tree +/// rooted at `root` being a well-typed constant. +/// - **Reveal**: selective field revelation of a committed constant. +/// Orthogonal to typechecking; carries no assumptions. +/// - **Contains**: structural membership claim — `const_addr` is a leaf +/// in the merkle tree rooted at `tree`. Used by the aggregation +/// circuit to discharge a leaf from a conditional claim's assumption +/// set. Carries no assumptions itself. +/// +/// The `assumptions` root may be any merkle tree (canonical sorted+ +/// padded via `merkle_root_canonical`, or free-form via `merkle_join`) +/// with `Address` leaves. Verifiers recover the leaf set via the +/// `AssumptionTree` serialization when free-form. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Claim { - /// Evaluation claim - Evals(EvalClaim), - /// Type-checking claim - Checks(CheckClaim), - /// Reveal claim (selective field revelation) - Reveals(RevealClaim), + /// `input` evaluates to `output`, optionally modulo `assumptions`. + Eval { + input: Address, + output: Address, + assumptions: Option
, + }, + /// The constant at `const_addr` is well-typed, optionally modulo + /// `assumptions`. + Check { const_addr: Address, assumptions: Option
}, + /// Every constant in the env merkle-rooted at `root` is well-typed, + /// optionally modulo `assumptions` (typically the env's axiom + /// leaves). + CheckEnv { root: Address, assumptions: Option
}, + /// Selective field revelation of a committed constant. + Reveal { comm: Address, info: RevealConstantInfo }, + /// `const_addr` is a leaf in the merkle tree rooted at `tree`. + Contains { tree: Address, const_addr: Address }, } /// A proof of a claim. @@ -181,28 +182,55 @@ pub struct Proof { } // ============================================================================ -// Tag4 variant layout for flag 0xE +// Tag4 variant layout for flags 0xE (data + claims) and 0xF (proofs) // ============================================================================ -/// Tag4 flag for claims, proofs, commitments, and environment (0xE). -/// Size field encodes variant: -/// - 0: Environment (Env) -/// - 1: CheckProof (proof of CheckClaim) -/// - 2: EvalProof (proof of EvalClaim) -/// - 3: CheckClaim (no proof) -/// - 4: EvalClaim (no proof) -/// - 5: Commitment -/// - 6: RevealClaim -/// - 7: RevealProof -pub const FLAG: u8 = 0xE; - -const VARIANT_CHECK_PROOF: u64 = 1; -const VARIANT_EVAL_PROOF: u64 = 2; -const VARIANT_CHECK_CLAIM: u64 = 3; -const VARIANT_EVAL_CLAIM: u64 = 4; -// VARIANT 5 = Comm (handled in comm.rs) -const VARIANT_REVEAL_CLAIM: u64 = 6; -const VARIANT_REVEAL_PROOF: u64 = 7; +/// Tag4 flag for envs, commitments, AssumptionTree, and claims (0xE). +/// +/// All variants under 0xE fit in single-byte tags (`0xE0`–`0xE7`). +/// Matches the `Variant (0-7)` constraint documented in `docs/Ixon.md`. +/// +/// - 0: Env (on-disk env serialization) +/// - 1: Comm (commitment, handled in `comm.rs`) +/// - 2: AssumptionTree (recursive merkle-tree data, see `assumption_tree.rs`) +/// - 3: Eval claim +/// - 4: Check claim +/// - 5: CheckEnv claim +/// - 6: Reveal claim +/// - 7: Contains claim +pub const FLAG_CLAIM: u8 = 0xE; + +pub const VARIANT_ENV: u64 = 0; +// VARIANT 1 = Comm (handled in comm.rs) +pub const VARIANT_ASSUMPTION_TREE: u64 = 2; +pub const VARIANT_EVAL_CLAIM: u64 = 3; +pub const VARIANT_CHECK_CLAIM: u64 = 4; +pub const VARIANT_CHECK_ENV_CLAIM: u64 = 5; +pub const VARIANT_REVEAL_CLAIM: u64 = 6; +pub const VARIANT_CONTAINS_CLAIM: u64 = 7; + +/// Tag4 flag for ZK proofs (0xF). All variants in single-byte tags +/// (`0xF0`–`0xF4`). Slots 5-7 reserved for future proof variants. +/// +/// Proof bytes are uniform opaque ZK proofs — witness data (e.g., +/// merkle paths for Contains) is prover-side scratch consumed by the +/// ZK circuit and NOT transmitted on the wire. +/// +/// - 0: Eval proof +/// - 1: Check proof +/// - 2: CheckEnv proof +/// - 3: Reveal proof +/// - 4: Contains proof +pub const FLAG_PROOF: u8 = 0xF; + +pub const VARIANT_EVAL_PROOF: u64 = 0; +pub const VARIANT_CHECK_PROOF: u64 = 1; +pub const VARIANT_CHECK_ENV_PROOF: u64 = 2; +pub const VARIANT_REVEAL_PROOF: u64 = 3; +pub const VARIANT_CONTAINS_PROOF: u64 = 4; + +// Backwards-compatibility re-export: many call sites refer to FLAG. +pub const FLAG: u8 = FLAG_CLAIM; // ============================================================================ // Serialization helpers @@ -921,54 +949,97 @@ impl RevealConstantInfo { // Claim serialization // ============================================================================ +/// Helper: write an `Option
` as `[0x00]` (None) or +/// `[0x01][addr:32]` (Some). Single byte for absence avoids a 33-byte +/// gap when assumptions are absent. +fn put_opt_addr(opt: &Option
, buf: &mut Vec) { + match opt { + None => buf.push(0x00), + Some(addr) => { + buf.push(0x01); + buf.extend_from_slice(addr.as_bytes()); + }, + } +} + +fn get_opt_addr(buf: &mut &[u8]) -> Result, String> { + match get_u8(buf)? { + 0x00 => Ok(None), + 0x01 => Ok(Some(get_address(buf)?)), + b => Err(format!("get_opt_addr: invalid tag 0x{:02X}", b)), + } +} + impl Claim { pub fn put(&self, buf: &mut Vec) { match self { - Claim::Evals(eval) => { - Tag4::new(FLAG, VARIANT_EVAL_CLAIM).put(buf); - buf.extend_from_slice(eval.input.as_bytes()); - buf.extend_from_slice(eval.output.as_bytes()); + Claim::Eval { input, output, assumptions } => { + Tag4::new(FLAG_CLAIM, VARIANT_EVAL_CLAIM).put(buf); + buf.extend_from_slice(input.as_bytes()); + buf.extend_from_slice(output.as_bytes()); + put_opt_addr(assumptions, buf); }, - Claim::Checks(check) => { - Tag4::new(FLAG, VARIANT_CHECK_CLAIM).put(buf); - buf.extend_from_slice(check.value.as_bytes()); + Claim::Check { const_addr, assumptions } => { + Tag4::new(FLAG_CLAIM, VARIANT_CHECK_CLAIM).put(buf); + buf.extend_from_slice(const_addr.as_bytes()); + put_opt_addr(assumptions, buf); }, - Claim::Reveals(reveal) => { - Tag4::new(FLAG, VARIANT_REVEAL_CLAIM).put(buf); - buf.extend_from_slice(reveal.comm.as_bytes()); - reveal.info.put(buf); + Claim::CheckEnv { root, assumptions } => { + Tag4::new(FLAG_CLAIM, VARIANT_CHECK_ENV_CLAIM).put(buf); + buf.extend_from_slice(root.as_bytes()); + put_opt_addr(assumptions, buf); + }, + Claim::Reveal { comm, info } => { + Tag4::new(FLAG_CLAIM, VARIANT_REVEAL_CLAIM).put(buf); + buf.extend_from_slice(comm.as_bytes()); + info.put(buf); + }, + Claim::Contains { tree, const_addr } => { + Tag4::new(FLAG_CLAIM, VARIANT_CONTAINS_CLAIM).put(buf); + buf.extend_from_slice(tree.as_bytes()); + buf.extend_from_slice(const_addr.as_bytes()); }, } } pub fn get(buf: &mut &[u8]) -> Result { let tag = Tag4::get(buf)?; - if tag.flag != FLAG { + if tag.flag != FLAG_CLAIM { return Err(format!( "Claim::get: expected flag 0x{:X}, got 0x{:X}", - FLAG, tag.flag + FLAG_CLAIM, tag.flag )); } - match tag.size { VARIANT_EVAL_CLAIM => { let input = get_address(buf)?; let output = get_address(buf)?; - Ok(Claim::Evals(EvalClaim { input, output })) + let assumptions = get_opt_addr(buf)?; + Ok(Claim::Eval { input, output, assumptions }) }, VARIANT_CHECK_CLAIM => { - let value = get_address(buf)?; - Ok(Claim::Checks(CheckClaim { value })) + let const_addr = get_address(buf)?; + let assumptions = get_opt_addr(buf)?; + Ok(Claim::Check { const_addr, assumptions }) + }, + VARIANT_CHECK_ENV_CLAIM => { + let root = get_address(buf)?; + let assumptions = get_opt_addr(buf)?; + Ok(Claim::CheckEnv { root, assumptions }) }, VARIANT_REVEAL_CLAIM => { let comm = get_address(buf)?; let info = RevealConstantInfo::get(buf)?; - Ok(Claim::Reveals(RevealClaim { comm, info })) + Ok(Claim::Reveal { comm, info }) }, - VARIANT_EVAL_PROOF | VARIANT_CHECK_PROOF | VARIANT_REVEAL_PROOF => Err( - format!("Claim::get: got Proof variant {}, use Proof::get", tag.size), - ), - x => Err(format!("Claim::get: invalid variant {x}")), + VARIANT_CONTAINS_CLAIM => { + let tree = get_address(buf)?; + let const_addr = get_address(buf)?; + Ok(Claim::Contains { tree, const_addr }) + }, + x => Err(format!( + "Claim::get: invalid claim variant {x} under flag 0xE", + )), } } @@ -979,6 +1050,17 @@ impl Claim { let addr = Address::hash(&buf); (addr, buf) } + + /// Map a claim to its corresponding proof variant size (under flag 0xF). + pub fn proof_variant_size(&self) -> u64 { + match self { + Claim::Eval { .. } => VARIANT_EVAL_PROOF, + Claim::Check { .. } => VARIANT_CHECK_PROOF, + Claim::CheckEnv { .. } => VARIANT_CHECK_ENV_PROOF, + Claim::Reveal { .. } => VARIANT_REVEAL_PROOF, + Claim::Contains { .. } => VARIANT_CONTAINS_PROOF, + } + } } // ============================================================================ @@ -991,61 +1073,79 @@ impl Proof { } pub fn put(&self, buf: &mut Vec) { + let proof_size = self.claim.proof_variant_size(); + // Proofs live under flag 0xF; claim payload is the same body as the + // matching Claim variant. + Tag4::new(FLAG_PROOF, proof_size).put(buf); match &self.claim { - Claim::Evals(eval) => { - Tag4::new(FLAG, VARIANT_EVAL_PROOF).put(buf); - buf.extend_from_slice(eval.input.as_bytes()); - buf.extend_from_slice(eval.output.as_bytes()); + Claim::Eval { input, output, assumptions } => { + buf.extend_from_slice(input.as_bytes()); + buf.extend_from_slice(output.as_bytes()); + put_opt_addr(assumptions, buf); + }, + Claim::Check { const_addr, assumptions } => { + buf.extend_from_slice(const_addr.as_bytes()); + put_opt_addr(assumptions, buf); + }, + Claim::CheckEnv { root, assumptions } => { + buf.extend_from_slice(root.as_bytes()); + put_opt_addr(assumptions, buf); }, - Claim::Checks(check) => { - Tag4::new(FLAG, VARIANT_CHECK_PROOF).put(buf); - buf.extend_from_slice(check.value.as_bytes()); + Claim::Reveal { comm, info } => { + buf.extend_from_slice(comm.as_bytes()); + info.put(buf); }, - Claim::Reveals(reveal) => { - Tag4::new(FLAG, VARIANT_REVEAL_PROOF).put(buf); - buf.extend_from_slice(reveal.comm.as_bytes()); - reveal.info.put(buf); + Claim::Contains { tree, const_addr } => { + buf.extend_from_slice(tree.as_bytes()); + buf.extend_from_slice(const_addr.as_bytes()); }, } - // Proof bytes: length prefix + data + // Opaque ZK proof bytes: length prefix + data Tag0::new(self.proof.len() as u64).put(buf); buf.extend_from_slice(&self.proof); } pub fn get(buf: &mut &[u8]) -> Result { let tag = Tag4::get(buf)?; - if tag.flag != FLAG { + if tag.flag != FLAG_PROOF { return Err(format!( "Proof::get: expected flag 0x{:X}, got 0x{:X}", - FLAG, tag.flag + FLAG_PROOF, tag.flag )); } - let claim = match tag.size { VARIANT_EVAL_PROOF => { let input = get_address(buf)?; let output = get_address(buf)?; - Claim::Evals(EvalClaim { input, output }) + let assumptions = get_opt_addr(buf)?; + Claim::Eval { input, output, assumptions } }, VARIANT_CHECK_PROOF => { - let value = get_address(buf)?; - Claim::Checks(CheckClaim { value }) + let const_addr = get_address(buf)?; + let assumptions = get_opt_addr(buf)?; + Claim::Check { const_addr, assumptions } + }, + VARIANT_CHECK_ENV_PROOF => { + let root = get_address(buf)?; + let assumptions = get_opt_addr(buf)?; + Claim::CheckEnv { root, assumptions } }, VARIANT_REVEAL_PROOF => { let comm = get_address(buf)?; let info = RevealConstantInfo::get(buf)?; - Claim::Reveals(RevealClaim { comm, info }) + Claim::Reveal { comm, info } }, - VARIANT_EVAL_CLAIM | VARIANT_CHECK_CLAIM | VARIANT_REVEAL_CLAIM => { - return Err(format!( - "Proof::get: got Claim variant {}, use Claim::get", - tag.size - )); + VARIANT_CONTAINS_PROOF => { + let tree = get_address(buf)?; + let const_addr = get_address(buf)?; + Claim::Contains { tree, const_addr } }, - x => return Err(format!("Proof::get: invalid variant {x}")), + x => return Err(format!( + "Proof::get: invalid proof variant {x} under flag 0xF" + )), }; - // Proof bytes + // Opaque ZK proof bytes let len = usize::try_from(Tag0::get(buf)?.size) .map_err(|_e| "Proof::get: Tag0 size overflows usize".to_string())?; if buf.len() < len { @@ -1081,18 +1181,6 @@ mod tests { // ========== Arbitrary impls ========== - impl Arbitrary for EvalClaim { - fn arbitrary(g: &mut Gen) -> Self { - EvalClaim { input: Address::arbitrary(g), output: Address::arbitrary(g) } - } - } - - impl Arbitrary for CheckClaim { - fn arbitrary(g: &mut Gen) -> Self { - CheckClaim { value: Address::arbitrary(g) } - } - } - impl Arbitrary for RevealConstructorInfo { fn arbitrary(g: &mut Gen) -> Self { RevealConstructorInfo { @@ -1285,21 +1373,30 @@ mod tests { } } - impl Arbitrary for RevealClaim { - fn arbitrary(g: &mut Gen) -> Self { - RevealClaim { - comm: Address::arbitrary(g), - info: RevealConstantInfo::arbitrary(g), - } - } - } - impl Arbitrary for Claim { fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Claim::Evals(EvalClaim::arbitrary(g)), - 1 => Claim::Checks(CheckClaim::arbitrary(g)), - _ => Claim::Reveals(RevealClaim::arbitrary(g)), + match u8::arbitrary(g) % 5 { + 0 => Claim::Eval { + input: Address::arbitrary(g), + output: Address::arbitrary(g), + assumptions: gen_opt_addr(g), + }, + 1 => Claim::Check { + const_addr: Address::arbitrary(g), + assumptions: gen_opt_addr(g), + }, + 2 => Claim::CheckEnv { + root: Address::arbitrary(g), + assumptions: gen_opt_addr(g), + }, + 3 => Claim::Reveal { + comm: Address::arbitrary(g), + info: RevealConstantInfo::arbitrary(g), + }, + _ => Claim::Contains { + tree: Address::arbitrary(g), + const_addr: Address::arbitrary(g), + }, } } } @@ -1372,28 +1469,81 @@ mod tests { // ========== Manual roundtrip tests ========== + // ---------- Per-variant claim roundtrips ---------- + + #[test] + fn test_eval_claim_no_asm_roundtrip() { + let claim = Claim::Eval { + input: Address::hash(b"input"), + output: Address::hash(b"output"), + assumptions: None, + }; + assert!(claim_roundtrip(&claim)); + } + #[test] - fn test_eval_claim_roundtrip() { - let claim = Claim::Evals(EvalClaim { + fn test_eval_claim_with_asm_roundtrip() { + let claim = Claim::Eval { input: Address::hash(b"input"), output: Address::hash(b"output"), - }); + assumptions: Some(Address::hash(b"asm")), + }; + assert!(claim_roundtrip(&claim)); + } + + #[test] + fn test_check_claim_no_asm_roundtrip() { + let claim = Claim::Check { + const_addr: Address::hash(b"value"), + assumptions: None, + }; assert!(claim_roundtrip(&claim)); } #[test] - fn test_check_claim_roundtrip() { - let claim = Claim::Checks(CheckClaim { value: Address::hash(b"value") }); + fn test_check_claim_with_asm_roundtrip() { + let claim = Claim::Check { + const_addr: Address::hash(b"value"), + assumptions: Some(Address::hash(b"asm")), + }; assert!(claim_roundtrip(&claim)); } + #[test] + fn test_check_env_claim_no_asm_roundtrip() { + let claim = + Claim::CheckEnv { root: Address::hash(b"env-root"), assumptions: None }; + assert!(claim_roundtrip(&claim)); + } + + #[test] + fn test_check_env_claim_with_asm_roundtrip() { + let claim = Claim::CheckEnv { + root: Address::hash(b"env-root"), + assumptions: Some(Address::hash(b"asm")), + }; + assert!(claim_roundtrip(&claim)); + } + + #[test] + fn test_contains_claim_roundtrip() { + let claim = Claim::Contains { + tree: Address::hash(b"tree-root"), + const_addr: Address::hash(b"member"), + }; + assert!(claim_roundtrip(&claim)); + } + + // ---------- Per-variant proof roundtrips ---------- + #[test] fn test_eval_proof_roundtrip() { let proof = Proof::new( - Claim::Evals(EvalClaim { + Claim::Eval { input: Address::hash(b"input"), output: Address::hash(b"output"), - }), + assumptions: None, + }, vec![1, 2, 3, 4], ); assert!(proof_roundtrip(&proof)); @@ -1402,19 +1552,59 @@ mod tests { #[test] fn test_check_proof_roundtrip() { let proof = Proof::new( - Claim::Checks(CheckClaim { value: Address::hash(b"value") }), + Claim::Check { + const_addr: Address::hash(b"value"), + assumptions: None, + }, vec![5, 6, 7, 8, 9], ); assert!(proof_roundtrip(&proof)); } + #[test] + fn test_check_env_proof_roundtrip() { + let proof = Proof::new( + Claim::CheckEnv { + root: Address::hash(b"env-root"), + assumptions: None, + }, + vec![0x11, 0x22], + ); + assert!(proof_roundtrip(&proof)); + } + + #[test] + fn test_check_proof_with_asm_roundtrip() { + let proof = Proof::new( + Claim::Check { + const_addr: Address::hash(b"const"), + assumptions: Some(Address::hash(b"asm")), + }, + vec![0xAA, 0xBB, 0xCC], + ); + assert!(proof_roundtrip(&proof)); + } + + #[test] + fn test_contains_proof_roundtrip() { + let proof = Proof::new( + Claim::Contains { + tree: Address::hash(b"tree-root"), + const_addr: Address::hash(b"member"), + }, + vec![0xDE, 0xAD, 0xBE, 0xEF], + ); + assert!(proof_roundtrip(&proof)); + } + #[test] fn test_empty_proof_data() { let proof = Proof::new( - Claim::Evals(EvalClaim { + Claim::Eval { input: Address::hash(b"c"), output: Address::hash(b"d"), - }), + assumptions: None, + }, vec![], ); assert!(proof_roundtrip(&proof)); @@ -1422,7 +1612,7 @@ mod tests { #[test] fn test_reveal_claim_roundtrip() { - let claim = Claim::Reveals(RevealClaim { + let claim = Claim::Reveal { comm: Address::hash(b"comm"), info: RevealConstantInfo::Defn { kind: Some(DefKind::Definition), @@ -1431,110 +1621,288 @@ mod tests { typ: None, value: None, }, - }); + }; assert!(claim_roundtrip(&claim)); } #[test] fn test_reveal_proof_roundtrip() { let proof = Proof::new( - Claim::Reveals(RevealClaim { + Claim::Reveal { comm: Address::hash(b"comm"), info: RevealConstantInfo::Axio { is_unsafe: Some(false), lvls: None, typ: Some(Address::hash(b"typ")), }, - }), + }, vec![0xAB, 0xCD], ); assert!(proof_roundtrip(&proof)); } - // ========== Tag byte tests ========== + // ---------- Tag4 flag/size dispatch ---------- - #[test] - fn test_claim_tags() { - // EvalClaim should be 0xE4 - let eval_claim = Claim::Evals(EvalClaim { - input: Address::hash(b"a"), - output: Address::hash(b"b"), - }); - let mut buf = Vec::new(); - eval_claim.put(&mut buf); - assert_eq!(buf[0], 0xE4); + fn parse_tag(bytes: &[u8]) -> Tag4 { + Tag4::get(&mut &bytes[..]).unwrap() + } - // CheckClaim should be 0xE3 - let check_claim = Claim::Checks(CheckClaim { value: Address::hash(b"a") }); + fn claim_tag(claim: &Claim) -> Tag4 { let mut buf = Vec::new(); - check_claim.put(&mut buf); - assert_eq!(buf[0], 0xE3); + claim.put(&mut buf); + parse_tag(&buf) + } - // RevealClaim should be 0xE6 - let reveal_claim = Claim::Reveals(RevealClaim { - comm: Address::hash(b"a"), - info: RevealConstantInfo::Defn { - kind: None, - safety: None, - lvls: None, - typ: None, - value: None, - }, - }); + fn proof_tag(proof: &Proof) -> Tag4 { let mut buf = Vec::new(); - reveal_claim.put(&mut buf); - assert_eq!(buf[0], 0xE6); + proof.put(&mut buf); + parse_tag(&buf) } #[test] - fn test_proof_tags() { - // EvalProof should be 0xE2 - let eval_proof = Proof::new( - Claim::Evals(EvalClaim { - input: Address::hash(b"a"), - output: Address::hash(b"b"), - }), - vec![1, 2, 3], - ); + fn test_claim_tag_flag_and_size() { + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let reveal_info = RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }; + + let cases: Vec<(Claim, u64)> = vec![ + ( + Claim::Eval { + input: a.clone(), + output: b.clone(), + assumptions: None, + }, + VARIANT_EVAL_CLAIM, + ), + ( + Claim::Check { const_addr: a.clone(), assumptions: None }, + VARIANT_CHECK_CLAIM, + ), + ( + Claim::CheckEnv { root: a.clone(), assumptions: None }, + VARIANT_CHECK_ENV_CLAIM, + ), + (Claim::Reveal { comm: a.clone(), info: reveal_info }, VARIANT_REVEAL_CLAIM), + ( + Claim::Contains { tree: a, const_addr: b }, + VARIANT_CONTAINS_CLAIM, + ), + ]; + + for (claim, expected_size) in cases { + let tag = claim_tag(&claim); + assert_eq!(tag.flag, FLAG_CLAIM, "claim must use flag 0xE"); + assert_eq!(tag.size, expected_size); + } + } + + #[test] + fn test_proof_tag_flag_and_size() { + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let reveal_info = RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }; + + let cases: Vec<(Claim, u64)> = vec![ + ( + Claim::Eval { + input: a.clone(), + output: b.clone(), + assumptions: None, + }, + VARIANT_EVAL_PROOF, + ), + ( + Claim::Check { const_addr: a.clone(), assumptions: None }, + VARIANT_CHECK_PROOF, + ), + ( + Claim::CheckEnv { root: a.clone(), assumptions: None }, + VARIANT_CHECK_ENV_PROOF, + ), + (Claim::Reveal { comm: a.clone(), info: reveal_info }, VARIANT_REVEAL_PROOF), + ( + Claim::Contains { tree: a, const_addr: b }, + VARIANT_CONTAINS_PROOF, + ), + ]; + + for (claim, expected_size) in cases { + let proof = Proof::new(claim, vec![0]); + let tag = proof_tag(&proof); + assert_eq!(tag.flag, FLAG_PROOF, "proof must use flag 0xF"); + assert_eq!(tag.size, expected_size); + } + } + + // ---------- Per-variant payload byte lengths ---------- + + fn claim_bytes(claim: &Claim) -> Vec { let mut buf = Vec::new(); - eval_proof.put(&mut buf); - assert_eq!(buf[0], 0xE2); + claim.put(&mut buf); + buf + } - // CheckProof should be 0xE1 - let check_proof = Proof::new( - Claim::Checks(CheckClaim { value: Address::hash(b"a") }), - vec![4, 5, 6], + #[test] + fn test_claim_byte_lengths() { + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let asm = Address::hash(b"asm"); + + // Single-byte Tag4 + payload + 1 opt byte (+ 32 if Some). + assert_eq!( + claim_bytes(&Claim::Eval { + input: a.clone(), + output: b.clone(), + assumptions: None + }) + .len(), + 1 + 64 + 1, + "Eval no-asm = 66 bytes" ); - let mut buf = Vec::new(); - check_proof.put(&mut buf); - assert_eq!(buf[0], 0xE1); - - // RevealProof should be 0xE7 - let reveal_proof = Proof::new( - Claim::Reveals(RevealClaim { - comm: Address::hash(b"a"), - info: RevealConstantInfo::Defn { - kind: None, - safety: None, - lvls: None, - typ: None, - value: None, - }, - }), - vec![7, 8], + assert_eq!( + claim_bytes(&Claim::Eval { + input: a.clone(), + output: b.clone(), + assumptions: Some(asm.clone()) + }) + .len(), + 1 + 64 + 1 + 32, + "Eval with-asm = 98 bytes" + ); + assert_eq!( + claim_bytes(&Claim::Check { + const_addr: a.clone(), + assumptions: None + }) + .len(), + 1 + 32 + 1, + "Check no-asm = 34 bytes" + ); + assert_eq!( + claim_bytes(&Claim::Check { + const_addr: a.clone(), + assumptions: Some(asm.clone()) + }) + .len(), + 1 + 32 + 1 + 32, + "Check with-asm = 66 bytes" + ); + assert_eq!( + claim_bytes(&Claim::CheckEnv { + root: a.clone(), + assumptions: None + }) + .len(), + 1 + 32 + 1, + "CheckEnv no-asm = 34 bytes" + ); + assert_eq!( + claim_bytes(&Claim::Contains { tree: a, const_addr: b }).len(), + 1 + 64, + "Contains = 65 bytes" ); - let mut buf = Vec::new(); - reveal_proof.put(&mut buf); - assert_eq!(buf[0], 0xE7); + } + + #[test] + fn test_claim_first_byte() { + // Single-byte Tag4 encoding: size 0-7 fits in one byte (0xE0..0xE7). + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let reveal_info = RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }; + + let cases: Vec<(Claim, u8)> = vec![ + ( + Claim::Eval { + input: a.clone(), + output: b.clone(), + assumptions: None, + }, + 0xE3, + ), + ( + Claim::Check { const_addr: a.clone(), assumptions: None }, + 0xE4, + ), + ( + Claim::CheckEnv { root: a.clone(), assumptions: None }, + 0xE5, + ), + (Claim::Reveal { comm: a.clone(), info: reveal_info }, 0xE6), + ( + Claim::Contains { tree: a, const_addr: b }, + 0xE7, + ), + ]; + for (claim, expected_byte) in cases { + let bytes = claim_bytes(&claim); + assert_eq!(bytes[0], expected_byte); + } + } + + #[test] + fn test_proof_first_byte() { + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let reveal_info = RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }; + let cases: Vec<(Claim, u8)> = vec![ + ( + Claim::Eval { + input: a.clone(), + output: b.clone(), + assumptions: None, + }, + 0xF0, + ), + ( + Claim::Check { const_addr: a.clone(), assumptions: None }, + 0xF1, + ), + ( + Claim::CheckEnv { root: a.clone(), assumptions: None }, + 0xF2, + ), + (Claim::Reveal { comm: a.clone(), info: reveal_info }, 0xF3), + (Claim::Contains { tree: a, const_addr: b }, 0xF4), + ]; + for (claim, expected_byte) in cases { + let mut buf = Vec::new(); + Proof::new(claim, vec![]).put(&mut buf); + assert_eq!(buf[0], expected_byte); + } } // ========== Bitmask encoding tests from plan examples ========== + // Reveal claim is variant 6 (single-byte tag 0xE6). + #[test] fn test_reveal_defn_safety() { - // Plan example: Reveal that a committed Definition has safety = Safe // 0xE6 <32 bytes comm> 0x00 0x02 0x01 - let claim = Claim::Reveals(RevealClaim { + let claim = Claim::Reveal { comm: Address::hash(b"test_comm"), info: RevealConstantInfo::Defn { kind: None, @@ -1543,12 +1911,12 @@ mod tests { typ: None, value: None, }, - }); + }; let mut buf = Vec::new(); claim.put(&mut buf); - assert_eq!(buf[0], 0xE6); // Tag4: RevealClaim + assert_eq!(buf[0], 0xE6); // Tag4: flag=0xE, size=6 (Reveal claim) // buf[1..33] = comm_addr (32 bytes) - assert_eq!(buf[33], 0x00); // variant: Definition + assert_eq!(buf[33], 0x00); // RevealConstantInfo variant: Definition assert_eq!(buf[34], 0x02); // mask: bit 1 (safety) assert_eq!(buf[35], 0x01); // DefinitionSafety::Safe assert_eq!(buf.len(), 36); // Total: 1 + 32 + 1 + 1 + 1 = 36 bytes @@ -1556,10 +1924,9 @@ mod tests { #[test] fn test_reveal_defn_typ() { - // Plan example: Reveal a committed Definition's type expression // 0xE6 <32 bytes comm> 0x00 0x08 <32 bytes typ> let typ_addr = Address::hash(b"serialized typ expr"); - let claim = Claim::Reveals(RevealClaim { + let claim = Claim::Reveal { comm: Address::hash(b"test_comm"), info: RevealConstantInfo::Defn { kind: None, @@ -1568,11 +1935,11 @@ mod tests { typ: Some(typ_addr), value: None, }, - }); + }; let mut buf = Vec::new(); claim.put(&mut buf); - assert_eq!(buf[0], 0xE6); // Tag4: RevealClaim - assert_eq!(buf[33], 0x00); // variant: Definition + assert_eq!(buf[0], 0xE6); + assert_eq!(buf[33], 0x00); // RevealConstantInfo variant: Definition assert_eq!(buf[34], 0x08); // mask: bit 3 (typ) // buf[35..67] = typ address (32 bytes) assert_eq!(buf.len(), 67); // Total: 1 + 32 + 1 + 1 + 32 = 67 bytes @@ -1580,9 +1947,8 @@ mod tests { #[test] fn test_reveal_muts_component_safety() { - // Plan example: Reveal a Muts component's safety // 0xE6 <32 comm> 0x08 0x01 0x01 0x02 0x00 0x02 0x01 - let claim = Claim::Reveals(RevealClaim { + let claim = Claim::Reveal { comm: Address::hash(b"test_comm"), info: RevealConstantInfo::Muts { components: vec![( @@ -1596,11 +1962,11 @@ mod tests { }, )], }, - }); + }; let mut buf = Vec::new(); claim.put(&mut buf); - assert_eq!(buf[0], 0xE6); // Tag4: RevealClaim - assert_eq!(buf[33], 0x08); // variant: Muts + assert_eq!(buf[0], 0xE6); + assert_eq!(buf[33], 0x08); // RevealConstantInfo variant: Muts assert_eq!(buf[34], 0x01); // mask: bit 0 (components) assert_eq!(buf[35], 0x01); // Tag0: 1 component revealed assert_eq!(buf[36], 0x02); // Tag0: component index 2 diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index 12690d6f..58a4bd20 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1089,6 +1089,7 @@ pub fn get_named_indexed( use super::comm::Comm; use super::env::Env; +use super::merkle::{merkle_root_canonical, zero_address}; impl Env { /// Tag4 flag for Env (0xE), variant 0. @@ -1118,6 +1119,22 @@ impl Env { // Header: Tag4 with flag=0xE, size=0 (Env variant) Tag4::new(Self::FLAG, 0).put(buf); + // ───────────────────────────────────────────────────────────────────── + // Canonical merkle root over consts.keys() + // + // Hoisted before section 1 so we can sort const_addrs once and reuse + // it for section 2 below. Always 32 bytes (non-optional) — empty + // const sets serialize as `zero_address()` (a fixed sentinel that + // cannot collide with any non-empty canonical root since + // `merkle_root_canonical` always returns a Blake3 hash for n>=1). + // Verifiers recompute on deserialize and reject mismatches. + // ───────────────────────────────────────────────────────────────────── + let mut const_addrs: Vec
= + self.consts.iter().map(|e| e.key().clone()).collect(); + const_addrs.par_sort_unstable(); + let root = merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); + put_address(&root, buf); + // ───────────────────────────────────────────────────────────────────── // Section 1: Blobs (Address -> bytes) // ───────────────────────────────────────────────────────────────────── @@ -1147,14 +1164,14 @@ impl Env { // ───────────────────────────────────────────────────────────────────── // Section 2: Consts (Address -> Constant) + // + // Reuses the already-collected+sorted `const_addrs` from the merkle + // root computation above. // ───────────────────────────────────────────────────────────────────── let sec_start = std::time::Instant::now(); if !quiet { eprintln!("[Env::put] section 2/5 consts: {} entries", self.consts.len(),); } - let mut const_addrs: Vec
= - self.consts.iter().map(|e| e.key().clone()).collect(); - const_addrs.par_sort_unstable(); if !quiet { eprintln!( "[Env::put] section 2/5 consts: collected+sorted in {:.1}s, \ @@ -1315,6 +1332,11 @@ impl Env { )); } + // Canonical merkle root (fixed 32 bytes). For empty const sets the + // stored value is `zero_address()`. Verified against the + // recomputed value at the end of deserialization. + let stored_root = get_address(buf)?; + let env = Env::new(); // Section 1: Blobs @@ -1380,6 +1402,22 @@ impl Env { env.comms.insert(addr, comm); } + // Verify the stored merkle root matches what we'd compute from + // env.consts. Empty const set → expected = zero_address(). + // Rejects any tampering with the header. + let mut const_addrs: Vec
= + env.consts.iter().map(|e| e.key().clone()).collect(); + const_addrs.sort_unstable(); + let computed_root = + merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); + if computed_root != stored_root { + return Err(format!( + "Env::get: merkle root mismatch (stored={}, computed={})", + stored_root.hex(), + computed_root.hex(), + )); + } + Ok(env) } @@ -1396,8 +1434,15 @@ impl Env { ) -> Result<(usize, usize, usize, usize, usize, usize), String> { let mut buf = Vec::new(); - // Header + // Header + merkle root (matches Env::put layout; root is always + // 32 bytes, with `zero_address()` as the empty-env sentinel). Tag4::new(Self::FLAG, 0).put(&mut buf); + let mut const_addrs: Vec
= + self.consts.iter().map(|e| e.key().clone()).collect(); + const_addrs.sort_unstable(); + let root = + merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); + put_address(&root, &mut buf); let header_size = buf.len(); // Section 1: Blobs @@ -1794,4 +1839,104 @@ mod tests { assert!(env_roundtrip(&env), "Env roundtrip failed"); } } + + // ---------- Env merkle root tests ---------- + + fn defn_const(refs: Vec
) -> Constant { + use crate::ix::env::DefinitionSafety; + use crate::ix::ixon::constant::{DefKind, Definition}; + Constant::with_tables( + ConstantInfo::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }), + Vec::new(), + refs, + Vec::new(), + ) + } + + /// Extract the stored merkle root from a serialized env. The Tag4 + /// header byte (`0xE0` for env) is followed by exactly 32 bytes of + /// root (no opt-tag). + fn parse_stored_root(buf: &[u8]) -> Vec { + assert_eq!(buf[0], 0xE0, "env header byte should be 0xE0"); + buf[1..33].to_vec() + } + + #[test] + fn env_root_empty_env_is_zero_address() { + use crate::ix::ixon::merkle::zero_address; + let env = Env::new(); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let root = parse_stored_root(&buf); + assert_eq!( + root, + zero_address().as_bytes().to_vec(), + "empty env root should be zero_address sentinel" + ); + } + + #[test] + fn env_root_present_when_consts_nonempty() { + use crate::ix::ixon::merkle::zero_address; + let env = Env::new(); + env.store_const(Address::hash(b"a"), defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let root = parse_stored_root(&buf); + // Non-empty env root must NOT be the zero sentinel. + assert_ne!(root, zero_address().as_bytes().to_vec()); + } + + #[test] + fn env_root_invariant_under_insertion_order() { + let env1 = Env::new(); + let env2 = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let c = Address::hash(b"c"); + env1.store_const(a.clone(), defn_const(vec![])); + env1.store_const(b.clone(), defn_const(vec![])); + env1.store_const(c.clone(), defn_const(vec![])); + env2.store_const(c, defn_const(vec![])); + env2.store_const(b, defn_const(vec![])); + env2.store_const(a, defn_const(vec![])); + + let mut buf1 = Vec::new(); + let mut buf2 = Vec::new(); + env1.put(&mut buf1).unwrap(); + env2.put(&mut buf2).unwrap(); + + assert_eq!(parse_stored_root(&buf1), parse_stored_root(&buf2)); + } + + #[test] + fn env_root_changes_with_extra_const() { + let env = Env::new(); + env.store_const(Address::hash(b"a"), defn_const(vec![])); + let mut buf1 = Vec::new(); + env.put(&mut buf1).unwrap(); + env.store_const(Address::hash(b"b"), defn_const(vec![])); + let mut buf2 = Vec::new(); + env.put(&mut buf2).unwrap(); + + assert_ne!(parse_stored_root(&buf1), parse_stored_root(&buf2)); + } + + #[test] + fn env_root_mismatch_rejected() { + let env = Env::new(); + env.store_const(Address::hash(b"a"), defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + // Tamper with a byte in the root (offset 1..33). + buf[10] ^= 0xFF; + let res = Env::get(&mut buf.as_slice()); + assert!(res.is_err(), "tampered root should be rejected"); + } } diff --git a/src/ix/kernel.rs b/src/ix/kernel.rs index 17f51d75..7b5c3b1c 100644 --- a/src/ix/kernel.rs +++ b/src/ix/kernel.rs @@ -1,5 +1,6 @@ pub mod canonical_check; pub mod check; +pub mod claim; pub mod congruence; pub mod constant; pub mod def_eq; diff --git a/src/ix/kernel/claim.rs b/src/ix/kernel/claim.rs new file mode 100644 index 00000000..b38fd2d8 --- /dev/null +++ b/src/ix/kernel/claim.rs @@ -0,0 +1,282 @@ +//! High-level claim builders that combine kernel results with the +//! transitive-dep walker and the merkle root. +//! +//! `TypeChecker::check_const` stays pure (`Result<(), TcError>`); these +//! builders sit above it. They take an `Env` so they can compute the +//! `assumptions` merkle root over the constant's transitive deps. +//! +//! All builders default to the *canonical* merkle builder (sorted + +//! deduped leaves) — when recursive aggregation lands, free-form roots +//! from `merkle_join` will also be acceptable, but builders that start +//! from an env always produce canonical roots. + +use rustc_hash::FxHashSet; + +use crate::ix::address::Address; +use crate::ix::ixon::constant::ConstantInfo; +use crate::ix::ixon::env::Env; +use crate::ix::ixon::merkle::merkle_root_canonical; +use crate::ix::ixon::proof::Claim; + +/// Canonical merkle root over the env's `consts.keys()`. Also called +/// from the env serializer. Returns `None` for an empty const set. +pub fn env_merkle_root(env: &Env) -> Option
{ + let mut addrs: Vec
= + env.consts.iter().map(|e| e.key().clone()).collect(); + addrs.sort_unstable(); + merkle_root_canonical(&addrs) +} + +/// Build a check claim for the constant at `const_addr` in `env`. +/// +/// Sets `assumptions: None` when the constant has no transitive deps, +/// else `Some(root)` where `root` is the canonical merkle root over +/// those deps. +pub fn build_check_claim(env: &Env, const_addr: Address) -> Claim { + let deps = env.transitive_deps_excl(&const_addr); + let assumptions = merkle_root_canonical(&deps); + Claim::Check { const_addr, assumptions } +} + +/// Build an eval claim for the pair `(input, output)` in `env`. +/// +/// Assumptions = canonical merkle root over `transitive_deps(input) ∪ +/// transitive_deps(output) \ {input, output}`. `None` if that set is +/// empty. +pub fn build_eval_claim( + env: &Env, + input: Address, + output: Address, +) -> Claim { + let mut set: FxHashSet
= + env.transitive_deps_excl(&input).into_iter().collect(); + set.extend(env.transitive_deps_excl(&output)); + set.remove(&input); + set.remove(&output); + let mut deps: Vec
= set.into_iter().collect(); + deps.sort_unstable(); + let assumptions = merkle_root_canonical(&deps); + Claim::Eval { input, output, assumptions } +} + +/// Build a whole-env check claim. Subject is the env's canonical merkle +/// root; assumptions are the env's axiom leaves. +/// +/// Returns `None` if the env has an empty const set (no subject root +/// can be formed). +pub fn build_check_env_claim(env: &Env) -> Option { + let root = env_merkle_root(env)?; + let mut axioms: Vec
= env + .consts + .iter() + .filter_map(|e| match &e.value().info { + ConstantInfo::Axio(_) => Some(e.key().clone()), + _ => None, + }) + .collect(); + axioms.sort_unstable(); + let assumptions = merkle_root_canonical(&axioms); + Some(Claim::CheckEnv { root, assumptions }) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::DefinitionSafety; + use crate::ix::ixon::constant::{ + Axiom, Constant, DefKind, Definition, ConstantInfo, + }; + use crate::ix::ixon::expr::Expr; + use crate::ix::ixon::merkle::leaf_hash; + use std::sync::Arc; + + fn axiom_const(refs: Vec
) -> Constant { + Constant::with_tables( + ConstantInfo::Axio(Axiom { + is_unsafe: false, + lvls: 0, + typ: Arc::new(Expr::Sort(0)), + }), + Vec::new(), + refs, + Vec::new(), + ) + } + + fn defn_const(refs: Vec
) -> Constant { + Constant::with_tables( + ConstantInfo::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }), + Vec::new(), + refs, + Vec::new(), + ) + } + + #[test] + fn check_no_deps_assumptions_none() { + let env = Env::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), defn_const(vec![])); + match build_check_claim(&env, a.clone()) { + Claim::Check { const_addr, assumptions: None } => { + assert_eq!(const_addr, a); + }, + other => panic!("expected Check {{ assumptions: None }}, got {other:?}"), + } + } + + #[test] + fn check_with_one_axiom_dep_assumptions_some() { + let env = Env::new(); + let a = Address::hash(b"a"); + let ax = Address::hash(b"ax"); + env.store_const(a.clone(), defn_const(vec![ax.clone()])); + env.store_const(ax.clone(), axiom_const(vec![])); + match build_check_claim(&env, a.clone()) { + Claim::Check { const_addr, assumptions: Some(asm) } => { + assert_eq!(const_addr, a); + assert_eq!(asm, leaf_hash(&ax)); + }, + other => panic!("expected Check Some, got {other:?}"), + } + } + + #[test] + fn check_excludes_subject_from_assumptions() { + // a -> b -> a (cycle). Subject `a` must not be in its own assumption set. + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + env.store_const(a.clone(), defn_const(vec![b.clone()])); + env.store_const(b.clone(), defn_const(vec![a.clone()])); + match build_check_claim(&env, a.clone()) { + Claim::Check { const_addr, assumptions: Some(asm) } => { + assert_eq!(const_addr, a); + assert_eq!(asm, leaf_hash(&b)); + }, + other => panic!("expected Check Some, got {other:?}"), + } + } + + #[test] + fn eval_no_deps_assumptions_none() { + let env = Env::new(); + let i = Address::hash(b"i"); + let o = Address::hash(b"o"); + env.store_const(i.clone(), defn_const(vec![])); + env.store_const(o.clone(), defn_const(vec![])); + match build_eval_claim(&env, i.clone(), o.clone()) { + Claim::Eval { input, output, assumptions: None } => { + assert_eq!(input, i); + assert_eq!(output, o); + }, + other => panic!("expected Eval None, got {other:?}"), + } + } + + #[test] + fn eval_excludes_both_endpoints_from_assumptions() { + // i -> d, o -> d. The set is {d}, not containing i or o. + let env = Env::new(); + let i = Address::hash(b"i"); + let o = Address::hash(b"o"); + let d = Address::hash(b"d"); + env.store_const(i.clone(), defn_const(vec![d.clone()])); + env.store_const(o.clone(), defn_const(vec![d.clone()])); + env.store_const(d.clone(), defn_const(vec![])); + match build_eval_claim(&env, i.clone(), o.clone()) { + Claim::Eval { input, output, assumptions: Some(asm) } => { + assert_eq!(input, i); + assert_eq!(output, o); + assert_eq!(asm, leaf_hash(&d)); + }, + other => panic!("expected Eval Some, got {other:?}"), + } + } + + #[test] + fn check_env_axiom_free_assumptions_none() { + let env = Env::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), defn_const(vec![])); + match build_check_env_claim(&env).unwrap() { + Claim::CheckEnv { root, assumptions: None } => { + assert_eq!(Some(root), env_merkle_root(&env)); + }, + other => panic!("expected CheckEnv None, got {other:?}"), + } + } + + #[test] + fn check_env_with_axioms_assumptions_some() { + let env = Env::new(); + let a = Address::hash(b"a"); + let ax = Address::hash(b"ax"); + env.store_const(a.clone(), defn_const(vec![ax.clone()])); + env.store_const(ax.clone(), axiom_const(vec![])); + match build_check_env_claim(&env).unwrap() { + Claim::CheckEnv { root, assumptions: Some(asm) } => { + assert_eq!(Some(root), env_merkle_root(&env)); + assert_eq!(asm, leaf_hash(&ax)); + }, + other => panic!("expected CheckEnv Some, got {other:?}"), + } + } + + #[test] + fn check_env_empty_returns_none() { + let env = Env::new(); + assert!(build_check_env_claim(&env).is_none()); + } + + #[test] + fn env_merkle_root_invariant_under_insertion_order() { + let env1 = Env::new(); + let env2 = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let c = Address::hash(b"c"); + env1.store_const(a.clone(), defn_const(vec![])); + env1.store_const(b.clone(), defn_const(vec![])); + env1.store_const(c.clone(), defn_const(vec![])); + // Insert in reverse order. + env2.store_const(c, defn_const(vec![])); + env2.store_const(b, defn_const(vec![])); + env2.store_const(a, defn_const(vec![])); + assert_eq!(env_merkle_root(&env1), env_merkle_root(&env2)); + } + + #[test] + fn env_merkle_root_changes_with_extra_const() { + let env = Env::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), defn_const(vec![])); + let root1 = env_merkle_root(&env).unwrap(); + let b = Address::hash(b"b"); + env.store_const(b, defn_const(vec![])); + let root2 = env_merkle_root(&env).unwrap(); + assert_ne!(root1, root2); + } + + #[test] + fn build_claims_are_deterministic() { + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + env.store_const(a.clone(), defn_const(vec![b.clone()])); + env.store_const(b, defn_const(vec![])); + let c1 = build_check_claim(&env, a.clone()); + let c2 = build_check_claim(&env, a); + assert_eq!(c1, c2); + } +} From 01a90b0052c7c52f81cf7fc13d0f9e7d184fbd13 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Sun, 17 May 2026 01:42:57 -0400 Subject: [PATCH 02/12] Add lazy env deserialization and anon-mode kernel FFI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related changes to reduce kernel memory + lay groundwork for metadata-isolated typechecking. **Lazy constant deserialization.** `Env::consts` now stores `LazyConstant` (`Arc<[u8]>` + `OnceLock>`) instead of `Constant`. Constants are materialized on first access and the structured form is cached. Sparse access patterns (single-constant typecheck, transitive_deps walk, claim builders) only parse the closure they need; non-reachable constants stay as raw bytes. The `.ixe` section-2 layout gains a Tag0 length sidecar before each constant's Tag4 bytes: old: [addr:32] [Tag4 constant bytes] new: [addr:32] [Tag0 length] [Tag4 constant bytes] The length is section-level framing, not part of the constant's content hash. `Address::hash(raw_bytes) == addr` is preserved. `Constant::put`/`Constant::get` are unchanged. The Lean side (`Ix.Ixon.putEnv`/`getEnv`) reads/writes the new format. **Anon-mode kernel FFI.** New `rs_kernel_check_consts_anon(path, addrs, quiet)` exposes anonymous-mode typechecking by content address. The kernel runs as `KEnv` / `TypeChecker` with every `M::MField` erased to `()`, so the typechecking logic structurally cannot read metadata. Useful for zkPCC verifiers that hold only addresses. Supporting pieces: - `KernelMode::HAS_META: bool` for future compile-time gating. - `AnonEnv<'a>` wrapper (`src/ix/kernel/anon_env.rs`) exposing only consts/blobs/transitive walks — no `named`/`names`/`comms`. - `Env::get_anon` reads header + blobs + consts, parse-and-drops metadata sections (3-5), returns an `Env` with empty `named`/ `names`/`comms`. Same merkle-root verification as `Env::get`. - `rs_de_env_anon` FFI + `Ix.Ixon.rsDeEnvAnon` Lean wrapper. - `Ix.KernelCheck.rsCheckConstsAnonFFI` Lean binding. Caveat: `ixon_ingress::` still consults `Env::named` internally to enumerate work items. The resulting `KEnv` is metadata-free so the typechecker is anon, but full ingress-level metadata isolation is a follow-up. Tests: 16 new (9 lazy + sparsity; 3 AnonEnv; 4 get_anon). All 1009 Rust unit tests pass; all 813 Lean tests pass; clippy clean. --- Ix/Ixon.lean | 35 +++++- Ix/KernelCheck.lean | 27 +++++ docs/Ixon.md | 24 +++- src/ffi/compile.rs | 14 ++- src/ffi/ixon/env.rs | 38 +++++- src/ffi/kernel.rs | 134 +++++++++++++++++++++- src/ffi/lean_env.rs | 6 +- src/ix/compile.rs | 2 +- src/ix/decompile.rs | 104 ++++++++--------- src/ix/ixon.rs | 2 + src/ix/ixon/env.rs | 108 +++++++++++++++--- src/ix/ixon/lazy.rs | 211 ++++++++++++++++++++++++++++++++++ src/ix/ixon/serialize.rs | 235 ++++++++++++++++++++++++++++++++++++-- src/ix/kernel.rs | 1 + src/ix/kernel/anon_env.rs | 184 +++++++++++++++++++++++++++++ src/ix/kernel/claim.rs | 15 ++- src/ix/kernel/ingress.rs | 12 +- src/ix/kernel/mode.rs | 11 ++ 18 files changed, 1062 insertions(+), 101 deletions(-) create mode 100644 src/ix/ixon/lazy.rs create mode 100644 src/ix/kernel/anon_env.rs diff --git a/Ix/Ixon.lean b/Ix/Ixon.lean index f89fa041..0dd8015c 100644 --- a/Ix/Ixon.lean +++ b/Ix/Ixon.lean @@ -1730,12 +1730,20 @@ def putEnv (env : Env) : PutM Unit := do putTag0 ⟨bytes.size.toUInt64⟩ putBytes bytes - -- Section 2: Consts (Address -> Constant) + -- Section 2: Consts (Address -> Tag0-length-prefixed Tag4 constant bytes) + -- + -- The Tag0 length sidecar is added at the env-section level so a lazy + -- loader can slice each constant without parsing its Tag4 envelope. + -- The length is NOT part of the content-addressed bytes: the address + -- is `Address.hash` over the Tag4 constant body alone (which is + -- exactly what `serConstant` produces). let consts := env.consts.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT putTag0 ⟨consts.size.toUInt64⟩ for (addr, constant) in consts do Serialize.put addr - putConstant constant + let bytes := serConstant constant + putTag0 ⟨bytes.size.toUInt64⟩ + putBytes bytes -- Section 3: Names (Address -> Name component) -- Topologically sorted so parents come before children, with ties broken by address @@ -1795,12 +1803,17 @@ def getEnv : GetM Env := do let bytes ← getBytes len.toNat env := { env with blobs := env.blobs.insert addr bytes } - -- Section 2: Consts + -- Section 2: Consts (length-prefixed; see putEnv for rationale) let numConsts := (← getTag0).size for _ in [:numConsts.toNat] do let addr ← Serialize.get - let constant ← getConstant - env := { env with consts := env.consts.insert addr constant } + let len := (← getTag0).size + let bytes ← getBytes len.toNat + match deConstant bytes with + | .ok constant => + env := { env with consts := env.consts.insert addr constant } + | .error e => + throw s!"Env.get: bad constant bytes for addr {reprStr (toString addr)}: {e}" -- Section 3: Names (build lookup table AND reverse index) let numNames := (← getTag0).size @@ -1936,6 +1949,18 @@ opaque rsDeEnvFFI : @& ByteArray → Except String RawEnv def rsDeEnv (bytes : ByteArray) : Except String Env := return (← rsDeEnvFFI bytes).toEnv +/-- Anonymous-only deserialization: keep blobs + consts, parse-and-drop + names/named/comms. Returns a `RawEnv` whose `named`/`names`/`comms` + arrays are empty. -/ +@[extern "rs_de_env_anon"] +opaque rsDeEnvAnonFFI : @& ByteArray → Except String RawEnv + +/-- Anonymous-only `rsDeEnv`. The returned `Env` has empty + `named`/`names`/`comms` (and `addrToName`) and is suitable for + anon-mode kernel workflows. -/ +def rsDeEnvAnon (bytes : ByteArray) : Except String Env := + return (← rsDeEnvAnonFFI bytes).toEnv + /-! ## Canonical merkle root over consts -/ @[extern "rs_env_merkle_root"] diff --git a/Ix/KernelCheck.lean b/Ix/KernelCheck.lean index b8477ead..c0f711f0 100644 --- a/Ix/KernelCheck.lean +++ b/Ix/KernelCheck.lean @@ -14,6 +14,7 @@ module public import Lean.Data.Name public import Lean.Declaration +public import Ix.Address public section @@ -115,6 +116,32 @@ opaque rsCheckIxonFFI : @[extern "rs_kernel_ixon_names"] opaque rsIxonNamesFFI : @& String → IO (Array Lean.Name) +/-- FFI: anonymous-mode type-check by address. + + Loads the `.ixe` file at the given path, ingresses every constant into a + `KEnv` (with all metadata fields erased to `()` at the type level), + then runs the kernel typechecker on each requested address. + + The address-based surface (no Lean.Name input) reflects what the anon + kernel actually consumes: structural, content-addressed identities with + no Lean-side names. Useful for zkPCC verifiers that hold only claim + addresses, and for tests that want to assert metadata-free typechecking. + + Implemented in `src/ffi/kernel.rs::rs_kernel_check_consts_anon`. The + kernel's typechecking logic structurally cannot read metadata when + running in `Anon` mode — every `M::MField` is `()`. + + Note: today's `ixon_ingress` still consults `Env::named`/`Env::names` + internally to enumerate work items, even in Anon mode. The resulting + kernel state has no metadata, so the typechecking step is anon, but + full ingress-level metadata isolation is a follow-up. -/ +@[extern "rs_kernel_check_consts_anon"] +opaque rsCheckConstsAnonFFI : + @& String → -- .ixe path + @& Array Address → -- addresses to type-check + @& Bool → -- quiet + IO (Array (Option CheckError)) + end Ix.KernelCheck end diff --git a/docs/Ixon.md b/docs/Ixon.md index c383d648..0121f668 100644 --- a/docs/Ixon.md +++ b/docs/Ixon.md @@ -825,12 +825,32 @@ count (Tag0) [Address (32 bytes) + len (Tag0) + bytes]* ``` -**Section 2: Constants** (Address → Constant) +**Section 2: Constants** (Address → length-prefixed Constant bytes) ``` count (Tag0) -[Address (32 bytes) + Constant]* +[Address (32 bytes) + len (Tag0) + Constant bytes (Tag4-bounded)]* ``` +The Tag0 length sidecar is a **section-level** framing byte: it is not +part of the constant's content-addressed bytes. The address is +computed as `blake3` over only the Tag4 constant body. This layout +lets a lazy loader slice each constant directly into a +[`LazyConstant`](../src/ix/ixon/lazy.rs) without parsing its Tag4 +envelope, deferring full deserialization until first access. The +materialized `Constant` is cached so subsequent accesses are free. + +### Anonymous-only loading + +`Env::get_anon` (`src/ix/ixon/serialize.rs`) is a sibling of +`Env::get` that loads only the anonymous sections — header, blobs, +consts — and parses-and-drops the metadata sections (names, named, +comms). The returned `Env` has empty `named`/`names`/`comms` and is +suitable for anon-mode kernel workflows that never consult metadata. +Steady-state memory for a Mathlib-scale env drops from ~3-4 GB +(structured + metadata) to ~1 GB (lazy bytes only). + +Exposed to Lean via `rs_de_env_anon` (`Ix.Ixon.rsDeEnvAnon`). + **Section 3: Names** (Address → NameComponent, topologically sorted) ``` count (Tag0) diff --git a/src/ffi/compile.rs b/src/ffi/compile.rs index 84156240..b91e19c3 100644 --- a/src/ffi/compile.rs +++ b/src/ffi/compile.rs @@ -424,11 +424,14 @@ pub extern "C" fn rs_compile_phases( .env .consts .iter() - .map(|e| (e.key().clone(), e.value().clone())) + .filter_map(|e| { + let c = e.value().get().ok()?; + Some((e.key().clone(), c)) + }) .collect(); let consts_arr = LeanArray::alloc(consts.len()); for (i, (addr, constant)) in consts.iter().enumerate() { - consts_arr.set(i, build_raw_const(addr, constant)); + consts_arr.set(i, build_raw_const(addr, constant.as_ref())); } let named: Vec<_> = compile_stt @@ -517,11 +520,14 @@ pub extern "C" fn rs_compile_env_to_ixon( .env .consts .iter() - .map(|e| (e.key().clone(), e.value().clone())) + .filter_map(|e| { + let c = e.value().get().ok()?; + Some((e.key().clone(), c)) + }) .collect(); let consts_arr = LeanArray::alloc(consts.len()); for (i, (addr, constant)) in consts.iter().enumerate() { - consts_arr.set(i, build_raw_const(addr, constant)); + consts_arr.set(i, build_raw_const(addr, constant.as_ref())); } let named: Vec<_> = compile_stt diff --git a/src/ffi/ixon/env.rs b/src/ffi/ixon/env.rs index 2c4a3748..233b9479 100644 --- a/src/ffi/ixon/env.rs +++ b/src/ffi/ixon/env.rs @@ -334,13 +334,19 @@ pub fn decoded_to_ixon_env(decoded: &DecodedRawEnv) -> IxonEnv { } /// Convert a Rust IxonEnv to a DecodedRawEnv. +/// +/// Forces materialization of every constant — callers operating on a +/// freshly-loaded lazy env pay the parse cost here. pub fn ixon_env_to_decoded(env: &IxonEnv) -> DecodedRawEnv { let consts = env .consts .iter() - .map(|e| DecodedRawConst { - addr: e.key().clone(), - constant: e.value().clone(), + .filter_map(|e| { + let c = e.value().get().ok()?; + Some(DecodedRawConst { + addr: e.key().clone(), + constant: (*c).clone(), + }) }) .collect(); let named = env @@ -436,3 +442,29 @@ pub extern "C" fn rs_de_env( }, } } + +/// FFI: Anonymous-only deserialization (`Env::get_anon`). +/// +/// Reads the header + blobs + consts sections; parses and discards +/// the metadata sections (names / named / comms). The returned +/// `RawEnv` has empty `named`, `names`, `comms` arrays. Useful for +/// anon-mode kernel callers that want to avoid the steady-state +/// memory cost of metadata that they will never consult. +#[unsafe(no_mangle)] +pub extern "C" fn rs_de_env_anon( + obj: LeanByteArray>, +) -> LeanExcept { + let data = obj.as_bytes(); + let mut slice: &[u8] = data; + match IxonEnv::get_anon(&mut slice) { + Ok(env) => { + let decoded = ixon_env_to_decoded(&env); + let raw_env = LeanIxonRawEnv::build(&decoded); + LeanExcept::ok(raw_env) + }, + Err(e) => { + let msg = format!("rs_de_env_anon: {}", e); + LeanExcept::error_string(&msg) + }, + } +} diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index f62a429a..4edcf204 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -74,7 +74,11 @@ use crate::ix::kernel::ingress::{ }; #[cfg(feature = "test-ffi")] use crate::ix::kernel::ingress::{ixon_ingress, lean_ingress}; -use crate::ix::kernel::mode::Meta; +use crate::ix::kernel::anon_env::AnonEnv; +use crate::ix::kernel::id::KId; +use crate::ix::kernel::ingress::ixon_ingress; +use crate::ix::kernel::mode::{Anon, Meta}; +use crate::lean::LeanIxAddress; use crate::ix::kernel::tc::TypeChecker; unsafe extern "C" { @@ -694,6 +698,134 @@ pub extern "C" fn rs_kernel_check_ixon( build_result_array(&results) } +/// FFI: anonymous-mode type-check by address. +/// +/// Lean signature: +/// ```lean +/// @[extern "rs_kernel_check_consts_anon"] +/// opaque rsCheckConstsAnonFFI : +/// @& String → -- .ixe path +/// @& Array Ix.Address → -- addresses to check +/// @& Bool → -- quiet +/// IO (Array (Option CheckError)) +/// ``` +/// +/// Loads the `.ixe`, builds a `KEnv` via `ixon_ingress::`, +/// and runs `TypeChecker::check_const` for each requested +/// address. All metadata fields (`KConst::name`, +/// `level_params`, binder names, mdata) are erased to `()` at the +/// type level, so the kernel's typechecking logic cannot read +/// metadata even if ingress consulted it. +/// +/// The address surface (not name) makes the anon path's entry +/// independent of Lean.Name → Address resolution — verifiers that +/// only have content addresses (e.g., zkPCC claim subjects) can +/// invoke this directly without metadata. +/// +/// Results come back in input order, paired by the caller with +/// `addrs[i]`. +#[unsafe(no_mangle)] +pub extern "C" fn rs_kernel_check_consts_anon( + env_path: LeanString>, + addrs: LeanArray>, + quiet: LeanBool>, +) -> LeanIOResult { + let total_start = Instant::now(); + let _quiet = quiet.to_bool(); + let path = env_path.to_string(); + let addrs_vec: Vec
= LeanIxAddress::decode_array(addrs); + + // Load env + let t0 = Instant::now(); + let bytes = match std::fs::read(&path) { + Ok(b) => b, + Err(e) => { + return LeanIOResult::error_string(&format!( + "rs_kernel_check_consts_anon: failed to read {path}: {e}" + )); + }, + }; + eprintln!( + "[rs_kernel_check_anon] read env: {:>8.1?} ({} bytes)", + t0.elapsed(), + bytes.len() + ); + + let t1 = Instant::now(); + let mut slice: &[u8] = &bytes; + let ixon_env = match IxonEnv::get(&mut slice) { + Ok(env) => env, + Err(e) => { + return LeanIOResult::error_string(&format!( + "rs_kernel_check_consts_anon: failed to deserialize {path}: {e}" + )); + }, + }; + drop(bytes); + eprintln!( + "[rs_kernel_check_anon] deserialize:{:>8.1?} ({} consts)", + t1.elapsed(), + ixon_env.const_count() + ); + + // Build KEnv via ingress. Note: ixon_ingress consults named/ + // names internally to enumerate work items, but the resulting + // KEnv has `()` for every metadata field — the kernel's + // typechecking logic structurally cannot read metadata. + let t2 = Instant::now(); + let _anon_env_view = AnonEnv::from_env(&ixon_env); + let (mut kenv, _intern) = match ixon_ingress::(&ixon_env) { + Ok(pair) => pair, + Err(e) => { + return build_uniform_error( + addrs_vec.len(), + &format!("[anon ingress] {e}"), + ); + }, + }; + eprintln!("[rs_kernel_check_anon] anon ingress:{:>8.1?}", t2.elapsed()); + + // Per-address check + let t3 = Instant::now(); + let total = addrs_vec.len(); + let mut results: Vec = Vec::with_capacity(total); + for (i, addr) in addrs_vec.iter().enumerate() { + let kid = KId::::new(addr.clone(), ()); + if !kenv.consts.contains_key(&kid) { + results.push(Err(( + ErrKind::Compile, + format!("[anon] no kernel const at {}", addr.hex()), + ))); + continue; + } + let mut tc = TypeChecker::::new(&mut kenv); + let r = tc.check_const(&kid); + let label = format!("[{}/{}] {}", i + 1, total, addr.hex()); + match r { + Ok(()) => { + eprintln!("{label} ok"); + results.push(Ok(())); + }, + Err(e) => { + let msg = format!("{e:?}"); + eprintln!("{label} FAIL: {msg}"); + results.push(Err((ErrKind::Kernel, msg))); + }, + } + } + eprintln!( + "[rs_kernel_check_anon] checks: {:>8.1?} ({} addrs)", + t3.elapsed(), + total + ); + eprintln!( + "[rs_kernel_check_anon] total: {:>8.1?}", + total_start.elapsed() + ); + + build_result_array(&results) +} + /// FFI: list the checkable names in a serialized Ixon environment. #[unsafe(no_mangle)] pub extern "C" fn rs_kernel_ixon_names( diff --git a/src/ffi/lean_env.rs b/src/ffi/lean_env.rs index 9ebfba3a..c1fcd4f9 100644 --- a/src/ffi/lean_env.rs +++ b/src/ffi/lean_env.rs @@ -2466,7 +2466,7 @@ extern "C" fn rs_compile_validate_aux( stt: &crate::ix::compile::CompileState, addr: &crate::ix::address::Address, ) -> String { - match stt.env.get_const(addr).map(|c| c.info) { + match stt.env.get_const(addr).map(|c| c.info.clone()) { Some(crate::ix::ixon::constant::ConstantInfo::RPrj(p)) => { format!("RPrj(idx={}, block={:.12})", p.idx, p.block.hex()) }, @@ -2597,7 +2597,7 @@ extern "C" fn rs_compile_validate_aux( } } - let proj = match stt.env.get_const(addr).map(|c| c.info) { + let proj = match stt.env.get_const(addr).map(|c| c.info.clone()) { Some(crate::ix::ixon::constant::ConstantInfo::RPrj(p)) => p, _ => return None, }; @@ -2614,7 +2614,7 @@ extern "C" fn rs_compile_validate_aux( .iter() .chain(stt.name_to_addr.iter()) .filter_map(|entry| { - match stt.env.get_const(entry.value()).map(|c| c.info) { + match stt.env.get_const(entry.value()).map(|c| c.info.clone()) { Some(crate::ix::ixon::constant::ConstantInfo::RPrj(p)) if p.block == proj.block && p.idx == idx => { diff --git a/src/ix/compile.rs b/src/ix/compile.rs index 2bebb759..db6a59cb 100644 --- a/src/ix/compile.rs +++ b/src/ix/compile.rs @@ -4402,7 +4402,7 @@ mod tests { // Check the constant was stored let cnst = stt.env.get_const(&addr); assert!(cnst.is_some()); - match cnst.unwrap() { + match cnst.unwrap().as_ref() { Constant { info: ConstantInfo::Defn(d), .. } => { // Value should be a Rec(0) since it's self-referential in a single-element block match d.value.as_ref() { diff --git a/src/ix/decompile.rs b/src/ix/decompile.rs index 8b1ccf61..65784c50 100644 --- a/src/ix/decompile.rs +++ b/src/ix/decompile.rs @@ -160,11 +160,11 @@ fn read_string( }) } -/// Read a Constant from the const store. +/// Read a Constant from the const store, materializing the lazy entry. fn read_const( addr: &Address, stt: &CompileState, -) -> Result { +) -> Result, DecompileError> { stt.env.get_const(addr).ok_or(DecompileError::MissingAddress(addr.clone())) } @@ -1807,12 +1807,12 @@ fn decompile_const( let ctx = all_to_ctx(&all_names); let current_const = name.pretty(); - match cnst { + match cnst.as_ref() { Constant { info: ConstantInfo::Defn(def), - ref sharing, - ref refs, - ref univs, + sharing, + refs, + univs, } => { let mut cache = BlockCache { sharing: sharing.clone(), @@ -1824,15 +1824,15 @@ fn decompile_const( }; cache.load_meta_extensions(&named.meta); let info = - decompile_definition(&def, &named.meta, &mut cache, stt, dstt)?; + decompile_definition(def, &named.meta, &mut cache, stt, dstt)?; dstt.env.insert(name.clone(), info); }, Constant { info: ConstantInfo::Recr(rec), - ref sharing, - ref refs, - ref univs, + sharing, + refs, + univs, } => { let mut cache = BlockCache { sharing: sharing.clone(), @@ -1848,15 +1848,15 @@ fn decompile_const( // `CallSiteEntry::Collapsed.sharing_idx` from the intended // `meta_sharing` slot. cache.load_meta_extensions(&named.meta); - let info = decompile_recursor(&rec, &named.meta, &mut cache, stt, dstt)?; + let info = decompile_recursor(rec, &named.meta, &mut cache, stt, dstt)?; dstt.env.insert(name.clone(), info); }, Constant { info: ConstantInfo::Axio(ax), - ref sharing, - ref refs, - ref univs, + sharing, + refs, + univs, } => { let mut cache = BlockCache { sharing: sharing.clone(), @@ -1869,15 +1869,15 @@ fn decompile_const( // Axioms have only a type (no body), so no surgery today — but // load extensions for consistency with the other branches. cache.load_meta_extensions(&named.meta); - let info = decompile_axiom(&ax, &named.meta, &mut cache, stt, dstt)?; + let info = decompile_axiom(ax, &named.meta, &mut cache, stt, dstt)?; dstt.env.insert(name.clone(), info); }, Constant { info: ConstantInfo::Quot(quot), - ref sharing, - ref refs, - ref univs, + sharing, + refs, + univs, } => { let mut cache = BlockCache { sharing: sharing.clone(), @@ -1890,7 +1890,7 @@ fn decompile_const( // Quotient types have only a type signature — same story as // axioms. Load extensions for consistency. cache.load_meta_extensions(&named.meta); - let info = decompile_quotient(", &named.meta, &mut cache, stt, dstt)?; + let info = decompile_quotient(quot, &named.meta, &mut cache, stt, dstt)?; dstt.env.insert(name.clone(), info); }, @@ -3129,50 +3129,44 @@ fn decompile_named_const( // Projections - get the block and decompile ConstantInfo::DPrj(proj) => { - if let Some(Constant { - info: ConstantInfo::Muts(mutuals), - ref sharing, - ref refs, - ref univs, - }) = stt.env.get_const(&proj.block) - { - decompile_projection( - name, named, &cnst, &mutuals, sharing, refs, univs, stt, dstt, - ) - } else { - Err(DecompileError::MissingAddress(proj.block.clone())) + match stt.env.get_const(&proj.block).as_deref() { + Some(Constant { + info: ConstantInfo::Muts(mutuals), + sharing, + refs, + univs, + }) => decompile_projection( + name, named, &cnst, mutuals, sharing, refs, univs, stt, dstt, + ), + _ => Err(DecompileError::MissingAddress(proj.block.clone())), } }, ConstantInfo::IPrj(proj) => { - if let Some(Constant { - info: ConstantInfo::Muts(mutuals), - ref sharing, - ref refs, - ref univs, - }) = stt.env.get_const(&proj.block) - { - decompile_projection( - name, named, &cnst, &mutuals, sharing, refs, univs, stt, dstt, - ) - } else { - Err(DecompileError::MissingAddress(proj.block.clone())) + match stt.env.get_const(&proj.block).as_deref() { + Some(Constant { + info: ConstantInfo::Muts(mutuals), + sharing, + refs, + univs, + }) => decompile_projection( + name, named, &cnst, mutuals, sharing, refs, univs, stt, dstt, + ), + _ => Err(DecompileError::MissingAddress(proj.block.clone())), } }, ConstantInfo::RPrj(proj) => { - if let Some(Constant { - info: ConstantInfo::Muts(mutuals), - ref sharing, - ref refs, - ref univs, - }) = stt.env.get_const(&proj.block) - { - decompile_projection( - name, named, &cnst, &mutuals, sharing, refs, univs, stt, dstt, - ) - } else { - Err(DecompileError::MissingAddress(proj.block.clone())) + match stt.env.get_const(&proj.block).as_deref() { + Some(Constant { + info: ConstantInfo::Muts(mutuals), + sharing, + refs, + univs, + }) => decompile_projection( + name, named, &cnst, mutuals, sharing, refs, univs, stt, dstt, + ), + _ => Err(DecompileError::MissingAddress(proj.block.clone())), } }, diff --git a/src/ix/ixon.rs b/src/ix/ixon.rs index 3a0319f0..d27b79a6 100644 --- a/src/ix/ixon.rs +++ b/src/ix/ixon.rs @@ -12,6 +12,7 @@ pub mod constant; pub mod env; pub mod error; pub mod expr; +pub mod lazy; pub mod merkle; pub mod metadata; pub mod proof; @@ -30,6 +31,7 @@ pub use constant::{ pub use env::{Env, Named}; pub use error::{CompileError, DecompileError, SerializeError}; pub use expr::Expr; +pub use lazy::LazyConstant; pub use metadata::{ ConstantMeta, DataValue, ExprMeta, ExprMetaData, KVMap, NameIndex, NameReverseIndex, diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs index 54e5098b..46777f87 100644 --- a/src/ix/ixon/env.rs +++ b/src/ix/ixon/env.rs @@ -3,12 +3,14 @@ use dashmap::DashMap; use rustc_hash::FxHashSet; use std::collections::VecDeque; +use std::sync::Arc; use crate::ix::address::Address; use crate::ix::env::Name; use super::comm::Comm; use super::constant::Constant; +use super::lazy::LazyConstant; use super::metadata::ConstantMeta; /// A named constant with metadata. @@ -62,15 +64,18 @@ pub struct AuxLayout { /// The Ixon environment. /// /// Contains five maps: -/// - `consts`: Alpha-invariant constants indexed by content hash +/// - `consts`: Alpha-invariant constants indexed by content hash, +/// stored lazily as serialized bytes ([`LazyConstant`]) and +/// materialized on demand. /// - `named`: Named references with metadata and mutual context /// - `blobs`: Raw data (strings, nats, files) /// - `names`: Hash-consed Lean.Name components (Address -> Name) /// - `comms`: Cryptographic commitments (secrets) #[derive(Debug, Default)] pub struct Env { - /// Alpha-invariant constants: Address -> Constant - pub consts: DashMap, + /// Alpha-invariant constants: Address -> LazyConstant (raw bytes + + /// optional materialized cache; see [`LazyConstant`]). + pub consts: DashMap, /// Named references: Name -> (constant address, metadata, ctx) pub named: DashMap, /// Raw data blobs: Address -> bytes @@ -104,15 +109,41 @@ impl Env { self.blobs.get(addr).map(|r| r.clone()) } - /// Store a constant and return its content address. - /// Note: The actual hashing/serialization is done elsewhere. + /// Store a structured constant under `addr`. + /// + /// Serializes the constant once and pre-populates the + /// [`LazyConstant`] cache so subsequent `Env::put` is a memcpy and + /// the first `get_const` call is free. pub fn store_const(&self, addr: Address, constant: Constant) { - self.consts.insert(addr, constant); + self.consts.insert(addr, LazyConstant::from_constant(constant)); + } + + /// Store an already-serialized constant under `addr` (lazy load path). + /// `bytes` must be exactly what `Constant::put` produced for `addr`. + pub fn store_const_lazy(&self, addr: Address, bytes: Arc<[u8]>) { + self.consts.insert(addr, LazyConstant::from_bytes(bytes)); } - /// Get a constant by address. - pub fn get_const(&self, addr: &Address) -> Option { - self.consts.get(addr).map(|r| r.clone()) + /// Get a constant by address, materializing on demand. + /// + /// Returns `None` if the address is not present or materialization + /// fails (e.g., corrupt bytes). Use [`Self::try_get_const`] to + /// inspect materialization errors. + pub fn get_const(&self, addr: &Address) -> Option> { + self.consts.get(addr).and_then(|r| r.value().get().ok()) + } + + /// Get a constant by address, returning materialization errors. + pub fn try_get_const( + &self, + addr: &Address, + ) -> Option, String>> { + self.consts.get(addr).map(|r| r.value().get()) + } + + /// Get the raw serialized bytes of a constant without materializing it. + pub fn get_const_bytes(&self, addr: &Address) -> Option> { + self.consts.get(addr).map(|r| Arc::from(r.value().raw_bytes())) } /// Register a named constant. @@ -176,16 +207,27 @@ impl Env { /// Addresses that are referenced but not present in `self.consts` are /// still added to the set (so verifiers see external assumptions) /// but we cannot recurse into them. + /// + /// Visited constants are materialized via [`LazyConstant::get`]; + /// subsequent BFS passes over the same closure are free. pub fn bfs_refs(&self, root: &Address) -> FxHashSet
{ let mut visited: FxHashSet
= FxHashSet::default(); let mut queue: VecDeque
= VecDeque::new(); visited.insert(root.clone()); queue.push_back(root.clone()); while let Some(addr) = queue.pop_front() { - if let Some(entry) = self.consts.get(&addr) { - for r in &entry.value().refs { + // Materialize the constant just long enough to read its refs. + // Drop the DashMap guard before recursing so concurrent BFS + // calls don't deadlock on the same shard. + let refs: Option> = self + .consts + .get(&addr) + .and_then(|r| r.value().get().ok()) + .map(|c| c.refs.clone()); + if let Some(rs) = refs { + for r in rs { if visited.insert(r.clone()) { - queue.push_back(r.clone()); + queue.push_back(r); } } } @@ -273,7 +315,7 @@ mod tests { let addr = Address::hash(b"test-constant"); env.store_const(addr.clone(), constant.clone()); let got = env.get_const(&addr).unwrap(); - assert_eq!(got, constant); + assert_eq!(*got, constant); } #[test] @@ -471,4 +513,44 @@ mod tests { env.store_const(a.clone(), const_with_refs(vec![])); assert!(env.transitive_deps_excl(&a).is_empty()); } + + /// Round-trips an env through serialize → deserialize so the + /// deserialized side holds purely lazy entries, then asserts that + /// only constants reachable from `seed` get materialized after a + /// `transitive_deps_excl(seed)` walk. + #[test] + fn lazy_sparsity_only_materializes_closure() { + // Build a small env: a→b→c, and an independent d. + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let c = Address::hash(b"c"); + let d = Address::hash(b"d"); + env.store_const(a.clone(), const_with_refs(vec![b.clone()])); + env.store_const(b.clone(), const_with_refs(vec![c.clone()])); + env.store_const(c.clone(), const_with_refs(vec![])); + env.store_const(d.clone(), const_with_refs(vec![])); + + // Serialize → deserialize so all entries start unmaterialized. + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let loaded = Env::get(&mut buf.as_slice()).unwrap(); + for entry in loaded.consts.iter() { + assert!( + !entry.value().is_materialized(), + "freshly-loaded entry {:?} should not be materialized", + entry.key() + ); + } + + // Walk closure of `a`. {a, b, c} get materialized; `d` does not. + let _ = loaded.transitive_deps_excl(&a); + assert!(loaded.consts.get(&a).unwrap().value().is_materialized()); + assert!(loaded.consts.get(&b).unwrap().value().is_materialized()); + assert!(loaded.consts.get(&c).unwrap().value().is_materialized()); + assert!( + !loaded.consts.get(&d).unwrap().value().is_materialized(), + "`d` outside `a`'s closure should stay lazy" + ); + } } diff --git a/src/ix/ixon/lazy.rs b/src/ix/ixon/lazy.rs new file mode 100644 index 00000000..bc9f028e --- /dev/null +++ b/src/ix/ixon/lazy.rs @@ -0,0 +1,211 @@ +//! Lazy materialization of `Constant` from on-disk bytes. +//! +//! Stored inside `Env::consts`: `DashMap`. The +//! `.ixe` loader reads each constant's bytes (preceded by a Tag0 +//! length sidecar at the env-section level — see `Env::get`) into a +//! `LazyConstant::from_bytes`, deferring `Constant::get` until first +//! access via [`LazyConstant::get`]. Subsequent accesses return a +//! cached `Arc`. +//! +//! Invariants: +//! - `raw_bytes()` returns exactly what `Constant::put` produces and +//! `Address::hash` consumes — the Tag0 length prefix is *not* +//! included. +//! - `Address::hash(self.raw_bytes()) == addr` for the address this +//! lazy entry was stored under (`verify_address` checks this). +//! - Cache is shared across `Clone`s (`Arc>`) so that +//! materialization done through one handle is visible through all. + +use std::sync::{Arc, OnceLock}; + +use crate::ix::address::Address; + +use super::constant::Constant; + +/// Lazy-materialized `Constant` backed by serialized bytes. +#[derive(Debug, Clone)] +pub struct LazyConstant { + /// Tag4-encoded constant bytes (exactly the slice consumed by + /// `Constant::get` and hashed by `Address::hash`). + bytes: Arc<[u8]>, + /// Cached materialization. Shared across clones via `Arc` so the + /// first thread to materialize benefits every subsequent handle. + cache: Arc>>, +} + +impl LazyConstant { + /// Construct from already-serialized bytes (the lazy load path). + /// + /// The caller is responsible for ensuring `bytes` is exactly what + /// `Constant::put` produced for the address this entry is stored + /// under. Use [`Self::verify_address`] for an explicit check. + pub fn from_bytes(bytes: Arc<[u8]>) -> Self { + LazyConstant { bytes, cache: Arc::new(OnceLock::new()) } + } + + /// Construct from a structured `Constant` (the in-memory build path, + /// e.g. from the compiler). Serializes once and pre-populates the + /// cache so `get()` is free and `raw_bytes()` is ready for + /// `Env::put`. + pub fn from_constant(c: Constant) -> Self { + let mut buf = Vec::new(); + c.put(&mut buf); + let bytes: Arc<[u8]> = buf.into(); + let arc: Arc = Arc::new(c); + let cache = OnceLock::new(); + // First `set` always succeeds on a fresh OnceLock. + let _ = cache.set(arc); + LazyConstant { bytes, cache: Arc::new(cache) } + } + + /// Materialize the `Constant`, caching for subsequent calls. + pub fn get(&self) -> Result, String> { + if let Some(c) = self.cache.get() { + return Ok(c.clone()); + } + let mut slice: &[u8] = &self.bytes; + let parsed = Constant::get(&mut slice) + .map_err(|e| format!("LazyConstant::get: {e}"))?; + if !slice.is_empty() { + return Err(format!( + "LazyConstant::get: trailing {} bytes after Constant", + slice.len() + )); + } + let arc = Arc::new(parsed); + // If another thread raced us and set first, that's fine — our + // local `arc` is dropped and we pick up the winner below. + let _ = self.cache.set(arc); + Ok(self.cache.get().expect("cache just set").clone()) + } + + /// Raw serialized bytes (the Tag4 constant body, no length prefix). + pub fn raw_bytes(&self) -> &[u8] { + &self.bytes + } + + /// Whether the structured `Constant` has been materialized. + pub fn is_materialized(&self) -> bool { + self.cache.get().is_some() + } + + /// Verify that `Address::hash(self.raw_bytes()) == *expected`. + pub fn verify_address(&self, expected: &Address) -> bool { + Address::hash(&self.bytes) == *expected + } +} + +/// Bytes are deterministic for a given `Constant`, so byte-equality +/// implies `Constant`-equality. +impl PartialEq for LazyConstant { + fn eq(&self, other: &Self) -> bool { + self.bytes == other.bytes + } +} +impl Eq for LazyConstant {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::DefinitionSafety; + use crate::ix::ixon::constant::{Axiom, ConstantInfo, DefKind, Definition}; + use crate::ix::ixon::expr::Expr; + + fn axiom_constant() -> Constant { + Constant::new(ConstantInfo::Axio(Axiom { + is_unsafe: false, + lvls: 0, + typ: Expr::sort(0), + })) + } + + fn defn_constant() -> Constant { + Constant::new(ConstantInfo::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 1, + typ: Expr::sort(0), + value: Expr::var(0), + })) + } + + #[test] + fn from_constant_materializes_immediately() { + let c = axiom_constant(); + let lazy = LazyConstant::from_constant(c.clone()); + assert!(lazy.is_materialized()); + assert_eq!(*lazy.get().unwrap(), c); + } + + #[test] + fn from_bytes_defers_materialization() { + let c = defn_constant(); + let (addr, bytes) = c.commit(); + let lazy = LazyConstant::from_bytes(bytes.into()); + assert!(!lazy.is_materialized()); + assert!(lazy.verify_address(&addr)); + let arc = lazy.get().unwrap(); + assert_eq!(*arc, c); + assert!(lazy.is_materialized()); + } + + #[test] + fn cache_is_shared_across_clones() { + let lazy = LazyConstant::from_bytes(axiom_constant().commit().1.into()); + assert!(!lazy.is_materialized()); + let cloned = lazy.clone(); + let _ = cloned.get().unwrap(); + // Materialization through `cloned` is visible through `lazy` + // because both share the same `Arc>` cache slot. + assert!(lazy.is_materialized()); + } + + #[test] + fn raw_bytes_equals_constant_put() { + let c = defn_constant(); + let mut expected = Vec::new(); + c.put(&mut expected); + let lazy = LazyConstant::from_constant(c); + assert_eq!(lazy.raw_bytes(), &expected[..]); + } + + #[test] + fn from_bytes_then_get_roundtrips() { + let c = defn_constant(); + let (_addr, bytes) = c.commit(); + let lazy = LazyConstant::from_bytes(bytes.into()); + let got = lazy.get().unwrap(); + assert_eq!(*got, c); + } + + #[test] + fn verify_address_detects_corruption() { + let c = axiom_constant(); + let (_addr, mut bytes) = c.commit(); + // Flip the last byte + let last = bytes.len() - 1; + bytes[last] ^= 0xFF; + let lazy = LazyConstant::from_bytes(bytes.into()); + // Address of mutated bytes shouldn't match the original + let original_addr = c.commit().0; + assert!(!lazy.verify_address(&original_addr)); + } + + #[test] + fn equality_by_bytes() { + let a = LazyConstant::from_constant(axiom_constant()); + let b = LazyConstant::from_constant(axiom_constant()); + assert_eq!(a, b); + let d = LazyConstant::from_constant(defn_constant()); + assert_ne!(a, d); + } + + #[test] + fn trailing_bytes_rejected() { + let mut bytes = axiom_constant().commit().1; + bytes.push(0xAB); + let lazy = LazyConstant::from_bytes(bytes.into()); + let err = lazy.get().unwrap_err(); + assert!(err.contains("trailing"), "got: {err}"); + } +} diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index 58a4bd20..15abfb36 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1184,7 +1184,13 @@ impl Env { for addr in &const_addrs { if let Some(entry) = self.consts.get(addr) { put_address(addr, buf); - entry.value().put(buf); + // Length-prefix sidecar (Tag0) so lazy loaders can slice each + // constant without parsing its Tag4 envelope. The length is + // NOT part of the content-addressed bytes — `Address::hash` is + // computed only over `raw_bytes()`. + let bytes = entry.value().raw_bytes(); + Tag0::new(bytes.len() as u64).put(buf); + buf.extend_from_slice(bytes); } } if !quiet { @@ -1356,12 +1362,21 @@ impl Env { env.blobs.insert(addr, bytes.to_vec()); } - // Section 2: Consts + // Section 2: Consts (lazy: read length prefix, slice bytes, defer parse) let num_consts = get_u64(buf)?; for _ in 0..num_consts { let addr = get_address(buf)?; - let constant = Constant::get(buf)?; - env.consts.insert(addr, constant); + let len = Tag0::get(buf)?.size as usize; + if buf.len() < len { + return Err(format!( + "Env::get: need {} bytes for constant, have {}", + len, + buf.len() + )); + } + let (bytes, rest) = buf.split_at(len); + *buf = rest; + env.store_const_lazy(addr, bytes.into()); } // Section 3: Names (build lookup table and reverse index for metadata) @@ -1421,6 +1436,130 @@ impl Env { Ok(env) } + /// Anonymous-only deserialization: read the header + blobs + + /// consts sections, parse-and-discard the metadata sections + /// (names / named / comms). + /// + /// Returns an `Env` with populated `consts` (lazy) and `blobs`, and + /// **empty** `named` / `names` / `comms`. The merkle-root header is + /// re-verified against the recomputed root over `consts.keys()`, + /// exactly as in [`Env::get`]. + /// + /// Why "parse and discard"? Sections 3-5 lack a section-level length + /// prefix today (only section 2 has one), so we can't byte-skip + /// them without parsing. Parsing into local scopes that drop on + /// return still wins us the steady-state memory: the returned `Env` + /// is metadata-free, and the temporary lookup tables / parsed + /// metadata values are reclaimed before this function returns. + /// + /// Used by the anon-mode kernel path so a verifier holding only + /// content addresses doesn't pay the long-term cost of metadata + /// sections it will never consult. + pub fn get_anon(buf: &mut &[u8]) -> Result { + // Header (same as Env::get) + let tag = Tag4::get(buf)?; + if tag.flag != Self::FLAG { + return Err(format!( + "Env::get_anon: expected flag 0x{:X}, got 0x{:X}", + Self::FLAG, + tag.flag + )); + } + if tag.size != 0 { + return Err(format!( + "Env::get_anon: expected Env variant 0, got {}", + tag.size + )); + } + let stored_root = get_address(buf)?; + + let env = Env::new(); + + // Section 1: Blobs (kept) + let num_blobs = get_u64(buf)?; + for _ in 0..num_blobs { + let addr = get_address(buf)?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "Env::get_anon: need {} bytes for blob, have {}", + len, + buf.len() + )); + } + let (bytes, rest) = buf.split_at(len); + *buf = rest; + env.blobs.insert(addr, bytes.to_vec()); + } + + // Section 2: Consts (kept, lazy) + let num_consts = get_u64(buf)?; + for _ in 0..num_consts { + let addr = get_address(buf)?; + let len = Tag0::get(buf)?.size as usize; + if buf.len() < len { + return Err(format!( + "Env::get_anon: need {} bytes for constant, have {}", + len, + buf.len() + )); + } + let (bytes, rest) = buf.split_at(len); + *buf = rest; + env.store_const_lazy(addr, bytes.into()); + } + + // Section 3: Names — parse and DISCARD. We still need a populated + // `names_lookup` and `name_reverse_index` so section 4's indexed + // metadata parses correctly, but both go out of scope before + // returning so the steady-state `Env` carries no name data. + let num_names = get_u64(buf)?; + let mut names_lookup: FxHashMap = FxHashMap::default(); + let mut name_reverse_index: NameReverseIndex = + Vec::with_capacity(num_names as usize + 1); + let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); + names_lookup.insert(anon_addr, Name::anon()); + for _ in 0..num_names { + let addr = get_address(buf)?; + let name = get_name_component(buf, &names_lookup)?; + name_reverse_index.push(addr.clone()); + names_lookup.insert(addr, name); + } + + // Section 4: Named — parse and DISCARD. + let num_named = get_u64(buf)?; + for _ in 0..num_named { + let _name_addr = get_address(buf)?; + let _named = get_named_indexed(buf, &name_reverse_index)?; + } + + // Section 5: Comms — parse and DISCARD. + let num_comms = get_u64(buf)?; + for _ in 0..num_comms { + let _addr = get_address(buf)?; + let _comm = Comm::get(buf)?; + } + + drop(names_lookup); + drop(name_reverse_index); + + // Verify merkle root over loaded consts. + let mut const_addrs: Vec
= + env.consts.iter().map(|e| e.key().clone()).collect(); + const_addrs.sort_unstable(); + let computed_root = + merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); + if computed_root != stored_root { + return Err(format!( + "Env::get_anon: merkle root mismatch (stored={}, computed={})", + stored_root.hex(), + computed_root.hex(), + )); + } + + Ok(env) + } + /// Calculate the serialized size of an Env. pub fn serialized_size(&self) -> Result { let mut buf = Vec::new(); @@ -1454,12 +1593,14 @@ impl Env { } let blobs_size = buf.len() - header_size; - // Section 2: Consts + // Section 2: Consts (length-prefixed) let before_consts = buf.len(); put_u64(self.consts.len() as u64, &mut buf); for entry in self.consts.iter() { put_address(entry.key(), &mut buf); - entry.value().put(&mut buf); + let bytes = entry.value().raw_bytes(); + Tag0::new(bytes.len() as u64).put(&mut buf); + buf.extend_from_slice(bytes); } let consts_size = buf.len() - before_consts; @@ -1691,7 +1832,7 @@ mod tests { let mut buf = Vec::new(); constant.put(&mut buf); let addr = Address::hash(&buf); - env.consts.insert(addr.clone(), constant); + env.store_const(addr.clone(), constant); // Create a named entry for this constant if !names.is_empty() { @@ -1701,7 +1842,7 @@ mod tests { let original = if bool::arbitrary(g) { let orig_addr = Address::arbitrary(g); // Store the original constant too so the env is self-consistent. - env.consts.insert(orig_addr.clone(), gen_constant(g)); + env.store_const(orig_addr.clone(), gen_constant(g)); Some((orig_addr, ConstantMeta::default())) } else { None @@ -1939,4 +2080,82 @@ mod tests { let res = Env::get(&mut buf.as_slice()); assert!(res.is_err(), "tampered root should be rejected"); } + + // --------------------------------------------------------------------------- + // Env::get_anon — anonymous-only deserialization + // --------------------------------------------------------------------------- + + #[test] + fn get_anon_keeps_consts_drops_metadata() { + use crate::ix::ixon::env::Named; + let env = Env::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + env.store_const(a.clone(), defn_const(vec![])); + env.store_const(b.clone(), defn_const(vec![a.clone()])); + // Populate metadata sections so we can verify they get dropped. + let blob_addr = env.store_blob(b"hello world".to_vec()); + env.register_name(Name::str(Name::anon(), "MyConst".to_string()), + Named::with_addr(a.clone())); + + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let loaded = Env::get_anon(&mut buf.as_slice()).unwrap(); + + // Anonymous sections preserved + assert_eq!(loaded.const_count(), 2); + assert!(loaded.consts.get(&a).is_some()); + assert!(loaded.consts.get(&b).is_some()); + assert_eq!(loaded.get_blob(&blob_addr), Some(b"hello world".to_vec())); + + // Metadata sections empty + assert_eq!(loaded.named_count(), 0, "named should be empty after get_anon"); + assert_eq!(loaded.name_count(), 0, "names should be empty after get_anon"); + assert_eq!(loaded.comm_count(), 0, "comms should be empty after get_anon"); + } + + #[test] + fn get_anon_merkle_root_verified() { + let env = Env::new(); + env.store_const(Address::hash(b"x"), defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + // Tamper with the root. + buf[10] ^= 0xFF; + let res = Env::get_anon(&mut buf.as_slice()); + assert!(res.is_err(), "get_anon must reject tampered root"); + } + + #[test] + fn get_anon_empty_env_roundtrip() { + let env = Env::new(); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let loaded = Env::get_anon(&mut buf.as_slice()).unwrap(); + assert_eq!(loaded.const_count(), 0); + assert_eq!(loaded.named_count(), 0); + } + + #[test] + fn get_anon_consts_match_get() { + // Build an env, serialize, then load via both get and get_anon. + // The `consts` map should agree (same addresses, same Constant + // when materialized). + let env = Env::new(); + for i in 0..5u8 { + let addr = Address::hash(&[i]); + env.store_const(addr, defn_const(vec![])); + } + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let full = Env::get(&mut buf.as_slice()).unwrap(); + let anon = Env::get_anon(&mut buf.as_slice()).unwrap(); + assert_eq!(full.const_count(), anon.const_count()); + for entry in full.consts.iter() { + let addr = entry.key(); + let from_full = full.get_const(addr).unwrap(); + let from_anon = anon.get_const(addr).unwrap(); + assert_eq!(*from_full, *from_anon); + } + } } diff --git a/src/ix/kernel.rs b/src/ix/kernel.rs index 7b5c3b1c..9e10f521 100644 --- a/src/ix/kernel.rs +++ b/src/ix/kernel.rs @@ -1,3 +1,4 @@ +pub mod anon_env; pub mod canonical_check; pub mod check; pub mod claim; diff --git a/src/ix/kernel/anon_env.rs b/src/ix/kernel/anon_env.rs new file mode 100644 index 00000000..42e48fbf --- /dev/null +++ b/src/ix/kernel/anon_env.rs @@ -0,0 +1,184 @@ +//! `AnonEnv<'a>` — restricted view over `IxonEnv` exposing only the +//! anonymous (metadata-free) sections. +//! +//! Held in lieu of `&IxonEnv` everywhere the anon-mode kernel needs +//! env access (constant materialization, blob lookup, dependency +//! walking). Crucially, it does **not** expose `named`, `names`, or +//! `comms` — Lean.Name → metadata mappings that the anon kernel must +//! never consult. +//! +//! Two construction paths: +//! +//! 1. [`AnonEnv::from_env`] — borrowed view over an existing +//! `IxonEnv`. The full env may still have metadata sections +//! populated, but anon-mode code can't reach them through this +//! wrapper. +//! +//! 2. [`Env::get_anon`] (Phase 3) — built fresh from `.ixe` bytes +//! with the named/names/comms sections skipped during load. +//! +//! Either way, code parameterized over `&AnonEnv<'_>` is structurally +//! prevented from accessing metadata. + +use rustc_hash::FxHashSet; +use std::collections::VecDeque; +use std::sync::Arc; + +use crate::ix::address::Address; +use crate::ix::ixon::constant::Constant; +use crate::ix::ixon::env::Env as IxonEnv; + +/// Anonymous-only view over an `IxonEnv`. +#[derive(Clone, Copy, Debug)] +pub struct AnonEnv<'a> { + inner: &'a IxonEnv, +} + +impl<'a> AnonEnv<'a> { + /// Construct a view that exposes only the anonymous sections. + pub fn from_env(env: &'a IxonEnv) -> Self { + AnonEnv { inner: env } + } + + /// Materialize the constant at `addr`. + pub fn get_const(&self, addr: &Address) -> Option> { + self.inner.get_const(addr) + } + + /// Raw constant bytes (no materialization). + pub fn get_const_bytes(&self, addr: &Address) -> Option> { + self.inner.get_const_bytes(addr) + } + + /// Whether a constant is present in this view. + pub fn contains_const(&self, addr: &Address) -> bool { + self.inner.consts.contains_key(addr) + } + + /// Number of constants. + pub fn const_count(&self) -> usize { + self.inner.const_count() + } + + /// Get a blob by address (needed by anon ingress for string/nat + /// literals embedded in expressions). + pub fn get_blob(&self, addr: &Address) -> Option> { + self.inner.get_blob(addr) + } + + /// BFS-collect all addresses transitively reachable from `root` + /// via `Constant.refs`. The returned set includes `root`. + /// + /// Materializes every visited constant. Constants outside the + /// closure are not touched. + pub fn bfs_refs(&self, root: &Address) -> FxHashSet
{ + let mut visited: FxHashSet
= FxHashSet::default(); + let mut queue: VecDeque
= VecDeque::new(); + visited.insert(root.clone()); + queue.push_back(root.clone()); + while let Some(addr) = queue.pop_front() { + let refs: Option> = + self.get_const(&addr).map(|c| c.refs.clone()); + if let Some(rs) = refs { + for r in rs { + if visited.insert(r.clone()) { + queue.push_back(r); + } + } + } + } + visited + } + + /// Transitive dep addresses of `root`, excluding `root`, sorted. + pub fn transitive_deps_excl(&self, root: &Address) -> Vec
{ + let mut set = self.bfs_refs(root); + set.remove(root); + let mut v: Vec
= set.into_iter().collect(); + v.sort_unstable(); + v + } + + /// **Escape hatch.** Returns the underlying `IxonEnv`. The anon + /// kernel must never call this; it exists solely for adapter code + /// that bridges to legacy APIs taking `&IxonEnv` for their + /// `consts`/`blobs` access. Adapters carry the responsibility to + /// not consult metadata sections. + #[allow(dead_code)] + pub(crate) fn as_ixon_env_unchecked(&self) -> &'a IxonEnv { + self.inner + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::Name; + use crate::ix::ixon::constant::{Axiom, ConstantInfo}; + use crate::ix::ixon::env::Named; + use crate::ix::ixon::expr::Expr; + + fn axiom_const_with_refs(refs: Vec
) -> Constant { + Constant::with_tables( + ConstantInfo::Axio(Axiom { + is_unsafe: false, + lvls: 0, + typ: Expr::sort(0), + }), + Vec::new(), + refs, + Vec::new(), + ) + } + + fn n(s: &str) -> Name { + Name::str(Name::anon(), s.to_string()) + } + + #[test] + fn from_env_exposes_consts_and_blobs() { + let env = IxonEnv::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), axiom_const_with_refs(vec![])); + let blob_addr = env.store_blob(b"hello".to_vec()); + let view = AnonEnv::from_env(&env); + assert!(view.contains_const(&a)); + assert_eq!(view.const_count(), 1); + assert_eq!(view.get_blob(&blob_addr), Some(b"hello".to_vec())); + } + + #[test] + fn bfs_refs_via_view_matches_env() { + let env = IxonEnv::new(); + let a = Address::hash(b"a"); + let b = Address::hash(b"b"); + let c = Address::hash(b"c"); + env.store_const(a.clone(), axiom_const_with_refs(vec![b.clone()])); + env.store_const(b.clone(), axiom_const_with_refs(vec![c.clone()])); + env.store_const(c.clone(), axiom_const_with_refs(vec![])); + let view = AnonEnv::from_env(&env); + let visited = view.bfs_refs(&a); + assert_eq!(visited.len(), 3); + assert!(visited.contains(&a)); + assert!(visited.contains(&b)); + assert!(visited.contains(&c)); + } + + /// The wrapper exposes no API to reach the named map. Compile-time + /// guarantee: this test just checks that we can construct an + /// `AnonEnv` from an env whose `named` map is populated, and the + /// public surface gives us no way to reach those entries. + #[test] + fn no_public_named_access() { + let env = IxonEnv::new(); + let a = Address::hash(b"a"); + env.store_const(a.clone(), axiom_const_with_refs(vec![])); + env.register_name(n("MyConst"), Named::with_addr(a.clone())); + assert_eq!(env.named_count(), 1); + let view = AnonEnv::from_env(&env); + // `view.contains_const(&a)` works — `view.lookup_name(...)` doesn't exist. + assert!(view.contains_const(&a)); + // (No assertion possible against absence of an API — this test + // documents intent.) + } +} diff --git a/src/ix/kernel/claim.rs b/src/ix/kernel/claim.rs index b38fd2d8..a4c531e4 100644 --- a/src/ix/kernel/claim.rs +++ b/src/ix/kernel/claim.rs @@ -64,14 +64,23 @@ pub fn build_eval_claim( /// /// Returns `None` if the env has an empty const set (no subject root /// can be formed). +/// +/// Materializes every constant to inspect its `ConstantInfo` variant. +/// This forces a full env walk; callers that already hold structured +/// constants are unaffected, but a freshly-loaded lazy env will pay +/// the parse cost here. pub fn build_check_env_claim(env: &Env) -> Option { let root = env_merkle_root(env)?; let mut axioms: Vec
= env .consts .iter() - .filter_map(|e| match &e.value().info { - ConstantInfo::Axio(_) => Some(e.key().clone()), - _ => None, + .filter_map(|e| { + let lazy = e.value(); + let c = lazy.get().ok()?; + match &c.info { + ConstantInfo::Axio(_) => Some(e.key().clone()), + _ => None, + } }) .collect(); axioms.sort_unstable(); diff --git a/src/ix/kernel/ingress.rs b/src/ix/kernel/ingress.rs index 21970f6d..d898db48 100644 --- a/src/ix/kernel/ingress.rs +++ b/src/ix/kernel/ingress.rs @@ -3673,7 +3673,7 @@ fn ixon_ingress_inner( ConstantMetaInfo::Indc { .. } | ConstantMetaInfo::Ctor { .. } | ConstantMetaInfo::Rec { .. } => { - if let Some(c) = ixon_env.consts.get(&named.addr) { + if let Some(c) = ixon_env.get_const(&named.addr) { match &c.info { IxonCI::IPrj(_) | IxonCI::CPrj(_) @@ -3687,7 +3687,7 @@ fn ixon_ingress_inner( } }, ConstantMetaInfo::Def { .. } => { - if let Some(c) = ixon_env.consts.get(&named.addr) { + if let Some(c) = ixon_env.get_const(&named.addr) { match &c.info { IxonCI::DPrj(_) => {}, _ => { @@ -3922,7 +3922,13 @@ fn validate_no_reserved_marker_addresses( entry.key().hex() )); } - for (idx, addr) in entry.value().refs.iter().enumerate() { + let constant = entry.value().get().map_err(|e| { + format!( + "validate_no_reserved_marker_addresses: cannot materialize {}: {e}", + entry.key().hex() + ) + })?; + for (idx, addr) in constant.refs.iter().enumerate() { if let Some(marker) = reserved_marker_name(addr) { return Err(format!( "reserved kernel marker address {marker} ({}) used in refs[{idx}] of Ixon constant {}", diff --git a/src/ix/kernel/mode.rs b/src/ix/kernel/mode.rs index e721cc22..07e1c559 100644 --- a/src/ix/kernel/mode.rs +++ b/src/ix/kernel/mode.rs @@ -186,6 +186,13 @@ impl MetaDisplay for () { /// Controls metadata behavior for all zero kernel types. pub trait KernelMode: 'static + Clone + Debug + Send + Sync { + /// `true` iff this mode carries metadata. Enables compile-time + /// pruning: code paths gated by `if M::HAS_META { … }` are + /// monomorphized into the dead-branch-eliminated form for the Anon + /// implementation. Use this to guard metadata lookups so they don't + /// execute in Anon mode. + const HAS_META: bool; + /// A metadata field: stores `T` in Meta mode, erased to `()` in Anon mode. type MField: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync; @@ -211,6 +218,8 @@ pub type Meta = ZMode; pub type Anon = ZMode; impl KernelMode for ZMode { + const HAS_META: bool = true; + type MField< T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, > = T; @@ -229,6 +238,8 @@ impl KernelMode for ZMode { } impl KernelMode for ZMode { + const HAS_META: bool = false; + type MField< T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, > = (); From 12630aa492e87615760a7f646929cb27e054faa9 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 17:46:07 -0400 Subject: [PATCH 03/12] Simplify ix check + add metadata-free anon mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `ix check` becomes .ixe-only with a positional ``. Drops the `--lean` compile-and-check flow and the `--env` flag; direct Lean → kernel checks remain available through `rsCheckConstsFFI` for tests. Removes the redundant `CheckIxonCmd.lean`. - New `--anon` flag runs a metadata-free kernel check: loads the .ixe via `Env::get_anon` (discards named/names/comms), enumerates work items from `env.consts` alone, and rebuilds member + ctor projection addresses deterministically via `Constant::commit`. Rejects `--consts`/`--ns`/`--consts-file` since it always checks everything. - Compiler: unwrap singleton non-inductive Muts blocks to standalone Defn/Recr. `Expr::Rec(0, univs)` already resolves correctly against a one-member block, so the wrapper was pure overhead; this keeps the env structurally uniform and matches `compile_single_def`. The anon ingress for standalones passes `mut_ctx_override = [self_id]` so self-recursive standalones still typecheck. - Kernel: anon parallel runner mirrors `run_checks_parallel_on_large _stacks` (32 workers, slow-detection, RSS tracking). Genericized `check_one_const` / `check_consts_loop` / `format_tc_error` to share the runner. `KernelMode::meta_field_with` / `meta_field_try` gate metadata lookups in expression ingress at compile time. - Anon lazy ingress (`LazyAnonIngress` in tc.rs, helpers in ingress.rs) dedupes blocks via `kenv.blocks.contains_key(&KId(B, ()))` before re-running `ingress_anon_block` (prevents N² re-ingestion when sibling projections fault separately within one worker check). - `Env::get_anon` now harvests `ReducibilityHints` from the otherwise-discarded Named section into a sidecar `Env::anon_hints: FxHashMap`. Anon ingress threads these through a new `hints_override` parameter on `ingress_defn` so the lazy-delta tiebreak in `def_eq::def_rank_id` sees realistic heights instead of `Regular(0)`. Hints are performance advice, not correctness data — supplying them in anon mode preserves the metadata-free trust model. - Regenerate the canonical addresses in `PrimAddrs::new()`; singleton- unwrap changed the content hash of every self-recursive primitive (Nat.add, String, Nat.rec, …). End-to-end on compileinitstd.ixe (105,487 constants): meta: 105487/105487 in 20.6s, peak RSS 7.4GB anon: 89010/89010 in 17.8s, peak RSS 4.0GB --- Ix/Cli/CheckCmd.lean | 307 ++++++++--------- Ix/Cli/CheckIxonCmd.lean | 177 ---------- Ix/KernelCheck.lean | 41 +-- Main.lean | 2 - src/ffi/ixon/serialize.rs | 4 +- src/ffi/kernel.rs | 611 ++++++++++++++++++++++++---------- src/ffi/lean_env.rs | 8 +- src/ix/compile.rs | 95 +++++- src/ix/compile/mutual.rs | 90 ++++- src/ix/ixon/env.rs | 28 +- src/ix/ixon/serialize.rs | 17 +- src/ix/kernel/ingress.rs | 651 +++++++++++++++++++++++++++++++++++-- src/ix/kernel/mode.rs | 57 ++++ src/ix/kernel/primitive.rs | 114 +++---- src/ix/kernel/tc.rs | 77 ++++- 15 files changed, 1613 insertions(+), 666 deletions(-) delete mode 100644 Ix/Cli/CheckIxonCmd.lean diff --git a/Ix/Cli/CheckCmd.lean b/Ix/Cli/CheckCmd.lean index 22dbd3ae..9cda25d4 100644 --- a/Ix/Cli/CheckCmd.lean +++ b/Ix/Cli/CheckCmd.lean @@ -1,55 +1,32 @@ /- - `ix check --path `: typecheck a Lean environment through the Rust - kernel. Mirrors the shape of `ix compile` (build the file, load its env, - ship to Rust) but pipes the env through `rs_kernel_check_consts` instead - of `rs_compile_env`. - - Pipeline (Rust side, `src/ffi/kernel.rs`): - Lean env → compile_env → ixon_ingress → TypeChecker.check_const - (one batch of names) - - This is the CLI entry point for "does Mathlib typecheck under Ix?". Use - it like `lake exe ix check --path Benchmarks/Compile/CompileMathlib.lean` - to run the full pipeline against an entire imported environment. - - Flags: - - `--path ` (required): file whose env should be checked. - - `--ns ` (optional, comma-separated): only seed - constants whose name matches one of the prefixes. Transitive deps - are still pulled in so the kernel sees a closed sub-environment, but - we only assert the seeded constants and the closure beneath them. - - `--consts ` (optional, comma-separated): exact constant - names to seed (e.g. - `--consts 'Aesop.GoalUnsafe.rec_6,IntermediateField.LinearDisjoint.trace_algebraMap'`). - Same closure semantics as `--ns`. Combine with `--ns` and the seed - set is the union. - - `--consts-file ` (optional): file with one constant name per - line. Useful for long `_private.Mathlib.…` names pasted from a - failing run. Lines starting with `#` and blank lines are ignored. - - `--fail-out ` (optional): write the names of all failing - constants to ``, one per line, with the error message as a - `#`-comment on the previous line. The output is directly consumable - by `--consts-file` so a typical workflow is: - # First run: full env, capture failures - ix check --path X.lean --fail-out fails.txt - # Bisect: re-run only the failures with verbose output - ix check --path X.lean --consts-file fails.txt --verbose - - `--verbose` (optional): one log line per constant - (default is quiet/ephemeral, periodic done/total + ETA). - - The dep-closure helper is the same one used by `ix validate` and the - `kernel-tutorial` test runner — see `Ix.Cli.ValidateCmd.collectDeps`. - - When any of `--ns`, `--consts`, `--consts-file` are set, the *whole* - pipeline (compile → ingress → check) is restricted to the transitive - closure of the matched seeds. This is the fast path for bisecting a - specific failure out of a full-Mathlib run without re-paying the 30s - compile + 130s ingress for the whole environment. + `ix check `: typecheck a serialized `.ixe` environment through + the Ix Rust kernel. Path is positional and required. + + Two kernel modes: + + - Default (Meta): kernel runs with metadata fields populated (Lean.Name, + binder info, mdata). Supports `--ns` / `--consts` / `--consts-file` + for seed filtering and `--fail-out` for bisect-loop workflows. + - `--anon` (metadata-free): the env is loaded via `Env::get_anon` — + `named`/`names`/`comms` sections are discarded at load time, never + reaching the kernel. Every kernel-checkable address (every constant + except Muts blocks and projections — projections are covered by + their parent block) is checked. The kernel's typechecking logic + structurally cannot read metadata (`M::MField` is `()` in Anon + mode); progress labels are `@` addresses, not names. + + `--anon` is incompatible with `--ns` / `--consts` / `--consts-file`: + the anon path checks everything in the env. Add `--addrs ` + in the future if address-based filtering is needed. + + Direct Lean → kernel typechecking (compile-and-check from source) is + available via the `rsCheckConstsFFI` API for tests + (`Tests/Ix/Kernel/Tutorial.lean`, `Tests/Ix/Kernel/CheckEnv.lean`). + Use `ix compile ` first, then `ix check `. -/ module public import Cli public import Ix.Common -public import Ix.CompileM public import Ix.KernelCheck public import Ix.Meta public import Ix.Cli.ValidateCmd @@ -62,41 +39,31 @@ open Ix.KernelCheck namespace Ix.Cli.CheckCmd /-- Combined seed selector: prefixes (`--ns`) ∪ exact names - (`--consts`, `--consts-file`). All seeds are intersected with - `env.constants` before the dep walk. -/ + (`--consts`, `--consts-file`). Meta-mode only. -/ private structure SeedSpec where - /-- `--ns` prefix list. Matches via `Lean.Name.isPrefixOf`. -/ prefixes : List Lean.Name := [] - /-- `--consts` + `--consts-file` exact names. Matched against - `env.constants` via structural equality. -/ - exacts : List Lean.Name := [] + exacts : List Lean.Name := [] private def SeedSpec.isEmpty (s : SeedSpec) : Bool := s.prefixes.isEmpty && s.exacts.isEmpty /-- Read one constant name per line from `path`. Blank lines and lines - starting with `#` (after trimming) are ignored. Trailing whitespace - on each line is trimmed before `String.toName`. -/ + starting with `#` (after trimming) are ignored. -/ private def readNamesFile (path : String) : IO (List Lean.Name) := do let content ← IO.FS.readFile path let lines := content.splitOn "\n" let names : List Lean.Name := lines.filterMap fun raw => - -- Strip CR (Windows line endings) and surrounding ASCII whitespace. let cs := raw.toList.dropWhile Char.isWhitespace let trimmed := String.ofList (cs.reverse.dropWhile Char.isWhitespace).reverse if trimmed.isEmpty || trimmed.startsWith "#" then none else some trimmed.toName pure names -/-- Build a `SeedSpec` from `--ns`, `--consts`, and `--consts-file`. - Returns `none` if none of the flags were supplied (caller should - check the full env). Returns `some spec` even when individual flags - parsed to empty (with a warning) as long as at least one source - contributed a seed; otherwise warns and falls back to full-env. -/ +/-- Build a `SeedSpec` from `--ns`, `--consts`, and `--consts-file`. -/ private def resolveSeedSpec (p : Cli.Parsed) : IO (Option SeedSpec) := do - let nsFlag := p.flag? "ns" - let constsFlag := p.flag? "consts" - let fileFlag := p.flag? "consts-file" + let nsFlag := p.flag? "ns" + let constsFlag := p.flag? "consts" + let fileFlag := p.flag? "consts-file" if nsFlag.isNone && constsFlag.isNone && fileFlag.isNone then return none let mut prefixes : List Lean.Name := [] @@ -126,30 +93,17 @@ private def resolveSeedSpec (p : Cli.Parsed) : IO (Option SeedSpec) := do return none return some spec -/-- Apply the seed spec (if any) and return both the seed names (the - constants the user explicitly asked about) and the closed list of - `(Name × ConstantInfo)` to ship to Rust. - - Without a filter: every constant in the env is a seed and gets shipped. - With a filter: only constants matching one of the prefixes / exact names - seed the walk, but the *transitive closure* is shipped so the kernel - can resolve every reference. -/ -private def selectConsts (leanEnv : Lean.Environment) - (spec : Option SeedSpec) - : IO (Array Lean.Name × List (Lean.Name × Lean.ConstantInfo)) := do +/-- Filter an `.ixe`'s checkable names down to the seed spec. -/ +private def selectNamesIxon (allNames : Array Lean.Name) + (spec : Option SeedSpec) : IO (Array Lean.Name) := do match spec with - | none => - let consts := leanEnv.constants.toList - let names := consts.toArray.map (·.fst) - pure (names, consts) + | none => pure allNames | some s => - -- Verify exact-name seeds exist in the env; warn (don't fail) on misses - -- so a typo or refactored name doesn't abort the run silently. let exactSet : Std.HashSet Lean.Name := s.exacts.foldl (fun acc n => acc.insert n) (Std.HashSet.emptyWithCapacity s.exacts.length) let mut missing : Array Lean.Name := #[] for n in s.exacts do - if !leanEnv.constants.contains n then + if !allNames.contains n then missing := missing.push n if !missing.isEmpty then IO.println s!"[check] warning: {missing.size}/{s.exacts.length} exact name(s) not in env:" @@ -158,139 +112,140 @@ private def selectConsts (leanEnv : Lean.Environment) IO.println s!" - {n}" if missing.size > 20 then IO.println s!" … ({missing.size - 20} more not shown)" - let seeds := leanEnv.constants.toList.filterMap fun (n, _) => - if exactSet.contains n || s.prefixes.any (·.isPrefixOf n) then some n else none - IO.println s!"[check] filter: {s.prefixes.length} prefix(es), {s.exacts.length} exact(s) → {seeds.length} seed constants" - let closed := collectDeps leanEnv seeds - IO.println s!"[check] filter: {closed.length} constants after transitive-dep closure" - -- `seeds` (not the closure) are the names we actually assert on. - -- Transitive deps still need to be in the shipped env so the kernel - -- can resolve references; they're checked implicitly via the seeds - -- that depend on them. - pure (seeds.toArray, closed) + let seeds := allNames.filter fun n => + exactSet.contains n || s.prefixes.any (·.isPrefixOf n) + IO.println s!"[check] filter: {s.prefixes.length} prefix(es), {s.exacts.length} exact(s) → {seeds.size} seed constants" + pure seeds /-- Print up to `limit` failures, then a summary line if truncated. -/ -private def reportFailures (failures : Array (Lean.Name × String)) +private def reportFailures (failures : Array (String × String)) (limit : Nat := 30) : IO Unit := do if failures.isEmpty then return IO.println s!"[check] {failures.size} failure(s):" let shown := min limit failures.size - for (name, msg) in failures[:shown] do - IO.println s!" ✗ {name}: {msg}" + for (label, msg) in failures[:shown] do + IO.println s!" ✗ {label}: {msg}" if failures.size > limit then - IO.println s!" … ({failures.size - limit} more failures suppressed; raise the printed limit if needed)" - -/-- Render the error message safely as a single-line `#`-comment. - Newlines (kernel diagnostics often have them) get joined with ` ⏎ ` - so each entry stays one line; this keeps `readNamesFile`'s - "preceding `#` line is a comment" parser happy when the file is fed - back through `--consts-file`. -/ -private def commentLine (msg : String) : String := - let oneLine := msg.replace "\n" " ⏎ " - s!"# {oneLine}" - -/-- Write the failure list to `path` in a format directly consumable by - `--consts-file`. Layout: - # header block (source path, seed count, failure count) - - # - - - # - - … - Always overwrites; always writes (even on zero failures, so callers - have a deterministic "no-news-is-good-news" artifact). -/ -private def writeFailuresFile - (path : String) - (sourcePath : String) - (seedCount : Nat) - (failures : Array (Lean.Name × String)) - : IO Unit := do - let mut buf : String := - "# ix check failures — feed this file back via `--consts-file`\n" - ++ s!"# source: {sourcePath}\n" - ++ s!"# seeds: {seedCount}\n" - ++ s!"# failures: {failures.size}\n" - ++ "\n" - for (name, msg) in failures do - buf := buf ++ commentLine msg ++ "\n" ++ s!"{name}\n\n" - IO.FS.writeFile path buf - IO.println s!"[check] wrote {failures.size} failure(s) to {path}" + IO.println s!" … ({failures.size - limit} more failures suppressed)" -def runCheckCmd (p : Cli.Parsed) : IO UInt32 := do - let some path := p.flag? "path" - | p.printError "error: must specify --path" - return 1 - let pathStr := path.as! String +/-- Anon-mode runner: dispatch to `rsCheckAnonFFI`. The Rust side checks + every kernel-checkable address in the env and streams failures to + `failOutPath` (when nonempty). -/ +private def runCheckAnon (envPath : String) (p : Cli.Parsed) : IO UInt32 := do let verbose := p.flag? "verbose" |>.isSome + let failOutPath : String := + match p.flag? "fail-out" with + | some flag => flag.as! String + | none => "" + + IO.println s!"Running Ix kernel check (anon mode) on {envPath}" + let start ← IO.monoMsNow + let results ← rsCheckAnonFFI envPath (!verbose) failOutPath + let elapsed := (← IO.monoMsNow) - start - -- `buildFile` also runs `lake exe cache get` if the target depends on - -- Mathlib, so a fresh checkout works without a prior `lake build`. - buildFile pathStr - let leanEnv ← getFileEnv pathStr + let mut passed := 0 + let mut failures : Array (String × String) := #[] + for i in [:results.size] do + match results[i]! with + | none => passed := passed + 1 + | some err => failures := failures.push (s!"#{i}", err.message) + + IO.println s!"[check] checked {results.size} constants in {elapsed.formatMs}" + IO.println s!"[check] {passed}/{results.size} passed" + reportFailures failures - let totalConsts := leanEnv.constants.toList.length - IO.println s!"Running Ix kernel check on {pathStr}" - IO.println s!"Total constants in env: {totalConsts}" + if !failOutPath.isEmpty then + IO.println s!"[check] streamed {failures.size} failure(s) to {failOutPath}" + IO.println s!"##check## {elapsed} {passed} {failures.size} {results.size}" + return if failures.isEmpty then 0 else 1 + +/-- Meta-mode runner: dispatch to `rsCheckIxonFFI` with seed filtering. -/ +private def runCheckMeta (envPath : String) (p : Cli.Parsed) : IO UInt32 := do + let verbose := p.flag? "verbose" |>.isSome + IO.println s!"Running Ix kernel check (meta mode) on {envPath}" let spec ← resolveSeedSpec p - let (seedNames, allConsts) ← selectConsts leanEnv spec - IO.println s!"[check] checking {seedNames.size} seed constant(s) against {allConsts.length} env constants" + let seedNames ← + match spec with + | some s => + if s.prefixes.isEmpty && !s.exacts.isEmpty then + IO.println s!"[check] exact-only filter: {s.exacts.length} name(s); skipping full env name preflight" + pure s.exacts.toArray + else + let namesInEnv ← rsIxonNamesFFI envPath + IO.println s!"Total checkable names in env: {namesInEnv.size}" + selectNamesIxon namesInEnv spec + | none => + let namesInEnv ← rsIxonNamesFFI envPath + IO.println s!"Total checkable names in env: {namesInEnv.size}" + pure namesInEnv + if spec.isSome && seedNames.isEmpty then + IO.println "[check] error: filter resolved to zero constants; refusing to run full-env check" + return 1 + IO.println s!"[check] checking {seedNames.size} seed constant(s)" - -- Every checked constant is expected to typecheck — `expectPass` is just - -- a Rust-side progress-log hint (see `src/ffi/kernel.rs::ErrKind`). - -- Defaulting to all-true keeps the `[ok]` / `[FAIL]` lines consistent. let expectPass : Array Bool := Array.replicate seedNames.size true - + let failOutPath : String := + match p.flag? "fail-out" with + | some flag => flag.as! String + | none => "" let start ← IO.monoMsNow - -- `verbose=false` (= `quiet=true` on the FFI side) is the default - -- because full-Mathlib runs ship tens of thousands of constants and - -- per-constant logs swamp the terminal. `--verbose` flips back to - -- per-constant lines for small batches. - let results ← rsCheckConstsFFI allConsts seedNames expectPass (!verbose) + let results ← rsCheckIxonFFI envPath seedNames expectPass (!verbose) failOutPath let elapsed := (← IO.monoMsNow) - start let mut passed := 0 - let mut failures : Array (Lean.Name × String) := #[] + let mut failures : Array (String × String) := #[] for i in [:seedNames.size] do match results[i]! with | none => passed := passed + 1 - | some err => - failures := failures.push (seedNames[i]!, err.message) + | some err => failures := failures.push (toString seedNames[i]!, err.message) IO.println s!"[check] checked {seedNames.size} constants in {elapsed.formatMs}" IO.println s!"[check] {passed}/{seedNames.size} passed" reportFailures failures - -- Persist failures for the bisect-loop workflow described in the - -- module docstring. Always written when `--fail-out` is set, even on - -- zero failures, so an automation can `test -s fails.txt` for a clean - -- pass/fail signal. - if let some flag := p.flag? "fail-out" then - let outPath := flag.as! String - writeFailuresFile outPath pathStr seedNames.size failures + if !failOutPath.isEmpty then + IO.println s!"[check] streamed {failures.size} failure(s) to {failOutPath}" - -- Machine-readable line for CI tracking, matches `ix compile`'s shape. IO.println s!"##check## {elapsed} {passed} {failures.size} {seedNames.size}" - return if failures.isEmpty then 0 else 1 +def runCheckCmd (p : Cli.Parsed) : IO UInt32 := do + let some pathArg := p.positionalArg? "path" + | p.printError "error: must specify to a .ixe file" + return 1 + let envPath := pathArg.as! String + + let anon := p.flag? "anon" |>.isSome + if anon then + let hasConsts := p.flag? "consts" |>.isSome + let hasNs := p.flag? "ns" |>.isSome + let hasConstsFile := p.flag? "consts-file" |>.isSome + if hasConsts || hasNs || hasConstsFile then + p.printError "error: --anon checks the entire env; --consts/--ns/--consts-file are unsupported" + return 1 + runCheckAnon envPath p + else + runCheckMeta envPath p + end Ix.Cli.CheckCmd open Ix.Cli.CheckCmd in def checkCmd : Cli.Cmd := `[Cli| check VIA runCheckCmd; - "Typecheck a Lean file's environment through the Ix Rust kernel" + "Typecheck a serialized Ixon environment through the Ix Rust kernel" FLAGS: - path : String; "Path to file whose env should be typechecked" - ns : String; "Comma-separated Lean name prefixes to filter on (e.g. 'Aesop,SetTheory.PGame'). When set, only seeds matching any prefix are asserted; transitive deps are pulled in so the kernel sees a closed env." - consts : String; "Comma-separated EXACT constant names to seed (e.g. 'Aesop.GoalUnsafe.rec_6,IntermediateField.LinearDisjoint.trace_algebraMap'). Transitive deps pulled in. Combine with --ns for a union." - "consts-file" : String; "Path to a file with one constant name per line. '#' comments and blank lines ignored. Useful for long _private.Mathlib.… names pasted from a failing run." - "fail-out" : String; "Write all failing constant names to this path (one per line, error message as preceding '#' comment). Output is directly consumable by --consts-file for a bisect-loop workflow." - verbose; "Log every constant on its own line (default: quiet ephemeral progress)" + anon; "Run the kernel in anon mode (no metadata read from .ixe)" + ns : String; "Comma-separated Lean.Name prefixes to filter on (meta mode only)" + consts : String; "Comma-separated EXACT constant names to seed (meta mode only)" + "consts-file" : String; "Path to a file with one constant name per line (meta mode only)" + "fail-out" : String; "Write failing constants to this path (consumable by --consts-file)" + verbose; "Log every constant on its own line (default: quiet)" + + ARGS: + path : String; "Path to a serialized .ixe environment" ] end diff --git a/Ix/Cli/CheckIxonCmd.lean b/Ix/Cli/CheckIxonCmd.lean deleted file mode 100644 index f8e21f78..00000000 --- a/Ix/Cli/CheckIxonCmd.lean +++ /dev/null @@ -1,177 +0,0 @@ -module -public import Cli -public import Ix.Common -public import Ix.KernelCheck -public import Ix.Meta -public import Ix.Cli.ValidateCmd - -public section - -open Ix.KernelCheck - -namespace Ix.Cli.CheckIxonCmd - -private structure SeedSpec where - prefixes : List Lean.Name := [] - exacts : List Lean.Name := [] - -private def SeedSpec.isEmpty (s : SeedSpec) : Bool := - s.prefixes.isEmpty && s.exacts.isEmpty - -private def readNamesFile (path : String) : IO (List Lean.Name) := do - let content ← IO.FS.readFile path - let lines := content.splitOn "\n" - pure <| lines.filterMap fun raw => - let cs := raw.toList.dropWhile Char.isWhitespace - let trimmed := String.ofList (cs.reverse.dropWhile Char.isWhitespace).reverse - if trimmed.isEmpty || trimmed.startsWith "#" then none - else some trimmed.toName - -private def resolveSeedSpec (p : Cli.Parsed) : IO (Option SeedSpec) := do - let nsFlag := p.flag? "ns" - let constsFlag := p.flag? "consts" - let fileFlag := p.flag? "consts-file" - if nsFlag.isNone && constsFlag.isNone && fileFlag.isNone then - return none - let mut prefixes : List Lean.Name := [] - let mut exacts : List Lean.Name := [] - if let some flag := nsFlag then - let raw := flag.as! String - prefixes := parsePrefixes raw - if prefixes.isEmpty then - IO.println s!"[check-ixon] warning: --ns '{raw}' parsed to empty list" - if let some flag := constsFlag then - let raw := flag.as! String - let parsed := parsePrefixes raw - if parsed.isEmpty then - IO.println s!"[check-ixon] warning: --consts '{raw}' parsed to empty list" - exacts := exacts ++ parsed - if let some flag := fileFlag then - let path := flag.as! String - let parsed ← readNamesFile path - if parsed.isEmpty then - IO.println s!"[check-ixon] warning: --consts-file '{path}' yielded zero names" - else - IO.println s!"[check-ixon] --consts-file '{path}': read {parsed.length} name(s)" - exacts := exacts ++ parsed - let spec : SeedSpec := { prefixes, exacts } - if spec.isEmpty then - IO.println "[check-ixon] warning: filter flags supplied but parsed to empty selection" - return some spec - -private def selectNames (allNames : Array Lean.Name) - (spec : Option SeedSpec) : IO (Array Lean.Name) := do - match spec with - | none => pure allNames - | some s => - let exactSet : Std.HashSet Lean.Name := - s.exacts.foldl (fun acc n => acc.insert n) (Std.HashSet.emptyWithCapacity s.exacts.length) - let mut missing : Array Lean.Name := #[] - for n in s.exacts do - if !allNames.contains n then - missing := missing.push n - if !missing.isEmpty then - IO.println s!"[check-ixon] warning: {missing.size}/{s.exacts.length} exact name(s) not in env:" - let shown := min 20 missing.size - for n in missing[:shown] do - IO.println s!" - {n}" - if missing.size > 20 then - IO.println s!" ... ({missing.size - 20} more not shown)" - let seeds := allNames.filter fun n => - exactSet.contains n || s.prefixes.any (·.isPrefixOf n) - IO.println s!"[check-ixon] filter: {s.prefixes.length} prefix(es), {s.exacts.length} exact(s) -> {seeds.size} seed constants" - pure seeds - -private def reportFailures (failures : Array (Lean.Name × String)) - (limit : Nat := 30) : IO Unit := do - if failures.isEmpty then return - IO.println s!"[check-ixon] {failures.size} failure(s):" - let shown := min limit failures.size - for (name, msg) in failures[:shown] do - IO.println s!" x {name}: {msg}" - if failures.size > limit then - IO.println s!" ... ({failures.size - limit} more failures suppressed)" - -def runCheckIxonCmd (p : Cli.Parsed) : IO UInt32 := do - let some env := p.flag? "env" - | p.printError "error: must specify --env" - return 1 - let envPath := env.as! String - let verbose := p.flag? "verbose" |>.isSome - - IO.println s!"Running Ix kernel check on serialized env {envPath}" - let spec ← resolveSeedSpec p - let seedNames ← - match spec with - | some s => - if s.prefixes.isEmpty && !s.exacts.isEmpty then - IO.println s!"[check-ixon] exact-only filter: {s.exacts.length} name(s); skipping full env name preflight" - pure s.exacts.toArray - else - let namesInEnv ← rsIxonNamesFFI envPath - IO.println s!"Total checkable names in env: {namesInEnv.size}" - selectNames namesInEnv spec - | none => - let namesInEnv ← rsIxonNamesFFI envPath - IO.println s!"Total checkable names in env: {namesInEnv.size}" - pure namesInEnv - if spec.isSome && seedNames.isEmpty then - IO.println "[check-ixon] error: filter resolved to zero constants; refusing to run full-env check" - return 1 - IO.println s!"[check-ixon] checking {seedNames.size} seed constant(s)" - - let expectPass : Array Bool := Array.replicate seedNames.size true - -- Pass an empty string when --fail-out is unset; the Rust side treats "" - -- as "no streaming file". When the flag is set, Rust opens the file at - -- start-of-run, writes a header, appends one record per failure as it's - -- detected (flushed immediately), and finalises with a footer. That's - -- what makes the file visible to `tail -f` during a long run instead of - -- being dumped only after every constant finishes. - let failOutPath : String := - match p.flag? "fail-out" with - | some flag => flag.as! String - | none => "" - let start ← IO.monoMsNow - let results ← rsCheckIxonFFI envPath seedNames expectPass (!verbose) failOutPath - let elapsed := (← IO.monoMsNow) - start - - let mut passed := 0 - let mut failures : Array (Lean.Name × String) := #[] - for i in [:seedNames.size] do - match results[i]! with - | none => passed := passed + 1 - | some err => failures := failures.push (seedNames[i]!, err.message) - - IO.println s!"[check-ixon] checked {seedNames.size} constants in {elapsed.formatMs}" - IO.println s!"[check-ixon] {passed}/{seedNames.size} passed" - reportFailures failures - - if !failOutPath.isEmpty then - IO.println s!"[check-ixon] streamed {failures.size} failure(s) to {failOutPath}" - - IO.println s!"##check-ixon## {elapsed} {passed} {failures.size} {seedNames.size}" - return if failures.isEmpty then 0 else 1 - -end Ix.Cli.CheckIxonCmd - -open Ix.Cli.CheckIxonCmd in -private def withCmdName (cmd : Cli.Cmd) (name : String) : Cli.Cmd := - match cmd with - | Cli.Cmd.init m run subCmds ext => - Cli.Cmd.init { m with name := name } run subCmds ext - -open Ix.Cli.CheckIxonCmd in -def checkIxonCmd : Cli.Cmd := withCmdName `[Cli| - checkIxon VIA runCheckIxonCmd; - "Typecheck a serialized Ixon environment through the Ix Rust kernel" - - FLAGS: - env : String; "Path to a serialized Ixon.Env file produced by `ix compile --out`" - ns : String; "Comma-separated Lean name prefixes to check" - consts : String; "Comma-separated exact constant names to check" - "consts-file" : String; "Path to a file with one constant name per line. '#' comments and blank lines ignored." - "fail-out" : String; "Write failing constant names to this path" - verbose; "Log every constant on its own line (default: quiet ephemeral progress)" -] "check-ixon" - -end diff --git a/Ix/KernelCheck.lean b/Ix/KernelCheck.lean index c0f711f0..a4c54ace 100644 --- a/Ix/KernelCheck.lean +++ b/Ix/KernelCheck.lean @@ -112,34 +112,29 @@ opaque rsCheckIxonFFI : IO (Array (Option CheckError)) /-- FFI: list checkable names from a serialized Ixon env file. Used by the - `check-ixon` CLI to support `--ns` filtering without rebuilding Lean. -/ + `ix check` CLI to support `--ns` filtering without rebuilding Lean. -/ @[extern "rs_kernel_ixon_names"] opaque rsIxonNamesFFI : @& String → IO (Array Lean.Name) -/-- FFI: anonymous-mode type-check by address. - - Loads the `.ixe` file at the given path, ingresses every constant into a - `KEnv` (with all metadata fields erased to `()` at the type level), - then runs the kernel typechecker on each requested address. - - The address-based surface (no Lean.Name input) reflects what the anon - kernel actually consumes: structural, content-addressed identities with - no Lean-side names. Useful for zkPCC verifiers that hold only claim - addresses, and for tests that want to assert metadata-free typechecking. - - Implemented in `src/ffi/kernel.rs::rs_kernel_check_consts_anon`. The - kernel's typechecking logic structurally cannot read metadata when - running in `Anon` mode — every `M::MField` is `()`. - - Note: today's `ixon_ingress` still consults `Env::named`/`Env::names` - internally to enumerate work items, even in Anon mode. The resulting - kernel state has no metadata, so the typechecking step is anon, but - full ingress-level metadata isolation is a follow-up. -/ -@[extern "rs_kernel_check_consts_anon"] -opaque rsCheckConstsAnonFFI : +/-- FFI: metadata-free anonymous-mode type-check of an entire `.ixe`. + + Loads the env via `IxonEnv::get_anon` (which discards the + `named`/`names`/`comms` sections during deserialization), enumerates + every kernel-checkable target by iterating `consts` (skipping + projection constants — they're covered by their parent Muts block + work item, with projection addresses reconstructed deterministically + via `Constant::commit`), and runs `TypeChecker::check_const` + on each. The kernel's typechecking logic structurally cannot read + metadata: every `M::MField` is `()` in Anon mode. + + `fail_out` is a streaming failure log path; pass `""` to disable. + Progress labels use `@` instead of Lean names — the kernel + operates on addresses only. -/ +@[extern "rs_kernel_check_anon"] +opaque rsCheckAnonFFI : @& String → -- .ixe path - @& Array Address → -- addresses to type-check @& Bool → -- quiet + @& String → -- fail-out path ("" = none) IO (Array (Option CheckError)) end Ix.KernelCheck diff --git a/Main.lean b/Main.lean index de08d39c..d1091613 100644 --- a/Main.lean +++ b/Main.lean @@ -1,7 +1,6 @@ --import Ix.Cli.ProveCmd --import Ix.Cli.StoreCmd import Ix.Cli.CheckCmd -import Ix.Cli.CheckIxonCmd import Ix.Cli.CompileCmd import Ix.Cli.IngressCmd import Ix.Cli.ValidateCmd @@ -21,7 +20,6 @@ def ixCmd : Cli.Cmd := `[Cli| --storeCmd; compileCmd; checkCmd; - checkIxonCmd; ingressCmd; validateCmd; serveCmd; diff --git a/src/ffi/ixon/serialize.rs b/src/ffi/ixon/serialize.rs index 94e0facb..5106a9b6 100644 --- a/src/ffi/ixon/serialize.rs +++ b/src/ffi/ixon/serialize.rs @@ -107,7 +107,9 @@ pub extern "C" fn rs_eq_env_serialization( return false; } for rc in &decoded.consts { - match rust_env.consts.get(&rc.addr) { + // Materialize the lazy entry to compare structured `Constant` values. + let stored = rust_env.get_const(&rc.addr); + match stored { Some(c) if *c == rc.constant => {}, Some(_) => { if debug { diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index 4edcf204..cfa19063 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -74,11 +74,12 @@ use crate::ix::kernel::ingress::{ }; #[cfg(feature = "test-ffi")] use crate::ix::kernel::ingress::{ixon_ingress, lean_ingress}; -use crate::ix::kernel::anon_env::AnonEnv; use crate::ix::kernel::id::KId; -use crate::ix::kernel::ingress::ixon_ingress; -use crate::ix::kernel::mode::{Anon, Meta}; -use crate::lean::LeanIxAddress; +use crate::ix::kernel::ingress::{ + anon_ctor_proj_addr, anon_defn_proj_addr, anon_indc_proj_addr, + anon_recr_proj_addr, +}; +use crate::ix::kernel::mode::{Anon, CheckDupLevelParams, KernelMode, Meta}; use crate::ix::kernel::tc::TypeChecker; unsafe extern "C" { @@ -318,7 +319,7 @@ pub extern "C" fn rs_kernel_check_consts( // --------------------------------------------------------------------- // Deep recursor expansions push the Rust stack. A dedicated thread with a // large stack matches the old ix_old pattern. - let results = match run_checks_on_large_stack( + let results = match run_checks_on_large_stack::( Arc::clone(&ixon_env), lookups, names_vec.clone(), @@ -421,7 +422,7 @@ pub extern "C" fn rs_kernel_check_malformed_rec_rule_ixon( t2.elapsed() ); - let kid = crate::ix::kernel::id::KId::new(rec_addr, rec_name); + let kid = KId::new(rec_addr, rec_name); let result = { let mut tc = TypeChecker::new(&mut kenv); match tc.check_const(&kid) { @@ -441,9 +442,14 @@ fn poison_second_rec_rule_returns_first_minor( .lookup_name(rec_name) .ok_or_else(|| format!("{}: missing Named entry", rec_name.pretty()))?; let rec_addr = named.addr.clone(); - let mut rec_constant = ixon_env.get_const(&rec_addr).ok_or_else(|| { + // Materialize then clone out of the Arc — we mutate the constant + // in-place to poison a recursor rule before storing it back. + let rec_arc = ixon_env.get_const(&rec_addr).ok_or_else(|| { format!("{}: missing constant {}", rec_name.pretty(), rec_addr.hex()) })?; + let mut rec_constant: crate::ix::ixon::constant::Constant = + (*rec_arc).clone(); + drop(rec_arc); match &mut rec_constant.info { IxonCI::Recr(rec) => { @@ -471,14 +477,16 @@ fn poison_second_rec_rule_returns_first_minor( }, IxonCI::RPrj(proj) => { let block_addr = proj.block.clone(); - let mut block_constant = - ixon_env.get_const(&block_addr).ok_or_else(|| { - format!( - "{}: recursor projection points at missing block {}", - rec_name.pretty(), - block_addr.hex() - ) - })?; + let block_arc = ixon_env.get_const(&block_addr).ok_or_else(|| { + format!( + "{}: recursor projection points at missing block {}", + rec_name.pretty(), + block_addr.hex() + ) + })?; + let mut block_constant: crate::ix::ixon::constant::Constant = + (*block_arc).clone(); + drop(block_arc); match &mut block_constant.info { IxonCI::Muts(members) => { let idx = usize::try_from(proj.idx).map_err(|_e| { @@ -659,7 +667,7 @@ pub extern "C" fn rs_kernel_check_ixon( let total = names_vec.len(); let t3 = Instant::now(); - let results = match run_checks_on_large_stack( + let results = match run_checks_on_large_stack::( ixon_env, lookups, names_vec, @@ -698,134 +706,6 @@ pub extern "C" fn rs_kernel_check_ixon( build_result_array(&results) } -/// FFI: anonymous-mode type-check by address. -/// -/// Lean signature: -/// ```lean -/// @[extern "rs_kernel_check_consts_anon"] -/// opaque rsCheckConstsAnonFFI : -/// @& String → -- .ixe path -/// @& Array Ix.Address → -- addresses to check -/// @& Bool → -- quiet -/// IO (Array (Option CheckError)) -/// ``` -/// -/// Loads the `.ixe`, builds a `KEnv` via `ixon_ingress::`, -/// and runs `TypeChecker::check_const` for each requested -/// address. All metadata fields (`KConst::name`, -/// `level_params`, binder names, mdata) are erased to `()` at the -/// type level, so the kernel's typechecking logic cannot read -/// metadata even if ingress consulted it. -/// -/// The address surface (not name) makes the anon path's entry -/// independent of Lean.Name → Address resolution — verifiers that -/// only have content addresses (e.g., zkPCC claim subjects) can -/// invoke this directly without metadata. -/// -/// Results come back in input order, paired by the caller with -/// `addrs[i]`. -#[unsafe(no_mangle)] -pub extern "C" fn rs_kernel_check_consts_anon( - env_path: LeanString>, - addrs: LeanArray>, - quiet: LeanBool>, -) -> LeanIOResult { - let total_start = Instant::now(); - let _quiet = quiet.to_bool(); - let path = env_path.to_string(); - let addrs_vec: Vec
= LeanIxAddress::decode_array(addrs); - - // Load env - let t0 = Instant::now(); - let bytes = match std::fs::read(&path) { - Ok(b) => b, - Err(e) => { - return LeanIOResult::error_string(&format!( - "rs_kernel_check_consts_anon: failed to read {path}: {e}" - )); - }, - }; - eprintln!( - "[rs_kernel_check_anon] read env: {:>8.1?} ({} bytes)", - t0.elapsed(), - bytes.len() - ); - - let t1 = Instant::now(); - let mut slice: &[u8] = &bytes; - let ixon_env = match IxonEnv::get(&mut slice) { - Ok(env) => env, - Err(e) => { - return LeanIOResult::error_string(&format!( - "rs_kernel_check_consts_anon: failed to deserialize {path}: {e}" - )); - }, - }; - drop(bytes); - eprintln!( - "[rs_kernel_check_anon] deserialize:{:>8.1?} ({} consts)", - t1.elapsed(), - ixon_env.const_count() - ); - - // Build KEnv via ingress. Note: ixon_ingress consults named/ - // names internally to enumerate work items, but the resulting - // KEnv has `()` for every metadata field — the kernel's - // typechecking logic structurally cannot read metadata. - let t2 = Instant::now(); - let _anon_env_view = AnonEnv::from_env(&ixon_env); - let (mut kenv, _intern) = match ixon_ingress::(&ixon_env) { - Ok(pair) => pair, - Err(e) => { - return build_uniform_error( - addrs_vec.len(), - &format!("[anon ingress] {e}"), - ); - }, - }; - eprintln!("[rs_kernel_check_anon] anon ingress:{:>8.1?}", t2.elapsed()); - - // Per-address check - let t3 = Instant::now(); - let total = addrs_vec.len(); - let mut results: Vec = Vec::with_capacity(total); - for (i, addr) in addrs_vec.iter().enumerate() { - let kid = KId::::new(addr.clone(), ()); - if !kenv.consts.contains_key(&kid) { - results.push(Err(( - ErrKind::Compile, - format!("[anon] no kernel const at {}", addr.hex()), - ))); - continue; - } - let mut tc = TypeChecker::::new(&mut kenv); - let r = tc.check_const(&kid); - let label = format!("[{}/{}] {}", i + 1, total, addr.hex()); - match r { - Ok(()) => { - eprintln!("{label} ok"); - results.push(Ok(())); - }, - Err(e) => { - let msg = format!("{e:?}"); - eprintln!("{label} FAIL: {msg}"); - results.push(Err((ErrKind::Kernel, msg))); - }, - } - } - eprintln!( - "[rs_kernel_check_anon] checks: {:>8.1?} ({} addrs)", - t3.elapsed(), - total - ); - eprintln!( - "[rs_kernel_check_anon] total: {:>8.1?}", - total_start.elapsed() - ); - - build_result_array(&results) -} - /// FFI: list the checkable names in a serialized Ixon environment. #[unsafe(no_mangle)] pub extern "C" fn rs_kernel_ixon_names( @@ -1104,7 +984,7 @@ fn check_schedule_block_addr( } } -fn run_checks_on_large_stack( +fn run_checks_on_large_stack( ixon_env: Arc, lookups: Arc, names: Vec, @@ -1112,7 +992,10 @@ fn run_checks_on_large_stack( ungrounded: FxHashMap, quiet: bool, failure_log: Option>, -) -> Result, String> { +) -> Result, String> +where + M::MField>: CheckDupLevelParams, +{ if names.is_empty() { eprintln!("[rs_kernel_check] checking 0 constants..."); return Ok(Vec::new()); @@ -1131,7 +1014,7 @@ fn run_checks_on_large_stack( let worker_count = resolve_kernel_check_workers(work.len(), quiet); if worker_count == 1 { - return run_checks_serial_on_large_stack( + return run_checks_serial_on_large_stack::( ixon_env, lookups, names, @@ -1143,7 +1026,7 @@ fn run_checks_on_large_stack( ); } - run_checks_parallel_on_large_stacks( + run_checks_parallel_on_large_stacks::( ixon_env, lookups, names, @@ -1156,7 +1039,7 @@ fn run_checks_on_large_stack( ) } -fn run_checks_serial_on_large_stack( +fn run_checks_serial_on_large_stack( ixon_env: Arc, lookups: Arc, names: Vec, @@ -1165,11 +1048,14 @@ fn run_checks_serial_on_large_stack( work: Vec, quiet: bool, failure_log: Option>, -) -> Result, String> { +) -> Result, String> +where + M::MField>: CheckDupLevelParams, +{ thread::Builder::new() .stack_size(KERNEL_CHECK_STACK_SIZE) .spawn(move || { - check_consts_loop( + check_consts_loop::( ixon_env, lookups, names, @@ -1188,7 +1074,7 @@ fn run_checks_serial_on_large_stack( // All by-value arguments below are immediately wrapped in `Arc` for sharing // with worker threads — clippy can't see that, so suppress the lint. #[allow(clippy::needless_pass_by_value)] -fn run_checks_parallel_on_large_stacks( +fn run_checks_parallel_on_large_stacks( ixon_env: Arc, lookups: Arc, names: Vec, @@ -1198,7 +1084,10 @@ fn run_checks_parallel_on_large_stacks( quiet: bool, worker_count: usize, failure_log: Option>, -) -> Result, String> { +) -> Result, String> +where + M::MField>: CheckDupLevelParams, +{ let total = names.len(); let work_total = work.len(); eprintln!( @@ -1234,7 +1123,7 @@ fn run_checks_parallel_on_large_stacks( .name(format!("ix-kernel-check-{worker_idx}")) .stack_size(KERNEL_CHECK_STACK_SIZE) .spawn(move || { - let mut kenv = KEnv::::new(); + let mut kenv = KEnv::::new(); let clear_every = kernel_check_clear_every(); let mut checks_since_clear = clear_every; let diag_threshold = kernel_check_diag_threshold(); @@ -1363,6 +1252,377 @@ fn resolve_kernel_check_workers_from( if total == 0 { 1 } else { available_parallelism.max(1).min(total) } } +// ============================================================================ +// Anon-mode parallel runner +// ============================================================================ +// +// Companion to `run_checks_parallel_on_large_stacks` for the metadata-free +// anon path. Iterates `env.consts` exactly once to enumerate work items +// (block or standalone), then dispatches to workers each running +// `TypeChecker::::new_with_lazy_anon` against its own `KEnv`. +// The lazy ingress mechanism (in `tc.rs`) handles cross-block faults +// without consulting metadata. + +#[derive(Clone, Debug)] +enum AnonWorkItem { + /// A standalone (non-mutual, non-projection) constant. + Standalone { result_idx: usize, addr: Address }, + /// A Muts block. `primary_addr` is the first member's projection address; + /// `result_idxs` enumerates every kernel-checkable target produced by + /// the block (each member's projection + each ctor's CPrj of inductive + /// members), all sharing the same check result via the kernel's + /// block coordination. + Block { + primary_addr: Address, + result_idxs: Vec, + }, +} + +/// One pass over `env.consts`: enumerate work items + the kernel-checkable +/// target addresses (one per result slot). Skips projection constants +/// (covered by their parent block) and Muts addresses themselves +/// (blocks aren't kernel KIds). +fn build_anon_work( + env: &IxonEnv, +) -> Result<(Vec, Vec
), String> { + use crate::ix::ixon::constant::ConstantInfo as CI; + use crate::ix::ixon::constant::MutConst as MC; + + let mut work: Vec = Vec::new(); + let mut addrs: Vec
= Vec::new(); + + // Sort keys for deterministic ordering across runs. + let mut keys: Vec
= + env.consts.iter().map(|e| e.key().clone()).collect(); + keys.sort_unstable(); + + for addr in keys { + let lc = env.consts.get(&addr).ok_or_else(|| { + format!("build_anon_work: missing const at {}", addr.hex()) + })?; + let constant = lc.value().get().map_err(|e| { + format!("build_anon_work: materialize {}: {e}", addr.hex()) + })?; + match &constant.info { + CI::IPrj(_) | CI::CPrj(_) | CI::RPrj(_) | CI::DPrj(_) => { + // Skip; covered by parent block. + }, + CI::Muts(members) => { + // Compute kernel-checkable targets deterministically. Each + // member contributes its projection address; inductive members + // contribute one CPrj per constructor. + let mut targets: Vec
= Vec::new(); + for (i, member) in members.iter().enumerate() { + let i = i as u64; + let member_addr = match member { + MC::Defn(_) => anon_defn_proj_addr(&addr, i), + MC::Indc(_) => anon_indc_proj_addr(&addr, i), + MC::Recr(_) => anon_recr_proj_addr(&addr, i), + }; + targets.push(member_addr); + if let MC::Indc(ind) = member { + for cidx in 0..ind.ctors.len() as u64 { + targets.push(anon_ctor_proj_addr(&addr, i, cidx)); + } + } + } + if targets.is_empty() { + continue; + } + let primary_addr = targets[0].clone(); + let result_idxs: Vec = + (addrs.len()..addrs.len() + targets.len()).collect(); + addrs.extend(targets); + work.push(AnonWorkItem::Block { primary_addr, result_idxs }); + }, + CI::Defn(_) | CI::Recr(_) | CI::Axio(_) | CI::Quot(_) => { + let result_idx = addrs.len(); + addrs.push(addr.clone()); + work.push(AnonWorkItem::Standalone { result_idx, addr: addr.clone() }); + }, + } + } + + Ok((work, addrs)) +} + +#[allow(clippy::needless_pass_by_value)] +fn run_anon_checks_parallel( + env: Arc, + work: Vec, + addrs: Vec
, + quiet: bool, + failure_log: Option>, +) -> Result, String> { + let total = addrs.len(); + let work_total = work.len(); + let worker_count = resolve_kernel_check_workers(work_total, quiet); + eprintln!( + "[rs_kernel_check_anon] checking {work_total} work item(s) for {total} consts with {worker_count} worker(s)..." + ); + + let work = Arc::new(work); + let addrs = Arc::new(addrs); + let next_index = Arc::new(AtomicUsize::new(0)); + let results: Arc>> = + Arc::new((0..total).map(|_| OnceLock::new()).collect()); + let progress = + Arc::new(ParallelProgress::new(work_total, worker_count, quiet)); + let mut reporter = ParallelProgress::spawn_reporter(Arc::clone(&progress)); + + let mut handles: Vec> = + Vec::with_capacity(worker_count); + for worker_idx in 0..worker_count { + let env = Arc::clone(&env); + let work = Arc::clone(&work); + let addrs = Arc::clone(&addrs); + let next_index = Arc::clone(&next_index); + let results = Arc::clone(&results); + let progress_worker = Arc::clone(&progress); + let failure_log_worker = failure_log.clone(); + + let handle = match thread::Builder::new() + .name(format!("ix-kernel-check-anon-{worker_idx}")) + .stack_size(KERNEL_CHECK_STACK_SIZE) + .spawn(move || { + let mut kenv = KEnv::::new(); + let clear_every = kernel_check_clear_every(); + let mut checks_since_clear = clear_every; + loop { + let work_idx = next_index.fetch_add(1, Ordering::Relaxed); + if work_idx >= work_total { + break; + } + let item = &work[work_idx]; + if checks_since_clear >= clear_every { + kenv.clear_releasing_memory(); + checks_since_clear = 0; + } + let (primary_addr, result_idxs): (Address, Vec) = match item { + AnonWorkItem::Standalone { result_idx, addr } => { + (addr.clone(), vec![*result_idx]) + }, + AnonWorkItem::Block { primary_addr, result_idxs } => { + (primary_addr.clone(), result_idxs.clone()) + }, + }; + let display = format!("@{}", &primary_addr.hex()[..16]); + let prefix = format!(" [{}/{work_total}] {display}", work_idx + 1); + progress_worker.begin(worker_idx, &prefix); + + let tc_start = Instant::now(); + let kid = KId::::new(primary_addr.clone(), ()); + let check_res = { + let mut tc = + TypeChecker::::new_with_lazy_anon(&mut kenv, &env); + tc.check_const(&kid) + }; + let elapsed = tc_start.elapsed(); + let result: CheckRes = match check_res { + Ok(()) => Ok(()), + Err(e) => Err((ErrKind::Kernel, format!("{e}"))), + }; + + let outcome = CheckOutcome { + progress_index: work_idx, + progress_total: work_total, + display: display.clone(), + should_pass: true, + result: result.clone(), + status: CheckStatus::Checked, + elapsed: Some(elapsed), + peak: None, + }; + progress_worker.finish(worker_idx, &outcome); + + for &result_idx in &result_idxs { + let _ = results[result_idx].set(result.clone()); + if let (Some(log), Err((_, msg))) = + (failure_log_worker.as_ref(), result.as_ref()) + { + let label = format!("@{}", &addrs[result_idx].hex()[..16]); + log.record(&label, msg); + } + } + checks_since_clear += 1; + } + }) { + Ok(h) => h, + Err(e) => { + progress.stop_reporter(); + if let Some(r) = reporter.take() { + let _ = r.join(); + } + for h in handles { + let _ = h.join(); + } + return Err(format!("spawn anon worker: {e}")); + }, + }; + handles.push(handle); + } + + let mut panicked = false; + for h in handles { + if h.join().is_err() { + panicked = true; + } + } + progress.stop_reporter(); + if let Some(r) = reporter { + let _ = r.join(); + } + progress.log_mem_summary(); + if panicked { + return Err("anon worker panicked".to_string()); + } + + let mut ordered: Vec = Vec::with_capacity(total); + for i in 0..total { + match results[i].get() { + Some(r) => ordered.push(r.clone()), + None => { + return Err(format!("anon worker missed result idx {i}")); + }, + } + } + Ok(ordered) +} + +/// FFI: typecheck every kernel-checkable constant in a `.ixe` using the +/// metadata-free anon kernel. +/// +/// - Loads the env via `IxonEnv::get_anon` (discards `named` / `names` / +/// `comms` sections during deserialization). +/// - Enumerates work items by walking `env.consts` once and skipping +/// projection constants (`IPrj`/`CPrj`/`RPrj`/`DPrj`); Muts blocks +/// become block work items whose member + ctor projection addresses +/// are reconstructed deterministically via `Constant::commit`. +/// - Workers each get their own `KEnv` and a +/// `LazyAnonIngress`-backed `TypeChecker`. Deep refs fault in +/// lazily via the anon-mode shallow ingress (`ingress_anon_addr_shallow`). +/// - Returns `Array (Option CheckError)`, one slot per kernel-checkable +/// address discovered during enumeration. +#[unsafe(no_mangle)] +pub extern "C" fn rs_kernel_check_anon( + env_path: LeanString>, + quiet: LeanBool>, + fail_out: LeanString>, +) -> LeanIOResult { + let total_start = Instant::now(); + let quiet = quiet.to_bool(); + let path = env_path.to_string(); + let fail_out_path = fail_out.to_string(); + let fail_out_path = + if fail_out_path.is_empty() { None } else { Some(fail_out_path) }; + + let t0 = Instant::now(); + let bytes = match std::fs::read(&path) { + Ok(b) => b, + Err(e) => { + return LeanIOResult::error_string(&format!( + "rs_kernel_check_anon: failed to read {path}: {e}" + )); + }, + }; + eprintln!( + "[rs_kernel_check_anon] read env: {:>8.1?} ({} bytes)", + t0.elapsed(), + bytes.len() + ); + + let t1 = Instant::now(); + let mut slice: &[u8] = &bytes; + let ixon_env = match IxonEnv::get_anon(&mut slice) { + Ok(env) => env, + Err(e) => { + return LeanIOResult::error_string(&format!( + "rs_kernel_check_anon: failed to deserialize {path}: {e}" + )); + }, + }; + drop(bytes); + eprintln!( + "[rs_kernel_check_anon] deserialize: {:>8.1?} ({} consts; \ + named={} names={} comms={})", + t1.elapsed(), + ixon_env.const_count(), + ixon_env.named_count(), + ixon_env.name_count(), + ixon_env.comm_count(), + ); + + let t2 = Instant::now(); + let (work, addrs) = match build_anon_work(&ixon_env) { + Ok(pair) => pair, + Err(e) => { + return LeanIOResult::error_string(&format!( + "rs_kernel_check_anon: build_anon_work: {e}" + )); + }, + }; + eprintln!( + "[rs_kernel_check_anon] build work: {:>8.1?} ({} items, {} targets)", + t2.elapsed(), + work.len(), + addrs.len() + ); + + let failure_log: Option> = match fail_out_path.as_deref() { + None => None, + Some(out_path) => match FailureLog::open(out_path, &path, addrs.len()) { + Ok(log) => { + eprintln!("[rs_kernel_check_anon] streaming failures to {out_path}"); + Some(Arc::new(log)) + }, + Err(e) => { + return LeanIOResult::error_string(&format!( + "rs_kernel_check_anon: failed to open fail-out file {out_path}: {e}" + )); + }, + }, + }; + + let total = addrs.len(); + let t3 = Instant::now(); + let ixon_env_arc = Arc::new(ixon_env); + let results = match run_anon_checks_parallel( + ixon_env_arc, + work, + addrs, + quiet, + failure_log.clone(), + ) { + Ok(r) => r, + Err(msg) => { + if let Some(log) = failure_log.as_ref() { + log.finalize(); + } + return build_uniform_error(total, &format!("[thread] {msg}")); + }, + }; + + let passed = results.iter().filter(|r| r.is_ok()).count(); + let failed = results.iter().filter(|r| r.is_err()).count(); + eprintln!( + "[rs_kernel_check_anon] {passed}/{total} passed, {failed} failed ({:.1?})", + t3.elapsed() + ); + eprintln!( + "[rs_kernel_check_anon] total: {:>8.1?}", + total_start.elapsed() + ); + if let Some(log) = failure_log.as_ref() { + log.finalize(); + eprintln!( + "[rs_kernel_check_anon] streamed {} failure(s) to fail-out", + log.count() + ); + } + + build_result_array(&results) +} + #[cfg(test)] mod tests { use super::{compact_in_flight_label, resolve_kernel_check_workers_from}; @@ -1492,8 +1752,8 @@ fn kernel_check_mem_stats_enabled() -> bool { /// pushed any single cache past `threshold` entries, or when this block /// set a new per-worker peak. Used only with `IX_KERNEL_CHECK_DIAG=1`. #[allow(clippy::too_many_arguments)] -fn log_block_diag_if_big( - kenv: &KEnv, +fn log_block_diag_if_big( + kenv: &KEnv, worker_idx: usize, work_idx: usize, work_total: usize, @@ -1618,7 +1878,7 @@ impl CheckOutcome { } } -fn check_one_const( +fn check_one_const( i: usize, progress_index: usize, progress_total: usize, @@ -1627,15 +1887,29 @@ fn check_one_const( names: &[Name], expect_pass: &[bool], ungrounded: &FxHashMap, - kenv: &mut KEnv, + kenv: &mut KEnv, mut before_kernel_check: F, ) -> CheckOutcome where F: FnMut(&str), + M::MField>: CheckDupLevelParams, { let name = &names[i]; let should_pass = expect_pass.get(i).copied().unwrap_or(true); - let display = name.pretty(); + // In anon mode, surface the content address in the per-constant log + // line — the kernel itself doesn't see names. We still accept names + // as input (and resolve them via `ixon_env.named` at the FFI + // scheduling layer), so the user's identifier is preserved in the + // CLI surface; only the kernel-visible progress label switches to a + // hash. Falls back to the name if the address lookup fails. + let display = if M::HAS_META { + name.pretty() + } else { + match ixon_env.lookup_name(name) { + Some(named) => format!("@{}", &named.addr.hex()[..16]), + None => name.pretty(), + } + }; if let Some(msg) = ungrounded.get(name) { return CheckOutcome { @@ -1706,7 +1980,7 @@ where // function body — clippy flags the by-value receivers, but transferring // ownership keeps the call sites simpler. #[allow(clippy::needless_pass_by_value)] -fn check_consts_loop( +fn check_consts_loop( ixon_env: Arc, lookups: Arc, names: Vec, @@ -1715,7 +1989,10 @@ fn check_consts_loop( work: Vec, quiet: bool, failure_log: Option>, -) -> Vec { +) -> Vec +where + M::MField>: CheckDupLevelParams, +{ let total = names.len(); let work_total = work.len(); let mut results: Vec> = vec![None; total]; @@ -1724,7 +2001,7 @@ fn check_consts_loop( // Terminal width is only needed for ephemeral clearing in quiet mode. In // verbose mode we never rewrite, so the value is ignored. let mut progress = Progress::new(quiet); - let mut kenv = KEnv::::new(); + let mut kenv = KEnv::::new(); let clear_every = kernel_check_clear_every(); let mut checks_since_clear = clear_every; @@ -2238,8 +2515,8 @@ fn term_cols_stderr() -> usize { /// Format a `TcError` for user-facing Lean-side display. For the two cases we /// hit most often we emit a human-tuned multi-line message; everything else /// falls through to `Debug`. -fn format_tc_error( - e: &TcError, +fn format_tc_error( + e: &TcError, ixon_env: &IxonEnv, lookups: &IxonIngressLookups, ) -> String { @@ -2249,6 +2526,12 @@ fn format_tc_error( }, TcError::FunExpected { .. } => "FunExpected".to_string(), TcError::UnknownConst(addr) => { + // Address-only label in anon mode: the kernel itself doesn't + // know names, so error messages from the anon path shouldn't + // either. + if !M::HAS_META { + return format!("unknown constant ({:.12})", addr.hex()); + } let name = lookups.name_for_addr(addr).map_or_else( || { if ixon_env.consts.contains_key(addr) { diff --git a/src/ffi/lean_env.rs b/src/ffi/lean_env.rs index c1fcd4f9..13296fa9 100644 --- a/src/ffi/lean_env.rs +++ b/src/ffi/lean_env.rs @@ -3897,9 +3897,9 @@ fn analyze_const_size(stt: &crate::ix::compile::CompileState, name_str: &str) { }, }; - // Get the constant - let constant = match stt.env.consts.get(&addr) { - Some(c) => c.clone(), + // Get the constant (materialize from the lazy entry). + let constant = match stt.env.get_const(&addr) { + Some(c) => c, None => { println!("\n=== Size analysis for {} ===", name_str); println!(" Constant data not found at address"); @@ -3915,7 +3915,7 @@ fn analyze_const_size(stt: &crate::ix::compile::CompileState, name_str: &str) { let dep_addrs = stt.env.transitive_deps_excl(&addr); let mut dep_breakdowns: Vec<(String, ConstSizeBreakdown)> = Vec::new(); for dep_addr in dep_addrs { - let Some(dep_const) = stt.env.consts.get(&dep_addr) else { + let Some(dep_const) = stt.env.get_const(&dep_addr) else { continue; }; // Get the name for this dependency (scan named entries) diff --git a/src/ix/compile.rs b/src/ix/compile.rs index db6a59cb..1f5784fe 100644 --- a/src/ix/compile.rs +++ b/src/ix/compile.rs @@ -3553,6 +3553,72 @@ fn compile_mutual( let univs: Vec> = cache.univs.iter().cloned().collect(); let const_count = ixon_mutuals.len(); let name_str = name.pretty(); + + // Singleton non-inductive: emit as a standalone `Defn`/`Recr` + // Constant instead of wrapping in `Muts(vec![one])`. Self-reference + // inside the body still uses `Expr::Rec(0, …)`, which the kernel + // resolves the same way for a single-member block. Eliminating the + // wrapper keeps the env structurally uniform (no degenerate Muts + // wrappers, no extra projection constants) and matches what + // `compile_single_def` produces for a non-mutual Lean Defn. + // + // Inductives are never unwrapped — their projection scheme requires + // the block. + if ixon_mutuals.len() == 1 + && !matches!(&ixon_mutuals[0], IxonMutConst::Indc(_)) + { + let single = ixon_mutuals.pop().unwrap(); + let result = match single { + IxonMutConst::Defn(def) => apply_sharing_to_definition_with_stats( + def, + refs, + univs, + Some(&name_str), + ), + IxonMutConst::Recr(rec) => { + apply_sharing_to_recursor_with_stats(rec, refs, univs) + }, + IxonMutConst::Indc(_) => unreachable!(), + }; + let standalone_constant = result.constant; + let hash_consed_size = result.hash_consed_size; + let mut bytes = Vec::new(); + standalone_constant.put(&mut bytes); + let serialized_size = bytes.len(); + let addr = Address::hash(&bytes); + + if aux { + stt.env.store_const(addr.clone(), standalone_constant); + stt.block_stats.insert( + name.clone(), + BlockSizeStats { + hash_consed_size, + serialized_size, + const_count: 1, + }, + ); + for class in &sorted_classes { + for cnst in class { + let n = cnst.name(); + let meta = all_metas.get(&n).cloned().unwrap_or_default(); + stt + .env + .register_name(n.clone(), Named::new(addr.clone(), meta)); + stt.name_to_addr.insert(n.clone(), addr.clone()); + } + } + } else { + for class in &sorted_classes { + for cnst in class { + let n = cnst.name(); + let meta = all_metas.get(&n).cloned().unwrap_or_default(); + stt.promote_aux(&n, addr.clone(), meta)?; + } + } + } + return Ok(addr); + } + let compiled = compile_mutual_block(ixon_mutuals, refs, univs, Some(&name_str)); let block_addr = compiled.addr.clone(); @@ -4594,17 +4660,15 @@ mod tests { "alpha-equivalent mutual defs should have same projection address" ); - // Verify the block exists and has exactly 1 equivalence class - assert!(!stt.blocks.is_empty(), "Expected at least one block entry"); - for entry in stt.blocks.iter() { - let classes = entry.value(); - assert_eq!( - classes.len(), - 1, - "alpha-equivalent class should produce 1 class, got {}", - classes.len() - ); - } + // Alpha-equivalent mutual defs collapse to a singleton non-inductive + // class, which the compiler now unwraps to a standalone `Defn` + // Constant (no Muts wrapper, no block entry). Both Lean names + // point to the same standalone address (asserted above); no Ixon + // block is created. + assert!( + stt.blocks.is_empty(), + "singleton-unwrapped mutual should have no block entry" + ); } /// Test that alpha-equivalent defs in a mutual block with a non-equivalent @@ -5706,8 +5770,13 @@ mod tests { // Compile let stt = compile_env(&lean_env).expect("compile_env failed"); - // Should have a mutual block - assert!(!stt.blocks.is_empty(), "Expected at least one mutual block"); + // f and g are alpha-equivalent (both `λ x => other x` with the same + // type) so they collapse to a singleton non-inductive class. The + // compiler unwraps this to a standalone Defn — no Ixon block entry. + assert!( + stt.blocks.is_empty(), + "singleton-unwrapped mutual should have no block entry" + ); // Decompile let dstt = decompile_env(&stt).expect("decompile_env failed"); diff --git a/src/ix/compile/mutual.rs b/src/ix/compile/mutual.rs index 11f83bef..fa4bd040 100644 --- a/src/ix/compile/mutual.rs +++ b/src/ix/compile/mutual.rs @@ -190,6 +190,65 @@ pub(crate) fn compile_aux_block_with_rename( let block_refs: Vec
= cache.refs.iter().cloned().collect(); let block_univs: Vec> = cache.univs.iter().cloned().collect(); let name_str = aux_consts[0].name().pretty(); + + // Singleton non-inductive aux blocks: emit as a standalone + // `Defn`/`Recr` Constant instead of `Muts(vec![one])`. Same + // rationale as `compile_mutual` in `compile.rs` — `Expr::Rec(0, …)` + // resolves correctly against a single-member block, so the wrapper + // is pure overhead. See that file for the detailed comment. + if ixon_mutuals.len() == 1 + && !matches!(&ixon_mutuals[0], IxonMutConst::Indc(_)) + { + use crate::ix::compile::{ + apply_sharing_to_definition_with_stats, + apply_sharing_to_recursor_with_stats, + }; + let single = ixon_mutuals.pop().unwrap(); + let result = match single { + IxonMutConst::Defn(def) => apply_sharing_to_definition_with_stats( + def, + block_refs, + block_univs, + Some(&name_str), + ), + IxonMutConst::Recr(rec) => apply_sharing_to_recursor_with_stats( + rec, + block_refs, + block_univs, + ), + IxonMutConst::Indc(_) => unreachable!(), + }; + let standalone_addr = content_address(&result.constant); + stt.env.store_const(standalone_addr.clone(), result.constant); + + let mut pending_names: Vec = Vec::new(); + for cnst in &sorted_classes[0] { + let canon_n = cnst.name(); + let n = resolve_name(&canon_n); + let meta = all_metas.remove(&canon_n).unwrap_or_default(); + stt.env.register_name( + n.clone(), + Named::new(standalone_addr.clone(), meta), + ); + stt.aux_name_to_addr.insert(n.clone(), standalone_addr.clone()); + stt.aux_gen_extra_names.insert(n.clone()); + pending_names.push(n); + } + if !pending_names.is_empty() { + stt.aux_gen_pending.lock().unwrap().extend(pending_names); + } + // Ingress all registered aux constants into the kernel environment. + for cnst in aux_consts { + aux_gen::expr_utils::ensure_in_kenv( + &cnst.name(), + lean_env.as_ref(), + stt, + kctx, + ); + } + return Ok(()); + } + let compiled = compile_mutual_block( ixon_mutuals, block_refs, @@ -203,24 +262,19 @@ pub(crate) fn compile_aux_block_with_rename( // Collect names for batched pending-queue push (one lock acquisition). let mut pending_names: Vec = Vec::new(); - let singleton = sorted_classes.len() == 1 - && !aux_consts.iter().any(|c| matches!(c, MutConst::Indc(_))); - - if singleton { - // Single non-inductive class: register directly with block_addr. - for cnst in &sorted_classes[0] { - let canon_n = cnst.name(); - let n = resolve_name(&canon_n); - // Meta was keyed by canonical name during compile; transfer to - // source name at lookup but preserve the meta payload. - let meta = all_metas.remove(&canon_n).unwrap_or_default(); - stt.env.register_name(n.clone(), Named::new(block_addr.clone(), meta)); - stt.aux_name_to_addr.insert(n.clone(), block_addr.clone()); - stt.aux_gen_extra_names.insert(n.clone()); - pending_names.push(n); - } - } else { - // Multi-class or inductive: create projections per member. + // Always create projection constants for every member, even when the + // block alpha-collapses to a single non-inductive class. The earlier + // singleton-collapse optimization (registering the source name + // directly at `block_addr` with no separate projection) saved one + // constant per block, but created a structural anomaly: a Muts block + // whose member had no corresponding projection in `env.consts`. That + // anomaly broke address-based anon ingress, which deterministically + // rebuilds projection addresses and expects them present. Always + // emitting projections keeps the env structurally uniform: every + // kernel-checkable member has its own projection address in + // `env.consts`. The cost is one extra Constant per singleton block — + // negligible at mathlib scale. + { for (idx, class) in sorted_classes.iter().enumerate() { let idx = idx as u64; for cnst in class { diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs index 46777f87..e765ae7b 100644 --- a/src/ix/ixon/env.rs +++ b/src/ix/ixon/env.rs @@ -1,12 +1,12 @@ //! Environment for storing Ixon data. use dashmap::DashMap; -use rustc_hash::FxHashSet; +use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::VecDeque; use std::sync::Arc; use crate::ix::address::Address; -use crate::ix::env::Name; +use crate::ix::env::{Name, ReducibilityHints}; use super::comm::Comm; use super::constant::Constant; @@ -84,6 +84,20 @@ pub struct Env { pub names: DashMap, /// Cryptographic commitments: commitment Address -> Comm pub comms: DashMap, + /// Reducibility hints sidecar harvested by [`Env::get_anon`] from the + /// otherwise-discarded Named section. Keyed by the constant's + /// projection/standalone address (i.e. `Named.addr` — the address the + /// kernel sees, **not** the name-hash address). Empty for envs loaded + /// via [`Env::get`] / [`Env::new`] / `store_*`; meta-mode ingress + /// pulls hints directly from `Named.meta` and ignores this field. + /// + /// Anon-mode ingress passes these hints through to + /// `ingress_defn` so the kernel's lazy-delta tiebreak + /// (`def_eq::def_rank_id`) sees realistic heights instead of the + /// constant `Regular(0)` fallback. Hints are performance advice — + /// supplying them in anon mode does not relax the kernel's + /// metadata-free correctness model. + pub anon_hints: FxHashMap, } impl Env { @@ -94,6 +108,7 @@ impl Env { blobs: DashMap::new(), names: DashMap::new(), comms: DashMap::new(), + anon_hints: FxHashMap::default(), } } @@ -273,7 +288,14 @@ impl Clone for Env { comms.insert(entry.key().clone(), entry.value().clone()); } - Env { consts, named, blobs, names, comms } + Env { + consts, + named, + blobs, + names, + comms, + anon_hints: self.anon_hints.clone(), + } } } diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index 15abfb36..9dc6b9d0 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1473,7 +1473,7 @@ impl Env { } let stored_root = get_address(buf)?; - let env = Env::new(); + let mut env = Env::new(); // Section 1: Blobs (kept) let num_blobs = get_u64(buf)?; @@ -1526,11 +1526,22 @@ impl Env { names_lookup.insert(addr, name); } - // Section 4: Named — parse and DISCARD. + // Section 4: Named — parse and mostly discard, but harvest + // `ReducibilityHints` from each `Def` variant into `env.anon_hints`. + // Hints are performance advice (lazy-delta tiebreak); the kernel's + // anon-mode correctness model is preserved either way. Without + // them, every Definition is forced to `Regular(0)` and the kernel + // can chew through `MAX_WHNF_FUEL` on definitions Lean would have + // marked `Abbrev`/`Regular(h)`. let num_named = get_u64(buf)?; for _ in 0..num_named { let _name_addr = get_address(buf)?; - let _named = get_named_indexed(buf, &name_reverse_index)?; + let named = get_named_indexed(buf, &name_reverse_index)?; + if let super::metadata::ConstantMetaInfo::Def { hints, .. } = + &named.meta.info + { + env.anon_hints.insert(named.addr.clone(), *hints); + } } // Section 5: Comms — parse and DISCARD. diff --git a/src/ix/kernel/ingress.rs b/src/ix/kernel/ingress.rs index d898db48..ad949205 100644 --- a/src/ix/kernel/ingress.rs +++ b/src/ix/kernel/ingress.rs @@ -776,20 +776,22 @@ fn ingress_expr( ) .ok_or_else(|| format!("invalid Ref index {ref_idx}"))? .clone(); - let name = match node { - ExprMetaData::Ref { name: name_addr } => { - resolve_name(name_addr, ctx.names) - }, - _ => { - return Err(format!( + // Meta mode: walk the arena to find the Ref's Lean name. + // Anon mode: the closure is never invoked — no arena walk, + // no name construction, no resolve_name call. + let name_field: M::MField = + M::meta_field_try::(|| match node { + ExprMetaData::Ref { name: name_addr } => { + Ok(resolve_name(name_addr, ctx.names)) + }, + _ => Err(format!( "Ref at index {ref_idx} (addr {}) has no metadata name (node={node:?})", &addr.hex()[..8] - )); - }, - }; + )), + })?; let univs = ingress_univ_args(univ_idxs, ctx, intern, univ_cache, stats)?; - let id = KId::new(addr, M::meta_field(name)); + let id = KId::new(addr, name_field); let hash = KExpr::::cnst_hash(&id, &univs, &mdata); values.push(timed_intern_or_build( intern, @@ -1123,19 +1125,24 @@ fn ingress_expr( format!("invalid Prj type ref index {type_ref_idx}") })? .clone(); - let (struct_name, child_arena) = match node { - ExprMetaData::Prj { struct_name: addr, child } => { - (resolve_name(addr, ctx.names), *child) - }, - _ => { - return Err(format!( + // The arena holds both the struct name and the child arena + // pointer (needed for the projected value's metadata walk). + // Meta mode reads both; Anon mode skips the entire arena + // touch — no name, no child-arena indexing. + let mut child_arena: u64 = 0; + let struct_name_field: M::MField = + M::meta_field_try::(|| match node { + ExprMetaData::Prj { struct_name: addr, child } => { + child_arena = *child; + Ok(resolve_name(addr, ctx.names)) + }, + _ => Err(format!( "Prj at ref index {type_ref_idx} (addr {}) has no metadata name (node={node:?})", &type_addr.hex()[..8] - )); - }, - }; + )), + })?; stack.push(ExprFrame::PrjDone { - type_id: KId::new(type_addr, M::meta_field(struct_name)), + type_id: KId::new(type_addr, struct_name_field), field_idx: *field_idx, mdata, }); @@ -1368,6 +1375,17 @@ fn ingress_defn( block: KId, intern: &mut InternTable, stats: &mut ConvertStats, + // Anon callers compute `mut_ctx` structurally from sibling + // projection addresses; passing `Some(_)` skips the metadata-derived + // `build_mut_ctx` call. Meta callers pass `None`. + mut_ctx_override: Option>>, + // Anon callers may supply `Some(hints)` to override the default + // `Regular(0)` fall-through when the .ixe carries `Env::anon_hints` + // (harvested by `Env::get_anon` from the otherwise-discarded Named + // metadata). Meta callers pass `None` and pull hints from + // `meta.info` like usual. The override only takes effect when + // `meta.info` is not `Def` (i.e. anon path with empty meta). + hints_override: Option, ) -> Result, KConst)>, String> { let mut cache: ExprCache = FxHashMap::default(); let mut univ_cache: UnivCache = FxHashMap::default(); @@ -1395,17 +1413,21 @@ fn ingress_defn( &DEFAULT_ARENA, 0, 0, - ReducibilityHints::Regular(0), + hints_override.unwrap_or(ReducibilityHints::Regular(0)), def.safety, vec![], ), }; + let mut_ctx = match mut_ctx_override { + Some(m) => m, + None => build_mut_ctx(meta, names, name_to_addr)?, + }; let ctx = Ctx { sharing, refs, univs, - mut_ctx: build_mut_ctx(meta, names, name_to_addr)?, + mut_ctx, arena, names, lvls: level_params.clone(), @@ -1473,6 +1495,9 @@ fn ingress_recursor( block: KId, intern: &mut InternTable, stats: &mut ConvertStats, + // Anon callers compute `mut_ctx` structurally; Meta callers pass + // `None` to fall back to the metadata-derived `build_mut_ctx`. + mut_ctx_override: Option>>, ) -> Result, KConst)>, String> { let mut cache: ExprCache = FxHashMap::default(); let mut univ_cache: UnivCache = FxHashMap::default(); @@ -1497,11 +1522,15 @@ fn ingress_recursor( _ => (vec![], &DEFAULT_ARENA, 0, vec![], vec![], vec![]), }; + let mut_ctx = match mut_ctx_override { + Some(m) => m, + None => build_mut_ctx(meta, names, name_to_addr)?, + }; let ctx = Ctx { sharing, refs, univs, - mut_ctx: build_mut_ctx(meta, names, name_to_addr)?, + mut_ctx, arena, names, lvls: level_params.clone(), @@ -1609,6 +1638,8 @@ fn ingress_standalone( self_id, intern, stats, + None, + None, ), IxonCI::Axio(ax) => { @@ -1720,6 +1751,7 @@ fn ingress_standalone( self_id, intern, stats, + None, ), // Projections and Muts are handled in ingress_muts_block @@ -1987,6 +2019,7 @@ fn ingress_muts_block( block_id.clone(), intern, stats, + None, )?); }, IxonMutConst::Defn(def) => { @@ -2003,6 +2036,8 @@ fn ingress_muts_block( block_id.clone(), intern, stats, + None, + None, )?); }, } @@ -3563,7 +3598,10 @@ pub fn ixon_ingress_owned( fn drop_ixon_env(ixon_env: IxonEnv, quiet: bool) { let total_start = Instant::now(); - let IxonEnv { consts, named, blobs, names, comms } = ixon_env; + // `anon_hints` is a small FxHashMap (one entry per Def from the .ixe's + // Named metadata); dropping it inline alongside the bookkeeping below + // is negligible compared to the DashMap dropdance. + let IxonEnv { consts, named, blobs, names, comms, anon_hints: _ } = ixon_env; let consts_len = consts.len(); let named_len = named.len(); let names_len = names.len(); @@ -3952,6 +3990,488 @@ fn validate_no_reserved_marker_addresses( Ok(()) } +// ============================================================================ +// Anonymous-mode ingress +// ============================================================================ +// +// Companion to the Meta-mode ingress above. Uses only `Constant` data — +// never reads `ConstantMeta`, `Env::named`, `Env::names`, or any other +// metadata. Projection addresses are reconstructed deterministically from +// `(block, idx, [cidx])` via `Constant::commit`, which agrees with the +// addresses stored in `env.consts` (the compiler produces these same +// projection constants). +// +// Intended for use with environments loaded via `Env::get_anon`, which +// discards `named`/`names`/`comms` sections. The helpers below do not +// depend on those sections being empty — they simply never consult them. + +use crate::ix::ixon::constant::{ + ConstructorProj, DefinitionProj, InductiveProj, RecursorProj, +}; +use crate::ix::kernel::mode::Anon; + +/// Deterministic IPrj content address for member `idx` of `block`. +pub fn anon_indc_proj_addr(block: &Address, idx: u64) -> Address { + Constant::new(IxonCI::IPrj(InductiveProj { idx, block: block.clone() })) + .commit() + .0 +} + +/// Deterministic DPrj content address for member `idx` of `block`. +pub fn anon_defn_proj_addr(block: &Address, idx: u64) -> Address { + Constant::new(IxonCI::DPrj(DefinitionProj { idx, block: block.clone() })) + .commit() + .0 +} + +/// Deterministic RPrj content address for member `idx` of `block`. +pub fn anon_recr_proj_addr(block: &Address, idx: u64) -> Address { + Constant::new(IxonCI::RPrj(RecursorProj { idx, block: block.clone() })) + .commit() + .0 +} + +/// Deterministic CPrj content address for ctor `(idx, cidx)` of `block`. +pub fn anon_ctor_proj_addr(block: &Address, idx: u64, cidx: u64) -> Address { + Constant::new(IxonCI::CPrj(ConstructorProj { + idx, + cidx, + block: block.clone(), + })) + .commit() + .0 +} + +/// Compute deterministic ctor projection addresses for every constructor of +/// an inductive member at position `indc_idx` in a block at `block_addr`. +fn anon_ctor_addrs( + block_addr: &Address, + indc_idx: u64, + ind: &crate::ix::ixon::constant::Inductive, +) -> Vec
{ + (0..ind.ctors.len() as u64) + .map(|cidx| anon_ctor_proj_addr(block_addr, indc_idx, cidx)) + .collect() +} + +/// Anon ingress for a single standalone (non-mutual) constant. +/// +/// For `Defn` and `Recr` we pass an explicit `mut_ctx_override = [self_id]` +/// to support self-recursive standalones that use `Expr::Rec(0, univs)` to +/// refer to themselves. (Singleton-unwrapped self-recursive defs are emitted +/// by the compiler as standalones, with their self-reference encoded as +/// `Rec(0)` — same as a one-member mutual block would have used.) Without +/// the override, `ingress_defn`/`ingress_recursor` falls back to +/// `build_mut_ctx` on the empty meta, yielding an empty `mut_ctx` and +/// "invalid Rec index 0" errors. +/// +/// `Axio` and `Quot` cannot have `Rec`-references so we route them through +/// `ingress_standalone` unchanged. Projections / `Muts` belong to a block +/// and are not valid as standalone entries. +fn ingress_anon_standalone( + kenv: &mut KEnv, + anon_env: &IxonEnv, + addr: &Address, + constant: &Constant, +) -> Result, String> { + let empty_meta = ConstantMeta::default(); + let empty_names: FxHashMap = FxHashMap::default(); + let empty_n2a: FxHashMap = FxHashMap::default(); + let mut convert_stats = ConvertStats::new(false); + let self_id: KId = KId::new(addr.clone(), ()); + let hints_override = anon_env.anon_hints.get(addr).copied(); + + let entries = match &constant.info { + IxonCI::Defn(def) => ingress_defn::( + def, + self_id.clone(), + &empty_meta, + anon_env, + &empty_names, + &empty_n2a, + &constant.sharing, + &constant.refs, + &constant.univs, + self_id.clone(), + &mut kenv.intern, + &mut convert_stats, + Some(vec![self_id.clone()]), + hints_override, + )?, + IxonCI::Recr(rec) => ingress_recursor::( + rec, + self_id.clone(), + &empty_meta, + anon_env, + &empty_names, + &empty_n2a, + &constant.sharing, + &constant.refs, + &constant.univs, + self_id.clone(), + &mut kenv.intern, + &mut convert_stats, + Some(vec![self_id.clone()]), + )?, + _ => ingress_standalone::( + &Name::anon(), + addr, + constant, + &empty_meta, + anon_env, + &empty_names, + &empty_n2a, + &mut kenv.intern, + &mut convert_stats, + )?, + }; + insert_standalone_entries(kenv, entries); + Ok(self_id) +} + +/// Anon ingress for a mutual inductive member. Parallel to +/// `ingress_muts_inductive` but takes ctor projection addresses **directly** +/// (caller computes via `anon_ctor_addrs`) and a pre-computed `mut_ctx` +/// (sibling KIds for `Expr::Rec` resolution) instead of going through +/// metadata. Uses `DEFAULT_ARENA` + `type_root=0` + empty level-params +/// for the inductive and every ctor. +#[allow(clippy::too_many_arguments)] +fn ingress_anon_inductive( + ind: &crate::ix::ixon::constant::Inductive, + self_id: &KId, + anon_env: &IxonEnv, + block_constant: &Constant, + block_id: KId, + member_idx: u64, + ctor_addrs: &[Address], + mut_ctx: &[KId], + intern: &mut InternTable, + stats: &mut ConvertStats, +) -> Result, KConst)>, String> { + if ctor_addrs.len() != ind.ctors.len() { + return Err(format!( + "ingress_anon_inductive: ctor_addrs.len()={} but ind.ctors.len()={}", + ctor_addrs.len(), + ind.ctors.len() + )); + } + + let empty_names: FxHashMap = FxHashMap::default(); + let level_params: Vec = Vec::new(); + let mut cache: ExprCache = FxHashMap::default(); + let mut univ_cache: UnivCache = FxHashMap::default(); + let ctx = Ctx:: { + sharing: &block_constant.sharing, + refs: &block_constant.refs, + univs: &block_constant.univs, + mut_ctx: mut_ctx.to_vec(), + arena: &DEFAULT_ARENA, + names: &empty_names, + lvls: level_params.clone(), + synth_counter: Cell::new(0), + }; + + let typ = ingress_expr( + &ind.typ, + 0, + &ctx, + intern, + anon_env, + &mut cache, + &mut univ_cache, + stats, + )?; + + let ctor_ids: Vec> = ctor_addrs + .iter() + .map(|a| KId::::new(a.clone(), ())) + .collect(); + + let mut results = vec![( + self_id.clone(), + KConst::Indc { + name: (), + level_params: (), + lvls: ind.lvls, + params: ind.params, + indices: ind.indices, + is_rec: ind.recr, + is_refl: ind.refl, + is_unsafe: ind.is_unsafe, + nested: ind.nested, + block: block_id, + member_idx, + ty: typ, + ctors: ctor_ids.clone(), + lean_all: (), + }, + )]; + + for (cidx, ctor) in ind.ctors.iter().enumerate() { + stats.record_cache_clear(&cache); + cache.clear(); + let ctor_id = ctor_ids[cidx].clone(); + let ctor_ctx = Ctx:: { + sharing: &block_constant.sharing, + refs: &block_constant.refs, + univs: &block_constant.univs, + mut_ctx: mut_ctx.to_vec(), + arena: &DEFAULT_ARENA, + names: &empty_names, + lvls: Vec::new(), + synth_counter: Cell::new(0), + }; + let mut ctor_univ_cache: UnivCache = FxHashMap::default(); + + let ctor_typ = ingress_expr( + &ctor.typ, + 0, + &ctor_ctx, + intern, + anon_env, + &mut cache, + &mut ctor_univ_cache, + stats, + )?; + + results.push(( + ctor_id, + KConst::Ctor { + name: (), + level_params: (), + is_unsafe: ctor.is_unsafe, + lvls: ctor.lvls, + induct: self_id.clone(), + cidx: ctor.cidx, + params: ctor.params, + fields: ctor.fields, + ty: ctor_typ, + }, + )); + } + + Ok(results) +} + +/// Anon ingress for an entire Muts block: ingresses every member (and +/// every constructor of every inductive member) under its deterministic +/// projection address. Verifies that each computed address exists in +/// `anon_env.consts` — missing → error (corrupted .ixe). +/// +/// Returns the KIds in member order. The first KId is the block's +/// "primary" — `check_const` on it relies on the kernel's block +/// coordination (`check_const_member` etc.) to check all members in one +/// pass. +pub fn ingress_anon_block( + kenv: &mut KEnv, + anon_env: &IxonEnv, + block_constant: &Constant, + block_addr: &Address, +) -> Result>, String> { + let IxonCI::Muts(members) = &block_constant.info else { + return Err(format!( + "ingress_anon_block: addr {} is not a Muts block (got variant {:?})", + block_addr.hex(), + block_constant.info.variant() + )); + }; + + let block_id = KId::::new(block_addr.clone(), ()); + let mut convert_stats = ConvertStats::new(false); + let empty_meta = ConstantMeta::default(); + let empty_names: FxHashMap = FxHashMap::default(); + let empty_n2a: FxHashMap = FxHashMap::default(); + + // Compute mut_ctx for the entire block: one KId per member, addressed + // by its projection variant. Threaded through every member's ingress + // so `Expr::Rec(idx)` resolves to a sibling correctly. + let mut_ctx: Vec> = members + .iter() + .enumerate() + .map(|(i, m)| { + let i = i as u64; + let addr = match m { + IxonMutConst::Defn(_) => anon_defn_proj_addr(block_addr, i), + IxonMutConst::Indc(_) => anon_indc_proj_addr(block_addr, i), + IxonMutConst::Recr(_) => anon_recr_proj_addr(block_addr, i), + }; + KId::::new(addr, ()) + }) + .collect(); + + let mut all_entries: Vec<(KId, KConst)> = Vec::new(); + let mut member_kids: Vec> = Vec::with_capacity(members.len()); + + for (i, member) in members.iter().enumerate() { + let idx = i as u64; + match member { + IxonMutConst::Defn(def) => { + let proj_addr = anon_defn_proj_addr(block_addr, idx); + if !anon_env.consts.contains_key(&proj_addr) { + return Err(format!( + "ingress_anon_block: computed DPrj address {} not present in env (block {} idx {})", + proj_addr.hex(), + block_addr.hex(), + idx + )); + } + let self_id = KId::::new(proj_addr.clone(), ()); + member_kids.push(self_id.clone()); + let hints_override = anon_env.anon_hints.get(&proj_addr).copied(); + + let entries = ingress_defn::( + def, + self_id, + &empty_meta, + anon_env, + &empty_names, + &empty_n2a, + &block_constant.sharing, + &block_constant.refs, + &block_constant.univs, + block_id.clone(), + &mut kenv.intern, + &mut convert_stats, + Some(mut_ctx.clone()), + hints_override, + )?; + all_entries.extend(entries); + }, + IxonMutConst::Recr(rec) => { + let proj_addr = anon_recr_proj_addr(block_addr, idx); + if !anon_env.consts.contains_key(&proj_addr) { + return Err(format!( + "ingress_anon_block: computed RPrj address {} not present in env (block {} idx {})", + proj_addr.hex(), + block_addr.hex(), + idx + )); + } + let self_id = KId::::new(proj_addr.clone(), ()); + member_kids.push(self_id.clone()); + + let entries = ingress_recursor::( + rec, + self_id, + &empty_meta, + anon_env, + &empty_names, + &empty_n2a, + &block_constant.sharing, + &block_constant.refs, + &block_constant.univs, + block_id.clone(), + &mut kenv.intern, + &mut convert_stats, + Some(mut_ctx.clone()), + )?; + all_entries.extend(entries); + }, + IxonMutConst::Indc(ind) => { + let proj_addr = anon_indc_proj_addr(block_addr, idx); + if !anon_env.consts.contains_key(&proj_addr) { + return Err(format!( + "ingress_anon_block: computed IPrj address {} not present in env (block {} idx {})", + proj_addr.hex(), + block_addr.hex(), + idx + )); + } + let self_id = KId::::new(proj_addr.clone(), ()); + member_kids.push(self_id.clone()); + + let ctor_addrs = anon_ctor_addrs(block_addr, idx, ind); + // Verify ctor addresses too — catches corruption early. + for (cidx, c_addr) in ctor_addrs.iter().enumerate() { + if !anon_env.consts.contains_key(c_addr) { + return Err(format!( + "ingress_anon_block: computed CPrj address {} not present in env (block {} idx {} cidx {})", + c_addr.hex(), + block_addr.hex(), + idx, + cidx + )); + } + } + + let entries = ingress_anon_inductive( + ind, + &self_id, + anon_env, + block_constant, + block_id.clone(), + idx, + &ctor_addrs, + &mut_ctx, + &mut kenv.intern, + &mut convert_stats, + )?; + all_entries.extend(entries); + }, + } + } + + insert_muts_entries(kenv, all_entries); + Ok(member_kids) +} + +/// Anon shallow ingress for a single address — used by the lazy fault +/// path in `TypeChecker` (`LazyAnonIngress`). For projections, fetches +/// the parent block and ingresses the relevant member (or the full +/// inductive when a constructor is requested). For standalones, ingresses +/// directly. Errors if asked about a Muts block address (not a valid +/// kernel KId target) or an absent address. +pub fn ingress_anon_addr_shallow( + kenv: &mut KEnv, + anon_env: &IxonEnv, + addr: &Address, +) -> Result { + let Some(constant) = anon_env.get_const(addr) else { + return Ok(false); + }; + + // Extract the parent block address if this is a projection. The + // kernel sometimes asks for the block's KId directly (block + // coordination during mutual recursion) — in that case we ingress + // the block at its own address. + let block_addr: Option
= match &constant.info { + IxonCI::DPrj(p) => Some(p.block.clone()), + IxonCI::IPrj(p) => Some(p.block.clone()), + IxonCI::RPrj(p) => Some(p.block.clone()), + IxonCI::CPrj(p) => Some(p.block.clone()), + IxonCI::Muts(_) => Some(addr.clone()), + _ => None, + }; + + if let Some(block_addr) = block_addr { + // Block dedup: `insert_muts_entries` always stores the block under + // `KId(block_addr, ())` in `kenv.blocks`. A prior fault on any + // projection of this block populated all members in `kenv.consts`; + // re-ingressing would be wasted work and leaves heap-allocator + // fragmentation that doesn't return to the OS. The + // `LazyAnonIngress::faulted_addrs` set in `tc.rs` only dedupes at the + // projection-address level, so distinct sibling projections of the + // same block each fault separately and reach here. + let block_kid = KId::::new(block_addr.clone(), ()); + if kenv.blocks.contains_key(&block_kid) { + return Ok(true); + } + let block_const = anon_env.get_const(&block_addr).ok_or_else(|| { + format!( + "ingress_anon_addr_shallow: block {} (parent of {}) absent", + block_addr.hex(), + addr.hex() + ) + })?; + ingress_anon_block(kenv, anon_env, &block_const, &block_addr)?; + return Ok(true); + } + + // Standalone (Defn/Recr/Axio/Quot). + ingress_anon_standalone(kenv, anon_env, addr, &constant)?; + Ok(true) +} + #[cfg(test)] mod tests { use super::*; @@ -4536,4 +5056,87 @@ mod tests { assert_eq!(name_map.get(&nat_addr), Some(&nat_name)); assert_eq!(addr_map.get(&list_name), Some(&list_addr)); } + + // ---- Anon-mode determinism ---- + + #[test] + fn anon_proj_addrs_are_deterministic() { + let block = Address::hash(b"test-block-deterministic"); + // Same inputs → same outputs. + assert_eq!( + anon_defn_proj_addr(&block, 0), + anon_defn_proj_addr(&block, 0) + ); + assert_eq!( + anon_indc_proj_addr(&block, 1), + anon_indc_proj_addr(&block, 1) + ); + assert_eq!( + anon_recr_proj_addr(&block, 2), + anon_recr_proj_addr(&block, 2) + ); + assert_eq!( + anon_ctor_proj_addr(&block, 3, 4), + anon_ctor_proj_addr(&block, 3, 4) + ); + // Different inputs → different outputs (catches accidental aliasing). + assert_ne!( + anon_defn_proj_addr(&block, 0), + anon_defn_proj_addr(&block, 1) + ); + assert_ne!( + anon_defn_proj_addr(&block, 0), + anon_indc_proj_addr(&block, 0) + ); + assert_ne!( + anon_ctor_proj_addr(&block, 0, 0), + anon_ctor_proj_addr(&block, 0, 1) + ); + assert_ne!( + anon_ctor_proj_addr(&block, 0, 0), + anon_ctor_proj_addr(&block, 1, 0) + ); + } + + #[test] + fn anon_proj_addr_matches_constant_commit() { + // The helper must agree with the explicit `Constant::commit` over + // the synthesized projection — that's the contract the rest of the + // anon pipeline depends on (verifying the computed address against + // the address actually stored in `env.consts`). + use crate::ix::ixon::constant::{ + Constant, ConstantInfo, ConstructorProj, DefinitionProj, + InductiveProj, RecursorProj, + }; + let b = Address::hash(b"another-block"); + let (defn_addr, _) = + Constant::new(ConstantInfo::DPrj(DefinitionProj { + idx: 5, + block: b.clone(), + })) + .commit(); + assert_eq!(defn_addr, anon_defn_proj_addr(&b, 5)); + let (indc_addr, _) = + Constant::new(ConstantInfo::IPrj(InductiveProj { + idx: 7, + block: b.clone(), + })) + .commit(); + assert_eq!(indc_addr, anon_indc_proj_addr(&b, 7)); + let (recr_addr, _) = + Constant::new(ConstantInfo::RPrj(RecursorProj { + idx: 9, + block: b.clone(), + })) + .commit(); + assert_eq!(recr_addr, anon_recr_proj_addr(&b, 9)); + let (ctor_addr, _) = + Constant::new(ConstantInfo::CPrj(ConstructorProj { + idx: 2, + cidx: 3, + block: b.clone(), + })) + .commit(); + assert_eq!(ctor_addr, anon_ctor_proj_addr(&b, 2, 3)); + } } diff --git a/src/ix/kernel/mode.rs b/src/ix/kernel/mode.rs index 07e1c559..0477e8f1 100644 --- a/src/ix/kernel/mode.rs +++ b/src/ix/kernel/mode.rs @@ -204,6 +204,29 @@ pub trait KernelMode: 'static + Clone + Debug + Send + Sync { val: T, ) -> Self::MField; + /// Build a metadata field from a closure. Meta runs the closure to + /// produce the wrapped value; Anon discards the closure unevaluated + /// and returns `()`. The fallible counterpart is `meta_field_try`. + /// + /// Use this at metadata-extraction sites in mode-generic code: the + /// closure body — including arena walks, name resolution, and any + /// other work that only matters in Meta mode — is skipped entirely + /// when `M = Anon`, with no `Name::anon`-style placeholder + /// construction at the call site. + fn meta_field_with(f: F) -> Self::MField + where + T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, + F: FnOnce() -> T; + + /// Fallible variant of `meta_field_with`. Meta runs the closure (and + /// may return its `Err`); Anon discards the closure and returns + /// `Ok(())`. Use for sites where missing metadata is a Meta-mode + /// error but a no-op in Anon mode. + fn meta_field_try(f: F) -> Result, E> + where + T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, + F: FnOnce() -> Result; + /// Extract a name from a metadata field when running in Meta mode. fn meta_name(field: &Self::MField) -> Option; } @@ -232,6 +255,22 @@ impl KernelMode for ZMode { val } + fn meta_field_with(f: F) -> T + where + T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, + F: FnOnce() -> T, + { + f() + } + + fn meta_field_try(f: F) -> Result + where + T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, + F: FnOnce() -> Result, + { + f() + } + fn meta_name(field: &Name) -> Option { Some(field.clone()) } @@ -251,6 +290,24 @@ impl KernelMode for ZMode { ) { } + fn meta_field_with(_f: F) + where + T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, + F: FnOnce() -> T, + { + // Anon: closure never invoked — the metadata extraction work it + // would have performed (arena walk, name resolution, etc.) is + // entirely skipped. + } + + fn meta_field_try(_f: F) -> Result<(), E> + where + T: MetaHash + MetaDisplay + PartialEq + Clone + Debug + Hash + Send + Sync, + F: FnOnce() -> Result, + { + Ok(()) + } + fn meta_name(_field: &()) -> Option { None } diff --git a/src/ix/kernel/primitive.rs b/src/ix/kernel/primitive.rs index 5ad99bc5..4514b6ff 100644 --- a/src/ix/kernel/primitive.rs +++ b/src/ix/kernel/primitive.rs @@ -282,52 +282,52 @@ impl PrimAddrs { "7190ce56f6a2a847b944a355e3ec595a4036fb07e3c3db9d9064fc041be72b64", ), nat_add: h( - "f94192058e41bc29e88924d857a6bd33f8b3e0a90f8786828270d1cc1dd0adc6", + "9d83307d552e681f4cceff7f783b5a64e002575edcb1c04fa0c5662ce2dd3438", ), nat_pred: h( - "6b59cf449781f07b04207d665978b5c5ef9688afa7448590a68f7da7ff88c516", + "4ed5fffb03ae5e6b7a0d9f3379aa769e5ca8188cacbdf1e20dca4bad27f25333", ), nat_sub: h( - "fa98dabf44d2a6307b490ac9e811433efc2f958996c67be1398cb4d1b264cf39", + "9e86ff43b15aebafb3df610a96dd4492ff9cd8aab87a82025b617c9a0bbf6280", ), nat_mul: h( - "9b5c57ea1cf2fb1de67ee5bec15e360d20a9635990273014e67851e049ff3619", + "9bc13539b68b0e1c5a53818580aa096a65907f63af4588a1e91e14d34d9e4d86", ), nat_pow: h( - "d015987bb10dd22863ddc41160d27dd3d1ea74f754fb2412432436f3ea5b5071", + "b52c4d0d3878f287719f65d0088a269af0f6e5b1b7ef5629830963dcb75e6cee", ), nat_gcd: h( - "ee8ba9216b3fc81e7968586b43cebea15d0e143d5d4b1fde1bd301a74093f606", + "7436d9fa7cce3ef91bc9903cc5aa32d413da2f6ca7c21a9235b41a2fc482dffc", ), nat_mod: h( - "8ef8b28b4e9e0a59f3822e243e71299f06bb6e7afdb6cdd97976fb290b667bb4", + "6ea1a44f7378e372feb58fb52c8084626057b3f387495e7600b971a38b244276", ), nat_div: h( - "fa583794c8ef368eff6881e816a4e889f95061116ce49b154056d38fce4b7f52", + "d0919570f8932ddf5dff4300ab7667d1baab9324dbc136ac9c81292ed1c81fe9", ), nat_bitwise: h( - "f21d747aca3e08f5290093bf8f4020838d8e1742a78b3e1f48d83ef159395e6a", + "0b69fbfb2ef3c7733ad2f6bd7707820c32603a79603501a77fbbef74df855a32", ), nat_beq: h( - "e8b7149d8a7d12414b06252f318d408204723ca4c02f3a38edfa37792448c0da", + "49a16714bd7b82037cd8e776331d8262829bc70c8ee363c866c7060bf366cd9b", ), nat_ble: h( - "2275080a89c327904e3ad127ba44370a7c6c1bef3aa74792079f8f3159636957", + "f5bb245767fdbc683bee9e1ca8d9a7247426fb24c67b2c3f227de51b5f839b26", ), nat_land: h( - "a0db90e68ee3b7a166e35f619bd7b02c0896efd60eb46914ff3e4fb81252fb94", + "44514320bd9335a08942e77de8077e383f11a0f6150c000c9823c87467589965", ), nat_lor: h( - "d14419aaa47a03bf9a46938bf72e40f96cab853f9cc5869879e7699f45171773", + "184ca6a932a4c5fd0a2c169501d2d5048bb743bd166f96ffec9d4101e54e982b", ), nat_xor: h( - "ae68fd416ecb9ce20612272d43c2f86eaf21d9547f565968391e9e12e39372dc", + "163a8c2800ca51daaffe1b71575127942a05300440524b145c8fcdcc5ee008b6", ), nat_shift_left: h( - "f606b7c23180a20ace60fe24d52bc0ea3854698d2d14da05c4837a97e1ab4469", + "16bd10365ee6fa40b4a1ddc0dd26c8a49db8f8b1eb56b2ac2a179ea2440598d7", ), nat_shift_right: h( - "d860b560156da68e801c8bd51d892e557fbe3526d7d198696ffb4d551ae04bb7", + "6fe21e35a9a308deafe53210db5b2856c185dc147ef2717c0e73a0fa3ad31690", ), bool_type: h( "6405a455ba70c2b2179c7966c6f610bf3417bd0f3dd2ba7a522533c2cd9e1d0b", @@ -339,32 +339,32 @@ impl PrimAddrs { "c127f89f92e0481f7a3e0631c5615fe7f6cbbf439d5fd7eba400fb0603aedf2f", ), string: h( - "cb1bca7fc5dbb1bdfbf6319df89da9fda3a679d22554b8a9d5dd4663c0a97312", + "422658d043ee482f7102d2f6ea6596664808e899abad628080478a1e9189f0aa", ), string_mk: h( - "63d95a0fd6a1144348d0f20e20cc5c3af61ac955923f45f42a782de933aad594", + "405d36f5f6479c40216ff7bbba10b077848ec33af03ef4040bfa4f82930de4ba", ), char_type: h( - "38aa12059fad3afa1e1e8740dc9470a47c26986350f6cb3bea1fae1276d7b5f1", + "2f96b8da29a38b177fc32553d538d5d450212fd3e6fed95d61c817837d29a34f", ), char_mk: h( - "e62238c54b91395c2c06192cfccb5e80fce41ed11d1bf6db142d2c39d7c81a20", + "316fe91ede33079f2330cc9921ee117f9aca023efa14f5b1fe024ddbe625fe86", ), char_of_nat: h( - "7a5754386b30bb86f0b6f70fd368bb50e603273a50ad79d8c17fc3cb59f80fac", + "28dc1b3d3d2e011529c71c9d4418248f6060dbfb1c7e97db1c572a565787ef61", ), // NOTE: `String.ofList` and `String.mk` share the canonical content-hash // because both compile to the same Ixon form (a one-constructor `String` // built from `List Char`). The Lean-side deprecation of `String.mk` in // favor of `String.ofList` is orthogonal to the compiled representation. string_of_list: h( - "63d95a0fd6a1144348d0f20e20cc5c3af61ac955923f45f42a782de933aad594", + "405d36f5f6479c40216ff7bbba10b077848ec33af03ef4040bfa4f82930de4ba", ), string_to_byte_array: h( - "65f644286bc49464cc7a36b7d7952f8543ab67564cd509ee878a95375609069b", + "a07736ec999fdcb8753067497f9f97b461f2a14e8169cd11287dc73cdfd742aa", ), byte_array_empty: h( - "d97417c49206c61fe28cbb7a0b6095f722cdfbc213e034aa59de51b9218af074", + "c07f1589bd7dcc556e384e42bab142a84f7a6255d39b59b0f900198047252296", ), list: h( "abed9ff1aba4634abc0bd3af76ca544285a32dcfe43dc27b129aea8867457620", @@ -414,16 +414,16 @@ impl PrimAddrs { "ad58c3656044d7faef697637f516d72674d35b18663cb263f7ccca8cdd2e6f00", ), nat_dec_le: h( - "e08c5141c44b27653957ae00a926a2dd68dcd7779c4fdf850e668fdc92b408de", + "e2d8292e1adf54b0d39300847718dc89a63332e8f31b41c9e80f8c9a2017278d", ), nat_dec_eq: h( - "38323fd9e17e9d1f17536dbb7f196b94b5ba19e4bf625d9e7c607c47365c15ad", + "8629519632e18c49097936fc220c9e03d6263fddeba4462af5b717dd11e4bef4", ), nat_dec_lt: h( - "f445084f6805faf9be62aa328415651343c98ffe52db159dfb1b9a14cb28cf23", + "4295c071a9485af2d998e3947be5988077531f02bf091b870d53ce589d4ef5b1", ), decidable_rec: h( - "f323a549ad4df6b2f32899237a281136f34d431ed72b33857c085e6c4d852738", + "7a18ca84a113b0c2ad0cc0e825a55d767e77a89e8d0e1d82eb9104859f53d095", ), decidable_is_true: h( "3ae2c71da2bf34179a5a8808857c34a3b7662ff5654d8c247c43e85a7cde493f", @@ -432,22 +432,22 @@ impl PrimAddrs { "10ac5f48798b3ff01b0f74c0b544d22796c9775f6d43d328316bbb3aa1638999", ), nat_le_of_ble_eq_true: h( - "7e5d1f1118a89f77f89d469a27731a754de336a05e33f383056bc92b36947812", + "bab37a8bd9860d3bfe31f1a1752fe7008a224c6ad8af623c7fb8bd192be5c07e", ), nat_not_le_of_not_ble_eq_true: h( - "c1e23b8dafb3778b996312068a2bec3dcbcc72132efbf43c235e573084668241", + "981b00b7c45899f726c3de35328074cb3f72a09225743da81f5031ff6e647ba9", ), nat_eq_of_beq_eq_true: h( - "b9acc81f2801af89b95e0962aa9d7390a3acfe8fb760559a811a82ed7443dbb5", + "a57b8180288701cebfb1d6dd29f160cc4acc3c6aba9834e46b65f1c5aa7217e2", ), nat_ne_of_beq_eq_false: h( - "248779884109eed00600a0bd968f740db7f3d924fb2b1706ab552e7876062855", + "6e9b3c1ca5d9f09b902321b155edf4524c3e32de1d690db917bfbaaaac3f8f82", ), fin: h( "272aa9e16c03e9ad7337e706d73efd14ccf1da10e2f8367dd34374b60e1556fa", ), bool_no_confusion: h( - "473b2c948ddbce4ddb4b369e5cf6199ff185b64e9fbb1e90901d746de55190ef", + "5b94718322c633ad163592db4fb7432360f5d023adfa7749f5cf5175798d16ca", ), // Int primitives — canonical content-hashes from // `lake test -- rust-kernel-build-primitives`. @@ -461,43 +461,43 @@ impl PrimAddrs { "25bbcd756b52eb78bce170410defa4c15b238dedef5f7b89691621dcbe919780", ), int_add: h( - "d8e6cdc988d4288e48cc6092730bc5387176cff6592471a328cc4354f1878412", + "e4adffd6df782a658a014fc10d5783078ab08df86c6dfa98abf0467f1eff1778", ), int_sub: h( - "93b2d12d7797fd62c20bec255336c1e91ca1cef7a6951071296fc1ab5bd1d8c8", + "117a355713696bfe4a5e52303e09a64402169ed0e47ca3286ddac66829d0c909", ), int_mul: h( - "9ad6ee18ef6d7d74bbe449ab61aa31f84a0e78951e9560d28fd82e0c3b071d01", + "55a20a6208057a7b3e7fdc0422c44426c09795fd4ab7bc8416693d1929feddd8", ), int_neg: h( - "8c3f64e6b5baaaa125f0637d7a824df627dbede0115968f3c80c55e022554462", + "cda7d330fc5071197cb6237132aef80504cfc2cfa8079cfd1eab8758e9962054", ), int_emod: h( - "7cdb112725d3a4f542bfb0cd309268641bd89ddc9890c7221ed01f99b6a00b63", + "2bca87e317612b6b01a6ea2737c96de2c77403949f56cec0814c8fdb73c16844", ), int_ediv: h( - "ba194c0a3674e67b9968d0a65cdda3a4ddb9dcdce48ad6c62e91d478a10a3ddd", + "b96aac54f81f2f2e30d16b843f9b1bfde70d24a3391dd22edacec651b7885d71", ), int_bmod: h( - "c8431b7adb918967aa05ba6fd8297f33e97d67003e4138021d912ea92cc1887f", + "b7ed12c1ce5af35ce2a954ff9dc4aedfcb434ca13139d67033a51df88bdfe004", ), int_bdiv: h( - "ab72477254d1ca4738123ad612eae4dfb9126ef78310ed7d2ebde8100963bfb1", + "e07fe014c7a8148bf5b679684b3731933cd9f5450e8e393a1ff47b5bb31ded0b", ), int_nat_abs: h( - "60662e33224f55be9e367683378c7bf6093c125c04ff7c4e3eca370112e1c562", + "ea837737db22feb8ed0234ba5d359e82b1a752d352019d291c642fae92e793e9", ), int_pow: h( - "0dfe8f22bd6cb67d538a2f018f0e406fc0b5d730caa63e1a798dfa9ad78bab07", + "4274644acc93cec33c8ff16f5fb4c9cca63fba1bb0745ff68b941716e9aae2a3", ), int_dec_eq: h( - "42d9b7a94aefc77a6616936be31264eaf8bed7bd80f5d34967fc42afaf29a7fd", + "19e01bc9a3264b9b8b940cf172a209bb774ad36f6410fa742f0048046808c0b6", ), int_dec_le: h( - "ee0370e426a400c8b16782fabfa0e43ff87ecac1a0c1c765cc5179fc423ab1bd", + "7e048ef303ecdc836467cdd4d892f7fe26fbbd7b62ae1d1746543f4e3098c6e1", ), int_dec_lt: h( - "15070e920204272369f0f2e80ff3f5035c05b39efa714ec8e6bbfce9950637af", + "6cc2d63da1fd07e2533fcb08cbb38c2d67f7512a7efd15236a4b0e57bb1fcd53", ), punit: h( "16a2dc76a2cfcc9440f443c666536f2fa99c0250b642fd3971fbad25d531262a", @@ -511,25 +511,25 @@ impl PrimAddrs { // Names previously matched via `is_const_named` in whnf.rs. // Canonical content-hashes from `lake test -- rust-kernel-build-primitives`. nat_rec: h( - "6e855f04485df8d97767f8aa89f223bcac977e2a155c45c66d6e094ec3163194", + "43619510ee8a583db72b9d71b84e7ea13a198fe33d73963cf0cc1ebf68a68ad6", ), nat_cases_on: h( - "9a6b32af194fdf0b447633077d9fa89c249d6d7df243d300b89dd9b14d92bb03", + "28096d7ca6b3f96bd250cc8b8fee00c36bbc36dd1dd2040854041ec13993ba34", ), bit_vec: h( - "cf55115c75343f824fdd932178b0cbc75a86e5052de93db98f05b37885ffb09b", + "33d94a2d250a1a5aa022e3befdca1c86f45d70071db038eff9b8980dc5160b76", ), bit_vec_to_nat: h( - "7834865c1c6cd963b9365cb06500623880de4d9930343e96e19e62a026e7cace", + "f94271482ffdfd7802d42e22271c89e21dee456b050859c5d12e3d1d699bb4ea", ), bit_vec_of_nat: h( - "a08acf4cedb4c05eddb55bff366cd952d5b7b88602c3fc6d875e8ea732a3c2f4", + "2acb8942f3587d0aebf1795df90426eaff54e7f3ccce36c589d5d14716a78fad", ), bit_vec_ult: h( - "6a3f262c2f4a2c517a616fbae54a31eccb85998ad9c1f93be8cc590d97117c04", + "068a88410ef445d31ae58e0e11b3684143e472288bd4a884d5f928c3d2019bec", ), decidable_decide: h( - "6ddaaed263740b5d5d67e6c12ecfadb24ad8867d4a09fe784b59dac7f72754ab", + "f4cdbc5ed9a1ab5928f9931f5c2390239e7f47df6d20e84ea465c9707b84cdc1", ), lt_lt: h( "01d871bcdfb2e769e1aca00e7a3b3a21a8d902cc273707c892eb867b7fc78ae2", @@ -541,19 +541,19 @@ impl PrimAddrs { "211bf5ed2f4c51d45750e75b891fa267db4d4e6f46c2079282fa2be3e88781a1", ), punit_size_of_1: h( - "8c2cbfe328910bfe7feb60072b46f7487692cb37599681b137a31dd99e708f03", + "489187e9cd03abebc12a1335c628d642ea2a48bdc262c85f848f1011e73f610a", ), size_of_size_of: h( "7105eaf4c52ce3a19372a87fac57a8f9598a246334ce6effaee3e48e7e6d3aad", ), string_back: h( - "11baba55cbdf3649fc1b696c2e775696e995c38ef313cf276553e1898da45e0f", + "f6066fc62491fd4c48d4daf3b9beba72e2a0b8040fcbd99fb729abf56a9c07c4", ), string_legacy_back: h( - "998c3e640c8b3a35c627200dcd694f67f8b1d41e68760c90e361da24734d39bc", + "d5e543a5b6bde88dc3854d4c2b9a12ac270976bf4102a6b33f55a90db324268f", ), string_utf8_byte_size: h( - "11ea1432562b1132853f173fda9add591b0606a8dee36b00f71bec2967fb6447", + "cc6cdc73e0df404ba7685c733ebbe7c1aecc6ef46503d10aad58bf70f84a4858", ), } } diff --git a/src/ix/kernel/tc.rs b/src/ix/kernel/tc.rs index 4acda9bd..a6089d09 100644 --- a/src/ix/kernel/tc.rs +++ b/src/ix/kernel/tc.rs @@ -22,6 +22,7 @@ use super::expr::{ExprData, FVarId, KExpr}; use super::id::KId; use super::ingress::{ IxonIngressLookups, ingress_addr_shallow_into_kenv_with_lookups, + ingress_anon_addr_shallow, }; use super::lctx::LocalDecl; use super::level::{KUniv, UnivData}; @@ -90,6 +91,16 @@ pub struct LazyIxonIngress<'a> { faulted_addrs: FxHashSet
, } +/// Lazy on-demand ingress for the anon kernel. Faults a missing +/// constant into the worker's `KEnv` using only `Constant` data — +/// no metadata access. Intended for use with envs loaded via +/// `Env::get_anon` (which discards metadata sections), but the +/// implementation never consults those sections regardless. +pub struct LazyAnonIngress<'a> { + anon_env: &'a IxonEnv, + faulted_addrs: FxHashSet
, +} + /// Thread-local type-checking handle. Cheap to create — only allocates empty /// vectors and counters. Kernel state lives in the borrowed worker `KEnv`. pub struct TypeChecker<'a, M: KernelMode> { @@ -98,6 +109,11 @@ pub struct TypeChecker<'a, M: KernelMode> { /// Optional read-only Ixon source used to fault constants into `env` when /// typechecking discovers a missing address. lazy_ixon: Option>, + /// Optional metadata-free lazy ingress (anon mode). Mutually exclusive with + /// `lazy_ixon` — `new_with_lazy_anon` constructs Anon-mode checkers; the + /// `lazy_ingress_addr` dispatcher uses whichever is set. The anon path + /// never reads `Env::named`/`Env::names`. + lazy_anon: Option>, /// Primitive constant KIds. Copied from `env.prims()` at construction; /// overridable for tests via `tc.prims = custom`. pub prims: Primitives, @@ -175,6 +191,7 @@ impl<'a, M: KernelMode> TypeChecker<'a, M> { TypeChecker { env, lazy_ixon: None, + lazy_anon: None, prims, ctx: Vec::new(), let_vals: Vec::new(), @@ -217,6 +234,7 @@ impl<'a, M: KernelMode> TypeChecker<'a, M> { tc } + pub fn try_get_const( &mut self, id: &KId, @@ -224,7 +242,7 @@ impl<'a, M: KernelMode> TypeChecker<'a, M> { if let Some(c) = self.env.get(id) { return Ok(Some(c)); } - let lazy_enabled = self.lazy_ixon.is_some(); + let lazy_enabled = self.lazy_ixon.is_some() || self.lazy_anon.is_some(); self.lazy_ingress_addr(&id.addr)?; match self.env.get(id) { Some(c) => Ok(Some(c)), @@ -255,6 +273,28 @@ impl<'a, M: KernelMode> TypeChecker<'a, M> { } fn lazy_ingress_addr(&mut self, addr: &Address) -> Result<(), TcError> { + // Anon-mode lazy path. The `lazy_anon` field is only ever set by + // `TypeChecker::::new_with_lazy_anon` (defined in a non-generic + // impl block below), so its presence is a compile-time witness that + // `M = Anon`. The transmute below relies on this invariant. + if let Some(lazy) = self.lazy_anon.as_mut() { + if !lazy.faulted_addrs.insert(addr.clone()) { + return Ok(()); + } + // SAFETY: `lazy_anon` is only set by the `M = Anon` constructor, + // so this cast is the identity when reached. `KEnv` and + // `KEnv` have the same layout iff `M == Anon`; both + // `KernelMode` impls (`ZMode`, `ZMode`) are layout- + // compatible per-field only when `M` matches. The invariant holds. + let env_anon: &mut KEnv = unsafe { + &mut *(self.env as *mut KEnv).cast::>() + }; + return ingress_anon_addr_shallow(env_anon, lazy.anon_env, addr) + .map(|_| ()) + .map_err(|msg| { + TcError::Other(format!("lazy anon ingress {}: {msg}", addr.hex())) + }); + } let Some(lazy) = self.lazy_ixon.as_mut() else { return Ok(()); }; @@ -935,6 +975,41 @@ impl<'a, M: KernelMode> TypeChecker<'a, M> { } } +// ----------------------------------------------------------------------- +// Anon-only constructors +// ----------------------------------------------------------------------- +// +// `new_with_lazy_anon` lives here so the `M = Anon` constraint is enforced +// at compile time. The dispatcher in `lazy_ingress_addr` relies on this: +// `lazy_anon` being `Some` is a witness that `M = Anon`, which makes the +// `&mut KEnv` → `&mut KEnv` cast a no-op transmute. + +impl<'a> TypeChecker<'a, super::mode::Anon> { + /// Construct an anon-mode typechecker with lazy on-demand ingress. + /// `anon_env` is expected to come from `Env::get_anon` (or otherwise + /// have empty metadata maps). The dispatcher in `lazy_ingress_addr` + /// never consults metadata regardless. + /// + /// Primitives are resolved by address only — `Primitives::from_addr_names` + /// receives a closure that always returns `None`, so the + /// `M::MField` slot ends up as `()` (always, in Anon mode). + pub fn new_with_lazy_anon( + env: &'a mut KEnv, + anon_env: &'a IxonEnv, + ) -> Self { + if !env.has_prims() { + let prims = Primitives::from_addr_names(|_addr| None); + let _ = env.set_prims(prims); + } + let mut tc = Self::new(env); + tc.lazy_anon = Some(LazyAnonIngress { + anon_env, + faulted_addrs: FxHashSet::default(), + }); + tc + } +} + // ----------------------------------------------------------------------- // Free-standing helpers // ----------------------------------------------------------------------- From 4ee0c80ffc9d407b55d07d90976dceb384c43df9 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 18:18:34 -0400 Subject: [PATCH 04/12] Clean up cargo clippy --all-targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 11× `cloned_ref_to_slice_refs`: `&[x.clone()]` → `std::slice::from_ref(&x)` in `assumption_tree.rs` + `merkle.rs` test modules. Same allocator footprint, no clone, matches the lint's recommended idiom. - 2× `useless_vec`: `vec![0xE3, 0x00, 0x00]` → `[0xE3, 0x00, 0x00]` in serde-reject tests where the buffer is only borrowed. All 1011 unit tests pass. `cargo clippy --all-targets` is now clean. --- src/ix/ixon/assumption_tree.rs | 16 ++++++++-------- src/ix/ixon/merkle.rs | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ix/ixon/assumption_tree.rs b/src/ix/ixon/assumption_tree.rs index 7b890133..5eb6ed2e 100644 --- a/src/ix/ixon/assumption_tree.rs +++ b/src/ix/ixon/assumption_tree.rs @@ -279,7 +279,7 @@ mod tests { #[test] fn canonical_single_leaf() { let a = addr(b"only"); - let t = AssumptionTree::canonical(&[a.clone()]).unwrap(); + let t = AssumptionTree::canonical(std::slice::from_ref(&a)).unwrap(); assert_eq!(t, AssumptionTree::Leaf(a)); } @@ -322,7 +322,7 @@ mod tests { #[test] fn canonical_root_matches_merkle_root_canonical_single() { let a = addr(b"only"); - let t = AssumptionTree::canonical(&[a.clone()]).unwrap(); + let t = AssumptionTree::canonical(std::slice::from_ref(&a)).unwrap(); assert_eq!(Some(t.root()), merkle_root_canonical(&[a])); } @@ -353,8 +353,8 @@ mod tests { fn join_root_matches_merkle_join() { let a = addr(b"a"); let b = addr(b"b"); - let l = AssumptionTree::canonical(&[a.clone()]).unwrap(); - let r = AssumptionTree::canonical(&[b.clone()]).unwrap(); + let l = AssumptionTree::canonical(std::slice::from_ref(&a)).unwrap(); + let r = AssumptionTree::canonical(std::slice::from_ref(&b)).unwrap(); let joined = AssumptionTree::join(l.clone(), r.clone()); assert_eq!(joined.root(), merkle_join(&l.root(), &r.root())); } @@ -390,7 +390,7 @@ mod tests { #[test] fn merkle_proof_single_leaf_empty_path() { let a = addr(b"only"); - let t = AssumptionTree::canonical(&[a.clone()]).unwrap(); + let t = AssumptionTree::canonical(std::slice::from_ref(&a)).unwrap(); let path = t.merkle_proof(&a).unwrap(); assert!(path.is_empty()); assert!(verify_merkle_proof(&t.root(), &a, &path)); @@ -430,7 +430,7 @@ mod tests { let b = addr(b"b"); let c = addr(b"c"); let left = AssumptionTree::canonical(&[a.clone(), b.clone()]).unwrap(); - let right = AssumptionTree::canonical(&[c.clone()]).unwrap(); + let right = AssumptionTree::canonical(std::slice::from_ref(&c)).unwrap(); let joined = AssumptionTree::join(left, right); for leaf in [a, b, c] { let path = joined.merkle_proof(&leaf).expect("leaf present in join"); @@ -497,14 +497,14 @@ mod tests { #[test] fn serde_rejects_wrong_tag() { // Tag4(0xE, 3) = Eval claim, not AssumptionTree. - let bytes = vec![0xE3, 0x00, 0x00]; + let bytes = [0xE3, 0x00, 0x00]; assert!(AssumptionTree::get(&mut &bytes[..]).is_err()); } #[test] fn serde_rejects_invalid_body_tag() { // 0xE2 outer + 0x99 invalid body tag - let bytes = vec![0xE2, 0x99]; + let bytes = [0xE2, 0x99]; assert!(AssumptionTree::get(&mut &bytes[..]).is_err()); } } diff --git a/src/ix/ixon/merkle.rs b/src/ix/ixon/merkle.rs index b846385a..5df1b4e1 100644 --- a/src/ix/ixon/merkle.rs +++ b/src/ix/ixon/merkle.rs @@ -211,7 +211,7 @@ mod tests { #[test] fn canonical_single() { let a = addr(b"only"); - let root = merkle_root_canonical(&[a.clone()]).unwrap(); + let root = merkle_root_canonical(std::slice::from_ref(&a)).unwrap(); assert_eq!(root, leaf_hash(&a)); } @@ -238,7 +238,7 @@ mod tests { fn canonical_distinguishes() { let a = addr(b"a"); let b = addr(b"b"); - let r1 = merkle_root_canonical(&[a.clone()]).unwrap(); + let r1 = merkle_root_canonical(std::slice::from_ref(&a)).unwrap(); let r2 = merkle_root_canonical(&[a, b]).unwrap(); assert_ne!(r1, r2); } @@ -293,7 +293,7 @@ mod tests { let b = addr(b"b"); let c = addr(b"c"); let left = merkle_root_canonical(&[a.clone(), b.clone()]).unwrap(); - let right = merkle_root_canonical(&[c.clone()]).unwrap(); + let right = merkle_root_canonical(std::slice::from_ref(&c)).unwrap(); let joined = merkle_join(&left, &right); let canonical = merkle_root_canonical(&[a, b, c]).unwrap(); assert_ne!(joined, canonical); @@ -331,8 +331,8 @@ mod tests { #[test] fn proof_single_leaf() { let a = addr(b"only"); - let root = merkle_root_canonical(&[a.clone()]).unwrap(); - let path = merkle_proof_canonical(&[a.clone()], &a).unwrap(); + let root = merkle_root_canonical(std::slice::from_ref(&a)).unwrap(); + let path = merkle_proof_canonical(std::slice::from_ref(&a), &a).unwrap(); assert!(path.is_empty()); assert!(verify_merkle_proof(&root, &a, &path)); } From 028ff8b6004a1adb9e5ad5711bb9dd980d880688 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 18:21:10 -0400 Subject: [PATCH 05/12] ix check: print full content hashes + add --workers flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The anon-mode progress / failure-log labels and the meta-mode hash-display fallback were truncating addresses to 16 hex chars (`@1f4b195aefa10e26`). Drop the `[..16]` slice so the full 64-char Blake3 hex is printed (`@1f4b195aefa10e2690d13c5b98b3d9124d3fbb5c…`), matching what tooling like `--fail-out` records and what the metadata-free workflow actually needs to identify a constant. - `--workers N` flag on `ix check` plumbs through the existing `IX_KERNEL_CHECK_WORKERS` env var that `resolve_kernel_check_workers` in `src/ffi/kernel.rs` reads. Useful for isolating per-worker memory cost (`--workers 1` lets you see the env's static footprint without per-worker overhead) and for capping concurrency in resource-tight contexts. --- Ix/Cli/CheckCmd.lean | 13 +++++++++++++ src/ffi/kernel.rs | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Ix/Cli/CheckCmd.lean b/Ix/Cli/CheckCmd.lean index 9cda25d4..939f7498 100644 --- a/Ix/Cli/CheckCmd.lean +++ b/Ix/Cli/CheckCmd.lean @@ -30,6 +30,7 @@ public import Ix.Common public import Ix.KernelCheck public import Ix.Meta public import Ix.Cli.ValidateCmd +public import Std.Internal.UV.System public section @@ -217,6 +218,17 @@ def runCheckCmd (p : Cli.Parsed) : IO UInt32 := do return 1 let envPath := pathArg.as! String + -- `--workers N` is plumbed through the existing + -- `IX_KERNEL_CHECK_WORKERS` env var that `resolve_kernel_check_workers` + -- (`src/ffi/kernel.rs`) reads. Setting `1` forces a single-threaded + -- runner, useful for isolating per-worker memory usage and timing. + if let some flag := p.flag? "workers" then + let n := flag.as! Nat + if n == 0 then + p.printError "error: --workers must be > 0" + return 1 + Std.Internal.UV.System.osSetenv "IX_KERNEL_CHECK_WORKERS" (toString n) + let anon := p.flag? "anon" |>.isSome if anon then let hasConsts := p.flag? "consts" |>.isSome @@ -242,6 +254,7 @@ def checkCmd : Cli.Cmd := `[Cli| consts : String; "Comma-separated EXACT constant names to seed (meta mode only)" "consts-file" : String; "Path to a file with one constant name per line (meta mode only)" "fail-out" : String; "Write failing constants to this path (consumable by --consts-file)" + workers : Nat; "Number of parallel kernel-check workers; 1 disables parallelism (default: available_parallelism). Plumbs via IX_KERNEL_CHECK_WORKERS env var." verbose; "Log every constant on its own line (default: quiet)" ARGS: diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index cfa19063..61938042 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -1406,7 +1406,7 @@ fn run_anon_checks_parallel( (primary_addr.clone(), result_idxs.clone()) }, }; - let display = format!("@{}", &primary_addr.hex()[..16]); + let display = format!("@{}", primary_addr.hex()); let prefix = format!(" [{}/{work_total}] {display}", work_idx + 1); progress_worker.begin(worker_idx, &prefix); @@ -1440,7 +1440,7 @@ fn run_anon_checks_parallel( if let (Some(log), Err((_, msg))) = (failure_log_worker.as_ref(), result.as_ref()) { - let label = format!("@{}", &addrs[result_idx].hex()[..16]); + let label = format!("@{}", addrs[result_idx].hex()); log.record(&label, msg); } } @@ -1906,7 +1906,7 @@ where name.pretty() } else { match ixon_env.lookup_name(name) { - Some(named) => format!("@{}", &named.addr.hex()[..16]), + Some(named) => format!("@{}", named.addr.hex()), None => name.pretty(), } }; From 373cb64395191e68cebf1cfb1e497b45498c053f Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 19:18:47 -0400 Subject: [PATCH 06/12] Mmap .ixe + cache-free LazyConstant for anon mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `lake exe ix check compilemathlib.ixe --anon` on a 3.2 GB env now peaks at ~11 GB RSS, down from ~40 GB; build_anon_work is 50× faster. Three intertwined changes: 1. Memory-map the .ixe (avoids ~3.2 GB heap copy of bytes) - Cargo: add memmap2 = "0.9". - `BytesSource` enum in `src/ix/ixon/lazy.rs` distinguishes heap-resident `Arc<[u8]>` from `(Arc, offset, len)` windows into a memory-mapped file. `LazyConstant::raw_bytes`, `verify_address`, `PartialEq` route through `BytesSource::as_slice` so every existing consumer is mmap-transparent. - `Env::get_anon_mmap(path)` in `src/ix/ixon/serialize.rs` opens the file, mmaps it, and stores Section-2 consts as mmap-backed `LazyConstant`s. Sections 3-4 (names + named) are still parsed transiently to harvest hints, then dropped before return. - `rs_kernel_check_anon` (`src/ffi/kernel.rs`) switches to `get_anon_mmap`; the old `std::fs::read` + `Env::get_anon` heap path is gone for the anon FFI. 2. Strip the persistent `LazyConstant` parsed-Constant cache - `LazyConstant.cache` was `Arc>>` and accumulated forever — for mathlib that meant ~30 GB of parsed `Arc` trees pinned in the env across the entire run. - New shape: `cache: Option>`. Populated only by `from_constant` (compile-side, where we already own the parsed value). `from_bytes`/`from_mmap_slice` set `None`, and their `get()` parses fresh on every call without storing. - Re-parse cost is bounded by the existing per-worker dedup: `kenv.consts` already addresses each ingressed constant once per work item, and `clear_releasing_memory()` drops the kenv between items. The kenv is now the only persistent materialization layer. 3. `LazyConstant::peek_variant` for `build_anon_work` - One-byte read of the outer Tag4 head to identify the `ConstantInfo` variant — no body parse, no allocation. - `build_anon_work` now dispatches on `peek_variant()`; only `Muts` blocks trigger `lc.get()` (we need the member list for projection-address enumeration), and that `Arc` drops at the end of the match arm. - Previously every constant was fully materialized at startup just to read its variant tag. With ~95% of the env being standalone/projection, the work-enumeration pass now skims the env at near-IO speed. Test updates: - `mmap_slice_roundtrips` (already added with the earlier mmap scaffolding) exercises the mmap window roundtrip. - New `from_bytes_does_not_cache` and `from_constant_clones_share_cache` document the new caching contract. - `peek_variant_*` tests cover every variant + empty-bytes and unknown-flag error paths. - `lazy_sparsity_only_materializes_closure` in `src/ix/ixon/env.rs` reframed to assert BFS-of-closure correctness instead of cache side-effects (`is_materialized()` no longer fires for lazy loads). End-to-end on compilemathlib.ixe (--anon, 32 workers): before: 640658/640658 in 266s, peak RSS ~40 GB after: 640658/640658 in 223s, peak RSS 11.3 GB --- Cargo.lock | 18 +- Cargo.toml | 1 + src/ffi/kernel.rs | 74 ++++---- src/ix/ixon/env.rs | 58 ++++-- src/ix/ixon/lazy.rs | 373 ++++++++++++++++++++++++++++++++++----- src/ix/ixon/serialize.rs | 153 ++++++++++++++++ 6 files changed, 590 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78ec1f00..758ea397 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -811,7 +811,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1733,6 +1733,7 @@ dependencies = [ "iroh-base", "itertools 0.14.0", "lean-ffi", + "memmap2", "mimalloc", "multi-stark", "n0-error", @@ -1891,6 +1892,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + [[package]] name = "mimalloc" version = "0.1.50" @@ -2207,7 +2217,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3211,7 +3221,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3390,7 +3400,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3efdfe00..a547b64b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ rayon = "1" rustc-hash = "2" tiny-keccak = { version = "2", features = ["keccak"] } dashmap = { version = "6.1.0", features = ["rayon"] } +memmap2 = "0.9" sha2 = "0.10" # Iroh dependencies bytes = { version = "1.10.1", optional = true } diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index 61938042..234cce6b 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -1287,6 +1287,7 @@ fn build_anon_work( ) -> Result<(Vec, Vec
), String> { use crate::ix::ixon::constant::ConstantInfo as CI; use crate::ix::ixon::constant::MutConst as MC; + use crate::ix::ixon::lazy::ConstVariantTag as Tag; let mut work: Vec = Vec::new(); let mut addrs: Vec
= Vec::new(); @@ -1296,18 +1297,48 @@ fn build_anon_work( env.consts.iter().map(|e| e.key().clone()).collect(); keys.sort_unstable(); + // Dispatch on the outer Tag4 byte via `peek_variant` — no body + // parse, no allocation. Only `Muts` blocks require a full + // materialization (to enumerate members for projection-address + // computation); the resulting `Arc` drops at the end of + // that match arm. Standalones (Defn/Recr/Axio/Quot, ~95% of the + // env) and projection skips don't even touch the body. + // + // Before this change, every constant was fully materialized here + // and (worse) pinned forever in `LazyConstant.cache`'s OnceLock. + // For mathlib that pinned ~30 GB of parsed `Arc` trees in + // the shared `Arc` before kernel checking even started. + // The cache-free `LazyConstant` policy + this peek path keep + // env-side memory bounded to "bytes (mmap'd) + per-const headers". for addr in keys { let lc = env.consts.get(&addr).ok_or_else(|| { format!("build_anon_work: missing const at {}", addr.hex()) })?; - let constant = lc.value().get().map_err(|e| { - format!("build_anon_work: materialize {}: {e}", addr.hex()) + let tag = lc.value().peek_variant().map_err(|e| { + format!("build_anon_work: peek_variant {}: {e}", addr.hex()) })?; - match &constant.info { - CI::IPrj(_) | CI::CPrj(_) | CI::RPrj(_) | CI::DPrj(_) => { + match tag { + Tag::IPrj | Tag::CPrj | Tag::RPrj | Tag::DPrj => { // Skip; covered by parent block. }, - CI::Muts(members) => { + Tag::Defn | Tag::Recr | Tag::Axio | Tag::Quot => { + let result_idx = addrs.len(); + addrs.push(addr.clone()); + work.push(AnonWorkItem::Standalone { result_idx, addr: addr.clone() }); + }, + Tag::Muts => { + // Materialize once to enumerate members; the `Arc` + // drops at the end of this arm — no cache retention. + let constant = lc.value().get().map_err(|e| { + format!("build_anon_work: materialize Muts {}: {e}", addr.hex()) + })?; + let CI::Muts(members) = &constant.info else { + return Err(format!( + "build_anon_work: Tag::Muts but ConstantInfo is {:?} at {}", + constant.info.variant(), + addr.hex() + )); + }; // Compute kernel-checkable targets deterministically. Each // member contributes its projection address; inductive members // contribute one CPrj per constructor. @@ -1335,11 +1366,6 @@ fn build_anon_work( addrs.extend(targets); work.push(AnonWorkItem::Block { primary_addr, result_idxs }); }, - CI::Defn(_) | CI::Recr(_) | CI::Axio(_) | CI::Quot(_) => { - let result_idx = addrs.len(); - addrs.push(addr.clone()); - work.push(AnonWorkItem::Standalone { result_idx, addr: addr.clone() }); - }, } } @@ -1516,34 +1542,22 @@ pub extern "C" fn rs_kernel_check_anon( let fail_out_path = if fail_out_path.is_empty() { None } else { Some(fail_out_path) }; - let t0 = Instant::now(); - let bytes = match std::fs::read(&path) { - Ok(b) => b, - Err(e) => { - return LeanIOResult::error_string(&format!( - "rs_kernel_check_anon: failed to read {path}: {e}" - )); - }, - }; - eprintln!( - "[rs_kernel_check_anon] read env: {:>8.1?} ({} bytes)", - t0.elapsed(), - bytes.len() - ); - + // mmap the .ixe directly. Section 2 consts become zero-copy windows + // into the mapping (`LazyConstant::from_mmap_slice`), avoiding the + // ~3 GB heap copy that `std::fs::read` would impose on mathlib. + // Sections 3-4 (names + named) are still parse-and-discard, but + // their decoded forms drop before `get_anon_mmap` returns. let t1 = Instant::now(); - let mut slice: &[u8] = &bytes; - let ixon_env = match IxonEnv::get_anon(&mut slice) { + let ixon_env = match IxonEnv::get_anon_mmap(std::path::Path::new(&path)) { Ok(env) => env, Err(e) => { return LeanIOResult::error_string(&format!( - "rs_kernel_check_anon: failed to deserialize {path}: {e}" + "rs_kernel_check_anon: failed to mmap+deserialize {path}: {e}" )); }, }; - drop(bytes); eprintln!( - "[rs_kernel_check_anon] deserialize: {:>8.1?} ({} consts; \ + "[rs_kernel_check_anon] mmap+parse: {:>8.1?} ({} consts; \ named={} names={} comms={})", t1.elapsed(), ixon_env.const_count(), diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs index e765ae7b..8b713efb 100644 --- a/src/ix/ixon/env.rs +++ b/src/ix/ixon/env.rs @@ -139,6 +139,22 @@ impl Env { self.consts.insert(addr, LazyConstant::from_bytes(bytes)); } + /// Store a constant as a window into a memory-mapped `.ixe` file. + /// `(mmap, offset, len)` must reference exactly what `Constant::put` + /// produced for `addr`. Used by [`Env::get_anon_mmap`] to avoid + /// heap-copying on-disk bytes — the OS page cache backs the slice. + pub fn store_const_lazy_mmap( + &self, + addr: Address, + mmap: Arc, + offset: usize, + len: usize, + ) { + self + .consts + .insert(addr, LazyConstant::from_mmap_slice(mmap, offset, len)); + } + /// Get a constant by address, materializing on demand. /// /// Returns `None` if the address is not present or materialization @@ -538,8 +554,16 @@ mod tests { /// Round-trips an env through serialize → deserialize so the /// deserialized side holds purely lazy entries, then asserts that - /// only constants reachable from `seed` get materialized after a - /// `transitive_deps_excl(seed)` walk. + /// a `transitive_deps_excl(seed)` walk only touches constants + /// reachable from `seed` (correctness of the BFS). + /// + /// Lazy-loaded `LazyConstant`s no longer cache materialized values + /// (see `src/ix/ixon/lazy.rs` "Cache policy" docs), so we can't + /// observe materialization via `is_materialized()` after a walk — + /// that observable was always-false. Instead we assert the BFS + /// returns exactly the closure, and that `is_materialized()` stays + /// false everywhere (proving the load path doesn't accidentally + /// pre-populate the cache). #[test] fn lazy_sparsity_only_materializes_closure() { // Build a small env: a→b→c, and an independent d. @@ -553,7 +577,7 @@ mod tests { env.store_const(c.clone(), const_with_refs(vec![])); env.store_const(d.clone(), const_with_refs(vec![])); - // Serialize → deserialize so all entries start unmaterialized. + // Serialize → deserialize so all entries are lazy-from-bytes. let mut buf = Vec::new(); env.put(&mut buf).unwrap(); let loaded = Env::get(&mut buf.as_slice()).unwrap(); @@ -565,14 +589,24 @@ mod tests { ); } - // Walk closure of `a`. {a, b, c} get materialized; `d` does not. - let _ = loaded.transitive_deps_excl(&a); - assert!(loaded.consts.get(&a).unwrap().value().is_materialized()); - assert!(loaded.consts.get(&b).unwrap().value().is_materialized()); - assert!(loaded.consts.get(&c).unwrap().value().is_materialized()); - assert!( - !loaded.consts.get(&d).unwrap().value().is_materialized(), - "`d` outside `a`'s closure should stay lazy" - ); + // BFS the closure of `a`; should hit {a, b, c} but not `d`. + let deps = loaded.transitive_deps_excl(&a); + let dep_set: FxHashSet
= deps.iter().cloned().collect(); + assert!(dep_set.contains(&b), "`b` reachable from `a`"); + assert!(dep_set.contains(&c), "`c` reachable from `a` via `b`"); + assert!(!dep_set.contains(&d), "`d` should not be in `a`'s closure"); + assert!(!dep_set.contains(&a), "deps_excl excludes the seed"); + + // Even after the BFS, no entries should report as materialized: + // lazy-loaded `LazyConstant`s parse fresh on each `get()` and + // don't cache (env-side caching is what kept mathlib's RSS at + // ~30GB; the cache-free policy is what made `--anon` viable). + for entry in loaded.consts.iter() { + assert!( + !entry.value().is_materialized(), + "entry {:?} should remain unmaterialized after BFS", + entry.key() + ); + } } } diff --git a/src/ix/ixon/lazy.rs b/src/ix/ixon/lazy.rs index bc9f028e..2b1e1dc7 100644 --- a/src/ix/ixon/lazy.rs +++ b/src/ix/ixon/lazy.rs @@ -4,8 +4,22 @@ //! `.ixe` loader reads each constant's bytes (preceded by a Tag0 //! length sidecar at the env-section level — see `Env::get`) into a //! `LazyConstant::from_bytes`, deferring `Constant::get` until first -//! access via [`LazyConstant::get`]. Subsequent accesses return a -//! cached `Arc`. +//! access via [`LazyConstant::get`]. +//! +//! Cache policy: +//! - The compile-side constructor [`LazyConstant::from_constant`] +//! pre-populates `cache: Some(Arc::new(c))` so subsequent `get()`s +//! are free; re-serializing the structured value would be wasteful. +//! - The lazy load constructors [`LazyConstant::from_bytes`] and +//! [`LazyConstant::from_mmap_slice`] leave `cache: None`. Every +//! `get()` parses fresh from `bytes` and returns the parsed value +//! *without* storing it. Callers (typically kernel ingress) consume +//! the returned `Arc` immediately to build a `KConst` in +//! the worker's `KEnv`, then drop it. The `KEnv` is the only +//! long-lived materialization; it is cleared between work items by +//! `clear_releasing_memory()`. This keeps env-level memory bounded +//! to "bytes + mmap header" regardless of how much of the env the +//! workers eventually visit. //! //! Invariants: //! - `raw_bytes()` returns exactly what `Constant::put` produces and @@ -13,34 +27,107 @@ //! included. //! - `Address::hash(self.raw_bytes()) == addr` for the address this //! lazy entry was stored under (`verify_address` checks this). -//! - Cache is shared across `Clone`s (`Arc>`) so that -//! materialization done through one handle is visible through all. +//! +//! Storage backend: +//! - [`BytesSource::Heap`] owns an `Arc<[u8]>`; used by the +//! compile-side path and tests. +//! - [`BytesSource::Mmap`] is a `(Arc, offset, len)` window +//! into a memory-mapped `.ixe` file. The mmap stays alive as long +//! as any [`LazyConstant`] (or `Env`) holds a clone of the +//! `Arc`; on Linux the OS handles paging — no heap copy of +//! the on-disk bytes. + +use std::sync::Arc; -use std::sync::{Arc, OnceLock}; +use memmap2::Mmap; use crate::ix::address::Address; -use super::constant::Constant; +use super::constant::{Constant, ConstantInfo}; + +/// Backing storage for a `LazyConstant`'s serialized bytes. +#[derive(Debug, Clone)] +pub enum BytesSource { + /// Heap-allocated bytes. Produced by `store_const`, + /// `LazyConstant::from_constant`, and the eager + /// `Env::get`/`Env::get_anon` paths that `std::fs::read` the file + /// into a `Vec` first. + Heap(Arc<[u8]>), + /// Slice into a memory-mapped `.ixe` file. Produced by + /// `Env::get_anon_mmap`. The `Arc` is shared across every + /// `LazyConstant` from the same load, so the mapping is kept + /// alive as long as any constant entry references it. + Mmap { mmap: Arc, offset: usize, len: usize }, +} + +impl BytesSource { + /// View the bytes as a `&[u8]`. Zero-copy for both variants: + /// `Heap` derefs the `Arc<[u8]>`; `Mmap` slices the mapping. + pub fn as_slice(&self) -> &[u8] { + match self { + BytesSource::Heap(arc) => arc, + BytesSource::Mmap { mmap, offset, len } => &mmap[*offset..*offset + *len], + } + } +} + +/// Tag identifying which `ConstantInfo` variant a `LazyConstant` +/// holds, derivable from one byte of the serialized prefix. Used by +/// `LazyConstant::peek_variant` so callers can dispatch without +/// parsing the body. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ConstVariantTag { + Defn, + Recr, + Axio, + Quot, + Muts, + IPrj, + CPrj, + RPrj, + DPrj, +} /// Lazy-materialized `Constant` backed by serialized bytes. #[derive(Debug, Clone)] pub struct LazyConstant { /// Tag4-encoded constant bytes (exactly the slice consumed by /// `Constant::get` and hashed by `Address::hash`). - bytes: Arc<[u8]>, - /// Cached materialization. Shared across clones via `Arc` so the - /// first thread to materialize benefits every subsequent handle. - cache: Arc>>, + bytes: BytesSource, + /// Pre-materialized `Constant`. Populated *only* by + /// `from_constant` (the compile-side path, where we already own + /// the parsed value). `from_bytes` and `from_mmap_slice` leave + /// this `None`; their `get()` parses fresh and does not store — + /// see the module-level "Cache policy" docs. + cache: Option>, } impl LazyConstant { - /// Construct from already-serialized bytes (the lazy load path). + /// Construct from already-serialized heap-resident bytes (the + /// `std::fs::read` lazy load path and the test-FFI path). /// /// The caller is responsible for ensuring `bytes` is exactly what /// `Constant::put` produced for the address this entry is stored /// under. Use [`Self::verify_address`] for an explicit check. pub fn from_bytes(bytes: Arc<[u8]>) -> Self { - LazyConstant { bytes, cache: Arc::new(OnceLock::new()) } + LazyConstant { bytes: BytesSource::Heap(bytes), cache: None } + } + + /// Construct from a memory-mapped window. The `Arc` keeps + /// the underlying mapping alive; cloning the `LazyConstant` only + /// bumps the refcount. + /// + /// Used by `Env::get_anon_mmap` to avoid heap-copying the on-disk + /// byte stream — the OS page cache is the source of truth. + pub fn from_mmap_slice( + mmap: Arc, + offset: usize, + len: usize, + ) -> Self { + LazyConstant { + bytes: BytesSource::Mmap { mmap, offset, len }, + cache: None, + } } /// Construct from a structured `Constant` (the in-memory build path, @@ -50,20 +137,24 @@ impl LazyConstant { pub fn from_constant(c: Constant) -> Self { let mut buf = Vec::new(); c.put(&mut buf); - let bytes: Arc<[u8]> = buf.into(); - let arc: Arc = Arc::new(c); - let cache = OnceLock::new(); - // First `set` always succeeds on a fresh OnceLock. - let _ = cache.set(arc); - LazyConstant { bytes, cache: Arc::new(cache) } + LazyConstant { + bytes: BytesSource::Heap(buf.into()), + cache: Some(Arc::new(c)), + } } - /// Materialize the `Constant`, caching for subsequent calls. + /// Materialize the `Constant`. + /// + /// If this entry was built via [`Self::from_constant`], returns the + /// pre-populated cached `Arc` (zero-cost clone). Otherwise parses + /// fresh from `bytes` on every call and returns a new `Arc` + /// without storing it — see the module-level "Cache policy" docs + /// for why. pub fn get(&self) -> Result, String> { - if let Some(c) = self.cache.get() { + if let Some(c) = &self.cache { return Ok(c.clone()); } - let mut slice: &[u8] = &self.bytes; + let mut slice: &[u8] = self.bytes.as_slice(); let parsed = Constant::get(&mut slice) .map_err(|e| format!("LazyConstant::get: {e}"))?; if !slice.is_empty() { @@ -72,26 +163,77 @@ impl LazyConstant { slice.len() )); } - let arc = Arc::new(parsed); - // If another thread raced us and set first, that's fine — our - // local `arc` is dropped and we pick up the winner below. - let _ = self.cache.set(arc); - Ok(self.cache.get().expect("cache just set").clone()) + Ok(Arc::new(parsed)) + } + + /// Identify the `ConstantInfo` variant by reading just the outer + /// `Tag4` head byte — no allocation, no body parse. + /// + /// `Tag4` encoding (see `src/ix/ixon/tag.rs:64-70`): head is + /// `[flag:4][large:1][size:3]`. For `Constant`: + /// - `flag = 0xC` (`Constant::FLAG_MUTS`) → Muts block (size field + /// encodes the entry count, possibly in large form; we ignore it + /// here — knowing it's Muts is enough). + /// - `flag = 0xD` (`Constant::FLAG`) → non-Muts variant; index + /// 0..=7 in the `size` field. All non-Muts variants fit in 3 + /// bits, so `large=0` always; the index is read directly. + /// + /// Used by `build_anon_work` to dispatch on variant without + /// materializing the entire `Arc` body. For the ~95% of + /// constants that are standalone or projections, this is the only + /// byte we ever read at enumeration time. + pub fn peek_variant(&self) -> Result { + let bytes = self.bytes.as_slice(); + let head = *bytes + .first() + .ok_or_else(|| "LazyConstant::peek_variant: empty bytes".to_string())?; + let flag = head >> 4; + let large = head & 0b1000 != 0; + let small = head & 0b0111; + match flag { + Constant::FLAG_MUTS => Ok(ConstVariantTag::Muts), + Constant::FLAG => { + if large { + return Err(format!( + "LazyConstant::peek_variant: unexpected large-form Tag4 for non-Muts constant (head=0x{head:02X})" + )); + } + match u64::from(small) { + ConstantInfo::CONST_DEFN => Ok(ConstVariantTag::Defn), + ConstantInfo::CONST_RECR => Ok(ConstVariantTag::Recr), + ConstantInfo::CONST_AXIO => Ok(ConstVariantTag::Axio), + ConstantInfo::CONST_QUOT => Ok(ConstVariantTag::Quot), + ConstantInfo::CONST_CPRJ => Ok(ConstVariantTag::CPrj), + ConstantInfo::CONST_RPRJ => Ok(ConstVariantTag::RPrj), + ConstantInfo::CONST_IPRJ => Ok(ConstVariantTag::IPrj), + ConstantInfo::CONST_DPRJ => Ok(ConstVariantTag::DPrj), + n => Err(format!( + "LazyConstant::peek_variant: unknown variant index {n}" + )), + } + }, + _ => Err(format!( + "LazyConstant::peek_variant: unexpected Tag4 flag 0x{flag:X} (expected 0xC or 0xD)" + )), + } } /// Raw serialized bytes (the Tag4 constant body, no length prefix). pub fn raw_bytes(&self) -> &[u8] { - &self.bytes + self.bytes.as_slice() } - /// Whether the structured `Constant` has been materialized. + /// Whether a pre-materialized `Constant` is cached. True only for + /// entries built via [`Self::from_constant`]; the lazy-load paths + /// (`from_bytes`, `from_mmap_slice`) always return `false` — see + /// the module-level "Cache policy" docs. pub fn is_materialized(&self) -> bool { - self.cache.get().is_some() + self.cache.is_some() } /// Verify that `Address::hash(self.raw_bytes()) == *expected`. pub fn verify_address(&self, expected: &Address) -> bool { - Address::hash(&self.bytes) == *expected + Address::hash(self.bytes.as_slice()) == *expected } } @@ -99,7 +241,7 @@ impl LazyConstant { /// implies `Constant`-equality. impl PartialEq for LazyConstant { fn eq(&self, other: &Self) -> bool { - self.bytes == other.bytes + self.bytes.as_slice() == other.bytes.as_slice() } } impl Eq for LazyConstant {} @@ -138,26 +280,34 @@ mod tests { } #[test] - fn from_bytes_defers_materialization() { + fn from_bytes_does_not_cache() { let c = defn_constant(); let (addr, bytes) = c.commit(); let lazy = LazyConstant::from_bytes(bytes.into()); assert!(!lazy.is_materialized()); assert!(lazy.verify_address(&addr)); - let arc = lazy.get().unwrap(); - assert_eq!(*arc, c); - assert!(lazy.is_materialized()); + let a1 = lazy.get().unwrap(); + let a2 = lazy.get().unwrap(); + // Both decoded to equal Constants... + assert_eq!(*a1, c); + assert_eq!(*a2, c); + // ...but no caching, so distinct Arc allocations each call. + assert!(!Arc::ptr_eq(&a1, &a2)); + // Never materialized — there's nothing to materialize into. + assert!(!lazy.is_materialized()); } #[test] - fn cache_is_shared_across_clones() { - let lazy = LazyConstant::from_bytes(axiom_constant().commit().1.into()); - assert!(!lazy.is_materialized()); + fn from_constant_clones_share_cache() { + let c = axiom_constant(); + let lazy = LazyConstant::from_constant(c); let cloned = lazy.clone(); - let _ = cloned.get().unwrap(); - // Materialization through `cloned` is visible through `lazy` - // because both share the same `Arc>` cache slot. + let a1 = lazy.get().unwrap(); + let a2 = cloned.get().unwrap(); + // Both came from the pre-populated cache; same Arc. + assert!(Arc::ptr_eq(&a1, &a2)); assert!(lazy.is_materialized()); + assert!(cloned.is_materialized()); } #[test] @@ -208,4 +358,145 @@ mod tests { let err = lazy.get().unwrap_err(); assert!(err.contains("trailing"), "got: {err}"); } + + #[test] + fn mmap_slice_roundtrips() { + use std::fs::File; + use std::io::Write; + let tmp = std::env::temp_dir().join("ix_lazy_mmap_test.bin"); + let c = defn_constant(); + let mut payload = Vec::new(); + // Some leading bytes so offset != 0 exercises the windowing. + payload.extend_from_slice(&[0xDE, 0xAD, 0xBE, 0xEF]); + c.put(&mut payload); + { + let mut f = File::create(&tmp).unwrap(); + f.write_all(&payload).unwrap(); + } + let file = File::open(&tmp).unwrap(); + let mmap = unsafe { Mmap::map(&file).unwrap() }; + let mmap = Arc::new(mmap); + let len = payload.len() - 4; + let lazy = LazyConstant::from_mmap_slice(Arc::clone(&mmap), 4, len); + // Mmap-backed entries are never marked materialized — they parse + // fresh on every `get()` and don't cache. + assert!(!lazy.is_materialized()); + assert_eq!(lazy.raw_bytes().len(), len); + let got = lazy.get().unwrap(); + assert_eq!(*got, c); + assert!(!lazy.is_materialized()); + std::fs::remove_file(&tmp).ok(); + } + + // -------------------- peek_variant -------------------- + + /// Build a Muts constant with two trivial Defn members so we can + /// roundtrip it through `Constant::put`/`Constant::get` for the + /// `peek_variant` Muts test. + fn muts_constant() -> Constant { + use crate::ix::ixon::constant::{ConstantInfo, MutConst}; + let m1 = MutConst::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }); + let m2 = MutConst::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }); + Constant::new(ConstantInfo::Muts(vec![m1, m2])) + } + + fn peek_for(c: &Constant) -> Result { + let (_addr, bytes) = c.commit(); + LazyConstant::from_bytes(bytes.into()).peek_variant() + } + + #[test] + fn peek_variant_defn() { + assert_eq!(peek_for(&defn_constant()).unwrap(), ConstVariantTag::Defn); + } + + #[test] + fn peek_variant_axio() { + assert_eq!(peek_for(&axiom_constant()).unwrap(), ConstVariantTag::Axio); + } + + #[test] + fn peek_variant_muts() { + assert_eq!(peek_for(&muts_constant()).unwrap(), ConstVariantTag::Muts); + } + + #[test] + fn peek_variant_quot() { + use crate::ix::env::QuotKind; + use crate::ix::ixon::constant::{ConstantInfo, Quotient}; + let q = Constant::new(ConstantInfo::Quot(Quotient { + kind: QuotKind::Type, + lvls: 1, + typ: Expr::sort(0), + })); + assert_eq!(peek_for(&q).unwrap(), ConstVariantTag::Quot); + } + + #[test] + fn peek_variant_dprj() { + use crate::ix::ixon::constant::{ConstantInfo, DefinitionProj}; + let p = Constant::new(ConstantInfo::DPrj(DefinitionProj { + idx: 0, + block: Address::hash(b"some-block"), + })); + assert_eq!(peek_for(&p).unwrap(), ConstVariantTag::DPrj); + } + + #[test] + fn peek_variant_iprj() { + use crate::ix::ixon::constant::{ConstantInfo, InductiveProj}; + let p = Constant::new(ConstantInfo::IPrj(InductiveProj { + idx: 0, + block: Address::hash(b"some-block"), + })); + assert_eq!(peek_for(&p).unwrap(), ConstVariantTag::IPrj); + } + + #[test] + fn peek_variant_rprj() { + use crate::ix::ixon::constant::{ConstantInfo, RecursorProj}; + let p = Constant::new(ConstantInfo::RPrj(RecursorProj { + idx: 0, + block: Address::hash(b"some-block"), + })); + assert_eq!(peek_for(&p).unwrap(), ConstVariantTag::RPrj); + } + + #[test] + fn peek_variant_cprj() { + use crate::ix::ixon::constant::{ConstantInfo, ConstructorProj}; + let p = Constant::new(ConstantInfo::CPrj(ConstructorProj { + idx: 0, + cidx: 0, + block: Address::hash(b"some-block"), + })); + assert_eq!(peek_for(&p).unwrap(), ConstVariantTag::CPrj); + } + + #[test] + fn peek_variant_empty_bytes_error() { + let lazy = LazyConstant::from_bytes(Arc::from(&[][..])); + let err = lazy.peek_variant().unwrap_err(); + assert!(err.contains("empty"), "got: {err}"); + } + + #[test] + fn peek_variant_unknown_flag_error() { + // 0x10 = flag 0x1 (not 0xC or 0xD) + let lazy = LazyConstant::from_bytes(Arc::from(&[0x10][..])); + let err = lazy.peek_variant().unwrap_err(); + assert!(err.contains("flag"), "got: {err}"); + } } diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index 9dc6b9d0..468a8da1 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1571,6 +1571,159 @@ impl Env { Ok(env) } + /// Memory-mapped sibling of [`Env::get_anon`]. Opens the `.ixe` + /// file with `mmap`, parses the header + section layout, and stores + /// every Section-2 constant as a [`LazyConstant`] window into the + /// mapping. No heap copy of constant bytes — the OS page cache is + /// the source of truth, paged in on demand. + /// + /// The returned `Env` carries an internal `Arc` (via each + /// `LazyConstant`'s [`super::lazy::BytesSource::Mmap`] variant), so + /// the mapping stays alive as long as any consumer holds the env or + /// any clone of a `LazyConstant` from it. + /// + /// Sections 1 (blobs), 3 (names), 4 (named), and 5 (comms) are + /// handled the same way as `get_anon`: blobs are heap-copied (they + /// are small and consumed eagerly), names and named are + /// parse-and-discard (with hints harvested into `env.anon_hints`), + /// comms are skipped. + /// + /// On Linux, the kernel's adaptive readahead handles the linear + /// scan during section parsing efficiently; subsequent random + /// access from worker kernel-check threads pages in as needed. + pub fn get_anon_mmap(path: &std::path::Path) -> Result { + let file = std::fs::File::open(path).map_err(|e| { + format!("Env::get_anon_mmap: open {}: {e}", path.display()) + })?; + // SAFETY: We treat the mapping as read-only and never alias it + // mutably. Other processes truncating or replacing the file while + // it is mapped would invalidate our slices; that is a contract + // the caller is expected to honor (don't modify the .ixe + // underfoot). + let mmap = unsafe { + memmap2::Mmap::map(&file).map_err(|e| { + format!("Env::get_anon_mmap: mmap {}: {e}", path.display()) + })? + }; + let mmap = Arc::new(mmap); + + // `buf` is a moving cursor over `mmap[..]`. We compute byte + // offsets via `mmap.len() - buf.len()` so we can record per-const + // (offset, len) windows for `LazyConstant::from_mmap_slice`. + let mmap_full: &[u8] = &mmap[..]; + let mut buf: &[u8] = mmap_full; + + // Header (same shape as Env::get_anon) + let tag = Tag4::get(&mut buf)?; + if tag.flag != Self::FLAG { + return Err(format!( + "Env::get_anon_mmap: expected flag 0x{:X}, got 0x{:X}", + Self::FLAG, + tag.flag + )); + } + if tag.size != 0 { + return Err(format!( + "Env::get_anon_mmap: expected Env variant 0, got {}", + tag.size + )); + } + let stored_root = get_address(&mut buf)?; + + let mut env = Env::new(); + + // Section 1: Blobs (heap-copied; small, eagerly consumed) + let num_blobs = get_u64(&mut buf)?; + for _ in 0..num_blobs { + let addr = get_address(&mut buf)?; + let len = get_u64(&mut buf)? as usize; + if buf.len() < len { + return Err(format!( + "Env::get_anon_mmap: need {} bytes for blob, have {}", + len, + buf.len() + )); + } + let (bytes, rest) = buf.split_at(len); + buf = rest; + env.blobs.insert(addr, bytes.to_vec()); + } + + // Section 2: Consts (mmap-backed lazy windows) + let num_consts = get_u64(&mut buf)?; + for _ in 0..num_consts { + let addr = get_address(&mut buf)?; + let len = Tag0::get(&mut buf)?.size as usize; + if buf.len() < len { + return Err(format!( + "Env::get_anon_mmap: need {} bytes for constant, have {}", + len, + buf.len() + )); + } + // `buf` is a suffix of `mmap_full`; the constant's bytes start + // at the current cursor and span `len` bytes. + let offset = mmap_full.len() - buf.len(); + env.store_const_lazy_mmap(addr, Arc::clone(&mmap), offset, len); + buf = &buf[len..]; + } + + // Section 3: Names — parse and DISCARD (needed transiently so + // section 4's indexed metadata can be decoded). + let num_names = get_u64(&mut buf)?; + let mut names_lookup: FxHashMap = FxHashMap::default(); + let mut name_reverse_index: NameReverseIndex = + Vec::with_capacity(num_names as usize + 1); + let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); + names_lookup.insert(anon_addr, Name::anon()); + for _ in 0..num_names { + let addr = get_address(&mut buf)?; + let name = get_name_component(&mut buf, &names_lookup)?; + name_reverse_index.push(addr.clone()); + names_lookup.insert(addr, name); + } + + // Section 4: Named — harvest `ReducibilityHints` from `Def` + // entries into `env.anon_hints`; discard the rest. See `get_anon` + // for the rationale. + let num_named = get_u64(&mut buf)?; + for _ in 0..num_named { + let _name_addr = get_address(&mut buf)?; + let named = get_named_indexed(&mut buf, &name_reverse_index)?; + if let super::metadata::ConstantMetaInfo::Def { hints, .. } = + &named.meta.info + { + env.anon_hints.insert(named.addr.clone(), *hints); + } + } + + // Section 5: Comms — parse and DISCARD. + let num_comms = get_u64(&mut buf)?; + for _ in 0..num_comms { + let _addr = get_address(&mut buf)?; + let _comm = Comm::get(&mut buf)?; + } + + drop(names_lookup); + drop(name_reverse_index); + + // Verify merkle root over loaded consts (same as get_anon). + let mut const_addrs: Vec
= + env.consts.iter().map(|e| e.key().clone()).collect(); + const_addrs.sort_unstable(); + let computed_root = + merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); + if computed_root != stored_root { + return Err(format!( + "Env::get_anon_mmap: merkle root mismatch (stored={}, computed={})", + stored_root.hex(), + computed_root.hex(), + )); + } + + Ok(env) + } + /// Calculate the serialized size of an Env. pub fn serialized_size(&self) -> Result { let mut buf = Vec::new(); From f792102bbf1ecfb001bd8dbae3414f69f8082d80 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 19:53:33 -0400 Subject: [PATCH 07/12] Address review findings: correctness + small refactors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code-review pass over the anon-mode / mmap / cache-strip work. This commit lands the highest-value subset — the correctness fixes and the small refactors that prevent future drift — and leaves the larger items (format-version bump, sealed marker trait, AnonEnv audit) for separate follow-ups. Correctness: - Verify per-constant address on load (#1). `LazyConstant::verify_address` existed but was never invoked from `Env::get`, `get_anon`, or `get_anon_mmap`. The env-level merkle root catches missing/extra entries but not byte-tampering of a constant whose key is intact; without this check, corruption surfaced much later inside `LazyConstant::get` with a misleading parse error. Inline the `Address::hash(bytes) == addr` check in each loader's Section-2 loop. Added 3 corruption-detection tests (`env_const_bytes_tampering_*`). This required updating a handful of existing tests that stored constants under fake `Address::hash(b"a")` keys instead of their true content hashes — round-tripping such envs now correctly rejects. Added a `store_canonical(env, c) -> Address` test helper for the canonical pattern, and `*_discriminator(refs, n)` so tests can produce content-distinct constants when the same ref-set would otherwise collide. - Hard-error in `ixon_env_to_decoded` on parse failure (#6). `src/ffi/ixon/env.rs` used `filter_map` to silently drop any const whose bytes failed `LazyConstant::get` — the Lean caller had no signal of the lost entries. Switch to a Result-collecting loop and propagate the first parse error; update both FFI call sites (`rs_de_env`, `rs_de_env_anon`). - Doc fixes (#4, #5). `docs/Ixon.md`'s AssumptionTree section described only `Leaf=0x00` and `Node=0x01`, missing `Padding=0x01` (and the docs' `Node` tag collided with the real `Padding` tag — the real `Node` is `0x02`). Also: the env-section table cell said "opt-tagged merkle root" but the implementation writes a bare 32-byte address. Refactors that prevent future drift: - Canonical projection-address helpers (#9). Four production sites reconstructed `Constant::new(IxonCI::{D,I,R,C}Prj{...}).commit().0` by hand; the anon pipeline silently breaks if any one drifts. Extract `defn_proj_address` / `indc_proj_address` / `recr_proj_address` / `ctor_proj_address` (plus `_constant` variants) in `src/ix/ixon/constant.rs`. Update `compile.rs`, `compile/mutual.rs`, and the anon `*_proj_addr` helpers to call them. - `verify_proj_addr_in_env` helper (#21). `ingress_anon_block` had the same "computed-address not in env" check repeated four times (DPrj/RPrj/IPrj/CPrj). DRY into one helper that produces a consistent error format. UX: - Anon display labels switched to `#` everywhere (#18). Rust was emitting `@` in progress/fail-out; Lean's `runCheckAnon` was emitting `#{i}` (result index). Standardize on `#` so the CLI failure summary is joinable with the fail-out file. This required exposing addresses per result slot to Lean — `rsCheckAnonFFI` now returns `Array (String × Option CheckError)` pairing each result with its content-address hex string. Rust builds the pairs via a new `build_anon_result_array` helper. - Stale "check-ixon" header in FailureLog (#16). One-line rename in the doc comment + `# ix check-ixon failures` writeln to `# ix check failures`. Tests: - `testPrimitivesParity` (PrimAddrs regen-parity test). Catches silent drift between hardcoded primitive addresses in `PrimAddrs::new()` and what `rsCompileEnvFFI` produces from the live Lean primitives. Plumbing: new `PrimAddrs::lean_parity_table()` returns `(lean_name, hex)` pairs; new `rs_prim_addrs_canonical` FFI exposes them to Lean; new test in `BuildPrimitives.lean` iterates `kernelPrimitives`, compares against the hardcoded table, and fails with a printable diff on mismatch (with instructions to regenerate). Skipped: `eagerReduce` — synthetic kernel marker whose PrimAddrs value (`0xff…3`) intentionally diverges from its compiled content hash (which collides with `id`). - 3 new corruption-detection tests in `serialize.rs` exercise the Section-2 verify check for `Env::get`, `Env::get_anon`, `Env::get_anon_mmap`. Verification: `cargo test --lib` 1025 passing (3 new), `cargo clippy --lib --all-targets` clean, `lake build` clean, `lake exe ix check compileinitstd.ixe` 105487/105487 in ~21s, `--anon` 89010/89010 with peak RSS 1.4 GB, `.lake/build/bin/IxTests --ignored rust-kernel-build-primitives` shows both `build primitives dump` and `primitive address parity (PrimAddrs vs live compile)` pass. Out of scope (deferred to follow-ups): - #2 Format version bump (Env::FLAG) - #3 rs_kernel_check_consts_anon still uses Env::get on main - #8 Sealed marker trait for the lazy_anon transmute - #11/#12 AnonEnv audit (vestigial wrapper) - #13, #15, #17, #19, #20, #22-25 Various quality cleanups --- Ix/Cli/CheckCmd.lean | 10 +- Ix/KernelCheck.lean | 33 +++--- Tests/Ix/Kernel/BuildPrimitives.lean | 76 ++++++++++++- docs/Ixon.md | 15 ++- src/ffi/ixon/env.rs | 53 +++++---- src/ffi/kernel.rs | 67 ++++++++++-- src/ix/compile.rs | 30 ++--- src/ix/compile/mutual.rs | 25 +---- src/ix/ixon/constant.rs | 56 ++++++++++ src/ix/ixon/env.rs | 45 ++++++-- src/ix/ixon/serialize.rs | 157 ++++++++++++++++++++++++--- src/ix/kernel/ingress.rs | 110 ++++++++++--------- src/ix/kernel/primitive.rs | 111 +++++++++++++++++++ 13 files changed, 619 insertions(+), 169 deletions(-) diff --git a/Ix/Cli/CheckCmd.lean b/Ix/Cli/CheckCmd.lean index 939f7498..c06ede9f 100644 --- a/Ix/Cli/CheckCmd.lean +++ b/Ix/Cli/CheckCmd.lean @@ -146,10 +146,14 @@ private def runCheckAnon (envPath : String) (p : Cli.Parsed) : IO UInt32 := do let mut passed := 0 let mut failures : Array (String × String) := #[] - for i in [:results.size] do - match results[i]! with + for (hex, res) in results do + match res with | none => passed := passed + 1 - | some err => failures := failures.push (s!"#{i}", err.message) + -- Label with the full content address (`#`) to match the + -- Rust-side progress / fail-out output. Pre-#48 we emitted + -- `#{i}` (result index), which made the CLI summary unjoinable + -- with the fail-out file's `#` entries. + | some err => failures := failures.push (s!"#{hex}", err.message) IO.println s!"[check] checked {results.size} constants in {elapsed.formatMs}" IO.println s!"[check] {passed}/{results.size} passed" diff --git a/Ix/KernelCheck.lean b/Ix/KernelCheck.lean index a4c54ace..b5cf3a05 100644 --- a/Ix/KernelCheck.lean +++ b/Ix/KernelCheck.lean @@ -118,24 +118,29 @@ opaque rsIxonNamesFFI : @& String → IO (Array Lean.Name) /-- FFI: metadata-free anonymous-mode type-check of an entire `.ixe`. - Loads the env via `IxonEnv::get_anon` (which discards the - `named`/`names`/`comms` sections during deserialization), enumerates - every kernel-checkable target by iterating `consts` (skipping - projection constants — they're covered by their parent Muts block - work item, with projection addresses reconstructed deterministically - via `Constant::commit`), and runs `TypeChecker::check_const` - on each. The kernel's typechecking logic structurally cannot read - metadata: every `M::MField` is `()` in Anon mode. + Loads the env via `IxonEnv::get_anon_mmap` (which mmaps the file + and discards the `named`/`names`/`comms` sections during + deserialization), enumerates every kernel-checkable target by + iterating `consts` (skipping projection constants — they're + covered by their parent Muts block work item, with projection + addresses reconstructed deterministically via `Constant::commit`), + and runs `TypeChecker::check_const` on each. The kernel's + typechecking logic structurally cannot read metadata: every + `M::MField` is `()` in Anon mode. `fail_out` is a streaming failure log path; pass `""` to disable. - Progress labels use `@` instead of Lean names — the kernel - operates on addresses only. -/ + + Returns an array of `(hex_address, Option CheckError)` pairs — one + per kernel-checkable target. The kernel has no name to associate + with each slot, so the address travels back so the Lean CLI can + print `#` failure labels matching the Rust progress and + fail-out output. -/ @[extern "rs_kernel_check_anon"] opaque rsCheckAnonFFI : - @& String → -- .ixe path - @& Bool → -- quiet - @& String → -- fail-out path ("" = none) - IO (Array (Option CheckError)) + @& String → -- .ixe path + @& Bool → -- quiet + @& String → -- fail-out path ("" = none) + IO (Array (String × Option CheckError)) end Ix.KernelCheck diff --git a/Tests/Ix/Kernel/BuildPrimitives.lean b/Tests/Ix/Kernel/BuildPrimitives.lean index 674fb86b..9bde20b8 100644 --- a/Tests/Ix/Kernel/BuildPrimitives.lean +++ b/Tests/Ix/Kernel/BuildPrimitives.lean @@ -181,6 +181,80 @@ def testBuildPrimitives : TestSeq := return (missing.isEmpty, found, missing.size, msg) ) .done -def suite : List TestSeq := [testBuildPrimitives] +/-- FFI: expose `PrimAddrs::new()` from Rust as + `(lean_name, hex_address)` pairs in the same order as + `kernelPrimitives` below. Used to detect drift between hardcoded + addresses and what `rsCompileEnvFFI` would produce today. -/ +@[extern "rs_prim_addrs_canonical"] +opaque rsPrimAddrsCanonicalFFI : IO (Array (String × String)) + +/-- Parity check: every primitive's content address as stored in + Rust's `PrimAddrs::new()` must match the address produced by + compiling that primitive's Lean declaration through the live + `rsCompileEnvFFI` pipeline. A mismatch means someone changed the + compile/serialize logic in a way that altered a primitive's + content hash — Aiur and downstream kernel primitive resolution + will silently break if `PrimAddrs::new()` isn't updated. + + On failure: re-run `lake test --ignored + rust-kernel-build-primitives` to dump the fresh table, then + paste over `PrimAddrs::new` in `src/ix/kernel/primitive.rs` + (plus update `PrimAddrs::lean_parity_table` keeps lock-step). + + Exception: `eagerReduce` is a synthetic kernel marker whose + PrimAddrs value (`0xff…3`) intentionally diverges from its + compiled content hash (which collides with `id`). Skip the check + for it. -/ +def testPrimitivesParity : TestSeq := + .individualIO "primitive address parity (PrimAddrs vs live compile)" none (do + -- Reuse the same Lean → Ixon compile pipeline as testBuildPrimitives. + let leanEnv ← get_env! + let roots := kernelPrimitives.map parseNameToLean + let needed := collectDeps leanEnv roots + let filtered := leanEnv.constants.toList.filter fun (name, _) => + needed.contains name + let rawEnv ← Ix.CompileM.rsCompileEnvFFI filtered + let env : Ixon.Env := rawEnv.toEnv + + let hardcoded ← rsPrimAddrsCanonicalFFI + let lookup : Std.HashMap String String := + hardcoded.foldl (init := {}) fun m (n, h) => m.insert n h + + let mut mismatches : Array String := #[] + let mut missing : Array String := #[] + for primName in kernelPrimitives do + -- The synthetic eagerReduce marker is intentionally unequal to + -- the compiled `id` hash; skip parity for it. + if primName == "eagerReduce" then continue + let ixName := parseIxName primName + match env.named[ixName]? with + | none => + missing := missing.push primName + | some named => + let computed := toString named.addr + match lookup[primName]? with + | none => + mismatches := mismatches.push s!"{primName}: missing from PrimAddrs::lean_parity_table" + | some hardcoded_hex => + if computed != hardcoded_hex then + mismatches := mismatches.push + s!"{primName}: live={computed} PrimAddrs={hardcoded_hex}" + + if !missing.isEmpty then + IO.eprintln s!"primitive parity: {missing.size} primitive(s) missing from compiled env:" + for n in missing do IO.eprintln s!" {n}" + if !mismatches.isEmpty then + IO.eprintln s!"primitive parity: {mismatches.size} address mismatch(es):" + for m in mismatches do IO.eprintln s!" {m}" + IO.eprintln "Regenerate via `lake test --ignored rust-kernel-build-primitives` and paste into src/ix/kernel/primitive.rs (PrimAddrs::new + lean_parity_table)." + + let total_problems := missing.size + mismatches.size + let msg : Option String := + if total_problems == 0 then none + else some s!"{mismatches.size} mismatch(es), {missing.size} missing" + return (total_problems == 0, kernelPrimitives.size - total_problems, total_problems, msg) + ) .done + +def suite : List TestSeq := [testBuildPrimitives, testPrimitivesParity] end Tests.Ix.Kernel.BuildPrimitives diff --git a/docs/Ixon.md b/docs/Ixon.md index 0121f668..f8a2798e 100644 --- a/docs/Ixon.md +++ b/docs/Ixon.md @@ -881,7 +881,7 @@ fits in single-byte tags (sizes 0..=7 per flag). | Size | Byte | Type | Payload | |------|------|------|---------| -| 0 | `0xE0` | Environment | opt-tagged merkle root + sections | +| 0 | `0xE0` | Environment | bare 32-byte merkle root + 5 sections | | 1 | `0xE1` | Commitment | 2 addr: secret, payload | | 2 | `0xE2` | AssumptionTree | recursive merkle-tree body (see below) | | 3 | `0xE3` | Eval claim | 2 addr (input, output) + opt assumptions | @@ -931,15 +931,24 @@ pub enum Claim { A serializable merkle tree over `Address` leaves, used to recover the leaf set committed to by a conditional claim's `assumptions` root. +The body has **three** variants — leaves, internal nodes, and an +explicit `Padding` sentinel that the canonical builder inserts at +odd-count levels so the byte-tree matches `merkle_root_canonical`'s +zero-mixing shape (a `Padding` hashes to `zero_address()`). + ``` [Tag4(0xE, 2) = 0xE2] [body] body recursive: Leaf(addr): [0x00] [addr:32] - Node(l, r): [0x01] [body l] [body r] + Padding: [0x01] + Node(l, r): [0x02] [body l] [body r] ``` -Size: `34N - 1` bytes for N leaves + 1 byte top-level tag. +Size is shape-dependent: each `Leaf` is 33 bytes, each `Padding` is +1 byte, each `Node` is 1 byte plus its children, and the top-level +`Tag4` adds 1 byte. For example, `N = 1` ⇒ 34 bytes; `N = 2` ⇒ 68 +bytes; `N = 3` (padded to 4) ⇒ 104 bytes. ### Commitment Hashing diff --git a/src/ffi/ixon/env.rs b/src/ffi/ixon/env.rs index 233b9479..fbd58708 100644 --- a/src/ffi/ixon/env.rs +++ b/src/ffi/ixon/env.rs @@ -336,19 +336,24 @@ pub fn decoded_to_ixon_env(decoded: &DecodedRawEnv) -> IxonEnv { /// Convert a Rust IxonEnv to a DecodedRawEnv. /// /// Forces materialization of every constant — callers operating on a -/// freshly-loaded lazy env pay the parse cost here. -pub fn ixon_env_to_decoded(env: &IxonEnv) -> DecodedRawEnv { - let consts = env - .consts - .iter() - .filter_map(|e| { - let c = e.value().get().ok()?; - Some(DecodedRawConst { - addr: e.key().clone(), - constant: (*c).clone(), - }) - }) - .collect(); +/// freshly-loaded lazy env pay the parse cost here. Returns `Err` +/// on the first const that fails to materialize (corrupt bytes, +/// trailing data, etc.); silently dropping such entries would leave +/// the Lean caller with no signal of the lost data. +pub fn ixon_env_to_decoded(env: &IxonEnv) -> Result { + let mut consts: Vec = Vec::with_capacity(env.consts.len()); + for e in env.consts.iter() { + let c = e.value().get().map_err(|err| { + format!( + "ixon_env_to_decoded: failed to materialize const {}: {err}", + e.key().hex() + ) + })?; + consts.push(DecodedRawConst { + addr: e.key().clone(), + constant: (*c).clone(), + }); + } let named = env .named .iter() @@ -376,7 +381,7 @@ pub fn ixon_env_to_decoded(env: &IxonEnv) -> DecodedRawEnv { name: e.value().clone(), }) .collect(); - DecodedRawEnv { consts, named, blobs, comms, names } + Ok(DecodedRawEnv { consts, named, blobs, comms, names }) } // ============================================================================= @@ -431,10 +436,12 @@ pub extern "C" fn rs_de_env( let data = obj.as_bytes(); let mut slice: &[u8] = data; match IxonEnv::get(&mut slice) { - Ok(env) => { - let decoded = ixon_env_to_decoded(&env); - let raw_env = LeanIxonRawEnv::build(&decoded); - LeanExcept::ok(raw_env) + Ok(env) => match ixon_env_to_decoded(&env) { + Ok(decoded) => { + let raw_env = LeanIxonRawEnv::build(&decoded); + LeanExcept::ok(raw_env) + }, + Err(e) => LeanExcept::error_string(&format!("rs_de_env: {e}")), }, Err(e) => { let msg = format!("rs_de_env: {}", e); @@ -457,10 +464,12 @@ pub extern "C" fn rs_de_env_anon( let data = obj.as_bytes(); let mut slice: &[u8] = data; match IxonEnv::get_anon(&mut slice) { - Ok(env) => { - let decoded = ixon_env_to_decoded(&env); - let raw_env = LeanIxonRawEnv::build(&decoded); - LeanExcept::ok(raw_env) + Ok(env) => match ixon_env_to_decoded(&env) { + Ok(decoded) => { + let raw_env = LeanIxonRawEnv::build(&decoded); + LeanExcept::ok(raw_env) + }, + Err(e) => LeanExcept::error_string(&format!("rs_de_env_anon: {e}")), }, Err(e) => { let msg = format!("rs_de_env_anon: {}", e); diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index 234cce6b..b5b4cbc9 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -42,7 +42,7 @@ use rustc_hash::FxHashMap; use lean_ffi::object::{ LeanArray, LeanBool, LeanBorrowed, LeanIOResult, LeanList, LeanOption, - LeanOwned, LeanRef, LeanString, + LeanOwned, LeanProd, LeanRef, LeanString, }; use crate::lean::LeanIxCheckError; @@ -116,7 +116,7 @@ const KERNEL_EXCEPTION_TAG: u8 = 0; const COMPILE_ERROR_TAG: u8 = 1; /// Streaming writer for the `--fail-out` file used by `lake exe ix -/// check-ixon`. +/// check`. /// /// The previous implementation buffered all failures in Lean and dumped them /// once at the very end of the run, which meant a long-running full-env @@ -141,7 +141,7 @@ impl FailureLog { /// `# seeds`), and return a handle ready to record per-failure entries. fn open(path: &str, env_path: &str, seeds: usize) -> std::io::Result { let mut file = File::create(path)?; - writeln!(file, "# ix check-ixon failures")?; + writeln!(file, "# ix check failures")?; writeln!(file, "# env: {env_path}")?; writeln!(file, "# seeds: {seeds}")?; writeln!(file)?; @@ -733,6 +733,29 @@ pub extern "C" fn rs_kernel_ixon_names( LeanIOResult::ok(build_lean_name_array(&names)) } +/// FFI: expose the canonical primitive `(lean_name, hex_address)` +/// table from `PrimAddrs::new()` to the Lean test suite. +/// +/// Used by `testPrimitivesParity` +/// (`Tests/Ix/Kernel/BuildPrimitives.lean`) to detect drift between +/// the hardcoded `PrimAddrs::new()` addresses and the freshly +/// compiled addresses produced by `rsCompileEnvFFI`. Any future +/// compile/serialize change that touches a primitive's content hash +/// will fail this test with a diff before the breakage propagates +/// to downstream consumers (Aiur, kernel primitive resolution). +#[unsafe(no_mangle)] +pub extern "C" fn rs_prim_addrs_canonical() -> LeanIOResult { + let table = crate::ix::kernel::primitive::PrimAddrs::lean_parity_table(); + let arr = LeanArray::alloc(table.len()); + for (i, (name, hex)) in table.iter().enumerate() { + let name_obj: LeanOwned = LeanString::new(name).into(); + let hex_obj: LeanOwned = LeanString::new(hex).into(); + let pair: LeanOwned = LeanProd::new(name_obj, hex_obj).into(); + arr.set(i, pair); + } + LeanIOResult::ok(arr) +} + fn all_checkable_ixon_names(ixon_env: &IxonEnv) -> Vec { let mut names = Vec::with_capacity(ixon_env.named_count()); for entry in ixon_env.named.iter() { @@ -1432,7 +1455,7 @@ fn run_anon_checks_parallel( (primary_addr.clone(), result_idxs.clone()) }, }; - let display = format!("@{}", primary_addr.hex()); + let display = format!("#{}", primary_addr.hex()); let prefix = format!(" [{}/{work_total}] {display}", work_idx + 1); progress_worker.begin(worker_idx, &prefix); @@ -1466,7 +1489,7 @@ fn run_anon_checks_parallel( if let (Some(log), Err((_, msg))) = (failure_log_worker.as_ref(), result.as_ref()) { - let label = format!("@{}", addrs[result_idx].hex()); + let label = format!("#{}", addrs[result_idx].hex()); log.record(&label, msg); } } @@ -1598,6 +1621,9 @@ pub extern "C" fn rs_kernel_check_anon( }; let total = addrs.len(); + // Keep our own copy for the per-slot `(hex, result)` FFI return; + // the runner clones internally for worker dispatch. + let addrs_for_return = addrs.clone(); let t3 = Instant::now(); let ixon_env_arc = Arc::new(ixon_env); let results = match run_anon_checks_parallel( @@ -1634,7 +1660,7 @@ pub extern "C" fn rs_kernel_check_anon( ); } - build_result_array(&results) + build_anon_result_array(&addrs_for_return, &results) } #[cfg(test)] @@ -1920,7 +1946,7 @@ where name.pretty() } else { match ixon_env.lookup_name(name) { - Some(named) => format!("@{}", named.addr.hex()), + Some(named) => format!("#{}", named.addr.hex()), None => name.pretty(), } }; @@ -2597,6 +2623,33 @@ fn build_result_array(results: &[CheckRes]) -> LeanIOResult { LeanIOResult::ok(arr) } +/// Build an `IO (Array (String × Option CheckError))` from Rust +/// `(address, result)` pairs. +/// +/// Used by `rs_kernel_check_anon` to return per-result content +/// addresses alongside the check outcome — the anon CLI has no +/// Lean-side name to associate with each result slot, so the +/// `#` address label has to come back through the FFI to keep +/// `--fail-out` and `[check]` summary lines content-addressed. +fn build_anon_result_array( + addrs: &[Address], + results: &[CheckRes], +) -> LeanIOResult { + debug_assert_eq!( + addrs.len(), + results.len(), + "build_anon_result_array: addrs/results length mismatch" + ); + let arr = LeanArray::alloc(results.len()); + for (i, result) in results.iter().enumerate() { + let hex: LeanOwned = LeanString::new(&addrs[i].hex()).into(); + let res_obj = build_option_result(result); + let pair: LeanOwned = LeanProd::new(hex, res_obj).into(); + arr.set(i, pair); + } + LeanIOResult::ok(arr) +} + /// Build a result array of length `count` where every slot is the same /// compile-kind error. Used when compile/ingress/thread setup fails /// before per-constant checking can begin — the error arose before the diff --git a/src/ix/compile.rs b/src/ix/compile.rs index 1f5784fe..e5a5228a 100644 --- a/src/ix/compile.rs +++ b/src/ix/compile.rs @@ -28,9 +28,10 @@ use crate::{ ix::ixon::{ CompileError, Tag0, constant::{ - Axiom, Constant, ConstantInfo, Constructor, ConstructorProj, Definition, - DefinitionProj, Inductive, InductiveProj, MutConst as IxonMutConst, - Quotient, Recursor, RecursorProj, RecursorRule, + Axiom, Constant, ConstantInfo, Constructor, Definition, Inductive, + MutConst as IxonMutConst, Quotient, Recursor, RecursorRule, + ctor_proj_constant, defn_proj_constant, indc_proj_constant, + recr_proj_constant, }, env::{Env as IxonEnv, Named}, expr::Expr, @@ -3658,18 +3659,10 @@ fn compile_mutual( let meta = all_metas.get(&n).cloned().unwrap_or_default(); let proj = match cnst { - MutConst::Defn(_) => { - Constant::new(ConstantInfo::DPrj(DefinitionProj { - idx, - block: block_addr.clone(), - })) - }, + MutConst::Defn(_) => defn_proj_constant(idx, block_addr.clone()), MutConst::Indc(ind) => { // Inductive projection - let indc_proj = Constant::new(ConstantInfo::IPrj(InductiveProj { - idx, - block: block_addr.clone(), - })); + let indc_proj = indc_proj_constant(idx, block_addr.clone()); let mut proj_bytes = Vec::new(); indc_proj.put(&mut proj_bytes); let proj_addr = Address::hash(&proj_bytes); @@ -3689,11 +3682,7 @@ fn compile_mutual( let ctor_meta = all_metas.get(&ctor.cnst.name).cloned().unwrap_or_default(); let ctor_proj = - Constant::new(ConstantInfo::CPrj(ConstructorProj { - idx, - cidx: cidx as u64, - block: block_addr.clone(), - })); + ctor_proj_constant(idx, cidx as u64, block_addr.clone()); let mut ctor_bytes = Vec::new(); ctor_proj.put(&mut ctor_bytes); let ctor_addr = Address::hash(&ctor_bytes); @@ -3711,10 +3700,7 @@ fn compile_mutual( continue; }, - MutConst::Recr(_) => Constant::new(ConstantInfo::RPrj(RecursorProj { - idx, - block: block_addr.clone(), - })), + MutConst::Recr(_) => recr_proj_constant(idx, block_addr.clone()), }; let mut proj_bytes = Vec::new(); diff --git a/src/ix/compile/mutual.rs b/src/ix/compile/mutual.rs index fa4bd040..21793c07 100644 --- a/src/ix/compile/mutual.rs +++ b/src/ix/compile/mutual.rs @@ -34,8 +34,8 @@ use crate::ix::env::{ use crate::ix::ixon::{ CompileError, constant::{ - Constant, ConstantInfo, ConstructorProj, DefKind, DefinitionProj, - InductiveProj, MutConst as IxonMutConst, RecursorProj, + Constant, DefKind, MutConst as IxonMutConst, ctor_proj_constant, + defn_proj_constant, indc_proj_constant, recr_proj_constant, }, env::Named, metadata::{ConstantMeta, ConstantMetaInfo}, @@ -285,10 +285,7 @@ pub(crate) fn compile_aux_block_with_rename( match cnst { MutConst::Indc(ind) => { // Inductive projection - let indc_proj = Constant::new(ConstantInfo::IPrj(InductiveProj { - idx, - block: block_addr.clone(), - })); + let indc_proj = indc_proj_constant(idx, block_addr.clone()); let proj_addr = content_address(&indc_proj); stt.env.store_const(proj_addr.clone(), indc_proj); stt @@ -307,11 +304,7 @@ pub(crate) fn compile_aux_block_with_rename( let ctor_meta = all_metas.get(&ctor.cnst.name).cloned().unwrap_or_default(); let ctor_proj = - Constant::new(ConstantInfo::CPrj(ConstructorProj { - idx, - cidx: cidx as u64, - block: block_addr.clone(), - })); + ctor_proj_constant(idx, cidx as u64, block_addr.clone()); let ctor_addr = content_address(&ctor_proj); stt.env.store_const(ctor_addr.clone(), ctor_proj); stt.env.register_name( @@ -326,10 +319,7 @@ pub(crate) fn compile_aux_block_with_rename( } }, MutConst::Recr(_) => { - let proj = Constant::new(ConstantInfo::RPrj(RecursorProj { - idx, - block: block_addr.clone(), - })); + let proj = recr_proj_constant(idx, block_addr.clone()); let proj_addr = content_address(&proj); stt.env.store_const(proj_addr.clone(), proj); stt @@ -340,10 +330,7 @@ pub(crate) fn compile_aux_block_with_rename( pending_names.push(n); }, MutConst::Defn(_) => { - let proj = Constant::new(ConstantInfo::DPrj(DefinitionProj { - idx, - block: block_addr.clone(), - })); + let proj = defn_proj_constant(idx, block_addr.clone()); let proj_addr = content_address(&proj); stt.env.store_const(proj_addr.clone(), proj); stt diff --git a/src/ix/ixon/constant.rs b/src/ix/ixon/constant.rs index a7979dde..0f311566 100644 --- a/src/ix/ixon/constant.rs +++ b/src/ix/ixon/constant.rs @@ -251,6 +251,62 @@ impl Constant { } } +// ============================================================================ +// Canonical projection-address helpers +// ============================================================================ +// +// The compile and ingress pipelines independently derive projection +// addresses for members of a mutual block — each member's +// `D/I/R/CPrj` wrapper has a deterministic content hash that becomes +// the kernel's lookup key. Centralizing the wrapper construction + +// `.commit().0` here keeps all callers in lock-step: drift between +// the compile-side address (what gets stored in `env.consts`) and +// the kernel-side address (what `LazyAnonIngress` looks up) is the +// kind of bug that surfaces only on full-env runs and is painful to +// diagnose. + +/// Deterministic `DPrj` content address for member `idx` of `block`. +pub fn defn_proj_address(idx: u64, block: &Address) -> Address { + defn_proj_constant(idx, block.clone()).commit().0 +} + +/// Deterministic `IPrj` content address for member `idx` of `block`. +pub fn indc_proj_address(idx: u64, block: &Address) -> Address { + indc_proj_constant(idx, block.clone()).commit().0 +} + +/// Deterministic `RPrj` content address for member `idx` of `block`. +pub fn recr_proj_address(idx: u64, block: &Address) -> Address { + recr_proj_constant(idx, block.clone()).commit().0 +} + +/// Deterministic `CPrj` content address for ctor `(idx, cidx)` of `block`. +pub fn ctor_proj_address(idx: u64, cidx: u64, block: &Address) -> Address { + ctor_proj_constant(idx, cidx, block.clone()).commit().0 +} + +/// Canonical `DPrj` `Constant` for member `idx` of `block`. Use this +/// (rather than ad-hoc `Constant::new(ConstantInfo::DPrj { ... })`) +/// so all callers share one construction path. +pub fn defn_proj_constant(idx: u64, block: Address) -> Constant { + Constant::new(ConstantInfo::DPrj(DefinitionProj { idx, block })) +} + +/// Canonical `IPrj` `Constant` for member `idx` of `block`. +pub fn indc_proj_constant(idx: u64, block: Address) -> Constant { + Constant::new(ConstantInfo::IPrj(InductiveProj { idx, block })) +} + +/// Canonical `RPrj` `Constant` for member `idx` of `block`. +pub fn recr_proj_constant(idx: u64, block: Address) -> Constant { + Constant::new(ConstantInfo::RPrj(RecursorProj { idx, block })) +} + +/// Canonical `CPrj` `Constant` for ctor `(idx, cidx)` of `block`. +pub fn ctor_proj_constant(idx: u64, cidx: u64, block: Address) -> Constant { + Constant::new(ConstantInfo::CPrj(ConstructorProj { idx, cidx, block })) +} + #[cfg(test)] pub mod tests { use super::*; diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs index 8b713efb..c3fbbb6f 100644 --- a/src/ix/ixon/env.rs +++ b/src/ix/ixon/env.rs @@ -440,12 +440,22 @@ mod tests { assert_eq!(addr1, addr3); } - /// Build a constant with the given refs (for BFS tests). + /// Build a constant with the given refs (for BFS tests). `discriminator` + /// is folded into `lvls` so callers can produce content-distinct + /// constants when the same ref-set would otherwise collide (e.g. + /// two independent leaf nodes both with empty refs). fn const_with_refs(refs: Vec
) -> Constant { + const_with_refs_discriminator(refs, 0) + } + + fn const_with_refs_discriminator( + refs: Vec
, + discriminator: u64, + ) -> Constant { Constant::with_tables( ConstantInfo::Axio(Axiom { is_unsafe: false, - lvls: 0, + lvls: discriminator, typ: Arc::new(Expr::Sort(0)), }), Vec::new(), @@ -454,6 +464,17 @@ mod tests { ) } + /// Store a constant at its true content address and return that + /// address. Use this instead of `store_const(Address::hash(b"a"), + /// ...)` for tests that round-trip through `Env::put`/`Env::get`; + /// the load path verifies `Address::hash(bytes) == addr` per + /// entry, so fake keys are rejected. + fn store_canonical(env: &Env, c: Constant) -> Address { + let (addr, _) = c.commit(); + env.store_const(addr.clone(), c); + addr + } + #[test] fn bfs_refs_singleton_no_deps() { let env = Env::new(); @@ -566,16 +587,18 @@ mod tests { /// pre-populate the cache). #[test] fn lazy_sparsity_only_materializes_closure() { - // Build a small env: a→b→c, and an independent d. + // Build a small env: a→b→c, and an independent d. Each const is + // stored at its true content address (round-trip through `put`+`get` + // verifies `Address::hash(bytes) == addr`). The `d` discriminator + // avoids a content-hash collision with `c` (both have empty refs). let env = Env::new(); - let a = Address::hash(b"a"); - let b = Address::hash(b"b"); - let c = Address::hash(b"c"); - let d = Address::hash(b"d"); - env.store_const(a.clone(), const_with_refs(vec![b.clone()])); - env.store_const(b.clone(), const_with_refs(vec![c.clone()])); - env.store_const(c.clone(), const_with_refs(vec![])); - env.store_const(d.clone(), const_with_refs(vec![])); + let c = store_canonical(&env, const_with_refs(vec![])); + let b = store_canonical(&env, const_with_refs(vec![c.clone()])); + let a = store_canonical(&env, const_with_refs(vec![b.clone()])); + let d = store_canonical( + &env, + const_with_refs_discriminator(vec![], 1), + ); // Serialize → deserialize so all entries are lazy-from-bytes. let mut buf = Vec::new(); diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index 468a8da1..cbe3bbc6 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1364,7 +1364,7 @@ impl Env { // Section 2: Consts (lazy: read length prefix, slice bytes, defer parse) let num_consts = get_u64(buf)?; - for _ in 0..num_consts { + for i in 0..num_consts { let addr = get_address(buf)?; let len = Tag0::get(buf)?.size as usize; if buf.len() < len { @@ -1376,6 +1376,20 @@ impl Env { } let (bytes, rest) = buf.split_at(len); *buf = rest; + // Per-entry integrity: hash the bytes and compare with the + // stored address. The env-level merkle root over `consts.keys()` + // catches missing/extra entries but not byte-tampering of a + // constant whose key is intact; without this check, corruption + // would slip past `Env::get` and surface much later as a + // misleading parse error inside `LazyConstant::get`. + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "Env::get: const at idx {i} bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } env.store_const_lazy(addr, bytes.into()); } @@ -1494,7 +1508,7 @@ impl Env { // Section 2: Consts (kept, lazy) let num_consts = get_u64(buf)?; - for _ in 0..num_consts { + for i in 0..num_consts { let addr = get_address(buf)?; let len = Tag0::get(buf)?.size as usize; if buf.len() < len { @@ -1506,6 +1520,14 @@ impl Env { } let (bytes, rest) = buf.split_at(len); *buf = rest; + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "Env::get_anon: const at idx {i} bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } env.store_const_lazy(addr, bytes.into()); } @@ -1651,7 +1673,7 @@ impl Env { // Section 2: Consts (mmap-backed lazy windows) let num_consts = get_u64(&mut buf)?; - for _ in 0..num_consts { + for i in 0..num_consts { let addr = get_address(&mut buf)?; let len = Tag0::get(&mut buf)?.size as usize; if buf.len() < len { @@ -1664,6 +1686,15 @@ impl Env { // `buf` is a suffix of `mmap_full`; the constant's bytes start // at the current cursor and span `len` bytes. let offset = mmap_full.len() - buf.len(); + let bytes = &mmap_full[offset..offset + len]; + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "Env::get_anon_mmap: const at idx {i} bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } env.store_const_lazy_mmap(addr, Arc::clone(&mmap), offset, len); buf = &buf[len..]; } @@ -2004,9 +2035,14 @@ mod tests { let meta = ConstantMeta::default(); // Sometimes generate a Named.original to exercise that serialization path. let original = if bool::arbitrary(g) { - let orig_addr = Address::arbitrary(g); - // Store the original constant too so the env is self-consistent. - env.store_const(orig_addr.clone(), gen_constant(g)); + // Store the original constant under its true content address — + // `Env::get` verifies `Address::hash(bytes) == addr` per entry, + // so a random `Address::arbitrary` would be rejected on load. + let orig_constant = gen_constant(g); + let mut orig_buf = Vec::new(); + orig_constant.put(&mut orig_buf); + let orig_addr = Address::hash(&orig_buf); + env.store_const(orig_addr.clone(), orig_constant); Some((orig_addr, ConstantMeta::default())) } else { None @@ -2148,13 +2184,17 @@ mod tests { // ---------- Env merkle root tests ---------- fn defn_const(refs: Vec
) -> Constant { + defn_const_discriminator(refs, 0) + } + + fn defn_const_discriminator(refs: Vec
, lvls: u64) -> Constant { use crate::ix::env::DefinitionSafety; use crate::ix::ixon::constant::{DefKind, Definition}; Constant::with_tables( ConstantInfo::Defn(Definition { kind: DefKind::Definition, safety: DefinitionSafety::Safe, - lvls: 0, + lvls, typ: Expr::sort(0), value: Expr::var(0), }), @@ -2164,6 +2204,16 @@ mod tests { ) } + /// Store `c` at its true content address; returns the address. + /// Tests that serialize+deserialize through `Env::put`/`Env::get` + /// must use canonical addresses because the load path verifies + /// `Address::hash(bytes) == addr` per entry. + fn store_canonical(env: &Env, c: Constant) -> Address { + let (addr, _) = c.commit(); + env.store_const(addr.clone(), c); + addr + } + /// Extract the stored merkle root from a serialized env. The Tag4 /// header byte (`0xE0` for env) is followed by exactly 32 bytes of /// root (no opt-tag). @@ -2245,6 +2295,81 @@ mod tests { assert!(res.is_err(), "tampered root should be rejected"); } + /// Flip a byte inside the first const's payload bytes (not its + /// stored address): merkle still validates over `consts.keys()`, so + /// the per-entry `Address::hash(bytes) == addr` check is what must + /// reject this corruption. Without that check, `Env::get` would + /// succeed and the failure would surface much later inside + /// `LazyConstant::get` with a misleading parse error. + /// + /// Header layout for an env with empty blobs and one const: + /// [0] Tag4 (0xE0) + /// [1..33) merkle root (32 bytes) + /// [33] Section 1 (blobs) count = 0 (Tag0) + /// [34] Section 2 (consts) count = 1 (Tag0) + /// [35..67) const address (32 bytes) + /// [67] Tag0 length of const bytes + /// [68..] const bytes (target for tampering) + #[test] + fn env_const_bytes_tampering_rejected_by_get() { + let env = Env::new(); + env.store_const(Address::hash(b"a"), defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + // Flip a byte well inside the const payload. + let off = 68 + 3; + assert!(off < buf.len(), "expected const bytes at offset {off}"); + buf[off] ^= 0xFF; + let res = Env::get(&mut buf.as_slice()); + let err = res.expect_err("tampered const bytes should be rejected"); + assert!( + err.contains("bytes hash to") && err.contains("but stored under"), + "expected per-entry verify error, got: {err}" + ); + } + + #[test] + fn env_const_bytes_tampering_rejected_by_get_anon() { + let env = Env::new(); + env.store_const(Address::hash(b"a"), defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let off = 68 + 3; + assert!(off < buf.len()); + buf[off] ^= 0xFF; + let res = Env::get_anon(&mut buf.as_slice()); + let err = res.expect_err("tampered const bytes should be rejected"); + assert!( + err.contains("bytes hash to") && err.contains("but stored under"), + "expected per-entry verify error, got: {err}" + ); + } + + #[test] + fn env_const_bytes_tampering_rejected_by_get_anon_mmap() { + use std::io::Write; + let env = Env::new(); + env.store_const(Address::hash(b"a"), defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let off = 68 + 3; + assert!(off < buf.len()); + buf[off] ^= 0xFF; + // mmap requires a real file + let tmp = std::env::temp_dir().join("ix_env_tamper_mmap_test.ixe"); + { + let mut f = std::fs::File::create(&tmp).unwrap(); + f.write_all(&buf).unwrap(); + } + let res = Env::get_anon_mmap(&tmp); + let err = res.expect_err("tampered const bytes should be rejected"); + assert!( + err.contains("bytes hash to") && err.contains("but stored under"), + "expected per-entry verify error, got: {err}" + ); + std::fs::remove_file(&tmp).ok(); + } + // --------------------------------------------------------------------------- // Env::get_anon — anonymous-only deserialization // --------------------------------------------------------------------------- @@ -2253,10 +2378,11 @@ mod tests { fn get_anon_keeps_consts_drops_metadata() { use crate::ix::ixon::env::Named; let env = Env::new(); - let a = Address::hash(b"a"); - let b = Address::hash(b"b"); - env.store_const(a.clone(), defn_const(vec![])); - env.store_const(b.clone(), defn_const(vec![a.clone()])); + // Round-trip tests must use canonical addresses (see store_canonical + // helper); `Env::get`/`get_anon` reject entries whose bytes don't + // hash to their stored address. + let a = store_canonical(&env, defn_const(vec![])); + let b = store_canonical(&env, defn_const(vec![a.clone()])); // Populate metadata sections so we can verify they get dropped. let blob_addr = env.store_blob(b"hello world".to_vec()); env.register_name(Name::str(Name::anon(), "MyConst".to_string()), @@ -2281,7 +2407,7 @@ mod tests { #[test] fn get_anon_merkle_root_verified() { let env = Env::new(); - env.store_const(Address::hash(b"x"), defn_const(vec![])); + store_canonical(&env, defn_const(vec![])); let mut buf = Vec::new(); env.put(&mut buf).unwrap(); // Tamper with the root. @@ -2304,11 +2430,12 @@ mod tests { fn get_anon_consts_match_get() { // Build an env, serialize, then load via both get and get_anon. // The `consts` map should agree (same addresses, same Constant - // when materialized). + // when materialized). Use a discriminator per const so they're + // content-distinct (otherwise 5 identical Defns would collapse + // to one entry). let env = Env::new(); for i in 0..5u8 { - let addr = Address::hash(&[i]); - env.store_const(addr, defn_const(vec![])); + store_canonical(&env, defn_const_discriminator(vec![], u64::from(i))); } let mut buf = Vec::new(); env.put(&mut buf).unwrap(); diff --git a/src/ix/kernel/ingress.rs b/src/ix/kernel/ingress.rs index ad949205..329a5da7 100644 --- a/src/ix/kernel/ingress.rs +++ b/src/ix/kernel/ingress.rs @@ -4005,41 +4005,63 @@ fn validate_no_reserved_marker_addresses( // discards `named`/`names`/`comms` sections. The helpers below do not // depend on those sections being empty — they simply never consult them. -use crate::ix::ixon::constant::{ - ConstructorProj, DefinitionProj, InductiveProj, RecursorProj, -}; use crate::ix::kernel::mode::Anon; +/// Verify that a projection address computed from a block's structure +/// is actually present in the env's consts. Wrapped here so the four +/// dispatch arms in `ingress_anon_block` (DPrj/RPrj/IPrj/CPrj) all +/// produce the same error format. +fn verify_proj_addr_in_env( + kind: &'static str, + proj_addr: &Address, + block_addr: &Address, + member_idx: u64, + ctor_idx: Option, + anon_env: &IxonEnv, +) -> Result<(), String> { + if anon_env.consts.contains_key(proj_addr) { + return Ok(()); + } + match ctor_idx { + None => Err(format!( + "ingress_anon_block: computed {kind} address {} not present in env (block {} idx {})", + proj_addr.hex(), + block_addr.hex(), + member_idx + )), + Some(cidx) => Err(format!( + "ingress_anon_block: computed {kind} address {} not present in env (block {} idx {} cidx {})", + proj_addr.hex(), + block_addr.hex(), + member_idx, + cidx + )), + } +} + /// Deterministic IPrj content address for member `idx` of `block`. +/// +/// Thin re-export of `crate::ix::ixon::constant::indc_proj_address` — +/// the canonical projection-address helper used by both compile and +/// ingress paths. Keep the `anon_` alias so existing call sites read +/// naturally in the anon-mode pipeline. pub fn anon_indc_proj_addr(block: &Address, idx: u64) -> Address { - Constant::new(IxonCI::IPrj(InductiveProj { idx, block: block.clone() })) - .commit() - .0 + crate::ix::ixon::constant::indc_proj_address(idx, block) } /// Deterministic DPrj content address for member `idx` of `block`. pub fn anon_defn_proj_addr(block: &Address, idx: u64) -> Address { - Constant::new(IxonCI::DPrj(DefinitionProj { idx, block: block.clone() })) - .commit() - .0 + crate::ix::ixon::constant::defn_proj_address(idx, block) } /// Deterministic RPrj content address for member `idx` of `block`. pub fn anon_recr_proj_addr(block: &Address, idx: u64) -> Address { - Constant::new(IxonCI::RPrj(RecursorProj { idx, block: block.clone() })) - .commit() - .0 + crate::ix::ixon::constant::recr_proj_address(idx, block) } /// Deterministic CPrj content address for ctor `(idx, cidx)` of `block`. pub fn anon_ctor_proj_addr(block: &Address, idx: u64, cidx: u64) -> Address { - Constant::new(IxonCI::CPrj(ConstructorProj { - idx, - cidx, - block: block.clone(), - })) - .commit() - .0 + crate::ix::ixon::constant::ctor_proj_address(idx, cidx, block) } /// Compute deterministic ctor projection addresses for every constructor of @@ -4307,14 +4329,9 @@ pub fn ingress_anon_block( match member { IxonMutConst::Defn(def) => { let proj_addr = anon_defn_proj_addr(block_addr, idx); - if !anon_env.consts.contains_key(&proj_addr) { - return Err(format!( - "ingress_anon_block: computed DPrj address {} not present in env (block {} idx {})", - proj_addr.hex(), - block_addr.hex(), - idx - )); - } + verify_proj_addr_in_env( + "DPrj", &proj_addr, block_addr, idx, None, anon_env, + )?; let self_id = KId::::new(proj_addr.clone(), ()); member_kids.push(self_id.clone()); let hints_override = anon_env.anon_hints.get(&proj_addr).copied(); @@ -4339,14 +4356,9 @@ pub fn ingress_anon_block( }, IxonMutConst::Recr(rec) => { let proj_addr = anon_recr_proj_addr(block_addr, idx); - if !anon_env.consts.contains_key(&proj_addr) { - return Err(format!( - "ingress_anon_block: computed RPrj address {} not present in env (block {} idx {})", - proj_addr.hex(), - block_addr.hex(), - idx - )); - } + verify_proj_addr_in_env( + "RPrj", &proj_addr, block_addr, idx, None, anon_env, + )?; let self_id = KId::::new(proj_addr.clone(), ()); member_kids.push(self_id.clone()); @@ -4369,29 +4381,23 @@ pub fn ingress_anon_block( }, IxonMutConst::Indc(ind) => { let proj_addr = anon_indc_proj_addr(block_addr, idx); - if !anon_env.consts.contains_key(&proj_addr) { - return Err(format!( - "ingress_anon_block: computed IPrj address {} not present in env (block {} idx {})", - proj_addr.hex(), - block_addr.hex(), - idx - )); - } + verify_proj_addr_in_env( + "IPrj", &proj_addr, block_addr, idx, None, anon_env, + )?; let self_id = KId::::new(proj_addr.clone(), ()); member_kids.push(self_id.clone()); let ctor_addrs = anon_ctor_addrs(block_addr, idx, ind); // Verify ctor addresses too — catches corruption early. for (cidx, c_addr) in ctor_addrs.iter().enumerate() { - if !anon_env.consts.contains_key(c_addr) { - return Err(format!( - "ingress_anon_block: computed CPrj address {} not present in env (block {} idx {} cidx {})", - c_addr.hex(), - block_addr.hex(), - idx, - cidx - )); - } + verify_proj_addr_in_env( + "CPrj", + c_addr, + block_addr, + idx, + Some(cidx as u64), + anon_env, + )?; } let entries = ingress_anon_inductive( diff --git a/src/ix/kernel/primitive.rs b/src/ix/kernel/primitive.rs index 4514b6ff..270ebea6 100644 --- a/src/ix/kernel/primitive.rs +++ b/src/ix/kernel/primitive.rs @@ -558,6 +558,117 @@ impl PrimAddrs { } } + /// `(lean_name, canonical_address_hex)` table from `Self::new()`, + /// in the same order as `Tests/Ix/Kernel/BuildPrimitives.lean`'s + /// `kernelPrimitives` array. Used by the regen-parity test + /// (`testPrimitivesParity`) to detect drift between hardcoded + /// addresses and freshly-compiled ones: if any future + /// compile/serialize change touches a primitive's content hash, + /// the parity test fails with a printable diff before the + /// breakage propagates to downstream consumers. + /// + /// Keep entries in lock-step with `kernelPrimitives` (same names, + /// same order). The `eager_reduce` entry is intentionally a + /// synthetic kernel marker — not the compiled Lean content hash — + /// because the real `eagerReduce` canonicalizes to the same + /// address as `id`; see the comment on the field in `new()`. + pub fn lean_parity_table() -> Vec<(&'static str, String)> { + let p = Self::new(); + vec![ + ("Nat", p.nat.hex()), + ("Nat.zero", p.nat_zero.hex()), + ("Nat.succ", p.nat_succ.hex()), + ("Nat.add", p.nat_add.hex()), + ("Nat.pred", p.nat_pred.hex()), + ("Nat.sub", p.nat_sub.hex()), + ("Nat.mul", p.nat_mul.hex()), + ("Nat.pow", p.nat_pow.hex()), + ("Nat.gcd", p.nat_gcd.hex()), + ("Nat.mod", p.nat_mod.hex()), + ("Nat.div", p.nat_div.hex()), + ("Nat.bitwise", p.nat_bitwise.hex()), + ("Nat.beq", p.nat_beq.hex()), + ("Nat.ble", p.nat_ble.hex()), + ("Nat.land", p.nat_land.hex()), + ("Nat.lor", p.nat_lor.hex()), + ("Nat.xor", p.nat_xor.hex()), + ("Nat.shiftLeft", p.nat_shift_left.hex()), + ("Nat.shiftRight", p.nat_shift_right.hex()), + ("Bool", p.bool_type.hex()), + ("Bool.true", p.bool_true.hex()), + ("Bool.false", p.bool_false.hex()), + ("String", p.string.hex()), + ("String.mk", p.string_mk.hex()), + ("Char", p.char_type.hex()), + ("Char.mk", p.char_mk.hex()), + ("Char.ofNat", p.char_of_nat.hex()), + ("String.ofList", p.string_of_list.hex()), + ("List", p.list.hex()), + ("List.nil", p.list_nil.hex()), + ("List.cons", p.list_cons.hex()), + ("Eq", p.eq.hex()), + ("Eq.refl", p.eq_refl.hex()), + ("Quot", p.quot_type.hex()), + ("Quot.mk", p.quot_ctor.hex()), + ("Quot.lift", p.quot_lift.hex()), + ("Quot.ind", p.quot_ind.hex()), + ("Lean.reduceBool", p.reduce_bool.hex()), + ("Lean.reduceNat", p.reduce_nat.hex()), + ("eagerReduce", p.eager_reduce.hex()), + ("System.Platform.numBits", p.system_platform_num_bits.hex()), + ("System.Platform.getNumBits", p.system_platform_get_num_bits.hex()), + ("Subtype.val", p.subtype_val.hex()), + ("String.toByteArray", p.string_to_byte_array.hex()), + ("ByteArray.empty", p.byte_array_empty.hex()), + ("Nat.decLe", p.nat_dec_le.hex()), + ("Nat.decEq", p.nat_dec_eq.hex()), + ("Nat.decLt", p.nat_dec_lt.hex()), + ("Decidable.rec", p.decidable_rec.hex()), + ("Decidable.isTrue", p.decidable_is_true.hex()), + ("Decidable.isFalse", p.decidable_is_false.hex()), + ("Nat.le_of_ble_eq_true", p.nat_le_of_ble_eq_true.hex()), + ("Nat.not_le_of_not_ble_eq_true", p.nat_not_le_of_not_ble_eq_true.hex()), + ("Nat.eq_of_beq_eq_true", p.nat_eq_of_beq_eq_true.hex()), + ("Nat.ne_of_beq_eq_false", p.nat_ne_of_beq_eq_false.hex()), + ("Fin", p.fin.hex()), + ("Bool.noConfusion", p.bool_no_confusion.hex()), + ("Int", p.int.hex()), + ("Int.ofNat", p.int_of_nat.hex()), + ("Int.negSucc", p.int_neg_succ.hex()), + ("Int.add", p.int_add.hex()), + ("Int.sub", p.int_sub.hex()), + ("Int.mul", p.int_mul.hex()), + ("Int.neg", p.int_neg.hex()), + ("Int.emod", p.int_emod.hex()), + ("Int.ediv", p.int_ediv.hex()), + ("Int.bmod", p.int_bmod.hex()), + ("Int.bdiv", p.int_bdiv.hex()), + ("Int.natAbs", p.int_nat_abs.hex()), + ("Int.pow", p.int_pow.hex()), + ("Int.decEq", p.int_dec_eq.hex()), + ("Int.decLe", p.int_dec_le.hex()), + ("Int.decLt", p.int_dec_lt.hex()), + ("PUnit", p.punit.hex()), + ("PProd", p.pprod.hex()), + ("PProd.mk", p.pprod_mk.hex()), + ("Nat.rec", p.nat_rec.hex()), + ("Nat.casesOn", p.nat_cases_on.hex()), + ("BitVec", p.bit_vec.hex()), + ("BitVec.toNat", p.bit_vec_to_nat.hex()), + ("BitVec.ofNat", p.bit_vec_of_nat.hex()), + ("BitVec.ult", p.bit_vec_ult.hex()), + ("Decidable.decide", p.decidable_decide.hex()), + ("LT.lt", p.lt_lt.hex()), + ("OfNat.ofNat", p.of_nat_of_nat.hex()), + ("Unit", p.unit.hex()), + ("PUnit._sizeOf_1", p.punit_size_of_1.hex()), + ("SizeOf.sizeOf", p.size_of_size_of.hex()), + ("String.back", p.string_back.hex()), + ("String.Legacy.back", p.string_legacy_back.hex()), + ("String.utf8ByteSize", p.string_utf8_byte_size.hex()), + ] + } + /// LEON content-hash addresses, hardcoded from /// `ConstantInfo::get_hash()` applied to each primitive's original /// (pre-compile) Lean declaration. Used by `Primitives::from_env_orig` From 5a4c783219ed300109a3f6ed7470cad9c11431c9 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 20:06:09 -0400 Subject: [PATCH 08/12] Round-2 review fixes: mmap defense + small cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six small items from the round-2 review of the anon-mode work: - N1: stat-at-open defense in `Env::get_anon_mmap` (`src/ix/ixon/serialize.rs`). Capture the file size before mmap; if the kernel's mapped length disagrees (file truncated between open and map) bail with a clear error instead of letting workers SIGBUS deep in `LazyConstant::get`. Truncate-in-place under a live mapping is still undefined per POSIX — documented as a caller contract — but the open-time check catches the common case. - New `get_anon_mmap_survives_file_unlink` test: load via mmap, unlink the path, then materialize both already-touched and not-yet-touched constants. Locks in the inode-retention invariant that the SIGBUS analysis depends on; a future refactor that switched to `mmap_anonymous` or copied bytes into a tmpfile would fail loudly here instead of letting workers SIGBUS in production. - #11: delete `AnonEnv::as_ixon_env_unchecked` (`src/ix/kernel/anon_env.rs`). Dead `pub(crate)` escape hatch marked `#[allow(dead_code)]` — confirmed zero callers and removed. - #13: `debug_assert!` on `OnceLock::set` for both result vectors (`src/ffi/kernel.rs`, meta + anon paths). The previous `let _ = results[idx].set(...)` silently dropped a re-set. If a future `build_*_work` dedup refactor breaks the one-write-per-slot invariant, debug builds now panic with the slot index instead of silently losing results. - #19: `allNames.contains` O(n²) preflight → `Std.HashSet` lookup (`Ix/Cli/CheckCmd.lean`). At mathlib scale (~700k env names × thousands of seed names) the previous linear scan-per-name spent measurable seconds on missing-name preflight alone. - N4: doc imprecision in `src/ix/ixon/lazy.rs` — the cache-policy preamble said the worker `KEnv` is "cleared between work items" but the actual cadence is `clear_every` items (`IX_KERNEL_CHECK_CLEAR_EVERY`, default 1 but tunable). Refined wording so the doc matches the code. Verification: `cargo test --lib` 1026 passing (1 new), `cargo clippy --lib --all-targets` clean, `lake build` clean, `lake exe ix check compileinitstd.ixe --anon` 89010/89010 in 15.2s peak RSS 1.4 GB (regression-clean). Out of scope, separate follow-ups: - #2 Format version bump - #3 rs_kernel_check_consts_anon zkPCC FFI (Env::get → mmap) - #8 Sealed marker trait for the lazy_anon transmute - #10 child_arena closure side-effect (widen MField to tuple) - #12 AnonEnv duplicates bfs_refs / transitive_deps_excl - #14 anon worker bypasses M-generic display helpers - #17 Anon --fail-out docstring claims --consts-file compat - #20 Singleton-unwrap duplicated across compile paths - #22-24 Lean `partial def`, `.get!` in tests, unguarded as-u64 casts - N2/N3 Lean is_materialized() callers + compute_const_size_breakdown - Test gaps: rs_kernel_check_anon integration, concurrent mmap, verify_address-on-mmap-corruption, etc. --- Ix/Cli/CheckCmd.lean | 8 +++- src/ffi/kernel.rs | 26 ++++++++++++- src/ix/ixon/lazy.rs | 12 ++++-- src/ix/ixon/serialize.rs | 79 ++++++++++++++++++++++++++++++++++++++- src/ix/kernel/anon_env.rs | 9 ----- 5 files changed, 117 insertions(+), 17 deletions(-) diff --git a/Ix/Cli/CheckCmd.lean b/Ix/Cli/CheckCmd.lean index c06ede9f..8a75afda 100644 --- a/Ix/Cli/CheckCmd.lean +++ b/Ix/Cli/CheckCmd.lean @@ -102,9 +102,15 @@ private def selectNamesIxon (allNames : Array Lean.Name) | some s => let exactSet : Std.HashSet Lean.Name := s.exacts.foldl (fun acc n => acc.insert n) (Std.HashSet.emptyWithCapacity s.exacts.length) + -- O(|allNames|) build-up to O(1)-per-check; the previous + -- `allNames.contains n` was O(|allNames|) per missing-name check, + -- which at mathlib scale (~700k env × thousands of seed names) + -- could spend seconds on the preflight alone. + let allNamesSet : Std.HashSet Lean.Name := + allNames.foldl (fun acc n => acc.insert n) (Std.HashSet.emptyWithCapacity allNames.size) let mut missing : Array Lean.Name := #[] for n in s.exacts do - if !allNames.contains n then + if !allNamesSet.contains n then missing := missing.push n if !missing.isEmpty then IO.println s!"[check] warning: {missing.size}/{s.exacts.length} exact name(s) not in env:" diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index b5b4cbc9..7257f416 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -1189,7 +1189,18 @@ where } let result = outcome.result.clone(); for &result_idx in &item.aliases { - let _ = results[result_idx].set(result.clone()); + // Each result slot should be written exactly once. The + // work-item dedup in `dedup_by_primary` ensures we + // never schedule the same alias twice. If this fires, + // a future build_check_work refactor has broken that + // invariant — surface it instead of silently dropping + // the second result. + if results[result_idx].set(result.clone()).is_err() { + debug_assert!( + false, + "meta work-item dedup invariant: result slot {result_idx} set twice" + ); + } // Stream this seed's failure to the fail-out file (if any) as // soon as it's known, so a long full-env run grows the file // incrementally instead of dropping everything at the end. @@ -1485,7 +1496,18 @@ fn run_anon_checks_parallel( progress_worker.finish(worker_idx, &outcome); for &result_idx in &result_idxs { - let _ = results[result_idx].set(result.clone()); + // Each result slot is written exactly once across the + // entire run. `build_anon_work` assigns disjoint + // `result_idxs` per work item; if this assertion fires, + // that invariant has been broken (likely by a future + // dedup refactor) and we'd silently drop the second + // write rather than expose the bug. + if results[result_idx].set(result.clone()).is_err() { + debug_assert!( + false, + "anon work-item dedup invariant: result slot {result_idx} set twice" + ); + } if let (Some(log), Err((_, msg))) = (failure_log_worker.as_ref(), result.as_ref()) { diff --git a/src/ix/ixon/lazy.rs b/src/ix/ixon/lazy.rs index 2b1e1dc7..c2143a5f 100644 --- a/src/ix/ixon/lazy.rs +++ b/src/ix/ixon/lazy.rs @@ -16,10 +16,14 @@ //! *without* storing it. Callers (typically kernel ingress) consume //! the returned `Arc` immediately to build a `KConst` in //! the worker's `KEnv`, then drop it. The `KEnv` is the only -//! long-lived materialization; it is cleared between work items by -//! `clear_releasing_memory()`. This keeps env-level memory bounded -//! to "bytes + mmap header" regardless of how much of the env the -//! workers eventually visit. +//! long-lived materialization layer; it is reset periodically by +//! `clear_releasing_memory()` — every `clear_every` work items +//! (see `kernel_check_clear_every`, default 1 but tunable via +//! `IX_KERNEL_CHECK_CLEAR_EVERY`). The result is that env-level +//! memory stays bounded to "bytes + mmap header" regardless of +//! how much of the env the workers eventually visit; the only +//! meaningful working set is the union of currently-checking +//! work items' ingress closures. //! //! Invariants: //! - `raw_bytes()` returns exactly what `Constant::put` produces and diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index cbe3bbc6..de130250 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1617,16 +1617,47 @@ impl Env { let file = std::fs::File::open(path).map_err(|e| { format!("Env::get_anon_mmap: open {}: {e}", path.display()) })?; + // Capture the file size at open so we can validate the mapping + // covers the bytes we believe we're parsing. The kernel will + // happily map MAP_PRIVATE with a smaller size than we expect (if + // the file was truncated between `open` and `mmap`); without + // this check our cursor-vs-len bounds tests in the section + // parsers would still catch most mismatches, but anyone reading + // past the truncation point through a `LazyConstant` window + // later would SIGBUS with no diagnostic. If `metadata()` fails + // we proceed without the check — better to attempt the mmap + // than to fail open on a transient stat error. + // + // Caveat: this is a snapshot at-open. If the file is replaced + // (rather than truncated in-place) while we hold the mmap, our + // pages remain valid (mmap pins the inode). If it's truncated + // in-place, subsequent page faults beyond the new EOF SIGBUS; + // we accept that as a contract violation (don't rewrite the + // .ixe underneath a running check) and document it. + let expected_len = file + .metadata() + .ok() + .map(|m| m.len() as usize); // SAFETY: We treat the mapping as read-only and never alias it // mutably. Other processes truncating or replacing the file while // it is mapped would invalidate our slices; that is a contract // the caller is expected to honor (don't modify the .ixe - // underfoot). + // underfoot). See the `expected_len` check below for the + // open-time size sanity guard. let mmap = unsafe { memmap2::Mmap::map(&file).map_err(|e| { format!("Env::get_anon_mmap: mmap {}: {e}", path.display()) })? }; + if let Some(expected) = expected_len + && mmap.len() != expected + { + return Err(format!( + "Env::get_anon_mmap: file size changed under us \ + (stat={expected} bytes, mmap={} bytes); refuse to load", + mmap.len() + )); + } let mmap = Arc::new(mmap); // `buf` is a moving cursor over `mmap[..]`. We compute byte @@ -2449,4 +2480,50 @@ mod tests { assert_eq!(*from_full, *from_anon); } } + + /// Lock in the mmap inode-retention invariant: after `get_anon_mmap` + /// opens and maps the file, removing the path from the filesystem + /// MUST NOT invalidate the mapping. On Linux this works because + /// `unlink` only decrements the directory link count; the inode + /// stays alive while any open fd or mmap reference exists, and + /// `MAP_PRIVATE` keeps its pages. + /// + /// This is the invariant our SIGBUS analysis relies on. A future + /// change that, say, switched to `mmap_anonymous` or copied bytes + /// into a tmpfile would break this — making the test fail loudly + /// instead of letting workers SIGBUS in production. + #[test] + fn get_anon_mmap_survives_file_unlink() { + let env = Env::new(); + let a = store_canonical(&env, defn_const(vec![])); + let b = store_canonical(&env, defn_const_discriminator(vec![], 1)); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + + let tmp = std::env::temp_dir().join("ix_get_anon_mmap_unlink_test.ixe"); + { + use std::io::Write; + let mut f = std::fs::File::create(&tmp).unwrap(); + f.write_all(&buf).unwrap(); + } + + let mmap_env = Env::get_anon_mmap(&tmp) + .expect("open should succeed before unlink"); + // Materializing once before unlink makes sure we have known-good + // baseline behavior; the real test is materializing AFTER unlink. + let pre_a = mmap_env.get_const(&a).expect("pre-unlink fetch of `a`"); + + std::fs::remove_file(&tmp).expect("unlink should succeed"); + + // After unlink, the mmap'd pages must still be readable. We + // didn't yet touch `b`'s bytes — if they hadn't been faulted in + // before, the OS still pages them from the now-unlinked inode + // because we hold a clone of `Arc` (via the env's + // `LazyConstant`s). + let post_a = mmap_env.get_const(&a).expect("post-unlink fetch of `a`"); + let post_b = mmap_env.get_const(&b).expect("post-unlink fetch of `b`"); + + assert_eq!(pre_a, post_a, "`a` content should be stable across unlink"); + assert_ne!(post_a, post_b, "discriminators should still differentiate"); + } } diff --git a/src/ix/kernel/anon_env.rs b/src/ix/kernel/anon_env.rs index 42e48fbf..98cd13dd 100644 --- a/src/ix/kernel/anon_env.rs +++ b/src/ix/kernel/anon_env.rs @@ -99,15 +99,6 @@ impl<'a> AnonEnv<'a> { v } - /// **Escape hatch.** Returns the underlying `IxonEnv`. The anon - /// kernel must never call this; it exists solely for adapter code - /// that bridges to legacy APIs taking `&IxonEnv` for their - /// `consts`/`blobs` access. Adapters carry the responsibility to - /// not consult metadata sections. - #[allow(dead_code)] - pub(crate) fn as_ixon_env_unchecked(&self) -> &'a IxonEnv { - self.inner - } } #[cfg(test)] From e8b067a91d049a2e0cfd06bd71915b99151906e1 Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 20:46:01 -0400 Subject: [PATCH 09/12] rustfmt --- src/ffi/ixon/env.rs | 6 +- src/ffi/kernel.rs | 11 +-- src/ix/compile.rs | 10 +-- src/ix/compile/mutual.rs | 15 ++-- src/ix/decompile.rs | 31 ++------ src/ix/ixon.rs | 11 +-- src/ix/ixon/assumption_tree.rs | 36 ++++----- src/ix/ixon/env.rs | 14 +--- src/ix/ixon/lazy.rs | 11 +-- src/ix/ixon/merkle.rs | 6 +- src/ix/ixon/proof.rs | 111 ++++++++------------------- src/ix/ixon/serialize.rs | 18 ++--- src/ix/kernel/anon_env.rs | 1 - src/ix/kernel/claim.rs | 8 +- src/ix/kernel/ingress.rs | 132 +++++++++++++++------------------ src/ix/kernel/tc.rs | 7 +- 16 files changed, 150 insertions(+), 278 deletions(-) diff --git a/src/ffi/ixon/env.rs b/src/ffi/ixon/env.rs index fbd58708..02d3b27b 100644 --- a/src/ffi/ixon/env.rs +++ b/src/ffi/ixon/env.rs @@ -349,10 +349,8 @@ pub fn ixon_env_to_decoded(env: &IxonEnv) -> Result { e.key().hex() ) })?; - consts.push(DecodedRawConst { - addr: e.key().clone(), - constant: (*c).clone(), - }); + consts + .push(DecodedRawConst { addr: e.key().clone(), constant: (*c).clone() }); } let named = env .named diff --git a/src/ffi/kernel.rs b/src/ffi/kernel.rs index 7257f416..6d8bb3a2 100644 --- a/src/ffi/kernel.rs +++ b/src/ffi/kernel.rs @@ -68,17 +68,17 @@ use crate::ix::ixon::metadata::ConstantMetaInfo; use crate::ix::kernel::egress::{ixon_egress, lean_egress}; use crate::ix::kernel::env::KEnv; use crate::ix::kernel::error::TcError; +use crate::ix::kernel::id::KId; use crate::ix::kernel::ingress::{ IxonIngressLookups, build_ixon_ingress_lookups, ingress_const_shallow_into_kenv_with_lookups, ixon_ingress_owned, }; -#[cfg(feature = "test-ffi")] -use crate::ix::kernel::ingress::{ixon_ingress, lean_ingress}; -use crate::ix::kernel::id::KId; use crate::ix::kernel::ingress::{ anon_ctor_proj_addr, anon_defn_proj_addr, anon_indc_proj_addr, anon_recr_proj_addr, }; +#[cfg(feature = "test-ffi")] +use crate::ix::kernel::ingress::{ixon_ingress, lean_ingress}; use crate::ix::kernel::mode::{Anon, CheckDupLevelParams, KernelMode, Meta}; use crate::ix::kernel::tc::TypeChecker; @@ -1306,10 +1306,7 @@ enum AnonWorkItem { /// the block (each member's projection + each ctor's CPrj of inductive /// members), all sharing the same check result via the kernel's /// block coordination. - Block { - primary_addr: Address, - result_idxs: Vec, - }, + Block { primary_addr: Address, result_idxs: Vec }, } /// One pass over `env.consts`: enumerate work items + the kernel-checkable diff --git a/src/ix/compile.rs b/src/ix/compile.rs index e5a5228a..a16b2bc1 100644 --- a/src/ix/compile.rs +++ b/src/ix/compile.rs @@ -3592,19 +3592,13 @@ fn compile_mutual( stt.env.store_const(addr.clone(), standalone_constant); stt.block_stats.insert( name.clone(), - BlockSizeStats { - hash_consed_size, - serialized_size, - const_count: 1, - }, + BlockSizeStats { hash_consed_size, serialized_size, const_count: 1 }, ); for class in &sorted_classes { for cnst in class { let n = cnst.name(); let meta = all_metas.get(&n).cloned().unwrap_or_default(); - stt - .env - .register_name(n.clone(), Named::new(addr.clone(), meta)); + stt.env.register_name(n.clone(), Named::new(addr.clone(), meta)); stt.name_to_addr.insert(n.clone(), addr.clone()); } } diff --git a/src/ix/compile/mutual.rs b/src/ix/compile/mutual.rs index 21793c07..fa7ef7d9 100644 --- a/src/ix/compile/mutual.rs +++ b/src/ix/compile/mutual.rs @@ -211,11 +211,9 @@ pub(crate) fn compile_aux_block_with_rename( block_univs, Some(&name_str), ), - IxonMutConst::Recr(rec) => apply_sharing_to_recursor_with_stats( - rec, - block_refs, - block_univs, - ), + IxonMutConst::Recr(rec) => { + apply_sharing_to_recursor_with_stats(rec, block_refs, block_univs) + }, IxonMutConst::Indc(_) => unreachable!(), }; let standalone_addr = content_address(&result.constant); @@ -226,10 +224,9 @@ pub(crate) fn compile_aux_block_with_rename( let canon_n = cnst.name(); let n = resolve_name(&canon_n); let meta = all_metas.remove(&canon_n).unwrap_or_default(); - stt.env.register_name( - n.clone(), - Named::new(standalone_addr.clone(), meta), - ); + stt + .env + .register_name(n.clone(), Named::new(standalone_addr.clone(), meta)); stt.aux_name_to_addr.insert(n.clone(), standalone_addr.clone()); stt.aux_gen_extra_names.insert(n.clone()); pending_names.push(n); diff --git a/src/ix/decompile.rs b/src/ix/decompile.rs index 65784c50..3a954a7b 100644 --- a/src/ix/decompile.rs +++ b/src/ix/decompile.rs @@ -1808,12 +1808,7 @@ fn decompile_const( let current_const = name.pretty(); match cnst.as_ref() { - Constant { - info: ConstantInfo::Defn(def), - sharing, - refs, - univs, - } => { + Constant { info: ConstantInfo::Defn(def), sharing, refs, univs } => { let mut cache = BlockCache { sharing: sharing.clone(), refs: refs.clone(), @@ -1823,17 +1818,11 @@ fn decompile_const( ..Default::default() }; cache.load_meta_extensions(&named.meta); - let info = - decompile_definition(def, &named.meta, &mut cache, stt, dstt)?; + let info = decompile_definition(def, &named.meta, &mut cache, stt, dstt)?; dstt.env.insert(name.clone(), info); }, - Constant { - info: ConstantInfo::Recr(rec), - sharing, - refs, - univs, - } => { + Constant { info: ConstantInfo::Recr(rec), sharing, refs, univs } => { let mut cache = BlockCache { sharing: sharing.clone(), refs: refs.clone(), @@ -1852,12 +1841,7 @@ fn decompile_const( dstt.env.insert(name.clone(), info); }, - Constant { - info: ConstantInfo::Axio(ax), - sharing, - refs, - univs, - } => { + Constant { info: ConstantInfo::Axio(ax), sharing, refs, univs } => { let mut cache = BlockCache { sharing: sharing.clone(), refs: refs.clone(), @@ -1873,12 +1857,7 @@ fn decompile_const( dstt.env.insert(name.clone(), info); }, - Constant { - info: ConstantInfo::Quot(quot), - sharing, - refs, - univs, - } => { + Constant { info: ConstantInfo::Quot(quot), sharing, refs, univs } => { let mut cache = BlockCache { sharing: sharing.clone(), refs: refs.clone(), diff --git a/src/ix/ixon.rs b/src/ix/ixon.rs index d27b79a6..5c540514 100644 --- a/src/ix/ixon.rs +++ b/src/ix/ixon.rs @@ -345,10 +345,8 @@ mod doc_examples { #[test] fn check_claim_tag() { // Check claim -> Tag4 { flag: 0xE, size: 4 } -> 0xE4 - let claim = Claim::Check { - const_addr: Address::hash(b"value"), - assumptions: None, - }; + let claim = + Claim::Check { const_addr: Address::hash(b"value"), assumptions: None }; let mut buf = Vec::new(); claim.put(&mut buf); assert_eq!(buf[0], 0xE4, "Check claim should start with 0xE4"); @@ -359,10 +357,7 @@ mod doc_examples { fn check_proof_tag() { // Check proof -> Tag4 { flag: 0xF, size: 1 } -> 0xF1 let proof = Proof::new( - Claim::Check { - const_addr: Address::hash(b"value"), - assumptions: None, - }, + Claim::Check { const_addr: Address::hash(b"value"), assumptions: None }, vec![5, 6, 7], ); let mut buf = Vec::new(); diff --git a/src/ix/ixon/assumption_tree.rs b/src/ix/ixon/assumption_tree.rs index 5eb6ed2e..4cf61d2b 100644 --- a/src/ix/ixon/assumption_tree.rs +++ b/src/ix/ixon/assumption_tree.rs @@ -38,9 +38,7 @@ use crate::ix::address::Address; -use super::merkle::{ - MerklePath, leaf_hash, node_hash, zero_address, -}; +use super::merkle::{MerklePath, leaf_hash, node_hash, zero_address}; use super::proof::{FLAG_CLAIM, VARIANT_ASSUMPTION_TREE}; use super::tag::Tag4; @@ -149,11 +147,7 @@ impl AssumptionTree { /// order expected by `verify_merkle_proof`. pub fn merkle_proof(&self, target: &Address) -> Option { let mut path: MerklePath = Vec::new(); - if self.search_path(target, &mut path) { - Some(path) - } else { - None - } + if self.search_path(target, &mut path) { Some(path) } else { None } } /// Recursive helper: if `target` is in this subtree, push the sibling @@ -220,7 +214,8 @@ impl AssumptionTree { } fn get_body(buf: &mut &[u8]) -> Result { - let (tag, rest) = buf.split_first().ok_or("AssumptionTree: EOF reading body tag")?; + let (tag, rest) = + buf.split_first().ok_or("AssumptionTree: EOF reading body tag")?; *buf = rest; match *tag { BODY_LEAF => { @@ -260,10 +255,10 @@ impl AssumptionTree { #[cfg(test)] mod tests { - use super::*; use super::super::merkle::{ merkle_join, merkle_root_canonical, verify_merkle_proof, }; + use super::*; fn addr(seed: &[u8]) -> Address { Address::hash(seed) @@ -329,9 +324,8 @@ mod tests { #[test] fn canonical_root_matches_merkle_root_canonical_pairs() { for n in 2..=10 { - let leaves: Vec
= (0..n) - .map(|i| addr(format!("leaf-{i}").as_bytes())) - .collect(); + let leaves: Vec
= + (0..n).map(|i| addr(format!("leaf-{i}").as_bytes())).collect(); let t = AssumptionTree::canonical(&leaves).unwrap(); assert_eq!( Some(t.root()), @@ -345,7 +339,8 @@ mod tests { fn canonical_root_dedups_like_primitive() { let a = addr(b"a"); let b = addr(b"b"); - let t = AssumptionTree::canonical(&[a.clone(), a.clone(), b.clone()]).unwrap(); + let t = + AssumptionTree::canonical(&[a.clone(), a.clone(), b.clone()]).unwrap(); assert_eq!(Some(t.root()), merkle_root_canonical(&[a, b])); } @@ -378,7 +373,8 @@ mod tests { let b = addr(b"b"); let c = addr(b"c"); let absent = addr(b"absent"); - let t = AssumptionTree::canonical(&[a.clone(), b.clone(), c.clone()]).unwrap(); + let t = + AssumptionTree::canonical(&[a.clone(), b.clone(), c.clone()]).unwrap(); assert!(t.contains(&a)); assert!(t.contains(&b)); assert!(t.contains(&c)); @@ -399,9 +395,8 @@ mod tests { #[test] fn merkle_proof_roundtrip_all_leaves() { for n in 1..=8 { - let leaves: Vec
= (0..n) - .map(|i| addr(format!("leaf-{i}").as_bytes())) - .collect(); + let leaves: Vec
= + (0..n).map(|i| addr(format!("leaf-{i}").as_bytes())).collect(); let t = AssumptionTree::canonical(&leaves).unwrap(); let root = t.root(); for leaf in t.leaves() { @@ -483,9 +478,8 @@ mod tests { #[test] fn serde_roundtrip_canonical_trees() { for n in 1..=10 { - let leaves: Vec
= (0..n) - .map(|i| addr(format!("leaf-{i}").as_bytes())) - .collect(); + let leaves: Vec
= + (0..n).map(|i| addr(format!("leaf-{i}").as_bytes())).collect(); let t = AssumptionTree::canonical(&leaves).unwrap(); let bytes = t.ser(); let parsed = AssumptionTree::get(&mut &bytes[..]).unwrap(); diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs index c3fbbb6f..e8363774 100644 --- a/src/ix/ixon/env.rs +++ b/src/ix/ixon/env.rs @@ -150,9 +150,7 @@ impl Env { offset: usize, len: usize, ) { - self - .consts - .insert(addr, LazyConstant::from_mmap_slice(mmap, offset, len)); + self.consts.insert(addr, LazyConstant::from_mmap_slice(mmap, offset, len)); } /// Get a constant by address, materializing on demand. @@ -553,9 +551,8 @@ mod tests { let a = Address::hash(b"a"); // Use multiple deps; the returned Vec should be in sorted order // regardless of how the BFS visited them. - let mut refs: Vec
= (0..16) - .map(|i| Address::hash(format!("dep-{i}").as_bytes())) - .collect(); + let mut refs: Vec
= + (0..16).map(|i| Address::hash(format!("dep-{i}").as_bytes())).collect(); env.store_const(a.clone(), const_with_refs(refs.clone())); for r in &refs { env.store_const(r.clone(), const_with_refs(vec![])); @@ -595,10 +592,7 @@ mod tests { let c = store_canonical(&env, const_with_refs(vec![])); let b = store_canonical(&env, const_with_refs(vec![c.clone()])); let a = store_canonical(&env, const_with_refs(vec![b.clone()])); - let d = store_canonical( - &env, - const_with_refs_discriminator(vec![], 1), - ); + let d = store_canonical(&env, const_with_refs_discriminator(vec![], 1)); // Serialize → deserialize so all entries are lazy-from-bytes. let mut buf = Vec::new(); diff --git a/src/ix/ixon/lazy.rs b/src/ix/ixon/lazy.rs index c2143a5f..269a82eb 100644 --- a/src/ix/ixon/lazy.rs +++ b/src/ix/ixon/lazy.rs @@ -123,15 +123,8 @@ impl LazyConstant { /// /// Used by `Env::get_anon_mmap` to avoid heap-copying the on-disk /// byte stream — the OS page cache is the source of truth. - pub fn from_mmap_slice( - mmap: Arc, - offset: usize, - len: usize, - ) -> Self { - LazyConstant { - bytes: BytesSource::Mmap { mmap, offset, len }, - cache: None, - } + pub fn from_mmap_slice(mmap: Arc, offset: usize, len: usize) -> Self { + LazyConstant { bytes: BytesSource::Mmap { mmap, offset, len }, cache: None } } /// Construct from a structured `Constant` (the in-memory build path, diff --git a/src/ix/ixon/merkle.rs b/src/ix/ixon/merkle.rs index 5df1b4e1..081f89e9 100644 --- a/src/ix/ixon/merkle.rs +++ b/src/ix/ixon/merkle.rs @@ -151,7 +151,8 @@ pub fn merkle_proof_canonical( let mut path: MerklePath = Vec::new(); while level.len() > 1 { let sibling_idx = pos ^ 1; - let sibling = level.get(sibling_idx).cloned().unwrap_or_else(|| zero.clone()); + let sibling = + level.get(sibling_idx).cloned().unwrap_or_else(|| zero.clone()); let is_left = pos & 1 == 1; path.push((sibling, is_left)); // Build next level. @@ -228,8 +229,7 @@ mod tests { fn canonical_dedup() { let a = addr(b"a"); let b = addr(b"b"); - let r1 = - merkle_root_canonical(&[a.clone(), a.clone(), b.clone()]).unwrap(); + let r1 = merkle_root_canonical(&[a.clone(), a.clone(), b.clone()]).unwrap(); let r2 = merkle_root_canonical(&[a, b]).unwrap(); assert_eq!(r1, r2); } diff --git a/src/ix/ixon/proof.rs b/src/ix/ixon/proof.rs index 8eef684f..805f0017 100644 --- a/src/ix/ixon/proof.rs +++ b/src/ix/ixon/proof.rs @@ -154,11 +154,7 @@ pub enum RevealConstantInfo { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Claim { /// `input` evaluates to `output`, optionally modulo `assumptions`. - Eval { - input: Address, - output: Address, - assumptions: Option
, - }, + Eval { input: Address, output: Address, assumptions: Option
}, /// The constant at `const_addr` is well-typed, optionally modulo /// `assumptions`. Check { const_addr: Address, assumptions: Option
}, @@ -1037,9 +1033,9 @@ impl Claim { let const_addr = get_address(buf)?; Ok(Claim::Contains { tree, const_addr }) }, - x => Err(format!( - "Claim::get: invalid claim variant {x} under flag 0xE", - )), + x => { + Err(format!("Claim::get: invalid claim variant {x} under flag 0xE",)) + }, } } @@ -1140,9 +1136,11 @@ impl Proof { let const_addr = get_address(buf)?; Claim::Contains { tree, const_addr } }, - x => return Err(format!( - "Proof::get: invalid proof variant {x} under flag 0xF" - )), + x => { + return Err(format!( + "Proof::get: invalid proof variant {x} under flag 0xF" + )); + }, }; // Opaque ZK proof bytes @@ -1493,10 +1491,8 @@ mod tests { #[test] fn test_check_claim_no_asm_roundtrip() { - let claim = Claim::Check { - const_addr: Address::hash(b"value"), - assumptions: None, - }; + let claim = + Claim::Check { const_addr: Address::hash(b"value"), assumptions: None }; assert!(claim_roundtrip(&claim)); } @@ -1552,10 +1548,7 @@ mod tests { #[test] fn test_check_proof_roundtrip() { let proof = Proof::new( - Claim::Check { - const_addr: Address::hash(b"value"), - assumptions: None, - }, + Claim::Check { const_addr: Address::hash(b"value"), assumptions: None }, vec![5, 6, 7, 8, 9], ); assert!(proof_roundtrip(&proof)); @@ -1564,10 +1557,7 @@ mod tests { #[test] fn test_check_env_proof_roundtrip() { let proof = Proof::new( - Claim::CheckEnv { - root: Address::hash(b"env-root"), - assumptions: None, - }, + Claim::CheckEnv { root: Address::hash(b"env-root"), assumptions: None }, vec![0x11, 0x22], ); assert!(proof_roundtrip(&proof)); @@ -1673,11 +1663,7 @@ mod tests { let cases: Vec<(Claim, u64)> = vec![ ( - Claim::Eval { - input: a.clone(), - output: b.clone(), - assumptions: None, - }, + Claim::Eval { input: a.clone(), output: b.clone(), assumptions: None }, VARIANT_EVAL_CLAIM, ), ( @@ -1688,11 +1674,11 @@ mod tests { Claim::CheckEnv { root: a.clone(), assumptions: None }, VARIANT_CHECK_ENV_CLAIM, ), - (Claim::Reveal { comm: a.clone(), info: reveal_info }, VARIANT_REVEAL_CLAIM), ( - Claim::Contains { tree: a, const_addr: b }, - VARIANT_CONTAINS_CLAIM, + Claim::Reveal { comm: a.clone(), info: reveal_info }, + VARIANT_REVEAL_CLAIM, ), + (Claim::Contains { tree: a, const_addr: b }, VARIANT_CONTAINS_CLAIM), ]; for (claim, expected_size) in cases { @@ -1716,11 +1702,7 @@ mod tests { let cases: Vec<(Claim, u64)> = vec![ ( - Claim::Eval { - input: a.clone(), - output: b.clone(), - assumptions: None, - }, + Claim::Eval { input: a.clone(), output: b.clone(), assumptions: None }, VARIANT_EVAL_PROOF, ), ( @@ -1731,11 +1713,11 @@ mod tests { Claim::CheckEnv { root: a.clone(), assumptions: None }, VARIANT_CHECK_ENV_PROOF, ), - (Claim::Reveal { comm: a.clone(), info: reveal_info }, VARIANT_REVEAL_PROOF), ( - Claim::Contains { tree: a, const_addr: b }, - VARIANT_CONTAINS_PROOF, + Claim::Reveal { comm: a.clone(), info: reveal_info }, + VARIANT_REVEAL_PROOF, ), + (Claim::Contains { tree: a, const_addr: b }, VARIANT_CONTAINS_PROOF), ]; for (claim, expected_size) in cases { @@ -1782,11 +1764,8 @@ mod tests { "Eval with-asm = 98 bytes" ); assert_eq!( - claim_bytes(&Claim::Check { - const_addr: a.clone(), - assumptions: None - }) - .len(), + claim_bytes(&Claim::Check { const_addr: a.clone(), assumptions: None }) + .len(), 1 + 32 + 1, "Check no-asm = 34 bytes" ); @@ -1800,11 +1779,8 @@ mod tests { "Check with-asm = 66 bytes" ); assert_eq!( - claim_bytes(&Claim::CheckEnv { - root: a.clone(), - assumptions: None - }) - .len(), + claim_bytes(&Claim::CheckEnv { root: a.clone(), assumptions: None }) + .len(), 1 + 32 + 1, "CheckEnv no-asm = 34 bytes" ); @@ -1830,26 +1806,13 @@ mod tests { let cases: Vec<(Claim, u8)> = vec![ ( - Claim::Eval { - input: a.clone(), - output: b.clone(), - assumptions: None, - }, + Claim::Eval { input: a.clone(), output: b.clone(), assumptions: None }, 0xE3, ), - ( - Claim::Check { const_addr: a.clone(), assumptions: None }, - 0xE4, - ), - ( - Claim::CheckEnv { root: a.clone(), assumptions: None }, - 0xE5, - ), + (Claim::Check { const_addr: a.clone(), assumptions: None }, 0xE4), + (Claim::CheckEnv { root: a.clone(), assumptions: None }, 0xE5), (Claim::Reveal { comm: a.clone(), info: reveal_info }, 0xE6), - ( - Claim::Contains { tree: a, const_addr: b }, - 0xE7, - ), + (Claim::Contains { tree: a, const_addr: b }, 0xE7), ]; for (claim, expected_byte) in cases { let bytes = claim_bytes(&claim); @@ -1870,21 +1833,11 @@ mod tests { }; let cases: Vec<(Claim, u8)> = vec![ ( - Claim::Eval { - input: a.clone(), - output: b.clone(), - assumptions: None, - }, + Claim::Eval { input: a.clone(), output: b.clone(), assumptions: None }, 0xF0, ), - ( - Claim::Check { const_addr: a.clone(), assumptions: None }, - 0xF1, - ), - ( - Claim::CheckEnv { root: a.clone(), assumptions: None }, - 0xF2, - ), + (Claim::Check { const_addr: a.clone(), assumptions: None }, 0xF1), + (Claim::CheckEnv { root: a.clone(), assumptions: None }, 0xF2), (Claim::Reveal { comm: a.clone(), info: reveal_info }, 0xF3), (Claim::Contains { tree: a, const_addr: b }, 0xF4), ]; diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs index de130250..ecbed0bf 100644 --- a/src/ix/ixon/serialize.rs +++ b/src/ix/ixon/serialize.rs @@ -1634,10 +1634,7 @@ impl Env { // in-place, subsequent page faults beyond the new EOF SIGBUS; // we accept that as a contract violation (don't rewrite the // .ixe underneath a running check) and document it. - let expected_len = file - .metadata() - .ok() - .map(|m| m.len() as usize); + let expected_len = file.metadata().ok().map(|m| m.len() as usize); // SAFETY: We treat the mapping as read-only and never alias it // mutably. Other processes truncating or replacing the file while // it is mapped would invalidate our slices; that is a contract @@ -1805,8 +1802,7 @@ impl Env { let mut const_addrs: Vec
= self.consts.iter().map(|e| e.key().clone()).collect(); const_addrs.sort_unstable(); - let root = - merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); + let root = merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); put_address(&root, &mut buf); let header_size = buf.len(); @@ -2416,8 +2412,10 @@ mod tests { let b = store_canonical(&env, defn_const(vec![a.clone()])); // Populate metadata sections so we can verify they get dropped. let blob_addr = env.store_blob(b"hello world".to_vec()); - env.register_name(Name::str(Name::anon(), "MyConst".to_string()), - Named::with_addr(a.clone())); + env.register_name( + Name::str(Name::anon(), "MyConst".to_string()), + Named::with_addr(a.clone()), + ); let mut buf = Vec::new(); env.put(&mut buf).unwrap(); @@ -2507,8 +2505,8 @@ mod tests { f.write_all(&buf).unwrap(); } - let mmap_env = Env::get_anon_mmap(&tmp) - .expect("open should succeed before unlink"); + let mmap_env = + Env::get_anon_mmap(&tmp).expect("open should succeed before unlink"); // Materializing once before unlink makes sure we have known-good // baseline behavior; the real test is materializing AFTER unlink. let pre_a = mmap_env.get_const(&a).expect("pre-unlink fetch of `a`"); diff --git a/src/ix/kernel/anon_env.rs b/src/ix/kernel/anon_env.rs index 98cd13dd..cfc18168 100644 --- a/src/ix/kernel/anon_env.rs +++ b/src/ix/kernel/anon_env.rs @@ -98,7 +98,6 @@ impl<'a> AnonEnv<'a> { v.sort_unstable(); v } - } #[cfg(test)] diff --git a/src/ix/kernel/claim.rs b/src/ix/kernel/claim.rs index a4c531e4..301300c7 100644 --- a/src/ix/kernel/claim.rs +++ b/src/ix/kernel/claim.rs @@ -43,11 +43,7 @@ pub fn build_check_claim(env: &Env, const_addr: Address) -> Claim { /// Assumptions = canonical merkle root over `transitive_deps(input) ∪ /// transitive_deps(output) \ {input, output}`. `None` if that set is /// empty. -pub fn build_eval_claim( - env: &Env, - input: Address, - output: Address, -) -> Claim { +pub fn build_eval_claim(env: &Env, input: Address, output: Address) -> Claim { let mut set: FxHashSet
= env.transitive_deps_excl(&input).into_iter().collect(); set.extend(env.transitive_deps_excl(&output)); @@ -97,7 +93,7 @@ mod tests { use super::*; use crate::ix::env::DefinitionSafety; use crate::ix::ixon::constant::{ - Axiom, Constant, DefKind, Definition, ConstantInfo, + Axiom, Constant, ConstantInfo, DefKind, Definition, }; use crate::ix::ixon::expr::Expr; use crate::ix::ixon::merkle::leaf_hash; diff --git a/src/ix/kernel/ingress.rs b/src/ix/kernel/ingress.rs index 329a5da7..38bedb6e 100644 --- a/src/ix/kernel/ingress.rs +++ b/src/ix/kernel/ingress.rs @@ -779,16 +779,19 @@ fn ingress_expr( // Meta mode: walk the arena to find the Ref's Lean name. // Anon mode: the closure is never invoked — no arena walk, // no name construction, no resolve_name call. - let name_field: M::MField = - M::meta_field_try::(|| match node { - ExprMetaData::Ref { name: name_addr } => { - Ok(resolve_name(name_addr, ctx.names)) - }, - _ => Err(format!( - "Ref at index {ref_idx} (addr {}) has no metadata name (node={node:?})", - &addr.hex()[..8] - )), - })?; + let name_field: M::MField = M::meta_field_try::< + Name, + _, + String, + >(|| match node { + ExprMetaData::Ref { name: name_addr } => { + Ok(resolve_name(name_addr, ctx.names)) + }, + _ => Err(format!( + "Ref at index {ref_idx} (addr {}) has no metadata name (node={node:?})", + &addr.hex()[..8] + )), + })?; let univs = ingress_univ_args(univ_idxs, ctx, intern, univ_cache, stats)?; let id = KId::new(addr, name_field); @@ -1130,17 +1133,23 @@ fn ingress_expr( // Meta mode reads both; Anon mode skips the entire arena // touch — no name, no child-arena indexing. let mut child_arena: u64 = 0; - let struct_name_field: M::MField = - M::meta_field_try::(|| match node { - ExprMetaData::Prj { struct_name: addr, child } => { - child_arena = *child; - Ok(resolve_name(addr, ctx.names)) - }, - _ => Err(format!( - "Prj at ref index {type_ref_idx} (addr {}) has no metadata name (node={node:?})", - &type_addr.hex()[..8] - )), - })?; + let struct_name_field: M::MField = M::meta_field_try::< + Name, + _, + String, + >( + || match node + { + ExprMetaData::Prj { struct_name: addr, child } => { + child_arena = *child; + Ok(resolve_name(addr, ctx.names)) + }, + _ => Err(format!( + "Prj at ref index {type_ref_idx} (addr {}) has no metadata name (node={node:?})", + &type_addr.hex()[..8] + )), + } + )?; stack.push(ExprFrame::PrjDone { type_id: KId::new(type_addr, struct_name_field), field_idx: *field_idx, @@ -4204,10 +4213,8 @@ fn ingress_anon_inductive( stats, )?; - let ctor_ids: Vec> = ctor_addrs - .iter() - .map(|a| KId::::new(a.clone(), ())) - .collect(); + let ctor_ids: Vec> = + ctor_addrs.iter().map(|a| KId::::new(a.clone(), ())).collect(); let mut results = vec![( self_id.clone(), @@ -5069,31 +5076,16 @@ mod tests { fn anon_proj_addrs_are_deterministic() { let block = Address::hash(b"test-block-deterministic"); // Same inputs → same outputs. - assert_eq!( - anon_defn_proj_addr(&block, 0), - anon_defn_proj_addr(&block, 0) - ); - assert_eq!( - anon_indc_proj_addr(&block, 1), - anon_indc_proj_addr(&block, 1) - ); - assert_eq!( - anon_recr_proj_addr(&block, 2), - anon_recr_proj_addr(&block, 2) - ); + assert_eq!(anon_defn_proj_addr(&block, 0), anon_defn_proj_addr(&block, 0)); + assert_eq!(anon_indc_proj_addr(&block, 1), anon_indc_proj_addr(&block, 1)); + assert_eq!(anon_recr_proj_addr(&block, 2), anon_recr_proj_addr(&block, 2)); assert_eq!( anon_ctor_proj_addr(&block, 3, 4), anon_ctor_proj_addr(&block, 3, 4) ); // Different inputs → different outputs (catches accidental aliasing). - assert_ne!( - anon_defn_proj_addr(&block, 0), - anon_defn_proj_addr(&block, 1) - ); - assert_ne!( - anon_defn_proj_addr(&block, 0), - anon_indc_proj_addr(&block, 0) - ); + assert_ne!(anon_defn_proj_addr(&block, 0), anon_defn_proj_addr(&block, 1)); + assert_ne!(anon_defn_proj_addr(&block, 0), anon_indc_proj_addr(&block, 0)); assert_ne!( anon_ctor_proj_addr(&block, 0, 0), anon_ctor_proj_addr(&block, 0, 1) @@ -5111,38 +5103,34 @@ mod tests { // anon pipeline depends on (verifying the computed address against // the address actually stored in `env.consts`). use crate::ix::ixon::constant::{ - Constant, ConstantInfo, ConstructorProj, DefinitionProj, - InductiveProj, RecursorProj, + Constant, ConstantInfo, ConstructorProj, DefinitionProj, InductiveProj, + RecursorProj, }; let b = Address::hash(b"another-block"); - let (defn_addr, _) = - Constant::new(ConstantInfo::DPrj(DefinitionProj { - idx: 5, - block: b.clone(), - })) - .commit(); + let (defn_addr, _) = Constant::new(ConstantInfo::DPrj(DefinitionProj { + idx: 5, + block: b.clone(), + })) + .commit(); assert_eq!(defn_addr, anon_defn_proj_addr(&b, 5)); - let (indc_addr, _) = - Constant::new(ConstantInfo::IPrj(InductiveProj { - idx: 7, - block: b.clone(), - })) - .commit(); + let (indc_addr, _) = Constant::new(ConstantInfo::IPrj(InductiveProj { + idx: 7, + block: b.clone(), + })) + .commit(); assert_eq!(indc_addr, anon_indc_proj_addr(&b, 7)); - let (recr_addr, _) = - Constant::new(ConstantInfo::RPrj(RecursorProj { - idx: 9, - block: b.clone(), - })) - .commit(); + let (recr_addr, _) = Constant::new(ConstantInfo::RPrj(RecursorProj { + idx: 9, + block: b.clone(), + })) + .commit(); assert_eq!(recr_addr, anon_recr_proj_addr(&b, 9)); - let (ctor_addr, _) = - Constant::new(ConstantInfo::CPrj(ConstructorProj { - idx: 2, - cidx: 3, - block: b.clone(), - })) - .commit(); + let (ctor_addr, _) = Constant::new(ConstantInfo::CPrj(ConstructorProj { + idx: 2, + cidx: 3, + block: b.clone(), + })) + .commit(); assert_eq!(ctor_addr, anon_ctor_proj_addr(&b, 2, 3)); } } diff --git a/src/ix/kernel/tc.rs b/src/ix/kernel/tc.rs index a6089d09..3ab0bb12 100644 --- a/src/ix/kernel/tc.rs +++ b/src/ix/kernel/tc.rs @@ -234,7 +234,6 @@ impl<'a, M: KernelMode> TypeChecker<'a, M> { tc } - pub fn try_get_const( &mut self, id: &KId, @@ -1002,10 +1001,8 @@ impl<'a> TypeChecker<'a, super::mode::Anon> { let _ = env.set_prims(prims); } let mut tc = Self::new(env); - tc.lazy_anon = Some(LazyAnonIngress { - anon_env, - faulted_addrs: FxHashSet::default(), - }); + tc.lazy_anon = + Some(LazyAnonIngress { anon_env, faulted_addrs: FxHashSet::default() }); tc } } From 6e238fa4f45016a0f04cc0779681ea54088387ee Mon Sep 17 00:00:00 2001 From: "John C. Burnham" Date: Mon, 18 May 2026 21:05:46 -0400 Subject: [PATCH 10/12] Tests: derive RawConst addrs from content (verify_address fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `Address::hash(bytes) == addr` defense added in f792102 rejects `RawConst { addr, const }` pairs where `addr` is uncorrelated with `serConstant const`. The Rust-side tests in `serialize.rs` were updated at the time (`store_canonical` helper), but three Lean-side sites still produced mismatched pairs and surface now as: × ∃: Env serialization Lean==Rust × ∃: serde RawEnv with data × ∃: serde RawEnv roundtrip deserialization failed: rs_de_env: Env::get: const at idx 0 bytes hash to 6110b739… but stored under b177ec1b… Three fixes, all derive `addr := Address.blake3 (serConstant c)`: - `Tests/Gen/Ixon.lean::genRawConst` — property-test generator was `RawConst.mk <$> genAddress <*> genConstant` (uncorrelated). Now generates the const first, then derives addr from `serConstant`. - `Tests/FFI/Lifecycle.lean::serdeTests` (`withData` unit case) — was using one `testAddr := Address.blake3 #[1,2,3]` for both the const and unrelated blob/comm slots. Split into `testAddr` (still used for the content-hash-free blob/comm) and `testConstAddr := Address.blake3 (serConstant testConst)`. Added a name entry for the new canonical addr. - `Tests/FFI/Lifecycle.lean::genSerdeRawEnv` — the "pool of addresses" pattern picked const addrs from a random pool that couldn't possibly match content hashes. Restructured: consts derive canonical addrs from content, each gets its own name-table entry appended to the pool-derived entries so the serde pipeline's "all addresses resolvable" invariant still holds. --- Tests/FFI/Lifecycle.lean | 34 ++++++++++++++++++++++++---------- Tests/Gen/Ixon.lean | 11 ++++++++--- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/Tests/FFI/Lifecycle.lean b/Tests/FFI/Lifecycle.lean index 798286be..79099038 100644 --- a/Tests/FFI/Lifecycle.lean +++ b/Tests/FFI/Lifecycle.lean @@ -137,7 +137,10 @@ def serdeTests : TestSeq := -- Empty RawEnv. Only data construction happens eagerly; FFI is deferred -- inside `mkSerdeRoundtripTest`. let empty : RawEnv := { consts := #[], named := #[], blobs := #[], comms := #[] } - -- RawEnv with data (include name entries for all referenced addresses) + -- RawEnv with data. The const's `addr` must be the canonical content + -- hash (`Address.blake3 (serConstant c)`) — the Rust loader verifies + -- `Address::hash(bytes) == addr` on load. Blobs and comms don't carry + -- a content-hash invariant, so `testAddr` is fine there. let testAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) let testExpr : Expr := .sort 0 let testDef : Definition := { @@ -147,21 +150,24 @@ def serdeTests : TestSeq := let testConst : Constant := { info := .defn testDef, sharing := #[], refs := #[], univs := #[] } - let testRawConst : RawConst := { addr := testAddr, const := testConst } + let testConstAddr := Address.blake3 (serConstant testConst) + let testRawConst : RawConst := { addr := testConstAddr, const := testConst } let testComm : Comm := { secret := testAddr, payload := testAddr } let testRawComm : RawComm := { addr := testAddr, comm := testComm } let testRawBlob : RawBlob := { addr := testAddr, bytes := ByteArray.mk #[1, 2, 3] } let testName := Ix.Name.mkStr Ix.Name.mkAnon "test" let testRawNamed : RawNamed := { - name := testName, addr := testAddr, constMeta := .empty + name := testName, addr := testConstAddr, constMeta := .empty } let testNameEntry : RawNameEntry := { addr := testAddr, name := testName } + let testConstNameEntry : RawNameEntry := + { addr := testConstAddr, name := Ix.Name.mkStr Ix.Name.mkAnon "testConst" } let withData : RawEnv := { consts := #[testRawConst], named := #[testRawNamed], blobs := #[testRawBlob], comms := #[testRawComm], - names := #[testNameEntry] + names := #[testNameEntry, testConstNameEntry] } mkSerdeRoundtripTest "serde empty RawEnv" empty ++ mkSerdeRoundtripTest "serde RawEnv with data" withData @@ -190,15 +196,23 @@ private def genSerdeRawEnv : Gen RawEnv := do let pickAddr : Gen Address := do let idx ← Gen.choose Nat 0 (pool.size - 1) pure pool[idx]!.1 - -- Name entries for every address in the pool - let names : Array RawNameEntry := pool.map fun (addr, name) => { addr, name } - -- Consts: pool addresses, empty refs/sharing/univs (no extra address lookups) + -- Consts: derive addr from content (`Env::get` verifies + -- `Address::hash(bytes) == addr` on load). Each canonical address + -- gets its own name entry appended to the names table so the serde + -- pipeline can resolve it. let numConsts ← Gen.choose Nat 0 3 let mut consts : Array RawConst := #[] - for _ in [:numConsts] do - let addr ← pickAddr + let mut constNames : Array RawNameEntry := #[] + for i in [:numConsts] do let info ← genSimpleConstantInfo - consts := consts.push { addr, const := { info, sharing := #[], refs := #[], univs := #[] } } + let c : Constant := { info, sharing := #[], refs := #[], univs := #[] } + let addr := Address.blake3 (serConstant c) + consts := consts.push { addr, const := c } + constNames := constNames.push + { addr, name := Ix.Name.mkNat Ix.Name.mkAnon (poolSize + i) } + -- Name entries for every address in the pool + every canonical const addr + let names : Array RawNameEntry := + (pool.map fun (addr, name) => { addr, name }) ++ constNames -- Named: pool addresses with empty metadata let numNamed ← Gen.choose Nat 0 3 let mut named : Array RawNamed := #[] diff --git a/Tests/Gen/Ixon.lean b/Tests/Gen/Ixon.lean index 7847c9fd..24a7df99 100644 --- a/Tests/Gen/Ixon.lean +++ b/Tests/Gen/Ixon.lean @@ -443,9 +443,14 @@ def genIxName : Nat → Gen Ix.Name pure (Ix.Name.mkNat parent n)) ] (pure Ix.Name.mkAnon) -/-- Generate a RawConst -/ -def genRawConst : Gen RawConst := - RawConst.mk <$> genAddress <*> genConstant +/-- Generate a RawConst whose `addr` is the canonical content hash of + `const`. The Rust loader (`Env::get`) verifies + `Address::hash(serConstant c) == addr` on load (added by the + address-tampering defense in the anon-mode work), so generated + pairs with arbitrary addresses are now rejected. -/ +def genRawConst : Gen RawConst := do + let c ← genConstant + pure { addr := Address.blake3 (serConstant c), const := c } /-- Generate a RawNamed with empty metadata (matching Rust test generator). Metadata addresses must reference valid names in env.names for indexed serialization. -/ From 778e946b69f072ae7e6125909113a2e27f5e6929 Mon Sep 17 00:00:00 2001 From: Arthur Paulino Date: Wed, 20 May 2026 04:08:07 -0700 Subject: [PATCH 11/12] Regenerate Aiur primitive addresses for singleton-unwrap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiler's singleton non-inductive Muts unwrap (12630aa) changed the content hashes of 48 primitives (`Nat.rec`, `Nat.add`, `Nat.mul`, …). The Aiur kernel's `Ix/IxVM/Kernel/Primitive.lean` still held the pre-unwrap blake3 bytes, so primitive dispatch missed every affected constant and fell through to structural `Nat.rec` unfolding. Tests like `IxVMPrim.nat_mul_big` then drove millions of Nat.succ unfoldings inside the Aiur trace, OOMing the prover. Addresses regenerated from `PrimAddrs::lean_parity_table()` (which the branch had already updated in `src/ix/kernel/primitive.rs`). --- Ix/IxVM/Kernel/Primitive.lean | 384 +++++++++++++++++----------------- 1 file changed, 192 insertions(+), 192 deletions(-) diff --git a/Ix/IxVM/Kernel/Primitive.lean b/Ix/IxVM/Kernel/Primitive.lean index 9e8e811f..199a94cf 100644 --- a/Ix/IxVM/Kernel/Primitive.lean +++ b/Ix/IxVM/Kernel/Primitive.lean @@ -71,38 +71,38 @@ def primitive := ⟦ } fn bit_vec_addr() -> Addr { - store([0xcf, 0x55, 0x11, 0x5c, 0x75, 0x34, 0x3f, 0x82, - 0x4f, 0xdd, 0x93, 0x21, 0x78, 0xb0, 0xcb, 0xc7, - 0x5a, 0x86, 0xe5, 0x05, 0x2d, 0xe9, 0x3d, 0xb9, - 0x8f, 0x05, 0xb3, 0x78, 0x85, 0xff, 0xb0, 0x9b]) + store([0x33, 0xd9, 0x4a, 0x2d, 0x25, 0x0a, 0x1a, 0x5a, + 0xa0, 0x22, 0xe3, 0xbe, 0xfd, 0xca, 0x1c, 0x86, + 0xf4, 0x5d, 0x70, 0x07, 0x1d, 0xb0, 0x38, 0xef, + 0xf9, 0xb8, 0x98, 0x0d, 0xc5, 0x16, 0x0b, 0x76]) } fn bit_vec_to_nat_addr() -> Addr { - store([0x78, 0x34, 0x86, 0x5c, 0x1c, 0x6c, 0xd9, 0x63, - 0xb9, 0x36, 0x5c, 0xb0, 0x65, 0x00, 0x62, 0x38, - 0x80, 0xde, 0x4d, 0x99, 0x30, 0x34, 0x3e, 0x96, - 0xe1, 0x9e, 0x62, 0xa0, 0x26, 0xe7, 0xca, 0xce]) + store([0xf9, 0x42, 0x71, 0x48, 0x2f, 0xfd, 0xfd, 0x78, + 0x02, 0xd4, 0x2e, 0x22, 0x27, 0x1c, 0x89, 0xe2, + 0x1d, 0xee, 0x45, 0x6b, 0x05, 0x08, 0x59, 0xc5, + 0xd1, 0x2e, 0x3d, 0x1d, 0x69, 0x9b, 0xb4, 0xea]) } fn bit_vec_of_nat_addr() -> Addr { - store([0xa0, 0x8a, 0xcf, 0x4c, 0xed, 0xb4, 0xc0, 0x5e, - 0xdd, 0xb5, 0x5b, 0xff, 0x36, 0x6c, 0xd9, 0x52, - 0xd5, 0xb7, 0xb8, 0x86, 0x02, 0xc3, 0xfc, 0x6d, - 0x87, 0x5e, 0x8e, 0xa7, 0x32, 0xa3, 0xc2, 0xf4]) + store([0x2a, 0xcb, 0x89, 0x42, 0xf3, 0x58, 0x7d, 0x0a, + 0xeb, 0xf1, 0x79, 0x5d, 0xf9, 0x04, 0x26, 0xea, + 0xff, 0x54, 0xe7, 0xf3, 0xcc, 0xce, 0x36, 0xc5, + 0x89, 0xd5, 0xd1, 0x47, 0x16, 0xa7, 0x8f, 0xad]) } fn bit_vec_ult_addr() -> Addr { - store([0x6a, 0x3f, 0x26, 0x2c, 0x2f, 0x4a, 0x2c, 0x51, - 0x7a, 0x61, 0x6f, 0xba, 0xe5, 0x4a, 0x31, 0xec, - 0xcb, 0x85, 0x99, 0x8a, 0xd9, 0xc1, 0xf9, 0x3b, - 0xe8, 0xcc, 0x59, 0x0d, 0x97, 0x11, 0x7c, 0x04]) + store([0x06, 0x8a, 0x88, 0x41, 0x0e, 0xf4, 0x45, 0xd3, + 0x1a, 0xe5, 0x8e, 0x0e, 0x11, 0xb3, 0x68, 0x41, + 0x43, 0xe4, 0x72, 0x28, 0x8b, 0xd4, 0xa8, 0x84, + 0xd5, 0xf9, 0x28, 0xc3, 0xd2, 0x01, 0x9b, 0xec]) } fn decidable_decide_addr() -> Addr { - store([0x6d, 0xda, 0xae, 0xd2, 0x63, 0x74, 0x0b, 0x5d, - 0x5d, 0x67, 0xe6, 0xc1, 0x2e, 0xcf, 0xad, 0xb2, - 0x4a, 0xd8, 0x86, 0x7d, 0x4a, 0x09, 0xfe, 0x78, - 0x4b, 0x59, 0xda, 0xc7, 0xf7, 0x27, 0x54, 0xab]) + store([0xf4, 0xcd, 0xbc, 0x5e, 0xd9, 0xa1, 0xab, 0x59, + 0x28, 0xf9, 0x93, 0x1f, 0x5c, 0x23, 0x90, 0x23, + 0x9e, 0x7f, 0x47, 0xdf, 0x6d, 0x20, 0xe8, 0x4e, + 0xa4, 0x65, 0xc9, 0x70, 0x7b, 0x84, 0xcd, 0xc1]) } fn lt_lt_addr() -> Addr { @@ -134,45 +134,45 @@ def primitive := ⟦ } fn nat_dec_le_addr() -> Addr { - store([0xe0, 0x8c, 0x51, 0x41, 0xc4, 0x4b, 0x27, 0x65, - 0x39, 0x57, 0xae, 0x00, 0xa9, 0x26, 0xa2, 0xdd, - 0x68, 0xdc, 0xd7, 0x77, 0x9c, 0x4f, 0xdf, 0x85, - 0x0e, 0x66, 0x8f, 0xdc, 0x92, 0xb4, 0x08, 0xde]) + store([0xe2, 0xd8, 0x29, 0x2e, 0x1a, 0xdf, 0x54, 0xb0, + 0xd3, 0x93, 0x00, 0x84, 0x77, 0x18, 0xdc, 0x89, + 0xa6, 0x33, 0x32, 0xe8, 0xf3, 0x1b, 0x41, 0xc9, + 0xe8, 0x0f, 0x8c, 0x9a, 0x20, 0x17, 0x27, 0x8d]) } fn nat_dec_eq_addr() -> Addr { - store([0x38, 0x32, 0x3f, 0xd9, 0xe1, 0x7e, 0x9d, 0x1f, - 0x17, 0x53, 0x6d, 0xbb, 0x7f, 0x19, 0x6b, 0x94, - 0xb5, 0xba, 0x19, 0xe4, 0xbf, 0x62, 0x5d, 0x9e, - 0x7c, 0x60, 0x7c, 0x47, 0x36, 0x5c, 0x15, 0xad]) + store([0x86, 0x29, 0x51, 0x96, 0x32, 0xe1, 0x8c, 0x49, + 0x09, 0x79, 0x36, 0xfc, 0x22, 0x0c, 0x9e, 0x03, + 0xd6, 0x26, 0x3f, 0xdd, 0xeb, 0xa4, 0x46, 0x2a, + 0xf5, 0xb7, 0x17, 0xdd, 0x11, 0xe4, 0xbe, 0xf4]) } fn nat_dec_lt_addr() -> Addr { - store([0xf4, 0x45, 0x08, 0x4f, 0x68, 0x05, 0xfa, 0xf9, - 0xbe, 0x62, 0xaa, 0x32, 0x84, 0x15, 0x65, 0x13, - 0x43, 0xc9, 0x8f, 0xfe, 0x52, 0xdb, 0x15, 0x9d, - 0xfb, 0x1b, 0x9a, 0x14, 0xcb, 0x28, 0xcf, 0x23]) + store([0x42, 0x95, 0xc0, 0x71, 0xa9, 0x48, 0x5a, 0xf2, + 0xd9, 0x98, 0xe3, 0x94, 0x7b, 0xe5, 0x98, 0x80, + 0x77, 0x53, 0x1f, 0x02, 0xbf, 0x09, 0x1b, 0x87, + 0x0d, 0x53, 0xce, 0x58, 0x9d, 0x4e, 0xf5, 0xb1]) } fn int_dec_eq_addr() -> Addr { - store([0x42, 0xd9, 0xb7, 0xa9, 0x4a, 0xef, 0xc7, 0x7a, - 0x66, 0x16, 0x93, 0x6b, 0xe3, 0x12, 0x64, 0xea, - 0xf8, 0xbe, 0xd7, 0xbd, 0x80, 0xf5, 0xd3, 0x49, - 0x67, 0xfc, 0x42, 0xaf, 0xaf, 0x29, 0xa7, 0xfd]) + store([0x19, 0xe0, 0x1b, 0xc9, 0xa3, 0x26, 0x4b, 0x9b, + 0x8b, 0x94, 0x0c, 0xf1, 0x72, 0xa2, 0x09, 0xbb, + 0x77, 0x4a, 0xd3, 0x6f, 0x64, 0x10, 0xfa, 0x74, + 0x2f, 0x00, 0x48, 0x04, 0x68, 0x08, 0xc0, 0xb6]) } fn int_dec_le_addr() -> Addr { - store([0xee, 0x03, 0x70, 0xe4, 0x26, 0xa4, 0x00, 0xc8, - 0xb1, 0x67, 0x82, 0xfa, 0xbf, 0xa0, 0xe4, 0x3f, - 0xf8, 0x7e, 0xca, 0xc1, 0xa0, 0xc1, 0xc7, 0x65, - 0xcc, 0x51, 0x79, 0xfc, 0x42, 0x3a, 0xb1, 0xbd]) + store([0x7e, 0x04, 0x8e, 0xf3, 0x03, 0xec, 0xdc, 0x83, + 0x64, 0x67, 0xcd, 0xd4, 0xd8, 0x92, 0xf7, 0xfe, + 0x26, 0xfb, 0xbd, 0x7b, 0x62, 0xae, 0x1d, 0x17, + 0x46, 0x54, 0x3f, 0x4e, 0x30, 0x98, 0xc6, 0xe1]) } fn int_dec_lt_addr() -> Addr { - store([0x15, 0x07, 0x0e, 0x92, 0x02, 0x04, 0x27, 0x23, - 0x69, 0xf0, 0xf2, 0xe8, 0x0f, 0xf3, 0xf5, 0x03, - 0x5c, 0x05, 0xb3, 0x9e, 0xfa, 0x71, 0x4e, 0xc8, - 0xe6, 0xbb, 0xfc, 0xe9, 0x95, 0x06, 0x37, 0xaf]) + store([0x6c, 0xc2, 0xd6, 0x3d, 0xa1, 0xfd, 0x07, 0xe2, + 0x53, 0x3f, 0xcb, 0x08, 0xcb, 0xb3, 0x8c, 0x2d, + 0x67, 0xf7, 0x51, 0x2a, 0x7e, 0xfd, 0x15, 0x23, + 0x6a, 0x4b, 0x0e, 0x57, 0xbb, 0x1f, 0xcd, 0x53]) } fn int_of_nat_addr() -> Addr { @@ -271,38 +271,38 @@ def primitive := ⟦ } fn bool_no_confusion_addr() -> Addr { - store([0x47, 0x3b, 0x2c, 0x94, 0x8d, 0xdb, 0xce, 0x4d, - 0xdb, 0x4b, 0x36, 0x9e, 0x5c, 0xf6, 0x19, 0x9f, - 0xf1, 0x85, 0xb6, 0x4e, 0x9f, 0xbb, 0x1e, 0x90, - 0x90, 0x1d, 0x74, 0x6d, 0xe5, 0x51, 0x90, 0xef]) + store([0x5b, 0x94, 0x71, 0x83, 0x22, 0xc6, 0x33, 0xad, + 0x16, 0x35, 0x92, 0xdb, 0x4f, 0xb7, 0x43, 0x23, + 0x60, 0xf5, 0xd0, 0x23, 0xad, 0xfa, 0x77, 0x49, + 0xf5, 0xcf, 0x51, 0x75, 0x79, 0x8d, 0x16, 0xca]) } fn char_mk_addr() -> Addr { - store([0xe6, 0x22, 0x38, 0xc5, 0x4b, 0x91, 0x39, 0x5c, - 0x2c, 0x06, 0x19, 0x2c, 0xfc, 0xcb, 0x5e, 0x80, - 0xfc, 0xe4, 0x1e, 0xd1, 0x1d, 0x1b, 0xf6, 0xdb, - 0x14, 0x2d, 0x2c, 0x39, 0xd7, 0xc8, 0x1a, 0x20]) + store([0x31, 0x6f, 0xe9, 0x1e, 0xde, 0x33, 0x07, 0x9f, + 0x23, 0x30, 0xcc, 0x99, 0x21, 0xee, 0x11, 0x7f, + 0x9a, 0xca, 0x02, 0x3e, 0xfa, 0x14, 0xf5, 0xb1, + 0xfe, 0x02, 0x4d, 0xdb, 0xe6, 0x25, 0xfe, 0x86]) } fn nat_bitwise_addr() -> Addr { - store([0xf2, 0x1d, 0x74, 0x7a, 0xca, 0x3e, 0x08, 0xf5, - 0x29, 0x00, 0x93, 0xbf, 0x8f, 0x40, 0x20, 0x83, - 0x8d, 0x8e, 0x17, 0x42, 0xa7, 0x8b, 0x3e, 0x1f, - 0x48, 0xd8, 0x3e, 0xf1, 0x59, 0x39, 0x5e, 0x6a]) + store([0x0b, 0x69, 0xfb, 0xfb, 0x2e, 0xf3, 0xc7, 0x73, + 0x3a, 0xd2, 0xf6, 0xbd, 0x77, 0x07, 0x82, 0x0c, + 0x32, 0x60, 0x3a, 0x79, 0x60, 0x35, 0x01, 0xa7, + 0x7f, 0xbb, 0xef, 0x74, 0xdf, 0x85, 0x5a, 0x32]) } fn nat_rec_addr() -> Addr { - store([0x6e, 0x85, 0x5f, 0x04, 0x48, 0x5d, 0xf8, 0xd9, - 0x77, 0x67, 0xf8, 0xaa, 0x89, 0xf2, 0x23, 0xbc, - 0xac, 0x97, 0x7e, 0x2a, 0x15, 0x5c, 0x45, 0xc6, - 0x6d, 0x6e, 0x09, 0x4e, 0xc3, 0x16, 0x31, 0x94]) + store([0x43, 0x61, 0x95, 0x10, 0xee, 0x8a, 0x58, 0x3d, + 0xb7, 0x2b, 0x9d, 0x71, 0xb8, 0x4e, 0x7e, 0xa1, + 0x3a, 0x19, 0x8f, 0xe3, 0x3d, 0x73, 0x96, 0x3c, + 0xf0, 0xcc, 0x1e, 0xbf, 0x68, 0xa6, 0x8a, 0xd6]) } fn nat_cases_on_addr() -> Addr { - store([0x9a, 0x6b, 0x32, 0xaf, 0x19, 0x4f, 0xdf, 0x0b, - 0x44, 0x76, 0x33, 0x07, 0x7d, 0x9f, 0xa8, 0x9c, - 0x24, 0x9d, 0x6d, 0x7d, 0xf2, 0x43, 0xd3, 0x00, - 0xb8, 0x9d, 0xd9, 0xb1, 0x4d, 0x92, 0xbb, 0x03]) + store([0x28, 0x09, 0x6d, 0x7c, 0xa6, 0xb3, 0xf9, 0x6b, + 0xd2, 0x50, 0xcc, 0x8b, 0x8f, 0xee, 0x00, 0xc3, + 0x6b, 0xbc, 0x36, 0xdd, 0x1d, 0xd2, 0x04, 0x08, + 0x54, 0x04, 0x1e, 0xc1, 0x39, 0x93, 0xba, 0x34]) } fn list_addr() -> Addr { @@ -313,17 +313,17 @@ def primitive := ⟦ } fn string_addr() -> Addr { - store([0xcb, 0x1b, 0xca, 0x7f, 0xc5, 0xdb, 0xb1, 0xbd, - 0xfb, 0xf6, 0x31, 0x9d, 0xf8, 0x9d, 0xa9, 0xfd, - 0xa3, 0xa6, 0x79, 0xd2, 0x25, 0x54, 0xb8, 0xa9, - 0xd5, 0xdd, 0x46, 0x63, 0xc0, 0xa9, 0x73, 0x12]) + store([0x42, 0x26, 0x58, 0xd0, 0x43, 0xee, 0x48, 0x2f, + 0x71, 0x02, 0xd2, 0xf6, 0xea, 0x65, 0x96, 0x66, + 0x48, 0x08, 0xe8, 0x99, 0xab, 0xad, 0x62, 0x80, + 0x80, 0x47, 0x8a, 0x1e, 0x91, 0x89, 0xf0, 0xaa]) } fn string_mk_addr() -> Addr { - store([0x63, 0xd9, 0x5a, 0x0f, 0xd6, 0xa1, 0x14, 0x43, - 0x48, 0xd0, 0xf2, 0x0e, 0x20, 0xcc, 0x5c, 0x3a, - 0xf6, 0x1a, 0xc9, 0x55, 0x92, 0x3f, 0x45, 0xf4, - 0x2a, 0x78, 0x2d, 0xe9, 0x33, 0xaa, 0xd5, 0x94]) + store([0x40, 0x5d, 0x36, 0xf5, 0xf6, 0x47, 0x9c, 0x40, + 0x21, 0x6f, 0xf7, 0xbb, 0xba, 0x10, 0xb0, 0x77, + 0x84, 0x8e, 0xc3, 0x3a, 0xf0, 0x3e, 0xf4, 0x04, + 0x0b, 0xfa, 0x4f, 0x82, 0x93, 0x0d, 0xe4, 0xba]) } fn of_nat_of_nat_addr() -> Addr { @@ -362,10 +362,10 @@ def primitive := ⟦ } fn decidable_rec_addr() -> Addr { - store([0xf3, 0x23, 0xa5, 0x49, 0xad, 0x4d, 0xf6, 0xb2, - 0xf3, 0x28, 0x99, 0x23, 0x7a, 0x28, 0x11, 0x36, - 0xf3, 0x4d, 0x43, 0x1e, 0xd7, 0x2b, 0x33, 0x85, - 0x7c, 0x08, 0x5e, 0x6c, 0x4d, 0x85, 0x27, 0x38]) + store([0x7a, 0x18, 0xca, 0x84, 0xa1, 0x13, 0xb0, 0xc2, + 0xad, 0x0c, 0xc0, 0xe8, 0x25, 0xa5, 0x5d, 0x76, + 0x7e, 0x77, 0xa8, 0x9e, 0x8d, 0x0e, 0x1d, 0x82, + 0xeb, 0x91, 0x04, 0x85, 0x9f, 0x53, 0xd0, 0x95]) } fn decidable_is_true_addr() -> Addr { @@ -383,31 +383,31 @@ def primitive := ⟦ } fn nat_le_of_ble_eq_true_addr() -> Addr { - store([0x7e, 0x5d, 0x1f, 0x11, 0x18, 0xa8, 0x9f, 0x77, - 0xf8, 0x9d, 0x46, 0x9a, 0x27, 0x73, 0x1a, 0x75, - 0x4d, 0xe3, 0x36, 0xa0, 0x5e, 0x33, 0xf3, 0x83, - 0x05, 0x6b, 0xc9, 0x2b, 0x36, 0x94, 0x78, 0x12]) + store([0xba, 0xb3, 0x7a, 0x8b, 0xd9, 0x86, 0x0d, 0x3b, + 0xfe, 0x31, 0xf1, 0xa1, 0x75, 0x2f, 0xe7, 0x00, + 0x8a, 0x22, 0x4c, 0x6a, 0xd8, 0xaf, 0x62, 0x3c, + 0x7f, 0xb8, 0xbd, 0x19, 0x2b, 0xe5, 0xc0, 0x7e]) } fn nat_not_le_of_not_ble_eq_true_addr() -> Addr { - store([0xc1, 0xe2, 0x3b, 0x8d, 0xaf, 0xb3, 0x77, 0x8b, - 0x99, 0x63, 0x12, 0x06, 0x8a, 0x2b, 0xec, 0x3d, - 0xcb, 0xcc, 0x72, 0x13, 0x2e, 0xfb, 0xf4, 0x3c, - 0x23, 0x5e, 0x57, 0x30, 0x84, 0x66, 0x82, 0x41]) + store([0x98, 0x1b, 0x00, 0xb7, 0xc4, 0x58, 0x99, 0xf7, + 0x26, 0xc3, 0xde, 0x35, 0x32, 0x80, 0x74, 0xcb, + 0x3f, 0x72, 0xa0, 0x92, 0x25, 0x74, 0x3d, 0xa8, + 0x1f, 0x50, 0x31, 0xff, 0x6e, 0x64, 0x7b, 0xa9]) } fn nat_eq_of_beq_eq_true_addr() -> Addr { - store([0xb9, 0xac, 0xc8, 0x1f, 0x28, 0x01, 0xaf, 0x89, - 0xb9, 0x5e, 0x09, 0x62, 0xaa, 0x9d, 0x73, 0x90, - 0xa3, 0xac, 0xfe, 0x8f, 0xb7, 0x60, 0x55, 0x9a, - 0x81, 0x1a, 0x82, 0xed, 0x74, 0x43, 0xdb, 0xb5]) + store([0xa5, 0x7b, 0x81, 0x80, 0x28, 0x87, 0x01, 0xce, + 0xbf, 0xb1, 0xd6, 0xdd, 0x29, 0xf1, 0x60, 0xcc, + 0x4a, 0xcc, 0x3c, 0x6a, 0xba, 0x98, 0x34, 0xe4, + 0x6b, 0x65, 0xf1, 0xc5, 0xaa, 0x72, 0x17, 0xe2]) } fn nat_ne_of_beq_eq_false_addr() -> Addr { - store([0x24, 0x87, 0x79, 0x88, 0x41, 0x09, 0xee, 0xd0, - 0x06, 0x00, 0xa0, 0xbd, 0x96, 0x8f, 0x74, 0x0d, - 0xb7, 0xf3, 0xd9, 0x24, 0xfb, 0x2b, 0x17, 0x06, - 0xab, 0x55, 0x2e, 0x78, 0x76, 0x06, 0x28, 0x55]) + store([0x6e, 0x9b, 0x3c, 0x1c, 0xa5, 0xd9, 0xf0, 0x9b, + 0x90, 0x23, 0x21, 0xb1, 0x55, 0xed, 0xf4, 0x52, + 0x4c, 0x3e, 0x32, 0xde, 0x1d, 0x69, 0x0d, 0xb9, + 0x17, 0xbf, 0xba, 0xaa, 0xac, 0x3f, 0x8f, 0x82]) } fn reduce_bool_addr() -> Addr { @@ -446,10 +446,10 @@ def primitive := ⟦ } fn punit_size_of_1_addr() -> Addr { - store([0x8c, 0x2c, 0xbf, 0xe3, 0x28, 0x91, 0x0b, 0xfe, - 0x7f, 0xeb, 0x60, 0x07, 0x2b, 0x46, 0xf7, 0x48, - 0x76, 0x92, 0xcb, 0x37, 0x59, 0x96, 0x81, 0xb1, - 0x37, 0xa3, 0x1d, 0xd9, 0x9e, 0x70, 0x8f, 0x03]) + store([0x48, 0x91, 0x87, 0xe9, 0xcd, 0x03, 0xab, 0xeb, + 0xc1, 0x2a, 0x13, 0x35, 0xc6, 0x28, 0xd6, 0x42, + 0xea, 0x2a, 0x48, 0xbd, 0xc2, 0x62, 0xc8, 0x5f, + 0x84, 0x8f, 0x10, 0x11, 0xe7, 0x3f, 0x61, 0x0a]) } fn size_of_size_of_addr() -> Addr { @@ -495,171 +495,171 @@ def primitive := ⟦ } fn nat_pred_addr() -> Addr { - store([0x6b, 0x59, 0xcf, 0x44, 0x97, 0x81, 0xf0, 0x7b, - 0x04, 0x20, 0x7d, 0x66, 0x59, 0x78, 0xb5, 0xc5, - 0xef, 0x96, 0x88, 0xaf, 0xa7, 0x44, 0x85, 0x90, - 0xa6, 0x8f, 0x7d, 0xa7, 0xff, 0x88, 0xc5, 0x16]) + store([0x4e, 0xd5, 0xff, 0xfb, 0x03, 0xae, 0x5e, 0x6b, + 0x7a, 0x0d, 0x9f, 0x33, 0x79, 0xaa, 0x76, 0x9e, + 0x5c, 0xa8, 0x18, 0x8c, 0xac, 0xbd, 0xf1, 0xe2, + 0x0d, 0xca, 0x4b, 0xad, 0x27, 0xf2, 0x53, 0x33]) } fn nat_add_addr() -> Addr { - store([0xf9, 0x41, 0x92, 0x05, 0x8e, 0x41, 0xbc, 0x29, - 0xe8, 0x89, 0x24, 0xd8, 0x57, 0xa6, 0xbd, 0x33, - 0xf8, 0xb3, 0xe0, 0xa9, 0x0f, 0x87, 0x86, 0x82, - 0x82, 0x70, 0xd1, 0xcc, 0x1d, 0xd0, 0xad, 0xc6]) + store([0x9d, 0x83, 0x30, 0x7d, 0x55, 0x2e, 0x68, 0x1f, + 0x4c, 0xce, 0xff, 0x7f, 0x78, 0x3b, 0x5a, 0x64, + 0xe0, 0x02, 0x57, 0x5e, 0xdc, 0xb1, 0xc0, 0x4f, + 0xa0, 0xc5, 0x66, 0x2c, 0xe2, 0xdd, 0x34, 0x38]) } fn nat_sub_addr() -> Addr { - store([0xfa, 0x98, 0xda, 0xbf, 0x44, 0xd2, 0xa6, 0x30, - 0x7b, 0x49, 0x0a, 0xc9, 0xe8, 0x11, 0x43, 0x3e, - 0xfc, 0x2f, 0x95, 0x89, 0x96, 0xc6, 0x7b, 0xe1, - 0x39, 0x8c, 0xb4, 0xd1, 0xb2, 0x64, 0xcf, 0x39]) + store([0x9e, 0x86, 0xff, 0x43, 0xb1, 0x5a, 0xeb, 0xaf, + 0xb3, 0xdf, 0x61, 0x0a, 0x96, 0xdd, 0x44, 0x92, + 0xff, 0x9c, 0xd8, 0xaa, 0xb8, 0x7a, 0x82, 0x02, + 0x5b, 0x61, 0x7c, 0x9a, 0x0b, 0xbf, 0x62, 0x80]) } fn nat_mul_addr() -> Addr { - store([0x9b, 0x5c, 0x57, 0xea, 0x1c, 0xf2, 0xfb, 0x1d, - 0xe6, 0x7e, 0xe5, 0xbe, 0xc1, 0x5e, 0x36, 0x0d, - 0x20, 0xa9, 0x63, 0x59, 0x90, 0x27, 0x30, 0x14, - 0xe6, 0x78, 0x51, 0xe0, 0x49, 0xff, 0x36, 0x19]) + store([0x9b, 0xc1, 0x35, 0x39, 0xb6, 0x8b, 0x0e, 0x1c, + 0x5a, 0x53, 0x81, 0x85, 0x80, 0xaa, 0x09, 0x6a, + 0x65, 0x90, 0x7f, 0x63, 0xaf, 0x45, 0x88, 0xa1, + 0xe9, 0x1e, 0x14, 0xd3, 0x4d, 0x9e, 0x4d, 0x86]) } fn nat_pow_addr() -> Addr { - store([0xd0, 0x15, 0x98, 0x7b, 0xb1, 0x0d, 0xd2, 0x28, - 0x63, 0xdd, 0xc4, 0x11, 0x60, 0xd2, 0x7d, 0xd3, - 0xd1, 0xea, 0x74, 0xf7, 0x54, 0xfb, 0x24, 0x12, - 0x43, 0x24, 0x36, 0xf3, 0xea, 0x5b, 0x50, 0x71]) + store([0xb5, 0x2c, 0x4d, 0x0d, 0x38, 0x78, 0xf2, 0x87, + 0x71, 0x9f, 0x65, 0xd0, 0x08, 0x8a, 0x26, 0x9a, + 0xf0, 0xf6, 0xe5, 0xb1, 0xb7, 0xef, 0x56, 0x29, + 0x83, 0x09, 0x63, 0xdc, 0xb7, 0x5e, 0x6c, 0xee]) } fn nat_gcd_addr() -> Addr { - store([0xee, 0x8b, 0xa9, 0x21, 0x6b, 0x3f, 0xc8, 0x1e, - 0x79, 0x68, 0x58, 0x6b, 0x43, 0xce, 0xbe, 0xa1, - 0x5d, 0x0e, 0x14, 0x3d, 0x5d, 0x4b, 0x1f, 0xde, - 0x1b, 0xd3, 0x01, 0xa7, 0x40, 0x93, 0xf6, 0x06]) + store([0x74, 0x36, 0xd9, 0xfa, 0x7c, 0xce, 0x3e, 0xf9, + 0x1b, 0xc9, 0x90, 0x3c, 0xc5, 0xaa, 0x32, 0xd4, + 0x13, 0xda, 0x2f, 0x6c, 0xa7, 0xc2, 0x1a, 0x92, + 0x35, 0xb4, 0x1a, 0x2f, 0xc4, 0x82, 0xdf, 0xfc]) } fn nat_mod_addr() -> Addr { - store([0x8e, 0xf8, 0xb2, 0x8b, 0x4e, 0x9e, 0x0a, 0x59, - 0xf3, 0x82, 0x2e, 0x24, 0x3e, 0x71, 0x29, 0x9f, - 0x06, 0xbb, 0x6e, 0x7a, 0xfd, 0xb6, 0xcd, 0xd9, - 0x79, 0x76, 0xfb, 0x29, 0x0b, 0x66, 0x7b, 0xb4]) + store([0x6e, 0xa1, 0xa4, 0x4f, 0x73, 0x78, 0xe3, 0x72, + 0xfe, 0xb5, 0x8f, 0xb5, 0x2c, 0x80, 0x84, 0x62, + 0x60, 0x57, 0xb3, 0xf3, 0x87, 0x49, 0x5e, 0x76, + 0x00, 0xb9, 0x71, 0xa3, 0x8b, 0x24, 0x42, 0x76]) } fn nat_div_addr() -> Addr { - store([0xfa, 0x58, 0x37, 0x94, 0xc8, 0xef, 0x36, 0x8e, - 0xff, 0x68, 0x81, 0xe8, 0x16, 0xa4, 0xe8, 0x89, - 0xf9, 0x50, 0x61, 0x11, 0x6c, 0xe4, 0x9b, 0x15, - 0x40, 0x56, 0xd3, 0x8f, 0xce, 0x4b, 0x7f, 0x52]) + store([0xd0, 0x91, 0x95, 0x70, 0xf8, 0x93, 0x2d, 0xdf, + 0x5d, 0xff, 0x43, 0x00, 0xab, 0x76, 0x67, 0xd1, + 0xba, 0xab, 0x93, 0x24, 0xdb, 0xc1, 0x36, 0xac, + 0x9c, 0x81, 0x29, 0x2e, 0xd1, 0xc8, 0x1f, 0xe9]) } fn nat_land_addr() -> Addr { - store([0xa0, 0xdb, 0x90, 0xe6, 0x8e, 0xe3, 0xb7, 0xa1, - 0x66, 0xe3, 0x5f, 0x61, 0x9b, 0xd7, 0xb0, 0x2c, - 0x08, 0x96, 0xef, 0xd6, 0x0e, 0xb4, 0x69, 0x14, - 0xff, 0x3e, 0x4f, 0xb8, 0x12, 0x52, 0xfb, 0x94]) + store([0x44, 0x51, 0x43, 0x20, 0xbd, 0x93, 0x35, 0xa0, + 0x89, 0x42, 0xe7, 0x7d, 0xe8, 0x07, 0x7e, 0x38, + 0x3f, 0x11, 0xa0, 0xf6, 0x15, 0x0c, 0x00, 0x0c, + 0x98, 0x23, 0xc8, 0x74, 0x67, 0x58, 0x99, 0x65]) } fn nat_lor_addr() -> Addr { - store([0xd1, 0x44, 0x19, 0xaa, 0xa4, 0x7a, 0x03, 0xbf, - 0x9a, 0x46, 0x93, 0x8b, 0xf7, 0x2e, 0x40, 0xf9, - 0x6c, 0xab, 0x85, 0x3f, 0x9c, 0xc5, 0x86, 0x98, - 0x79, 0xe7, 0x69, 0x9f, 0x45, 0x17, 0x17, 0x73]) + store([0x18, 0x4c, 0xa6, 0xa9, 0x32, 0xa4, 0xc5, 0xfd, + 0x0a, 0x2c, 0x16, 0x95, 0x01, 0xd2, 0xd5, 0x04, + 0x8b, 0xb7, 0x43, 0xbd, 0x16, 0x6f, 0x96, 0xff, + 0xec, 0x9d, 0x41, 0x01, 0xe5, 0x4e, 0x98, 0x2b]) } fn nat_xor_addr() -> Addr { - store([0xae, 0x68, 0xfd, 0x41, 0x6e, 0xcb, 0x9c, 0xe2, - 0x06, 0x12, 0x27, 0x2d, 0x43, 0xc2, 0xf8, 0x6e, - 0xaf, 0x21, 0xd9, 0x54, 0x7f, 0x56, 0x59, 0x68, - 0x39, 0x1e, 0x9e, 0x12, 0xe3, 0x93, 0x72, 0xdc]) + store([0x16, 0x3a, 0x8c, 0x28, 0x00, 0xca, 0x51, 0xda, + 0xaf, 0xfe, 0x1b, 0x71, 0x57, 0x51, 0x27, 0x94, + 0x2a, 0x05, 0x30, 0x04, 0x40, 0x52, 0x4b, 0x14, + 0x5c, 0x8f, 0xcd, 0xcc, 0x5e, 0xe0, 0x08, 0xb6]) } fn nat_shift_left_addr() -> Addr { - store([0xf6, 0x06, 0xb7, 0xc2, 0x31, 0x80, 0xa2, 0x0a, - 0xce, 0x60, 0xfe, 0x24, 0xd5, 0x2b, 0xc0, 0xea, - 0x38, 0x54, 0x69, 0x8d, 0x2d, 0x14, 0xda, 0x05, - 0xc4, 0x83, 0x7a, 0x97, 0xe1, 0xab, 0x44, 0x69]) + store([0x16, 0xbd, 0x10, 0x36, 0x5e, 0xe6, 0xfa, 0x40, + 0xb4, 0xa1, 0xdd, 0xc0, 0xdd, 0x26, 0xc8, 0xa4, + 0x9d, 0xb8, 0xf8, 0xb1, 0xeb, 0x56, 0xb2, 0xac, + 0x2a, 0x17, 0x9e, 0xa2, 0x44, 0x05, 0x98, 0xd7]) } fn nat_shift_right_addr() -> Addr { - store([0xd8, 0x60, 0xb5, 0x60, 0x15, 0x6d, 0xa6, 0x8e, - 0x80, 0x1c, 0x8b, 0xd5, 0x1d, 0x89, 0x2e, 0x55, - 0x7f, 0xbe, 0x35, 0x26, 0xd7, 0xd1, 0x98, 0x69, - 0x6f, 0xfb, 0x4d, 0x55, 0x1a, 0xe0, 0x4b, 0xb7]) + store([0x6f, 0xe2, 0x1e, 0x35, 0xa9, 0xa3, 0x08, 0xde, + 0xaf, 0xe5, 0x32, 0x10, 0xdb, 0x5b, 0x28, 0x56, + 0xc1, 0x85, 0xdc, 0x14, 0x7e, 0xf2, 0x71, 0x7c, + 0x0e, 0x73, 0xa0, 0xfa, 0x3a, 0xd3, 0x16, 0x90]) } fn nat_beq_addr() -> Addr { - store([0xe8, 0xb7, 0x14, 0x9d, 0x8a, 0x7d, 0x12, 0x41, - 0x4b, 0x06, 0x25, 0x2f, 0x31, 0x8d, 0x40, 0x82, - 0x04, 0x72, 0x3c, 0xa4, 0xc0, 0x2f, 0x3a, 0x38, - 0xed, 0xfa, 0x37, 0x79, 0x24, 0x48, 0xc0, 0xda]) + store([0x49, 0xa1, 0x67, 0x14, 0xbd, 0x7b, 0x82, 0x03, + 0x7c, 0xd8, 0xe7, 0x76, 0x33, 0x1d, 0x82, 0x62, + 0x82, 0x9b, 0xc7, 0x0c, 0x8e, 0xe3, 0x63, 0xc8, + 0x66, 0xc7, 0x06, 0x0b, 0xf3, 0x66, 0xcd, 0x9b]) } fn nat_ble_addr() -> Addr { - store([0x22, 0x75, 0x08, 0x0a, 0x89, 0xc3, 0x27, 0x90, - 0x4e, 0x3a, 0xd1, 0x27, 0xba, 0x44, 0x37, 0x0a, - 0x7c, 0x6c, 0x1b, 0xef, 0x3a, 0xa7, 0x47, 0x92, - 0x07, 0x9f, 0x8f, 0x31, 0x59, 0x63, 0x69, 0x57]) + store([0xf5, 0xbb, 0x24, 0x57, 0x67, 0xfd, 0xbc, 0x68, + 0x3b, 0xee, 0x9e, 0x1c, 0xa8, 0xd9, 0xa7, 0x24, + 0x74, 0x26, 0xfb, 0x24, 0xc6, 0x7b, 0x2c, 0x3f, + 0x22, 0x7d, 0xe5, 0x1b, 0x5f, 0x83, 0x9b, 0x26]) } fn str_addr() -> Addr { - store([0xcb, 0x1b, 0xca, 0x7f, 0xc5, 0xdb, 0xb1, 0xbd, - 0xfb, 0xf6, 0x31, 0x9d, 0xf8, 0x9d, 0xa9, 0xfd, - 0xa3, 0xa6, 0x79, 0xd2, 0x25, 0x54, 0xb8, 0xa9, - 0xd5, 0xdd, 0x46, 0x63, 0xc0, 0xa9, 0x73, 0x12]) + store([0x42, 0x26, 0x58, 0xd0, 0x43, 0xee, 0x48, 0x2f, + 0x71, 0x02, 0xd2, 0xf6, 0xea, 0x65, 0x96, 0x66, + 0x48, 0x08, 0xe8, 0x99, 0xab, 0xad, 0x62, 0x80, + 0x80, 0x47, 0x8a, 0x1e, 0x91, 0x89, 0xf0, 0xaa]) } fn string_utf8_byte_size_addr() -> Addr { - store([0x11, 0xea, 0x14, 0x32, 0x56, 0x2b, 0x11, 0x32, - 0x85, 0x3f, 0x17, 0x3f, 0xda, 0x9a, 0xdd, 0x59, - 0x1b, 0x06, 0x06, 0xa8, 0xde, 0xe3, 0x6b, 0x00, - 0xf7, 0x1b, 0xec, 0x29, 0x67, 0xfb, 0x64, 0x47]) + store([0xcc, 0x6c, 0xdc, 0x73, 0xe0, 0xdf, 0x40, 0x4b, + 0xa7, 0x68, 0x5c, 0x73, 0x3e, 0xbb, 0xe7, 0xc1, + 0xae, 0xcc, 0x6e, 0xf4, 0x65, 0x03, 0xd1, 0x0a, + 0xad, 0x58, 0xbf, 0x70, 0xf8, 0x4a, 0x48, 0x58]) } fn string_back_addr() -> Addr { - store([0x11, 0xba, 0xba, 0x55, 0xcb, 0xdf, 0x36, 0x49, - 0xfc, 0x1b, 0x69, 0x6c, 0x2e, 0x77, 0x56, 0x96, - 0xe9, 0x95, 0xc3, 0x8e, 0xf3, 0x13, 0xcf, 0x27, - 0x65, 0x53, 0xe1, 0x89, 0x8d, 0xa4, 0x5e, 0x0f]) + store([0xf6, 0x06, 0x6f, 0xc6, 0x24, 0x91, 0xfd, 0x4c, + 0x48, 0xd4, 0xda, 0xf3, 0xb9, 0xbe, 0xba, 0x72, + 0xe2, 0xa0, 0xb8, 0x04, 0x0f, 0xcb, 0xd9, 0x9f, + 0xb7, 0x29, 0xab, 0xf5, 0x6a, 0x9c, 0x07, 0xc4]) } fn string_legacy_back_addr() -> Addr { - store([0x99, 0x8c, 0x3e, 0x64, 0x0c, 0x8b, 0x3a, 0x35, - 0xc6, 0x27, 0x20, 0x0d, 0xcd, 0x69, 0x4f, 0x67, - 0xf8, 0xb1, 0xd4, 0x1e, 0x68, 0x76, 0x0c, 0x90, - 0xe3, 0x61, 0xda, 0x24, 0x73, 0x4d, 0x39, 0xbc]) + store([0xd5, 0xe5, 0x43, 0xa5, 0xb6, 0xbd, 0xe8, 0x8d, + 0xc3, 0x85, 0x4d, 0x4c, 0x2b, 0x9a, 0x12, 0xac, + 0x27, 0x09, 0x76, 0xbf, 0x41, 0x02, 0xa6, 0xb3, + 0x3f, 0x55, 0xa9, 0x0d, 0xb3, 0x24, 0x26, 0x8f]) } fn string_to_byte_array_addr() -> Addr { - store([0x65, 0xf6, 0x44, 0x28, 0x6b, 0xc4, 0x94, 0x64, - 0xcc, 0x7a, 0x36, 0xb7, 0xd7, 0x95, 0x2f, 0x85, - 0x43, 0xab, 0x67, 0x56, 0x4c, 0xd5, 0x09, 0xee, - 0x87, 0x8a, 0x95, 0x37, 0x56, 0x09, 0x06, 0x9b]) + store([0xa0, 0x77, 0x36, 0xec, 0x99, 0x9f, 0xdc, 0xb8, + 0x75, 0x30, 0x67, 0x49, 0x7f, 0x9f, 0x97, 0xb4, + 0x61, 0xf2, 0xa1, 0x4e, 0x81, 0x69, 0xcd, 0x11, + 0x28, 0x7d, 0xc7, 0x3c, 0xdf, 0xd7, 0x42, 0xaa]) } fn byte_array_empty_addr() -> Addr { - store([0xd9, 0x74, 0x17, 0xc4, 0x92, 0x06, 0xc6, 0x1f, - 0xe2, 0x8c, 0xbb, 0x7a, 0x0b, 0x60, 0x95, 0xf7, - 0x22, 0xcd, 0xfb, 0xc2, 0x13, 0xe0, 0x34, 0xaa, - 0x59, 0xde, 0x51, 0xb9, 0x21, 0x8a, 0xf0, 0x74]) + store([0xc0, 0x7f, 0x15, 0x89, 0xbd, 0x7d, 0xcc, 0x55, + 0x6e, 0x38, 0x4e, 0x42, 0xba, 0xb1, 0x42, 0xa8, + 0x4f, 0x7a, 0x62, 0x55, 0xd3, 0x9b, 0x59, 0xb0, + 0xf9, 0x00, 0x19, 0x80, 0x47, 0x25, 0x22, 0x96]) } fn char_of_nat_addr() -> Addr { - store([0x7a, 0x57, 0x54, 0x38, 0x6b, 0x30, 0xbb, 0x86, - 0xf0, 0xb6, 0xf7, 0x0f, 0xd3, 0x68, 0xbb, 0x50, - 0xe6, 0x03, 0x27, 0x3a, 0x50, 0xad, 0x79, 0xd8, - 0xc1, 0x7f, 0xc3, 0xcb, 0x59, 0xf8, 0x0f, 0xac]) + store([0x28, 0xdc, 0x1b, 0x3d, 0x3d, 0x2e, 0x01, 0x15, + 0x29, 0xc7, 0x1c, 0x9d, 0x44, 0x18, 0x24, 0x8f, + 0x60, 0x60, 0xdb, 0xfb, 0x1c, 0x7e, 0x97, 0xdb, + 0x1c, 0x57, 0x2a, 0x56, 0x57, 0x87, 0xef, 0x61]) } fn char_type_addr() -> Addr { - store([0x38, 0xaa, 0x12, 0x05, 0x9f, 0xad, 0x3a, 0xfa, - 0x1e, 0x1e, 0x87, 0x40, 0xdc, 0x94, 0x70, 0xa4, - 0x7c, 0x26, 0x98, 0x63, 0x50, 0xf6, 0xcb, 0x3b, - 0xea, 0x1f, 0xae, 0x12, 0x76, 0xd7, 0xb5, 0xf1]) + store([0x2f, 0x96, 0xb8, 0xda, 0x29, 0xa3, 0x8b, 0x17, + 0x7f, 0xc3, 0x25, 0x53, 0xd5, 0x38, 0xd5, 0xd4, + 0x50, 0x21, 0x2f, 0xd3, 0xe6, 0xfe, 0xd9, 0x5d, + 0x61, 0xc8, 0x17, 0x83, 0x7d, 0x29, 0xa3, 0x4f]) } fn string_of_list_addr() -> Addr { - store([0x63, 0xd9, 0x5a, 0x0f, 0xd6, 0xa1, 0x14, 0x43, - 0x48, 0xd0, 0xf2, 0x0e, 0x20, 0xcc, 0x5c, 0x3a, - 0xf6, 0x1a, 0xc9, 0x55, 0x92, 0x3f, 0x45, 0xf4, - 0x2a, 0x78, 0x2d, 0xe9, 0x33, 0xaa, 0xd5, 0x94]) + store([0x40, 0x5d, 0x36, 0xf5, 0xf6, 0x47, 0x9c, 0x40, + 0x21, 0x6f, 0xf7, 0xbb, 0xba, 0x10, 0xb0, 0x77, + 0x84, 0x8e, 0xc3, 0x3a, 0xf0, 0x3e, 0xf4, 0x04, + 0x0b, 0xfa, 0x4f, 0x82, 0x93, 0x0d, 0xe4, 0xba]) } fn list_nil_addr() -> Addr { From 88151e9286cd7e568a4e334b4b4ad2f0b7d6d2ce Mon Sep 17 00:00:00 2001 From: Arthur Paulino Date: Wed, 20 May 2026 05:13:14 -0700 Subject: [PATCH 12/12] Fix standalone Recr ingress: typ-based inductive lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `find_matching_block_addr` heuristic in the standalone-Recr branch of `build_convert_inputs_walk` picks the inductive block by matching ctor count to rule count. When multiple in-scope inductives share the same ctor count, it returns the wrong block — the stored rules' `ctor_idx` then points to a sibling's ctor, while `populate_rules` (canonical) derives `ctor_idx` from the right inductive's `ctor_indices`. The mismatch surfaces as `compare_rules`'s `assert_eq!(s_ctor, c_ctor)` panic (e.g. `38 != 40` for `IxVMPrim.nat_land_lit`). Switch the standalone-Recr path to the same typ-based resolution `build_aux_recr_ctor_idxs` already uses for aux Recr blocks: peel `params + motives + minors + indices` foralls of `recr.typ`, take the major's head Ref, look it up in `refs` to get the inductive's address, then resolve `IPrj.block` for the Muts wrapper. Slice ctor positions for the specific member via `extract_member_ctor_idxs`. Also store the resolved Muts `block_addr` in `CKRecr` (was passing the heuristic's wrong address through to `convert_recursor`). --- Ix/IxVM/Ingress.lean | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/Ix/IxVM/Ingress.lean b/Ix/IxVM/Ingress.lean index 9466eaa7..299df997 100644 --- a/Ix/IxVM/Ingress.lean +++ b/Ix/IxVM/Ingress.lean @@ -1156,22 +1156,36 @@ def ingress := ⟦ ConstantInfo.Recr(recr) => let ref_idxs = build_ref_idxs_mapped(refs, all_addrs, pos_map); let lit_blobs = build_lit_blobs(refs, all_addrs); - let nrules = recr_rule_count(recr); - let block_addr = find_matching_block_addr(refs, all_addrs, nrules); - let block_const = load_verified_constant(block_addr); - match block_const { - Constant.Mk(block_info, _, _, _) => - match block_info { - ConstantInfo.Muts(members) => - let recur_idxs = store(ListNode.Cons(pos, store(ListNode.Nil))); - let bs = lookup_block_start(block_addr, block_addrs, block_starts); - let rule_ctor_idxs = build_rule_ctor_idxs(members, bs, 0); - let ctx = ConvertCtx.Mk(sharing, ref_idxs, recur_idxs, lit_blobs, univs); - let input = ConvertInput.Mk(ctx, ConvertKind.CKRecr(recr, rule_ctor_idxs, block_addr)); - store(ListNode.Cons(store(input), - build_convert_inputs_walk(rest, rest_addrs, all_addrs, pos_map, canon_addrs, block_addrs, block_starts, pos + 1, seen_mptrs))), + -- Resolve the recursor's inductive via typ-based lookup: + -- peel n_skip foralls of `recr.typ` to reach the major's + -- type, take its head, lookup `refs[head_ref_idx]`. The + -- ctor-count heuristic in `find_matching_block_addr` picks + -- the wrong block when multiple in-scope inductives share + -- the same ctor count. + let rule_ctor_idxs = build_aux_recr_ctor_idxs( + recr, refs, sharing, all_addrs, block_addrs, block_starts); + let n_skip = match recr { + Recursor.Mk(_, _, _, params, indices, motives, minors, _, _) => + ((flatten_u64(params) + flatten_u64(motives)) + + flatten_u64(minors)) + flatten_u64(indices), + }; + let typ = match recr { + Recursor.Mk(_, _, _, _, _, _, _, &typ, _) => typ, + }; + let ind_addr = rec_typ_to_inductive_addr(typ, n_skip, refs, sharing); + let ind_const = load_verified_constant(ind_addr); + let block_addr = match ind_const { + Constant.Mk(info, _, _, _) => + match info { + ConstantInfo.IPrj(prj) => + match prj { InductiveProj.Mk(_, ba) => ba, }, }, - }, + }; + let recur_idxs = store(ListNode.Cons(pos, store(ListNode.Nil))); + let ctx = ConvertCtx.Mk(sharing, ref_idxs, recur_idxs, lit_blobs, univs); + let input = ConvertInput.Mk(ctx, ConvertKind.CKRecr(recr, rule_ctor_idxs, block_addr)); + store(ListNode.Cons(store(input), + build_convert_inputs_walk(rest, rest_addrs, all_addrs, pos_map, canon_addrs, block_addrs, block_starts, pos + 1, seen_mptrs))), }, }, },