From 42d816019cad275efa866e2a5735779a70918beb Mon Sep 17 00:00:00 2001
From: frisitano <francesco.risitano95@gmail.com>
Date: Fri, 15 May 2026 15:07:29 +0100
Subject: [PATCH 1/2] Add systematic hash OOD barycentric construction

---
 crates/lean-da/src/main.rs                    | 347 ++++++++++++++++--
 crates/lean-da/zkdsl_implem/lean_da_ood.py    | 256 +++++++++++++
 .../zkdsl_implem/lean_da_ood_column_major.py  | 322 ++++++++++++++++
 .../lean-da/zkdsl_implem/lean_da_ood_tiled.py | 295 +++++++++++++++
 .../lean-da/zkdsl_implem/ood_barycentric.py   | 119 ++++++
 crates/lean_compiler/snark_lib.py             |   6 +-
 .../lean_compiler/src/a_simplify_lang/mod.rs  |  49 ++-
 .../lean_compiler/src/instruction_encoder.rs  |   8 +-
 crates/lean_compiler/tests/test_compiler.rs   |  35 +-
 crates/lean_vm/src/isa/instruction.rs         |  19 +-
 crates/lean_vm/src/tables/execution/air.rs    |  16 +-
 crates/lean_vm/src/tables/execution/mod.rs    |   3 +-
 crates/lean_vm/src/tables/extension_op/air.rs |  30 +-
 .../lean_vm/src/tables/extension_op/exec.rs   |   6 +-
 crates/lean_vm/src/tables/extension_op/mod.rs |  22 +-
 crates/lean_vm/src/tables/mod.rs              |   4 +-
 crates/lean_vm/src/tables/poseidon_16/mod.rs  |  30 +-
 crates/lean_vm/src/tables/table_enum.rs       |   2 +-
 18 files changed, 1496 insertions(+), 73 deletions(-)
 create mode 100644 crates/lean-da/zkdsl_implem/lean_da_ood.py
 create mode 100644 crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py
 create mode 100644 crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py
 create mode 100644 crates/lean-da/zkdsl_implem/ood_barycentric.py
diff --git a/crates/lean-da/src/main.rs b/crates/lean-da/src/main.rs
index 3fade99ca..65bec9a77 100644
--- a/crates/lean-da/src/main.rs
+++ b/crates/lean-da/src/main.rs
@@ -3,8 +3,8 @@ mod cache;
 use std::collections::{BTreeMap, HashMap};
 use std::time::Instant;
 
-use backend::{Algebra, BasedVectorSpace, PrimeCharacteristicRing, Proof, TwoAdicField};
-use clap::Parser;
+use backend::{Algebra, BasedVectorSpace, Field, PrimeCharacteristicRing, Proof, TwoAdicField};
+use clap::{Parser, ValueEnum};
 use lean_compiler::{CompilationFlags, ProgramSource, compile_program_with_flags};
 use lean_prover::{
     default_whir_config,
@@ -26,10 +26,40 @@ pub const DEFAULT_N_BLOBS: usize = 8;
 struct Cli {
     #[arg(long, help = "Number of blobs to commit", default_value_t = DEFAULT_N_BLOBS)]
     n_blobs: usize,
+    #[arg(long, value_enum, default_value_t = Construction::Baseline, help = "leanDA construction to run")]
+    construction: Construction,
     #[arg(long, help = "Enable tracing")]
     tracing: bool,
 }
 
+#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)]
+pub enum Construction {
+    Baseline,
+    OodRow,
+    OodRowColumnMajor,
+    OodRowTiled,
+}
+
+impl Construction {
+    fn entry(self) -> &'static str {
+        match self {
+            Self::Baseline => "lean_da.py",
+            Self::OodRow => "lean_da_ood.py",
+            Self::OodRowColumnMajor => "lean_da_ood_column_major.py",
+            Self::OodRowTiled => "lean_da_ood_tiled.py",
+        }
+    }
+
+    fn label(self) -> &'static str {
+        match self {
+            Self::Baseline => "baseline",
+            Self::OodRow => "ood-row",
+            Self::OodRowColumnMajor => "ood-row-column-major",
+            Self::OodRowTiled => "ood-row-tiled",
+        }
+    }
+}
+
 fn main() {
     // cargo run --release -p lean-da -- --n-blobs 48
     let cli = Cli::parse();
@@ -37,16 +67,36 @@ fn main() {
         utils::init_tracing();
     }
 
-    let bytecode = compile_lean_da_bytecode(cli.n_blobs);
-    let public_input: Vec<F> = vec![];
-    let proof = prove_lean_da(&bytecode, &public_input, cli.n_blobs);
+    let bytecode = compile_lean_da_bytecode(cli.n_blobs, cli.construction);
+    let (witness, public_input) = build_instance(cli.n_blobs, cli.construction);
+    let proof = prove_lean_da(&bytecode, &public_input, &witness, cli.n_blobs, cli.construction);
     verify_lean_da(&bytecode, &public_input, proof.proof);
 }
 
-pub fn compile_lean_da_bytecode(n_blobs: usize) -> Bytecode {
+pub fn compile_lean_da_bytecode(n_blobs: usize, construction: Construction) -> Bytecode {
+    assert!(n_blobs > 0, "n_blobs must be nonzero");
+    if matches!(
+        construction,
+        Construction::OodRow | Construction::OodRowColumnMajor | Construction::OodRowTiled
+    ) {
+        assert!(
+            n_blobs.is_power_of_two(),
+            "ood-row constructions require n_blobs to be a power of two"
+        );
+    }
+
     let mut replacements = BTreeMap::new();
+    replacements.insert("LEAN_DA_ENTRY".to_string(), construction.entry().to_string());
     replacements.insert("LOG_M_PLACEHOLDER".to_string(), LOG_M.to_string());
     replacements.insert("N_BLOBS_PLACEHOLDER".to_string(), n_blobs.to_string());
+    replacements.insert(
+        "LOG_N_BLOBS_PLACEHOLDER".to_string(),
+        if n_blobs.is_power_of_two() {
+            log2_exact_power_of_two(n_blobs).to_string()
+        } else {
+            "0".to_string()
+        },
+    );
 
     if let Some((bytecode, _)) = cache::try_load(&EMBEDDED_ZK_DSL, &replacements) {
         println!("(Compilation cache hit)");
@@ -55,7 +105,7 @@ pub fn compile_lean_da_bytecode(n_blobs: usize) -> Bytecode {
 
     let time = Instant::now();
     let source = ProgramSource::Embedded {
-        entry: "lean_da.py".to_string(),
+        entry: construction.entry().to_string(),
         dir: &EMBEDDED_ZK_DSL,
     };
     let bytecode = compile_program_with_flags(
@@ -113,45 +163,250 @@ fn rs_encode<A: Algebra<F> + Copy>(message: &[A]) -> Vec<A> {
     codeword
 }
 
-fn build_witness(n_blobs: usize) -> ExecutionWitness {
+fn log2_exact_power_of_two(n: usize) -> usize {
+    assert!(n > 0 && n.is_power_of_two());
+    n.trailing_zeros() as usize
+}
+
+fn generate_codewords(n_blobs: usize) -> Vec<Vec<EF>> {
     let m = 1 << LOG_M;
-    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
     let mut rng = StdRng::seed_from_u64(0);
-    let mut codeword_blobs: Vec<Vec<F>> = Vec::with_capacity(n_blobs);
+    let mut codewords = Vec::with_capacity(n_blobs);
     for _ in 0..n_blobs {
         let message: Vec<EF> = (0..m).map(|_| rng.random()).collect();
-        let codeword_ef = rs_encode(&message);
-        let mut codeword_f: Vec<F> = Vec::with_capacity(2 * m * dim);
-        for j in 0..m {
-            codeword_f.extend_from_slice(<EF as BasedVectorSpace<F>>::as_basis_coefficients_slice(
-                &codeword_ef[2 * j],
-            ));
+        codewords.push(rs_encode(&message));
+    }
+    codewords
+}
+
+fn push_ext(out: &mut Vec<F>, value: &EF) {
+    out.extend_from_slice(<EF as BasedVectorSpace<F>>::as_basis_coefficients_slice(value));
+}
+
+fn serialize_codeword_for_witness(codeword: &[EF], construction: Construction) -> Vec<F> {
+    let m = 1 << LOG_M;
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    let mut codeword_f = Vec::with_capacity(2 * m * dim);
+    match construction {
+        Construction::Baseline => {
+            for j in 0..m {
+                push_ext(&mut codeword_f, &codeword[2 * j]);
+            }
+            for j in 0..m {
+                push_ext(&mut codeword_f, &codeword[2 * j + 1]);
+            }
         }
-        for j in 0..m {
-            codeword_f.extend_from_slice(<EF as BasedVectorSpace<F>>::as_basis_coefficients_slice(
-                &codeword_ef[2 * j + 1],
-            ));
+        Construction::OodRow | Construction::OodRowColumnMajor | Construction::OodRowTiled => {
+            for value in codeword {
+                push_ext(&mut codeword_f, value);
+            }
+        }
+    }
+    codeword_f
+}
+
+fn serialize_codewords_column_major(codewords: &[Vec<EF>]) -> Vec<F> {
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    let row_len = codewords[0].len();
+    let mut out = Vec::with_capacity(codewords.len() * row_len * dim);
+    for j in 0..row_len {
+        for codeword in codewords {
+            push_ext(&mut out, &codeword[j]);
+        }
+    }
+    out
+}
+
+fn serialize_codewords_tiled(codewords: &[Vec<EF>]) -> Vec<F> {
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    let tile_len_ext = 64;
+    let row_len = codewords[0].len();
+    assert_eq!(row_len % tile_len_ext, 0);
+
+    let mut out = Vec::with_capacity(codewords.len() * row_len * dim);
+    for tile_start in (0..row_len).step_by(tile_len_ext) {
+        for codeword in codewords {
+            for e in 0..tile_len_ext {
+                push_ext(&mut out, &codeword[tile_start + e]);
+            }
         }
-        codeword_blobs.push(codeword_f);
     }
+    out
+}
+
+fn serialize_ext_vec(values: &[EF]) -> Vec<F> {
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    let mut out = Vec::with_capacity(values.len() * dim);
+    for value in values {
+        push_ext(&mut out, value);
+    }
+    out
+}
 
+fn build_witness_from_codewords(codewords: &[Vec<EF>], construction: Construction) -> ExecutionWitness {
     let mut hints: HashMap<String, Vec<Vec<F>>> = HashMap::new();
-    hints.insert("codeword".to_string(), codeword_blobs);
+    match construction {
+        Construction::OodRowColumnMajor => {
+            hints.insert(
+                "codewords_column_major".to_string(),
+                vec![serialize_codewords_column_major(codewords)],
+            );
+        }
+        Construction::OodRowTiled => {
+            hints.insert(
+                "codewords_tiled".to_string(),
+                vec![serialize_codewords_tiled(codewords)],
+            );
+        }
+        Construction::Baseline | Construction::OodRow => {
+            let codeword_blobs = codewords
+                .iter()
+                .map(|codeword| serialize_codeword_for_witness(codeword, construction))
+                .collect();
+            hints.insert("codeword".to_string(), codeword_blobs);
+        }
+    }
     ExecutionWitness {
         preamble_memory_len: 0,
         hints,
     }
 }
 
-pub fn prove_lean_da(bytecode: &Bytecode, public_input: &[F], n_blobs: usize) -> ExecutionProof {
+fn build_instance(n_blobs: usize, construction: Construction) -> (ExecutionWitness, Vec<F>) {
+    let codewords = generate_codewords(n_blobs);
+    match construction {
+        Construction::Baseline => {
+            let witness = build_witness_from_codewords(&codewords, construction);
+            (witness, vec![])
+        }
+        Construction::OodRow | Construction::OodRowColumnMajor | Construction::OodRowTiled => {
+            let public_input = build_ood_public_input(&codewords);
+            let witness = build_witness_from_codewords(&codewords, construction);
+            (witness, public_input)
+        }
+    }
+}
+
+fn build_ood_public_input(codewords: &[Vec<EF>]) -> Vec<F> {
+    let n_blobs = codewords.len();
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    let m = 1 << LOG_M;
+    let mut public_input = Vec::with_capacity(n_blobs * 8 + 8 + dim + n_blobs * dim + 8);
+
+    let mut row_digests = Vec::with_capacity(n_blobs);
+    for codeword in codewords {
+        let mut systematic = Vec::with_capacity(m * dim);
+        for value in codeword.iter().take(m) {
+            push_ext(&mut systematic, value);
+        }
+        let digest = utils::poseidon_compress_slice(&systematic, false);
+        public_input.extend_from_slice(&digest);
+        row_digests.push(digest);
+    }
+
+    let chain_digest = chain_digest_array(&row_digests);
+    public_input.extend_from_slice(&chain_digest);
+
+    let z_digest = derive_z_digest(&chain_digest);
+    public_input.extend_from_slice(&z_digest[..dim]);
+    let z = EF::from_basis_coefficients_slice(&z_digest[..dim]).unwrap();
+
+    let row_coeffs = build_scaled_ood_coeffs(n_blobs, z);
+    public_input.extend(serialize_ext_vec(&row_coeffs));
+
+    let ood_row = build_scaled_ood_row(codewords, &row_coeffs);
+    let ood_root = merkle_root_from_exts(&ood_row);
+    public_input.extend_from_slice(&ood_root);
+
+    public_input
+}
+
+fn chain_digest_array(digests: &[[F; 8]]) -> [F; 8] {
+    let mut state = [F::ZERO; 8];
+    for digest in digests {
+        state = utils::poseidon16_compress_pair(&state, digest);
+    }
+    state
+}
+
+fn tag_digest(tag: u64) -> [F; 8] {
+    let mut digest = [F::ZERO; 8];
+    digest[0] = F::from_u64(tag);
+    digest
+}
+
+fn derive_z_digest(chain_digest: &[F; 8]) -> [F; 8] {
+    utils::poseidon16_compress_pair(chain_digest, &tag_digest(1))
+}
+
+fn build_scaled_ood_coeffs(n_blobs: usize, z: EF) -> Vec<EF> {
+    let log_n_blobs = log2_exact_power_of_two(n_blobs);
+    let row_root_inv = EF::from(F::two_adic_generator(log_n_blobs).inverse());
+    let numerator = z.exp_u64(n_blobs as u64) - EF::ONE;
+
+    let mut row_coeffs = Vec::with_capacity(n_blobs);
+    let mut point = EF::ONE;
+    for _ in 0..n_blobs {
+        let denominator = z * point - EF::ONE;
+        row_coeffs.push(numerator / denominator);
+        point *= row_root_inv;
+    }
+    row_coeffs
+}
+
+fn build_scaled_ood_row(codewords: &[Vec<EF>], row_coeffs: &[EF]) -> Vec<EF> {
+    assert_eq!(codewords.len(), row_coeffs.len());
+    let row_len = codewords[0].len();
+    let mut ood_row = vec![EF::ZERO; row_len];
+    for j in 0..row_len {
+        let mut acc = EF::ZERO;
+        for (codeword, coeff) in codewords.iter().zip(row_coeffs) {
+            acc += codeword[j] * *coeff;
+        }
+        ood_row[j] = acc;
+    }
+    ood_row
+}
+
+fn merkle_root_from_exts(row: &[EF]) -> [F; 8] {
+    let data = serialize_ext_vec(row);
+    merkle_root_from_base(&data)
+}
+
+fn merkle_root_from_base(data: &[F]) -> [F; 8] {
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    let leaf_len_ext = 1 << 4;
+    let leaf_len = leaf_len_ext * dim;
+    assert_eq!(data.len() % leaf_len, 0);
+    let mut layer: Vec<[F; 8]> = data
+        .chunks_exact(leaf_len)
+        .map(|leaf| utils::poseidon_compress_slice(leaf, false))
+        .collect();
+    assert!(layer.len().is_power_of_two());
+
+    while layer.len() > 1 {
+        layer = layer
+            .chunks_exact(2)
+            .map(|pair| utils::poseidon16_compress_pair(&pair[0], &pair[1]))
+            .collect();
+    }
+    layer[0]
+}
+
+pub fn prove_lean_da(
+    bytecode: &Bytecode,
+    public_input: &[F],
+    witness: &ExecutionWitness,
+    n_blobs: usize,
+    construction: Construction,
+) -> ExecutionProof {
     const F_BITS: usize = 31;
 
-    let witness = build_witness(n_blobs);
     let t0 = Instant::now();
     let proof = prove_execution(
         bytecode,
         public_input,
-        &witness,
+        witness,
         &default_whir_config(STARTING_LOG_INV_RATE),
         false,
     )
@@ -165,6 +420,7 @@ pub fn prove_lean_da(bytecode: &Bytecode, public_input: &[F], n_blobs: usize) ->
     let blob_size_fe = (1 << LOG_M) * <EF as BasedVectorSpace<F>>::DIMENSION;
     let total_data_kib = (n_blobs * blob_size_fe * F_BITS) as f64 / (8.0 * 1024.0);
     let throughput_kib_per_s = total_data_kib / proving_time.as_secs_f64();
+    println!("Construction:     {}", construction.label());
     println!("Bytecode size:    {}", meta.bytecode_size);
     println!("Cycles:           {}", meta.cycles);
     println!("Poseidon16 calls: {}", meta.n_poseidons);
@@ -193,9 +449,44 @@ mod tests {
     #[test]
     fn test_compile_prove_verify() {
         // cargo test --release --package lean-da -- tests::test_compile_prove_verify --nocapture
-        let bytecode = compile_lean_da_bytecode(DEFAULT_N_BLOBS);
-        let public_input: Vec<F> = vec![];
-        let proof = prove_lean_da(&bytecode, &public_input, DEFAULT_N_BLOBS);
+        let bytecode = compile_lean_da_bytecode(DEFAULT_N_BLOBS, Construction::Baseline);
+        let (witness, public_input) = build_instance(DEFAULT_N_BLOBS, Construction::Baseline);
+        let proof = prove_lean_da(
+            &bytecode,
+            &public_input,
+            &witness,
+            DEFAULT_N_BLOBS,
+            Construction::Baseline,
+        );
+        verify_lean_da(&bytecode, &public_input, proof.proof);
+    }
+
+    #[test]
+    fn test_ood_public_input_shape() {
+        let (_witness, public_input) = build_instance(DEFAULT_N_BLOBS, Construction::OodRow);
+        let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+        let expected_len = DEFAULT_N_BLOBS * 8 + 8 + dim + DEFAULT_N_BLOBS * dim + 8;
+        assert_eq!(public_input.len(), expected_len);
+
+        let (_witness, public_input) = build_instance(DEFAULT_N_BLOBS, Construction::OodRowColumnMajor);
+        assert_eq!(public_input.len(), expected_len);
+
+        let (_witness, public_input) = build_instance(DEFAULT_N_BLOBS, Construction::OodRowTiled);
+        assert_eq!(public_input.len(), expected_len);
+    }
+
+    #[test]
+    #[ignore = "runs the full OOD-row proof benchmark path"]
+    fn test_compile_prove_verify_ood_row() {
+        let bytecode = compile_lean_da_bytecode(DEFAULT_N_BLOBS, Construction::OodRow);
+        let (witness, public_input) = build_instance(DEFAULT_N_BLOBS, Construction::OodRow);
+        let proof = prove_lean_da(
+            &bytecode,
+            &public_input,
+            &witness,
+            DEFAULT_N_BLOBS,
+            Construction::OodRow,
+        );
         verify_lean_da(&bytecode, &public_input, proof.proof);
     }
 
diff --git a/crates/lean-da/zkdsl_implem/lean_da_ood.py b/crates/lean-da/zkdsl_implem/lean_da_ood.py
new file mode 100644
index 000000000..274bdf140
--- /dev/null
+++ b/crates/lean-da/zkdsl_implem/lean_da_ood.py
@@ -0,0 +1,256 @@
+from snark_lib import *
+from ood_barycentric import *
+
+# Natural-order OOD-row construction for leanDA.
+#
+# Each blob witness is a length-2M Reed-Solomon codeword in natural evaluation
+# order: C_i[j] = P_i(w^j).  The public input commits to each systematic prefix
+# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  The row challenge z is
+# derived from D, and the OOD row is the column-wise row-domain evaluation
+#   OOD[j] = sum_i ((z^n - 1) / (z*h^{-i} - 1)) * C_i[j].
+# The 1/n Lagrange factor is intentionally omitted: it scales the whole OOD row
+# by a nonzero public constant and does not affect the parity identity.
+#
+# The OOD row is computed in-circuit with one strided extension-field dot
+# product for each row-codeword position.
+
+TWO_ADDICITY = 24
+ROOT_24 = 1791270792  # root of unity of order 2^24
+
+LOG_M = LOG_M_PLACEHOLDER
+M = 2 ** LOG_M
+DIM = 5
+
+N_BLOBS = N_BLOBS_PLACEHOLDER
+LOG_N_BLOBS = LOG_N_BLOBS_PLACEHOLDER
+
+W = ROOT_24**(2**(TWO_ADDICITY - LOG_M - 1))  # primitive 2M-th root of unity
+U = W * W
+U_INV = U ** (M - 1)
+W_INV = W ** (2 * M - 1)
+ROW_W = ROOT_24**(2**(TWO_ADDICITY - LOG_N_BLOBS))  # primitive N_BLOBS-th root
+ROW_W_INV = ROW_W ** (N_BLOBS - 1)
+
+DIGEST_LEN = 8
+LOG_LEAF_LEN_EXT = 4
+LEAF_LEN_EXT = 2 ** LOG_LEAF_LEN_EXT
+LEAF_LEN = LEAF_LEN_EXT * DIM
+LEAF_NUM_CHUNKS = LEAF_LEN / DIGEST_LEN
+LOG_NUM_LEAVES = LOG_M + 1 - LOG_LEAF_LEN_EXT
+NUM_LEAVES = 2 ** LOG_NUM_LEAVES
+SYSTEMATIC_NUM_CHUNKS = M * DIM / DIGEST_LEN
+
+PUB_DIGESTS = 0
+PUB_D = PUB_DIGESTS + N_BLOBS * DIGEST_LEN
+PUB_Z = PUB_D + DIGEST_LEN
+PUB_ROW_COEFFS = PUB_Z + DIM
+PUB_OOD_ROOT = PUB_ROW_COEFFS + N_BLOBS * DIM
+
+
+def main():
+    debug_assert(LEAF_LEN % DIGEST_LEN == 0)
+    debug_assert((M * DIM) % DIGEST_LEN == 0)
+
+    all_codewords = Array(N_BLOBS * 2 * M * DIM)
+    row_digests = Array(N_BLOBS * DIGEST_LEN)
+    for i in unroll(0, N_BLOBS):
+        row = all_codewords + i * 2 * M * DIM
+        hint_witness("codeword", row)
+    for i in unroll(0, N_BLOBS):
+        hash_systematic_part(all_codewords + i * 2 * M * DIM, row_digests + i * DIGEST_LEN)
+        assert_eq_digest(row_digests + i * DIGEST_LEN, PUB_DIGESTS + i * DIGEST_LEN)
+
+    D = Array(DIGEST_LEN)
+    chain_digests(row_digests, N_BLOBS, D)
+    assert_eq_digest(D, PUB_D)
+
+    z_digest = Array(DIGEST_LEN)
+    derive_z(D, z_digest)
+    assert_eq_ext(z_digest, PUB_Z)
+    verify_row_coeffs(PUB_Z, PUB_ROW_COEFFS)
+
+    ood_row = compute_ood_row(all_codewords, PUB_ROW_COEFFS)
+    ood_root = merkle_root_from_data(ood_row)
+    assert_eq_digest(ood_root, PUB_OOD_ROOT)
+
+    r_digest = Array(DIGEST_LEN)
+    derive_ood_barycentric_challenge(D, PUB_Z, ood_root, r_digest)
+    barycentric_check_natural_order(ood_row, r_digest, M, LOG_M, U_INV, W_INV)
+
+    return
+
+
+def assert_eq_digest(a, b):
+    for i in unroll(0, DIGEST_LEN):
+        assert a[i] == b[i]
+    return
+
+
+@inline
+def assert_eq_ext(a, b):
+    for i in unroll(0, DIM):
+        assert a[i] == b[i]
+    return
+
+
+@inline
+def copy_ext(src, dest):
+    for i in unroll(0, DIM):
+        dest[i] = src[i]
+    return
+
+
+@inline
+def copy_digest(src, dest):
+    for i in unroll(0, DIGEST_LEN):
+        dest[i] = src[i]
+    return
+
+
+@inline
+def zero_digest():
+    zero = Array(DIGEST_LEN)
+    for i in unroll(0, DIGEST_LEN):
+        zero[i] = 0
+    return zero
+
+
+def hash_systematic_part(data, dest):
+    states = Array((SYSTEMATIC_NUM_CHUNKS - 2) * DIGEST_LEN)
+    poseidon16_compress(data, data + DIGEST_LEN, states)
+    for j in unroll(1, SYSTEMATIC_NUM_CHUNKS - 2):
+        poseidon16_compress(
+            states + (j - 1) * DIGEST_LEN,
+            data + (j + 1) * DIGEST_LEN,
+            states + j * DIGEST_LEN,
+        )
+    poseidon16_compress(
+        states + (SYSTEMATIC_NUM_CHUNKS - 3) * DIGEST_LEN,
+        data + (SYSTEMATIC_NUM_CHUNKS - 1) * DIGEST_LEN,
+        dest,
+    )
+    return
+
+
+def chain_digests(digests, n_digests: Const, dest):
+    state: Mut = zero_digest()
+    for i in unroll(0, n_digests):
+        new_state = Array(DIGEST_LEN)
+        poseidon16_compress(state, digests + i * DIGEST_LEN, new_state)
+        state = new_state
+    copy_digest(state, dest)
+    return
+
+
+def domain_tag(tag: Const):
+    tag_ptr = Array(DIGEST_LEN)
+    tag_ptr[0] = tag
+    for i in unroll(1, DIGEST_LEN):
+        tag_ptr[i] = 0
+    return tag_ptr
+
+
+@inline
+def derive_z(D, dest):
+    tag_z = domain_tag(1)
+    poseidon16_compress(D, tag_z, dest)
+    return
+
+
+def derive_ood_barycentric_challenge(D, z, ood_root, dest):
+    tag_ood = domain_tag(2)
+    state = Array(DIGEST_LEN)
+    poseidon16_compress(D, tag_ood, state)
+
+    z_digest = Array(DIGEST_LEN)
+    for d in unroll(0, DIM):
+        z_digest[d] = z[d]
+    for d in unroll(DIM, DIGEST_LEN):
+        z_digest[d] = 0
+
+    state_2 = Array(DIGEST_LEN)
+    poseidon16_compress(state, z_digest, state_2)
+    poseidon16_compress(state_2, ood_root, dest)
+    return
+
+
+def compute_ood_row(codewords_base, row_coeffs):
+    ood_row = Array(2 * M * DIM)
+    for j in unroll(0, 2 * M):
+        dot_product_ee_strided_a(
+            codewords_base + j * DIM,
+            row_coeffs,
+            ood_row + j * DIM,
+            N_BLOBS,
+            2 * M * DIM,
+        )
+    return ood_row
+
+
+def verify_row_coeffs(z, row_coeffs):
+    z_pow_log = Array((LOG_N_BLOBS + 1) * DIM)
+    copy_ext(z, z_pow_log)
+    for k in unroll(1, LOG_N_BLOBS + 1):
+        dot_product_ee(z_pow_log + (k - 1) * DIM,
+                       z_pow_log + (k - 1) * DIM,
+                       z_pow_log + k * DIM)
+    zn = z_pow_log + LOG_N_BLOBS * DIM
+
+    numerator = Array(DIM)
+    numerator[0] = zn[0] - 1
+    for d in unroll(1, DIM):
+        numerator[d] = zn[d]
+
+    row_w_inv_ext = Array(DIM)
+    row_w_inv_ext[0] = ROW_W_INV
+    for d in unroll(1, DIM):
+        row_w_inv_ext[d] = 0
+    points = Array(N_BLOBS * DIM)
+    copy_ext(z, points)
+    for i in unroll(1, N_BLOBS):
+        dot_product_ee(row_w_inv_ext, points + (i - 1) * DIM, points + i * DIM)
+
+    dens = Array(N_BLOBS * DIM)
+    for i in unroll(0, N_BLOBS):
+        dens[i * DIM] = points[i * DIM] - 1
+        for d in unroll(1, DIM):
+            dens[i * DIM + d] = points[i * DIM + d]
+        dot_product_ee(dens + i * DIM, row_coeffs + i * DIM, numerator)
+    return
+
+
+def hash_leaf(leaf, dest):
+    states = Array((LEAF_NUM_CHUNKS - 2) * DIGEST_LEN)
+    poseidon16_compress(leaf, leaf + DIGEST_LEN, states)
+    for j in unroll(1, LEAF_NUM_CHUNKS - 2):
+        poseidon16_compress(
+            states + (j - 1) * DIGEST_LEN,
+            leaf + (j + 1) * DIGEST_LEN,
+            states + j * DIGEST_LEN,
+        )
+    poseidon16_compress(
+        states + (LEAF_NUM_CHUNKS - 3) * DIGEST_LEN,
+        leaf + (LEAF_NUM_CHUNKS - 1) * DIGEST_LEN,
+        dest,
+    )
+    return
+
+
+def merkle_root_from_data(data):
+    leaves = Array(NUM_LEAVES * DIGEST_LEN)
+    for i in unroll(0, NUM_LEAVES):
+        hash_leaf(data + i * LEAF_LEN, leaves + i * DIGEST_LEN)
+
+    layer: Mut = leaves
+    for k in unroll(1, LOG_NUM_LEAVES + 1):
+        layer_size = 2 ** (LOG_NUM_LEAVES - k)
+        new_layer = Array(layer_size * DIGEST_LEN)
+        for i in unroll(0, layer_size):
+            poseidon16_compress(
+                layer + (2 * i) * DIGEST_LEN,
+                layer + (2 * i + 1) * DIGEST_LEN,
+                new_layer + i * DIGEST_LEN,
+            )
+        layer = new_layer
+
+    return layer
diff --git a/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py b/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py
new file mode 100644
index 000000000..a0a39bc7e
--- /dev/null
+++ b/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py
@@ -0,0 +1,322 @@
+from snark_lib import *
+from ood_barycentric import *
+
+# Natural-order OOD-row construction for leanDA, with column-major witness
+# memory.
+#
+# Each blob witness is a length-2M Reed-Solomon codeword in natural evaluation
+# order: C_i[j] = P_i(w^j).  The public input commits to each systematic prefix
+# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  The row challenge z is
+# derived from D, and the OOD row is the column-wise row-domain evaluation
+#   OOD[j] = sum_i ((z^n - 1) / (z*h^{-i} - 1)) * C_i[j].
+# The 1/n Lagrange factor is intentionally omitted: it scales the whole OOD row
+# by a nonzero public constant and does not affect the parity identity.
+#
+# The canonical witness layout is:
+#   all_codewords[j][i] = C_i[j]
+# where `j` is the row-codeword evaluation index and `i` is the blob index.
+# This makes OOD-row composition a contiguous extension-field dot product.  The
+# per-blob systematic hash is recovered by gathering a strided row stream into
+# contiguous scratch before feeding the existing Poseidon chain hash.
+
+TWO_ADDICITY = 24
+ROOT_24 = 1791270792  # root of unity of order 2^24
+
+LOG_M = LOG_M_PLACEHOLDER
+M = 2 ** LOG_M
+DIM = 5
+
+N_BLOBS = N_BLOBS_PLACEHOLDER
+LOG_N_BLOBS = LOG_N_BLOBS_PLACEHOLDER
+
+W = ROOT_24**(2**(TWO_ADDICITY - LOG_M - 1))  # primitive 2M-th root of unity
+U = W * W
+U_INV = U ** (M - 1)
+W_INV = W ** (2 * M - 1)
+ROW_W = ROOT_24**(2**(TWO_ADDICITY - LOG_N_BLOBS))  # primitive N_BLOBS-th root
+ROW_W_INV = ROW_W ** (N_BLOBS - 1)
+
+DIGEST_LEN = 8
+LOG_LEAF_LEN_EXT = 4
+LEAF_LEN_EXT = 2 ** LOG_LEAF_LEN_EXT
+LEAF_LEN = LEAF_LEN_EXT * DIM
+LEAF_NUM_CHUNKS = LEAF_LEN / DIGEST_LEN
+LOG_NUM_LEAVES = LOG_M + 1 - LOG_LEAF_LEN_EXT
+NUM_LEAVES = 2 ** LOG_NUM_LEAVES
+SYSTEMATIC_NUM_CHUNKS = M * DIM / DIGEST_LEN
+SYSTEMATIC_HASH_GROUPS = SYSTEMATIC_NUM_CHUNKS / 5
+
+PUB_DIGESTS = 0
+PUB_D = PUB_DIGESTS + N_BLOBS * DIGEST_LEN
+PUB_Z = PUB_D + DIGEST_LEN
+PUB_ROW_COEFFS = PUB_Z + DIM
+PUB_OOD_ROOT = PUB_ROW_COEFFS + N_BLOBS * DIM
+
+
+def main():
+    debug_assert(LEAF_LEN % DIGEST_LEN == 0)
+    debug_assert((M * DIM) % DIGEST_LEN == 0)
+
+    all_codewords = Array(N_BLOBS * 2 * M * DIM)
+    row_digests = Array(N_BLOBS * DIGEST_LEN)
+    hint_witness("codewords_column_major", all_codewords)
+    for i in unroll(0, N_BLOBS):
+        hash_systematic_part_column_major(all_codewords, i, row_digests + i * DIGEST_LEN)
+        assert_eq_digest(row_digests + i * DIGEST_LEN, PUB_DIGESTS + i * DIGEST_LEN)
+
+    D = Array(DIGEST_LEN)
+    chain_digests(row_digests, N_BLOBS, D)
+    assert_eq_digest(D, PUB_D)
+
+    z_digest = Array(DIGEST_LEN)
+    derive_z(D, z_digest)
+    assert_eq_ext(z_digest, PUB_Z)
+    verify_row_coeffs(PUB_Z, PUB_ROW_COEFFS)
+
+    ood_row = compute_ood_row_column_major(all_codewords, PUB_ROW_COEFFS)
+    ood_root = merkle_root_from_data(ood_row)
+    assert_eq_digest(ood_root, PUB_OOD_ROOT)
+
+    r_digest = Array(DIGEST_LEN)
+    derive_ood_barycentric_challenge(D, PUB_Z, ood_root, r_digest)
+    barycentric_check_natural_order(ood_row, r_digest, M, LOG_M, U_INV, W_INV)
+
+    return
+
+
+
+def gather_systematic_part_column_major(data, row_idx, dest):
+    # Five 8-FE hash chunks contain exactly eight 5-limb extension elements.
+    # This avoids floor division/modulo in the zkDSL while preserving row-major
+    # systematic hash order: C_i[0], C_i[1], ..., C_i[M-1].
+    for g in unroll(0, SYSTEMATIC_HASH_GROUPS):
+        eval_base = g * 8
+        out = g * 5 * DIGEST_LEN
+
+        dest[out + 0] = data[(eval_base + 0) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 1] = data[(eval_base + 0) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 2] = data[(eval_base + 0) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 3] = data[(eval_base + 0) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 4] = data[(eval_base + 0) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 5] = data[(eval_base + 1) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 6] = data[(eval_base + 1) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 7] = data[(eval_base + 1) * N_BLOBS * DIM + row_idx * DIM + (2)]
+
+        dest[out + 8] = data[(eval_base + 1) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 9] = data[(eval_base + 1) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 10] = data[(eval_base + 2) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 11] = data[(eval_base + 2) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 12] = data[(eval_base + 2) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 13] = data[(eval_base + 2) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 14] = data[(eval_base + 2) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 15] = data[(eval_base + 3) * N_BLOBS * DIM + row_idx * DIM + (0)]
+
+        dest[out + 16] = data[(eval_base + 3) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 17] = data[(eval_base + 3) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 18] = data[(eval_base + 3) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 19] = data[(eval_base + 3) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 20] = data[(eval_base + 4) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 21] = data[(eval_base + 4) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 22] = data[(eval_base + 4) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 23] = data[(eval_base + 4) * N_BLOBS * DIM + row_idx * DIM + (3)]
+
+        dest[out + 24] = data[(eval_base + 4) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 25] = data[(eval_base + 5) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 26] = data[(eval_base + 5) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 27] = data[(eval_base + 5) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 28] = data[(eval_base + 5) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 29] = data[(eval_base + 5) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 30] = data[(eval_base + 6) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 31] = data[(eval_base + 6) * N_BLOBS * DIM + row_idx * DIM + (1)]
+
+        dest[out + 32] = data[(eval_base + 6) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 33] = data[(eval_base + 6) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 34] = data[(eval_base + 6) * N_BLOBS * DIM + row_idx * DIM + (4)]
+        dest[out + 35] = data[(eval_base + 7) * N_BLOBS * DIM + row_idx * DIM + (0)]
+        dest[out + 36] = data[(eval_base + 7) * N_BLOBS * DIM + row_idx * DIM + (1)]
+        dest[out + 37] = data[(eval_base + 7) * N_BLOBS * DIM + row_idx * DIM + (2)]
+        dest[out + 38] = data[(eval_base + 7) * N_BLOBS * DIM + row_idx * DIM + (3)]
+        dest[out + 39] = data[(eval_base + 7) * N_BLOBS * DIM + row_idx * DIM + (4)]
+    return
+
+
+def hash_systematic_part_column_major(data, row_idx, dest):
+    systematic = Array(M * DIM)
+    gather_systematic_part_column_major(data, row_idx, systematic)
+    hash_systematic_part(systematic, dest)
+    return
+
+
+def assert_eq_digest(a, b):
+    for i in unroll(0, DIGEST_LEN):
+        assert a[i] == b[i]
+    return
+
+
+@inline
+def assert_eq_ext(a, b):
+    for i in unroll(0, DIM):
+        assert a[i] == b[i]
+    return
+
+
+@inline
+def copy_ext(src, dest):
+    for i in unroll(0, DIM):
+        dest[i] = src[i]
+    return
+
+
+@inline
+def copy_digest(src, dest):
+    for i in unroll(0, DIGEST_LEN):
+        dest[i] = src[i]
+    return
+
+
+@inline
+def zero_digest():
+    zero = Array(DIGEST_LEN)
+    for i in unroll(0, DIGEST_LEN):
+        zero[i] = 0
+    return zero
+
+
+def hash_systematic_part(data, dest):
+    states = Array((SYSTEMATIC_NUM_CHUNKS - 2) * DIGEST_LEN)
+    poseidon16_compress(data, data + DIGEST_LEN, states)
+    for j in unroll(1, SYSTEMATIC_NUM_CHUNKS - 2):
+        poseidon16_compress(
+            states + (j - 1) * DIGEST_LEN,
+            data + (j + 1) * DIGEST_LEN,
+            states + j * DIGEST_LEN,
+        )
+    poseidon16_compress(
+        states + (SYSTEMATIC_NUM_CHUNKS - 3) * DIGEST_LEN,
+        data + (SYSTEMATIC_NUM_CHUNKS - 1) * DIGEST_LEN,
+        dest,
+    )
+    return
+
+
+def chain_digests(digests, n_digests: Const, dest):
+    state: Mut = zero_digest()
+    for i in unroll(0, n_digests):
+        new_state = Array(DIGEST_LEN)
+        poseidon16_compress(state, digests + i * DIGEST_LEN, new_state)
+        state = new_state
+    copy_digest(state, dest)
+    return
+
+
+def domain_tag(tag: Const):
+    tag_ptr = Array(DIGEST_LEN)
+    tag_ptr[0] = tag
+    for i in unroll(1, DIGEST_LEN):
+        tag_ptr[i] = 0
+    return tag_ptr
+
+
+@inline
+def derive_z(D, dest):
+    tag_z = domain_tag(1)
+    poseidon16_compress(D, tag_z, dest)
+    return
+
+
+def derive_ood_barycentric_challenge(D, z, ood_root, dest):
+    tag_ood = domain_tag(2)
+    state = Array(DIGEST_LEN)
+    poseidon16_compress(D, tag_ood, state)
+
+    z_digest = Array(DIGEST_LEN)
+    for d in unroll(0, DIM):
+        z_digest[d] = z[d]
+    for d in unroll(DIM, DIGEST_LEN):
+        z_digest[d] = 0
+
+    state_2 = Array(DIGEST_LEN)
+    poseidon16_compress(state, z_digest, state_2)
+    poseidon16_compress(state_2, ood_root, dest)
+    return
+
+
+def compute_ood_row_column_major(codewords_base, row_coeffs):
+    ood_row = Array(2 * M * DIM)
+    for j in unroll(0, 2 * M):
+        dot_product_ee(
+            codewords_base + j * N_BLOBS * DIM,
+            row_coeffs,
+            ood_row + j * DIM,
+            N_BLOBS,
+        )
+    return ood_row
+
+
+def verify_row_coeffs(z, row_coeffs):
+    z_pow_log = Array((LOG_N_BLOBS + 1) * DIM)
+    copy_ext(z, z_pow_log)
+    for k in unroll(1, LOG_N_BLOBS + 1):
+        dot_product_ee(z_pow_log + (k - 1) * DIM,
+                       z_pow_log + (k - 1) * DIM,
+                       z_pow_log + k * DIM)
+    zn = z_pow_log + LOG_N_BLOBS * DIM
+
+    numerator = Array(DIM)
+    numerator[0] = zn[0] - 1
+    for d in unroll(1, DIM):
+        numerator[d] = zn[d]
+
+    row_w_inv_ext = Array(DIM)
+    row_w_inv_ext[0] = ROW_W_INV
+    for d in unroll(1, DIM):
+        row_w_inv_ext[d] = 0
+    points = Array(N_BLOBS * DIM)
+    copy_ext(z, points)
+    for i in unroll(1, N_BLOBS):
+        dot_product_ee(row_w_inv_ext, points + (i - 1) * DIM, points + i * DIM)
+
+    dens = Array(N_BLOBS * DIM)
+    for i in unroll(0, N_BLOBS):
+        dens[i * DIM] = points[i * DIM] - 1
+        for d in unroll(1, DIM):
+            dens[i * DIM + d] = points[i * DIM + d]
+        dot_product_ee(dens + i * DIM, row_coeffs + i * DIM, numerator)
+    return
+
+
+def hash_leaf(leaf, dest):
+    states = Array((LEAF_NUM_CHUNKS - 2) * DIGEST_LEN)
+    poseidon16_compress(leaf, leaf + DIGEST_LEN, states)
+    for j in unroll(1, LEAF_NUM_CHUNKS - 2):
+        poseidon16_compress(
+            states + (j - 1) * DIGEST_LEN,
+            leaf + (j + 1) * DIGEST_LEN,
+            states + j * DIGEST_LEN,
+        )
+    poseidon16_compress(
+        states + (LEAF_NUM_CHUNKS - 3) * DIGEST_LEN,
+        leaf + (LEAF_NUM_CHUNKS - 1) * DIGEST_LEN,
+        dest,
+    )
+    return
+
+
+def merkle_root_from_data(data):
+    leaves = Array(NUM_LEAVES * DIGEST_LEN)
+    for i in unroll(0, NUM_LEAVES):
+        hash_leaf(data + i * LEAF_LEN, leaves + i * DIGEST_LEN)
+
+    layer: Mut = leaves
+    for k in unroll(1, LOG_NUM_LEAVES + 1):
+        layer_size = 2 ** (LOG_NUM_LEAVES - k)
+        new_layer = Array(layer_size * DIGEST_LEN)
+        for i in unroll(0, layer_size):
+            poseidon16_compress(
+                layer + (2 * i) * DIGEST_LEN,
+                layer + (2 * i + 1) * DIGEST_LEN,
+                new_layer + i * DIGEST_LEN,
+            )
+        layer = new_layer
+
+    return layer
diff --git a/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py b/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py
new file mode 100644
index 000000000..6e7e7f137
--- /dev/null
+++ b/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py
@@ -0,0 +1,295 @@
+from snark_lib import *
+from ood_barycentric import *
+
+# Natural-order OOD-row construction for leanDA, with a roughly 1 KiB cell
+# witness layout.
+#
+# Each blob witness is a length-2M Reed-Solomon codeword in natural evaluation
+# order: C_i[j] = P_i(w^j).  The public input commits to each systematic prefix
+# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  The row challenge z is
+# derived from D, and the OOD row is the column-wise row-domain evaluation
+#   OOD[j] = sum_i ((z^n - 1) / (z*h^{-i} - 1)) * C_i[j].
+# The 1/n Lagrange factor is intentionally omitted: it scales the whole OOD row
+# by a nonzero public constant and does not affect the parity identity.
+#
+# The canonical witness is one flat `all_codewords` vector.  Pointer arithmetic
+# interprets it as tile-major:
+#   addr(C_i[j]) =
+#     tile * N_BLOBS * TILE_LEN
+#     + i * TILE_LEN
+#     + local_eval * DIM
+# where tile = floor(j / TILE_LEN_EXT) and local_eval = j % TILE_LEN_EXT.
+# This keeps each blob's systematic hash chunks contiguous inside a roughly
+# 1 KiB cell, while OOD composition reads across blobs with one fixed stride.
+
+TWO_ADDICITY = 24
+ROOT_24 = 1791270792  # root of unity of order 2^24
+
+LOG_M = LOG_M_PLACEHOLDER
+M = 2 ** LOG_M
+DIM = 5
+
+N_BLOBS = N_BLOBS_PLACEHOLDER
+LOG_N_BLOBS = LOG_N_BLOBS_PLACEHOLDER
+
+W = ROOT_24**(2**(TWO_ADDICITY - LOG_M - 1))  # primitive 2M-th root of unity
+U = W * W
+U_INV = U ** (M - 1)
+W_INV = W ** (2 * M - 1)
+ROW_W = ROOT_24**(2**(TWO_ADDICITY - LOG_N_BLOBS))  # primitive N_BLOBS-th root
+ROW_W_INV = ROW_W ** (N_BLOBS - 1)
+
+DIGEST_LEN = 8
+LOG_LEAF_LEN_EXT = 4
+LEAF_LEN_EXT = 2 ** LOG_LEAF_LEN_EXT
+LEAF_LEN = LEAF_LEN_EXT * DIM
+LEAF_NUM_CHUNKS = LEAF_LEN / DIGEST_LEN
+LOG_NUM_LEAVES = LOG_M + 1 - LOG_LEAF_LEN_EXT
+NUM_LEAVES = 2 ** LOG_NUM_LEAVES
+SYSTEMATIC_NUM_CHUNKS = M * DIM / DIGEST_LEN
+LOG_TILE_LEN_EXT = 6
+TILE_LEN_EXT = 2 ** LOG_TILE_LEN_EXT
+TILE_LEN = TILE_LEN_EXT * DIM
+SYSTEMATIC_NUM_TILES = M / TILE_LEN_EXT
+NUM_TILES = 2 * M / TILE_LEN_EXT
+TILE_NUM_HASH_CHUNKS = TILE_LEN / DIGEST_LEN
+
+PUB_DIGESTS = 0
+PUB_D = PUB_DIGESTS + N_BLOBS * DIGEST_LEN
+PUB_Z = PUB_D + DIGEST_LEN
+PUB_ROW_COEFFS = PUB_Z + DIM
+PUB_OOD_ROOT = PUB_ROW_COEFFS + N_BLOBS * DIM
+
+
+def main():
+    debug_assert(LEAF_LEN % DIGEST_LEN == 0)
+    debug_assert((M * DIM) % DIGEST_LEN == 0)
+    debug_assert(TILE_LEN % DIGEST_LEN == 0)
+
+    all_codewords = Array(N_BLOBS * 2 * M * DIM)
+    row_digests = Array(N_BLOBS * DIGEST_LEN)
+    hint_witness("codewords_tiled", all_codewords)
+    for i in unroll(0, N_BLOBS):
+        hash_systematic_part_tiled(all_codewords, i, row_digests + i * DIGEST_LEN)
+        assert_eq_digest(row_digests + i * DIGEST_LEN, PUB_DIGESTS + i * DIGEST_LEN)
+
+    D = Array(DIGEST_LEN)
+    chain_digests(row_digests, N_BLOBS, D)
+    assert_eq_digest(D, PUB_D)
+
+    z_digest = Array(DIGEST_LEN)
+    derive_z(D, z_digest)
+    assert_eq_ext(z_digest, PUB_Z)
+    verify_row_coeffs(PUB_Z, PUB_ROW_COEFFS)
+
+    ood_row = compute_ood_row(all_codewords, PUB_ROW_COEFFS)
+    ood_root = merkle_root_from_data(ood_row)
+    assert_eq_digest(ood_root, PUB_OOD_ROOT)
+
+    r_digest = Array(DIGEST_LEN)
+    derive_ood_barycentric_challenge(D, PUB_Z, ood_root, r_digest)
+    barycentric_check_natural_order(ood_row, r_digest, M, LOG_M, U_INV, W_INV)
+
+    return
+
+
+def assert_eq_digest(a, b):
+    for i in unroll(0, DIGEST_LEN):
+        assert a[i] == b[i]
+    return
+
+
+@inline
+def assert_eq_ext(a, b):
+    for i in unroll(0, DIM):
+        assert a[i] == b[i]
+    return
+
+
+@inline
+def copy_ext(src, dest):
+    for i in unroll(0, DIM):
+        dest[i] = src[i]
+    return
+
+
+@inline
+def copy_digest(src, dest):
+    for i in unroll(0, DIGEST_LEN):
+        dest[i] = src[i]
+    return
+
+
+@inline
+def zero_digest():
+    zero = Array(DIGEST_LEN)
+    for i in unroll(0, DIGEST_LEN):
+        zero[i] = 0
+    return zero
+
+
+def hash_systematic_part(data, dest):
+    states = Array((SYSTEMATIC_NUM_CHUNKS - 2) * DIGEST_LEN)
+    poseidon16_compress(data, data + DIGEST_LEN, states)
+    for j in unroll(1, SYSTEMATIC_NUM_CHUNKS - 2):
+        poseidon16_compress(
+            states + (j - 1) * DIGEST_LEN,
+            data + (j + 1) * DIGEST_LEN,
+            states + j * DIGEST_LEN,
+        )
+    poseidon16_compress(
+        states + (SYSTEMATIC_NUM_CHUNKS - 3) * DIGEST_LEN,
+        data + (SYSTEMATIC_NUM_CHUNKS - 1) * DIGEST_LEN,
+        dest,
+    )
+    return
+
+
+def hash_systematic_part_tiled(data, row_idx, dest):
+    first_tile = data + row_idx * TILE_LEN
+    state: Mut = Array(DIGEST_LEN)
+    poseidon16_compress(first_tile, first_tile + DIGEST_LEN, state)
+
+    for chunk in unroll(2, TILE_NUM_HASH_CHUNKS):
+        new_state = Array(DIGEST_LEN)
+        poseidon16_compress(state, first_tile + chunk * DIGEST_LEN, new_state)
+        state = new_state
+
+    for tile in unroll(1, SYSTEMATIC_NUM_TILES):
+        tile_base = data + tile * N_BLOBS * TILE_LEN + row_idx * TILE_LEN
+        for chunk in unroll(0, TILE_NUM_HASH_CHUNKS):
+            new_state = Array(DIGEST_LEN)
+            poseidon16_compress(state, tile_base + chunk * DIGEST_LEN, new_state)
+            state = new_state
+
+    copy_digest(state, dest)
+    return
+
+
+def chain_digests(digests, n_digests: Const, dest):
+    state: Mut = zero_digest()
+    for i in unroll(0, n_digests):
+        new_state = Array(DIGEST_LEN)
+        poseidon16_compress(state, digests + i * DIGEST_LEN, new_state)
+        state = new_state
+    copy_digest(state, dest)
+    return
+
+
+def domain_tag(tag: Const):
+    tag_ptr = Array(DIGEST_LEN)
+    tag_ptr[0] = tag
+    for i in unroll(1, DIGEST_LEN):
+        tag_ptr[i] = 0
+    return tag_ptr
+
+
+@inline
+def derive_z(D, dest):
+    tag_z = domain_tag(1)
+    poseidon16_compress(D, tag_z, dest)
+    return
+
+
+def derive_ood_barycentric_challenge(D, z, ood_root, dest):
+    tag_ood = domain_tag(2)
+    state = Array(DIGEST_LEN)
+    poseidon16_compress(D, tag_ood, state)
+
+    z_digest = Array(DIGEST_LEN)
+    for d in unroll(0, DIM):
+        z_digest[d] = z[d]
+    for d in unroll(DIM, DIGEST_LEN):
+        z_digest[d] = 0
+
+    state_2 = Array(DIGEST_LEN)
+    poseidon16_compress(state, z_digest, state_2)
+    poseidon16_compress(state_2, ood_root, dest)
+    return
+
+
+def compute_ood_row(codewords_base, row_coeffs):
+    ood_row = Array(2 * M * DIM)
+    for tile in unroll(0, NUM_TILES):
+        tile_base = codewords_base + tile * N_BLOBS * TILE_LEN
+        out_tile = ood_row + tile * TILE_LEN
+        for local_eval in unroll(0, TILE_LEN_EXT):
+            lane_offset = local_eval * DIM
+            dot_product_ee_strided_a(
+                tile_base + lane_offset,
+                row_coeffs,
+                out_tile + lane_offset,
+                N_BLOBS,
+                TILE_LEN,
+            )
+    return ood_row
+
+
+def verify_row_coeffs(z, row_coeffs):
+    z_pow_log = Array((LOG_N_BLOBS + 1) * DIM)
+    copy_ext(z, z_pow_log)
+    for k in unroll(1, LOG_N_BLOBS + 1):
+        dot_product_ee(z_pow_log + (k - 1) * DIM,
+                       z_pow_log + (k - 1) * DIM,
+                       z_pow_log + k * DIM)
+    zn = z_pow_log + LOG_N_BLOBS * DIM
+
+    numerator = Array(DIM)
+    numerator[0] = zn[0] - 1
+    for d in unroll(1, DIM):
+        numerator[d] = zn[d]
+
+    row_w_inv_ext = Array(DIM)
+    row_w_inv_ext[0] = ROW_W_INV
+    for d in unroll(1, DIM):
+        row_w_inv_ext[d] = 0
+    points = Array(N_BLOBS * DIM)
+    copy_ext(z, points)
+    for i in unroll(1, N_BLOBS):
+        dot_product_ee(row_w_inv_ext, points + (i - 1) * DIM, points + i * DIM)
+
+    dens = Array(N_BLOBS * DIM)
+    for i in unroll(0, N_BLOBS):
+        dens[i * DIM] = points[i * DIM] - 1
+        for d in unroll(1, DIM):
+            dens[i * DIM + d] = points[i * DIM + d]
+        dot_product_ee(dens + i * DIM, row_coeffs + i * DIM, numerator)
+    return
+
+
+@inline
+def hash_leaf(leaf, dest):
+    states = Array((LEAF_NUM_CHUNKS - 2) * DIGEST_LEN)
+    poseidon16_compress(leaf, leaf + DIGEST_LEN, states)
+    for j in unroll(1, LEAF_NUM_CHUNKS - 2):
+        poseidon16_compress(
+            states + (j - 1) * DIGEST_LEN,
+            leaf + (j + 1) * DIGEST_LEN,
+            states + j * DIGEST_LEN,
+        )
+    poseidon16_compress(
+        states + (LEAF_NUM_CHUNKS - 3) * DIGEST_LEN,
+        leaf + (LEAF_NUM_CHUNKS - 1) * DIGEST_LEN,
+        dest,
+    )
+    return
+
+
+def merkle_root_from_data(data):
+    leaves = Array(NUM_LEAVES * DIGEST_LEN)
+    for i in unroll(0, NUM_LEAVES):
+        hash_leaf(data + i * LEAF_LEN, leaves + i * DIGEST_LEN)
+
+    layer: Mut = leaves
+    for k in unroll(1, LOG_NUM_LEAVES + 1):
+        layer_size = 2 ** (LOG_NUM_LEAVES - k)
+        new_layer = Array(layer_size * DIGEST_LEN)
+        for i in unroll(0, layer_size):
+            poseidon16_compress(
+                layer + (2 * i) * DIGEST_LEN,
+                layer + (2 * i + 1) * DIGEST_LEN,
+                new_layer + i * DIGEST_LEN,
+            )
+        layer = new_layer
+
+    return layer
diff --git a/crates/lean-da/zkdsl_implem/ood_barycentric.py b/crates/lean-da/zkdsl_implem/ood_barycentric.py
new file mode 100644
index 000000000..c692e29ea
--- /dev/null
+++ b/crates/lean-da/zkdsl_implem/ood_barycentric.py
@@ -0,0 +1,119 @@
+from snark_lib import *
+
+# Extension-only barycentric low-degree check for a natural-order length-2M
+# row codeword. This is the upstream evens/odds barycentric identity, but with
+# base-domain constants embedded as extension elements and with strided reads
+# from the natural-order row. Domain constants are passed as compile-time
+# arguments to avoid colliding with the importing file's declarations.
+#
+#   sum_j s_L[j] * row[2j] = sum_j s_R[j] * row[2j + 1]
+#
+# where
+#   s_L[j] = (r^M - 1) / (r * u^{-j} - 1)
+#   s_R[j] = -(r^M + 1) / (r * w^{-1} * u^{-j} - 1)
+
+OOD_BARY_DIM = 5
+
+
+def base_const_ext(value: Const):
+    ptr = Array(OOD_BARY_DIM)
+    ptr[0] = value
+    for d in unroll(1, OOD_BARY_DIM):
+        ptr[d] = 0
+    return ptr
+
+
+def barycentric_slices_ext(
+    r,
+    m: Const,
+    log_m: Const,
+    u_inv_value: Const,
+    w_inv_value: Const,
+):
+    slices = Array(2 * m * OOD_BARY_DIM)
+    slice_L = slices
+    slice_R = slices + m * OOD_BARY_DIM
+
+    r_pow_log = Array((log_m + 1) * OOD_BARY_DIM)
+    for d in unroll(0, OOD_BARY_DIM):
+        r_pow_log[d] = r[d]
+    for k in unroll(1, log_m + 1):
+        dot_product_ee(
+            r_pow_log + (k - 1) * OOD_BARY_DIM,
+            r_pow_log + (k - 1) * OOD_BARY_DIM,
+            r_pow_log + k * OOD_BARY_DIM,
+        )
+    rm = r_pow_log + log_m * OOD_BARY_DIM
+
+    const_L = Array(OOD_BARY_DIM)
+    const_L[0] = rm[0] - 1
+    for d in unroll(1, OOD_BARY_DIM):
+        const_L[d] = rm[d]
+
+    const_R = Array(OOD_BARY_DIM)
+    const_R[0] = (0 - rm[0]) - 1
+    for d in unroll(1, OOD_BARY_DIM):
+        const_R[d] = 0 - rm[d]
+
+    u_inv = base_const_ext(u_inv_value)
+    w_inv = base_const_ext(w_inv_value)
+    s_L = Array(m * OOD_BARY_DIM)
+    s_R = Array(m * OOD_BARY_DIM)
+    for d in unroll(0, OOD_BARY_DIM):
+        s_L[d] = r[d]
+    dot_product_ee(w_inv, r, s_R)
+    for j in unroll(1, m):
+        dot_product_ee(
+            u_inv,
+            s_L + (j - 1) * OOD_BARY_DIM,
+            s_L + j * OOD_BARY_DIM,
+        )
+        dot_product_ee(
+            u_inv,
+            s_R + (j - 1) * OOD_BARY_DIM,
+            s_R + j * OOD_BARY_DIM,
+        )
+
+    dens_L = Array(m * OOD_BARY_DIM)
+    dens_R = Array(m * OOD_BARY_DIM)
+    for j in unroll(0, m):
+        dens_L[j * OOD_BARY_DIM] = s_L[j * OOD_BARY_DIM] - 1
+        dens_R[j * OOD_BARY_DIM] = s_R[j * OOD_BARY_DIM] - 1
+        for d in unroll(1, OOD_BARY_DIM):
+            dens_L[j * OOD_BARY_DIM + d] = s_L[j * OOD_BARY_DIM + d]
+            dens_R[j * OOD_BARY_DIM + d] = s_R[j * OOD_BARY_DIM + d]
+
+    for j in unroll(0, m):
+        dot_product_ee(
+            dens_L + j * OOD_BARY_DIM,
+            slice_L + j * OOD_BARY_DIM,
+            const_L,
+        )
+        dot_product_ee(
+            dens_R + j * OOD_BARY_DIM,
+            slice_R + j * OOD_BARY_DIM,
+            const_R,
+        )
+
+    return slice_L, slice_R
+
+
+def barycentric_check_natural_order(
+    row,
+    r,
+    m: Const,
+    log_m: Const,
+    u_inv_value: Const,
+    w_inv_value: Const,
+):
+    slice_L, slice_R = barycentric_slices_ext(r, m, log_m, u_inv_value, w_inv_value)
+    eval_check = Array(OOD_BARY_DIM)
+    dot_product_ee_strided_a(row, slice_L, eval_check, m, 2 * OOD_BARY_DIM)
+    dot_product_ee_strided_a(
+        row + OOD_BARY_DIM,
+        slice_R,
+        eval_check,
+        m,
+        2 * OOD_BARY_DIM,
+    )
+    return
diff --git a/crates/lean_compiler/snark_lib.py b/crates/lean_compiler/snark_lib.py
index 5d13b761f..01d3af168 100644
--- a/crates/lean_compiler/snark_lib.py
+++ b/crates/lean_compiler/snark_lib.py
@@ -105,6 +105,10 @@ def dot_product_ee(a, b, result, length=None):
     _ = a, b, result, length
 
 
+def dot_product_ee_strided_a(a, b, result, length, stride_a):
+    _ = a, b, result, length, stride_a
+
+
 def poly_eq_be(a, b, result, length=None):
     _ = a, b, result, length
 
@@ -175,4 +179,4 @@ def hint_log2_ceil(n):
 
 def hint_witness(name, destination):
     """Write the next witness entry for `name` into `destination`."""
-    _ = (name, destination)
\ No newline at end of file
+    _ = (name, destination)
diff --git a/crates/lean_compiler/src/a_simplify_lang/mod.rs b/crates/lean_compiler/src/a_simplify_lang/mod.rs
index b6cb43e3e..bdee8490d 100644
--- a/crates/lean_compiler/src/a_simplify_lang/mod.rs
+++ b/crates/lean_compiler/src/a_simplify_lang/mod.rs
@@ -7,8 +7,8 @@ use crate::{
 use backend::PrimeCharacteristicRing;
 use lean_vm::{
     ALL_POSEIDON16_NAMES, Boolean, BooleanExpr, CustomHint, ExtensionOpMode, FunctionName,
-    POSEIDON16_HALF_HARDCODED_LEFT_NAME, POSEIDON16_HALF_NAME, POSEIDON16_HARDCODED_LEFT_NAME, PrecompileArgs,
-    PrecompileCompTimeArgs, SourceLocation,
+    POSEIDON16_HALF_HARDCODED_LEFT_NAME, POSEIDON16_HALF_NAME, POSEIDON16_HARDCODED_LEFT_NAME,
+    PrecompileArgs, PrecompileCompTimeArgs, SourceLocation,
 };
 use std::{
     collections::{BTreeMap, BTreeSet},
@@ -2238,6 +2238,45 @@ fn simplify_lines(
                             continue;
                         }
 
+                        // Special handling for strided extension_op precompile
+                        // Signature: func_strided_a(ptr_a, ptr_b, ptr_res, length, stride_a)
+                        if let Some(stripped_name) = function_name.strip_suffix("_strided_a")
+                            && let Some(mode) = ExtensionOpMode::from_name(stripped_name)
+                        {
+                            if !targets.is_empty() {
+                                return Err(format!(
+                                    "Precompile {function_name} should not return values, at {location}"
+                                ));
+                            }
+                            if args.len() != 5 {
+                                return Err(format!(
+                                    "Precompile {function_name} expects 5 arguments (a, b, result, length, stride_a), got {}, at {location}",
+                                    args.len()
+                                ));
+                            }
+                            let simplified_args = args[..3]
+                                .iter()
+                                .map(|arg| simplify_expr(ctx, state, const_malloc, arg, &mut res))
+                                .collect::<Result<Vec<_>, _>>()?;
+                            let size = simplify_expr(ctx, state, const_malloc, &args[3], &mut res)?
+                                .as_constant()
+                                .expect("extension op size must be a constant");
+                            let stride_a = simplify_expr(ctx, state, const_malloc, &args[4], &mut res)?
+                                .as_constant()
+                                .expect("extension op stride_a must be a constant");
+                            res.push(SimpleLine::Precompile(PrecompileArgs {
+                                arg_0: simplified_args[0].clone(),
+                                arg_1: simplified_args[1].clone(),
+                                res: simplified_args[2].clone(),
+                                data: PrecompileCompTimeArgs::ExtensionOp {
+                                    size,
+                                    mode,
+                                    stride_a: Some(stride_a),
+                                },
+                            }));
+                            continue;
+                        }
+
                         // Special handling for extension_op precompile
                         // Signature: func(ptr_a, ptr_b, ptr_res) or func(ptr_a, ptr_b, ptr_res, length)
                         if let Some(mode) = ExtensionOpMode::from_name(function_name) {
@@ -2268,7 +2307,11 @@ fn simplify_lines(
                                 arg_0: simplified_args[0].clone(),
                                 arg_1: simplified_args[1].clone(),
                                 res: simplified_args[2].clone(),
-                                data: PrecompileCompTimeArgs::ExtensionOp { size, mode },
+                                data: PrecompileCompTimeArgs::ExtensionOp {
+                                    size,
+                                    mode,
+                                    stride_a: None,
+                                },
                             }));
                             continue;
                         }
diff --git a/crates/lean_compiler/src/instruction_encoder.rs b/crates/lean_compiler/src/instruction_encoder.rs
index 1060e3be4..d7a2b67e4 100644
--- a/crates/lean_compiler/src/instruction_encoder.rs
+++ b/crates/lean_compiler/src/instruction_encoder.rs
@@ -59,12 +59,18 @@ pub fn field_representation(instr: &Instruction) -> [F; N_INSTRUCTION_COLUMNS] {
                         + POSEIDON_HARDCODED_LEFT_4_FLAG_SHIFT * flag_left
                         + POSEIDON_HARDCODED_LEFT_4_OFFSET_SHIFT * hardcoded_offset_left_val
                 }
-                PrecompileCompTimeArgs::ExtensionOp { size, mode } => {
+                PrecompileCompTimeArgs::ExtensionOp { size, mode, .. } => {
                     assert!(*size >= 1, "invalid extension_op size={size}");
                     mode.flag_encoding() + EXT_OP_LEN_MULTIPLIER * size
                 }
             };
             fields[instr_idx(COL_PRECOMPILE_DATA)] = F::from_usize(precompile_data);
+            fields[instr_idx(COL_PRECOMPILE_EXTRA)] = match &precompile.data {
+                PrecompileCompTimeArgs::ExtensionOp { mode, stride_a, .. } => {
+                    F::from_usize(stride_a.unwrap_or_else(|| mode.default_stride_a()))
+                }
+                PrecompileCompTimeArgs::Poseidon16 { .. } => F::ZERO,
+            };
             match (precompile.arg_0, precompile.arg_1) {
                 (MemOrFpOrConstant::FpRelative { offset: off_a }, MemOrFpOrConstant::FpRelative { offset: off_b }) => {
                     fields[instr_idx(COL_FLAG_AB_FP)] = F::ONE;
diff --git a/crates/lean_compiler/tests/test_compiler.rs b/crates/lean_compiler/tests/test_compiler.rs
index 2c187a08e..92673bcd6 100644
--- a/crates/lean_compiler/tests/test_compiler.rs
+++ b/crates/lean_compiler/tests/test_compiler.rs
@@ -1,6 +1,6 @@
 use std::time::Instant;
 
-use backend::BasedVectorSpace;
+use backend::{BasedVectorSpace, PrimeCharacteristicRing};
 use lean_compiler::*;
 use lean_vm::*;
 use rand::{RngExt, SeedableRng, rngs::StdRng};
@@ -68,6 +68,39 @@ def div_ext_2(n, d):
     compile_and_run(&ProgramSource::Raw(program.to_string()), &public_input, false);
 }
 
+#[test]
+fn test_strided_dot_product_ee() {
+    let program = r#"
+DIM = 5
+N = 3
+STRIDE = 2 * DIM
+
+def main():
+    a = 0
+    b = N * STRIDE
+    expected = b + N * DIM
+    dot_product_ee_strided_a(a, b, expected, N, STRIDE)
+    return
+    "#;
+
+    let mut rng = StdRng::seed_from_u64(11);
+    let a_values: Vec<EF> = (0..3).map(|_| rng.random()).collect();
+    let b_values: Vec<EF> = (0..3).map(|_| rng.random()).collect();
+    let expected: EF = a_values.iter().zip(&b_values).map(|(&a, &b)| a * b).sum();
+
+    let mut public_input = vec![];
+    for value in &a_values {
+        public_input.extend(value.as_basis_coefficients_slice());
+        public_input.extend(EF::ZERO.as_basis_coefficients_slice());
+    }
+    for value in &b_values {
+        public_input.extend(value.as_basis_coefficients_slice());
+    }
+    public_input.extend(expected.as_basis_coefficients_slice());
+
+    compile_and_run(&ProgramSource::Raw(program.to_string()), &public_input, false);
+}
+
 fn test_data_dir() -> String {
     let manifest_dir = env!("CARGO_MANIFEST_DIR");
     format!("{manifest_dir}/tests/test_data")
diff --git a/crates/lean_vm/src/isa/instruction.rs b/crates/lean_vm/src/isa/instruction.rs
index a7a81af23..b577cf0c3 100644
--- a/crates/lean_vm/src/isa/instruction.rs
+++ b/crates/lean_vm/src/isa/instruction.rs
@@ -73,6 +73,7 @@ pub enum PrecompileCompTimeArgs<S> {
     ExtensionOp {
         size: S,
         mode: ExtensionOpMode,
+        stride_a: Option<S>,
     },
 }
 
@@ -93,7 +94,11 @@ impl<S> PrecompileCompTimeArgs<S> {
                 half_output,
                 hardcoded_offset_left: hardcoded_left_4.map(&mut f),
             },
-            Self::ExtensionOp { size, mode } => PrecompileCompTimeArgs::ExtensionOp { size: f(size), mode },
+            Self::ExtensionOp { size, mode, stride_a } => PrecompileCompTimeArgs::ExtensionOp {
+                size: f(size),
+                mode,
+                stride_a: stride_a.map(&mut f),
+            },
         }
     }
 }
@@ -263,8 +268,16 @@ impl<V: Display, S: Display> Display for PrecompileArgs<V, S> {
                     "{POSEIDON16_NAME}({arg_0}, {arg_1}, {res}, half, hardcoded_left_4={off})"
                 ),
             },
-            PrecompileCompTimeArgs::ExtensionOp { size, mode } => {
-                write!(f, "{}({arg_0}, {arg_1}, {res}, {size})", mode.name())
+            PrecompileCompTimeArgs::ExtensionOp { size, mode, stride_a } => {
+                if let Some(stride_a) = stride_a {
+                    write!(
+                        f,
+                        "{}_strided_a({arg_0}, {arg_1}, {res}, {size}, {stride_a})",
+                        mode.name()
+                    )
+                } else {
+                    write!(f, "{}({arg_0}, {arg_1}, {res}, {size})", mode.name())
+                }
             }
         }
     }
diff --git a/crates/lean_vm/src/tables/execution/air.rs b/crates/lean_vm/src/tables/execution/air.rs
index b20366efd..8b2b55aab 100644
--- a/crates/lean_vm/src/tables/execution/air.rs
+++ b/crates/lean_vm/src/tables/execution/air.rs
@@ -2,7 +2,7 @@ use crate::{EF, ExecutionTable, ExtraDataForBuses, eval_virtual_bus_column};
 use backend::*;
 
 pub const N_RUNTIME_COLUMNS: usize = 8;
-pub const N_INSTRUCTION_COLUMNS: usize = 12;
+pub const N_INSTRUCTION_COLUMNS: usize = 13;
 pub const N_TOTAL_EXECUTION_COLUMNS: usize = N_INSTRUCTION_COLUMNS + N_RUNTIME_COLUMNS;
 
 // Committed columns (IMPORTANT: they must be the first columns)
@@ -28,13 +28,14 @@ pub const COL_MUL: usize = 16;
 pub const COL_JUMP: usize = 17;
 pub const COL_AUX: usize = 18;
 pub const COL_PRECOMPILE_DATA: usize = 19;
+pub const COL_PRECOMPILE_EXTRA: usize = 20;
 
 // Temporary columns (stored to avoid duplicate computations)
 pub const N_TEMPORARY_EXEC_COLUMNS: usize = 4;
-pub const COL_IS_PRECOMPILE: usize = 20;
-pub const COL_EXEC_NU_A: usize = 21;
-pub const COL_EXEC_NU_B: usize = 22;
-pub const COL_EXEC_NU_C: usize = 23;
+pub const COL_IS_PRECOMPILE: usize = 21;
+pub const COL_EXEC_NU_A: usize = 22;
+pub const COL_EXEC_NU_B: usize = 23;
+pub const COL_EXEC_NU_C: usize = 24;
 
 impl<const BUS: bool> Air for ExecutionTable<BUS> {
     type ExtraData = ExtraDataForBuses<EF>;
@@ -68,6 +69,7 @@ impl<const BUS: bool> Air for ExecutionTable<BUS> {
         let jump = up[COL_JUMP];
         let aux = up[COL_AUX];
         let precompile_data = up[COL_PRECOMPILE_DATA];
+        let precompile_extra = up[COL_PRECOMPILE_EXTRA];
 
         let (value_a, value_b, value_c) = (up[COL_MEM_VALUE_A], up[COL_MEM_VALUE_B], up[COL_MEM_VALUE_C]);
         let pc = up[COL_PC];
@@ -96,11 +98,11 @@ impl<const BUS: bool> Air for ExecutionTable<BUS> {
             builder.eval_virtual_column(eval_virtual_bus_column::<AB, EF>(
                 extra_data,
                 is_precompile,
-                &[precompile_data, nu_a, nu_b, nu_c],
+                &[precompile_data, precompile_extra, nu_a, nu_b, nu_c],
             ));
         } else {
             builder.declare_values(&[is_precompile]);
-            builder.declare_values(&[precompile_data, nu_a, nu_b, nu_c]);
+            builder.declare_values(&[precompile_data, precompile_extra, nu_a, nu_b, nu_c]);
         }
 
         builder.assert_zero(one_minus_flag_a_and_flag_ab_fp * (addr_a - fp_plus_operand_a));
diff --git a/crates/lean_vm/src/tables/execution/mod.rs b/crates/lean_vm/src/tables/execution/mod.rs
index 10b854c04..ae90bbaa0 100644
--- a/crates/lean_vm/src/tables/execution/mod.rs
+++ b/crates/lean_vm/src/tables/execution/mod.rs
@@ -44,8 +44,9 @@ impl<const BUS: bool> TableT for ExecutionTable<BUS> {
 
     #[allow(clippy::vec_init_then_push)] // https://github.com/leanEthereum/leanMultisig/issues/198
     fn bus(&self) -> Bus {
-        let mut data = Vec::with_capacity(4);
+        let mut data = Vec::with_capacity(5);
         data.push(BusData::Column(COL_PRECOMPILE_DATA));
+        data.push(BusData::Column(COL_PRECOMPILE_EXTRA));
         data.push(BusData::Column(COL_EXEC_NU_A));
         data.push(BusData::Column(COL_EXEC_NU_B));
         data.push(BusData::Column(COL_EXEC_NU_C));
diff --git a/crates/lean_vm/src/tables/extension_op/air.rs b/crates/lean_vm/src/tables/extension_op/air.rs
index 2f39a6681..0de0b8162 100644
--- a/crates/lean_vm/src/tables/extension_op/air.rs
+++ b/crates/lean_vm/src/tables/extension_op/air.rs
@@ -15,19 +15,20 @@ pub(super) const COL_LEN: usize = 5;
 pub(super) const COL_IDX_A: usize = 6;
 pub(super) const COL_IDX_B: usize = 7;
 pub(super) const COL_IDX_RES: usize = 8;
+pub(super) const COL_STRIDE_A: usize = 9;
 
 /// value_a coordinates (5 columns)
-pub(super) const COL_VA: usize = 9;
+pub(super) const COL_VA: usize = 10;
 /// value_b coordinates (5 columns)
-pub(super) const COL_VB: usize = 14;
+pub(super) const COL_VB: usize = 15;
 /// result coordinates (5 columns).
-pub(super) const COL_VRES: usize = 19;
+pub(super) const COL_VRES: usize = 20;
 /// computation coordinates (5 columns)
-pub(super) const COL_COMP: usize = 24;
+pub(super) const COL_COMP: usize = 25;
 
 // Virtual columns (not explicitely in AIR)
-pub(super) const COL_ACTIVATION_FLAG: usize = 29;
-pub(super) const COL_AUX_EXTENSION_OP: usize = 30;
+pub(super) const COL_ACTIVATION_FLAG: usize = 30;
+pub(super) const COL_AUX_EXTENSION_OP: usize = 31;
 
 use backend::quintic_extension::extension::quintic_mul;
 
@@ -42,13 +43,13 @@ impl<const BUS: bool> Air for ExtensionOpPrecompile<BUS> {
     type ExtraData = ExtraDataForBuses<EF>;
 
     fn n_columns(&self) -> usize {
-        29
+        30
     }
     fn degree_air(&self) -> usize {
         6
     }
     fn n_constraints(&self) -> usize {
-        33
+        34
     }
     fn down_column_indexes(&self) -> Vec<usize> {
         vec![
@@ -60,6 +61,7 @@ impl<const BUS: bool> Air for ExtensionOpPrecompile<BUS> {
             COL_FLAG_POLY_EQ,
             COL_IDX_A,
             COL_IDX_B,
+            COL_STRIDE_A,
             COL_COMP,
             COL_COMP + 1,
             COL_COMP + 2,
@@ -81,6 +83,7 @@ impl<const BUS: bool> Air for ExtensionOpPrecompile<BUS> {
         let len = up[COL_LEN];
         let idx_a = up[COL_IDX_A];
         let idx_b = up[COL_IDX_B];
+        let stride_a = up[COL_STRIDE_A];
 
         let va: [AB::IF; 5] = std::array::from_fn(|k| up[COL_VA + k]);
         let vb: [AB::IF; 5] = std::array::from_fn(|k| up[COL_VB + k]);
@@ -95,7 +98,8 @@ impl<const BUS: bool> Air for ExtensionOpPrecompile<BUS> {
         let flag_poly_eq_down = down[5]; // COL_FLAG_POLY_EQ
         let idx_a_down = down[6]; // COL_IDX_A
         let idx_b_down = down[7]; // COL_IDX_B
-        let comp_down: [AB::IF; 5] = std::array::from_fn(|k| down[8 + k]); // COL_COMP+0..5
+        let stride_a_down = down[8]; // COL_STRIDE_A
+        let comp_down: [AB::IF; 5] = std::array::from_fn(|k| down[9 + k]); // COL_COMP+0..5
 
         let active = flag_add + flag_mul + flag_poly_eq;
         let activation_flag = start * active;
@@ -112,11 +116,11 @@ impl<const BUS: bool> Air for ExtensionOpPrecompile<BUS> {
             builder.eval_virtual_column(eval_virtual_bus_column::<AB, EF>(
                 extra_data,
                 activation_flag,
-                &[aux, idx_a, idx_b, idx_r],
+                &[aux, stride_a, idx_a, idx_b, idx_r],
             ));
         } else {
             builder.declare_values(&[activation_flag]);
-            builder.declare_values(&[aux, idx_a, idx_b, idx_r]);
+            builder.declare_values(&[aux, stride_a, idx_a, idx_b, idx_r]);
         }
 
         let is_ee = -(is_be - AB::F::ONE);
@@ -167,8 +171,8 @@ impl<const BUS: bool> Air for ExtensionOpPrecompile<BUS> {
         builder.assert_zero(not_start_down * (flag_add - flag_add_down));
         builder.assert_zero(not_start_down * (flag_mul - flag_mul_down));
         builder.assert_zero(not_start_down * (flag_poly_eq - flag_poly_eq_down));
-        let a_increment = is_be + is_ee * AB::F::from_usize(crate::DIMENSION);
-        builder.assert_zero(not_start_down * (idx_a_down - idx_a - a_increment));
+        builder.assert_zero(not_start_down * (stride_a - stride_a_down));
+        builder.assert_zero(not_start_down * (idx_a_down - idx_a - stride_a));
         builder.assert_zero(not_start_down * (idx_b_down - idx_b - AB::F::from_usize(crate::DIMENSION)));
 
         builder.assert_zero(start_down * (len - AB::F::ONE));
diff --git a/crates/lean_vm/src/tables/extension_op/exec.rs b/crates/lean_vm/src/tables/extension_op/exec.rs
index d0770d9b9..5b9953d3b 100644
--- a/crates/lean_vm/src/tables/extension_op/exec.rs
+++ b/crates/lean_vm/src/tables/extension_op/exec.rs
@@ -98,6 +98,7 @@ pub(super) fn exec_multi_row(
     size: usize,
     is_be: bool,
     op: ExtensionOp,
+    stride_a: usize,
     memory: &mut impl MemoryAccess,
     trace: &mut TableTrace,
 ) -> Result<(), RunnerError> {
@@ -107,8 +108,6 @@ pub(super) fn exec_multi_row(
         solve_unknowns(ptr_a, ptr_b, ptr_res, is_be, op, memory)?;
     }
 
-    let a_stride = if is_be { 1 } else { DIMENSION };
-
     // 1. Read all operands and compute elem values
     let mut elems = Vec::with_capacity(size);
     let mut v_bs = Vec::with_capacity(size);
@@ -116,7 +115,7 @@ pub(super) fn exec_multi_row(
     let mut idx_bs = Vec::with_capacity(size);
 
     for i in 0..size {
-        let addr_a = ptr_a.to_usize() + i * a_stride;
+        let addr_a = ptr_a.to_usize() + i * stride_a;
         let addr_b = ptr_b.to_usize() + i * DIMENSION;
         let idx_a_f = F::from_usize(addr_a);
         let idx_b_f = F::from_usize(addr_b);
@@ -164,6 +163,7 @@ pub(super) fn exec_multi_row(
         trace.columns[COL_FLAG_MUL].push(flag_mul_f);
         trace.columns[COL_FLAG_POLY_EQ].push(flag_poly_eq_f);
         trace.columns[COL_LEN].push(F::from_usize(current_len));
+        trace.columns[COL_STRIDE_A].push(F::from_usize(stride_a));
         trace.columns[COL_IDX_A].push(idx_as[i]);
         trace.columns[COL_IDX_B].push(idx_bs[i]);
         trace.columns[COL_IDX_RES].push(ptr_res);
diff --git a/crates/lean_vm/src/tables/extension_op/mod.rs b/crates/lean_vm/src/tables/extension_op/mod.rs
index af017407e..e9d3c9ee7 100644
--- a/crates/lean_vm/src/tables/extension_op/mod.rs
+++ b/crates/lean_vm/src/tables/extension_op/mod.rs
@@ -64,6 +64,10 @@ impl ExtensionOpMode {
         self.op.flag() + self.is_be as usize * EXT_OP_FLAG_IS_BE
     }
 
+    pub const fn default_stride_a(self) -> usize {
+        if self.is_be { 1 } else { DIMENSION }
+    }
+
     pub const fn name(self) -> &'static str {
         match (self.op, self.is_be) {
             (ExtensionOp::Add, false) => "add_ee",
@@ -107,8 +111,9 @@ impl<const BUS: bool> TableT for ExtensionOpPrecompile<BUS> {
 
     #[allow(clippy::vec_init_then_push)] // https://github.com/leanEthereum/leanMultisig/issues/198
     fn bus(&self) -> Bus {
-        let mut data = Vec::with_capacity(4);
+        let mut data = Vec::with_capacity(5);
         data.push(BusData::Column(COL_AUX_EXTENSION_OP));
+        data.push(BusData::Column(COL_STRIDE_A));
         data.push(BusData::Column(COL_IDX_A));
         data.push(BusData::Column(COL_IDX_B));
         data.push(BusData::Column(COL_IDX_RES));
@@ -127,6 +132,7 @@ impl<const BUS: bool> TableT for ExtensionOpPrecompile<BUS> {
         let mut row = vec![F::ZERO; self.n_columns_total()];
         row[COL_START] = F::ONE;
         row[COL_LEN] = F::ONE;
+        row[COL_STRIDE_A] = F::from_usize(DIMENSION);
         row[COL_AUX_EXTENSION_OP] = F::from_usize(EXT_OP_LEN_MULTIPLIER);
         row[COL_IDX_A] = F::from_usize(zero_vec_ptr);
         row[COL_IDX_B] = F::from_usize(zero_vec_ptr);
@@ -143,10 +149,20 @@ impl<const BUS: bool> TableT for ExtensionOpPrecompile<BUS> {
         args: PrecompileCompTimeArgs<usize>,
         ctx: &mut InstructionContext<'_, M>,
     ) -> Result<(), RunnerError> {
-        let PrecompileCompTimeArgs::ExtensionOp { size, mode } = args else {
+        let PrecompileCompTimeArgs::ExtensionOp { size, mode, stride_a } = args else {
             unreachable!("ExtensionOp table called with non-ExtensionOp args");
         };
         let trace = ctx.traces.get_mut(&self.table()).unwrap();
-        exec_multi_row(arg_a, arg_b, arg_c, size, mode.is_be, mode.op, ctx.memory, trace)
+        exec_multi_row(
+            arg_a,
+            arg_b,
+            arg_c,
+            size,
+            mode.is_be,
+            mode.op,
+            stride_a.unwrap_or_else(|| mode.default_stride_a()),
+            ctx.memory,
+            trace,
+        )
     }
 }
diff --git a/crates/lean_vm/src/tables/mod.rs b/crates/lean_vm/src/tables/mod.rs
index bf9523291..e6cb21c2c 100644
--- a/crates/lean_vm/src/tables/mod.rs
+++ b/crates/lean_vm/src/tables/mod.rs
@@ -16,8 +16,8 @@ pub use execution::*;
 mod utils;
 pub(crate) use utils::*;
 
-// `PRECOMPILE_DATA` is the bus discriminator separating the two precompile
-// tables. Disjointness is by parity of bit 0:
+// `PRECOMPILE_DATA` is the bus discriminator separating the precompile tables.
+// Disjointness is by parity of bit 0:
 //
 //   Poseidon16  (odd):  1 + 2·flag_half + 4·flag_left + 8·flag_left·offset_left
 //   ExtensionOp (even): 4·is_be + 8·flag_add + 16·flag_mul + 32·flag_poly_eq + 64·len
diff --git a/crates/lean_vm/src/tables/poseidon_16/mod.rs b/crates/lean_vm/src/tables/poseidon_16/mod.rs
index 5cffe5194..096a60812 100644
--- a/crates/lean_vm/src/tables/poseidon_16/mod.rs
+++ b/crates/lean_vm/src/tables/poseidon_16/mod.rs
@@ -108,6 +108,7 @@ pub const POSEIDON_16_COL_OUTPUT_START: ColIndex = num_cols_poseidon_16() - 8;
 /// Non-committed columns ("virtual"):
 pub const POSEIDON_16_COL_INDEX_INPUT_LEFT: ColIndex = num_cols_poseidon_16();
 pub const POSEIDON_16_COL_PRECOMPILE_DATA: ColIndex = num_cols_poseidon_16() + 1;
+pub const POSEIDON_16_COL_PRECOMPILE_EXTRA: ColIndex = num_cols_poseidon_16() + 2;
 
 pub const POSEIDON16_NAME: &str = "poseidon16_compress";
 pub const POSEIDON16_HALF_NAME: &str = "poseidon16_compress_half";
@@ -162,8 +163,9 @@ impl<const BUS: bool> TableT for Poseidon16Precompile<BUS> {
 
     #[allow(clippy::vec_init_then_push)] // https://github.com/leanEthereum/leanMultisig/issues/198
     fn bus(&self) -> Bus {
-        let mut data = Vec::with_capacity(4);
+        let mut data = Vec::with_capacity(5);
         data.push(BusData::Column(POSEIDON_16_COL_PRECOMPILE_DATA));
+        data.push(BusData::Column(POSEIDON_16_COL_PRECOMPILE_EXTRA));
         data.push(BusData::Column(POSEIDON_16_COL_INDEX_INPUT_LEFT));
         data.push(BusData::Column(POSEIDON_16_COL_INDEX_INPUT_RIGHT));
         data.push(BusData::Column(POSEIDON_16_COL_INDEX_INPUT_RES));
@@ -193,6 +195,7 @@ impl<const BUS: bool> TableT for Poseidon16Precompile<BUS> {
         // Non-committed columns
         row[POSEIDON_16_COL_INDEX_INPUT_LEFT] = F::from_usize(zero_vec_ptr);
         row[POSEIDON_16_COL_PRECOMPILE_DATA] = F::from_usize(POSEIDON_PRECOMPILE_DATA);
+        row[POSEIDON_16_COL_PRECOMPILE_EXTRA] = F::ZERO;
 
         generate_trace_rows_for_perm(perm);
         row
@@ -267,6 +270,7 @@ impl<const BUS: bool> TableT for Poseidon16Precompile<BUS> {
             + POSEIDON_HARDCODED_LEFT_4_FLAG_SHIFT * (flag_hardcoded as usize)
             + POSEIDON_HARDCODED_LEFT_4_OFFSET_SHIFT * hardcoded_offset_left_val;
         trace.columns[POSEIDON_16_COL_PRECOMPILE_DATA].push(F::from_usize(precompile_data));
+        trace.columns[POSEIDON_16_COL_PRECOMPILE_EXTRA].push(F::ZERO);
 
         // the rest of the trace is filled at the end of the execution (to get parallelism + SIMD)
 
@@ -314,16 +318,30 @@ impl<const BUS: bool> Air for Poseidon16Precompile<BUS> {
         let index_a =
             cols.effective_index_left_second - one_minus_flag_hardcoded_left * AB::F::from_usize(HALF_DIGEST_LEN);
 
-        // Bus data: [precompile_data, a, b, res]
+        let precompile_extra = AB::IF::ZERO;
+
+        // Bus data: [precompile_data, extra, a, b, res]
         if BUS {
             builder.eval_virtual_column(eval_virtual_bus_column::<AB, EF>(
                 extra_data,
                 cols.flag_active,
-                &[precompile_data_reconstructed, index_a, cols.index_b, cols.index_res],
+                &[
+                    precompile_data_reconstructed,
+                    precompile_extra,
+                    index_a,
+                    cols.index_b,
+                    cols.index_res,
+                ],
             ));
         } else {
             builder.declare_values(std::slice::from_ref(&cols.flag_active));
-            builder.declare_values(&[precompile_data_reconstructed, index_a, cols.index_b, cols.index_res]);
+            builder.declare_values(&[
+                precompile_data_reconstructed,
+                precompile_extra,
+                index_a,
+                cols.index_b,
+                cols.index_res,
+            ]);
         }
 
         builder.assert_bool(cols.flag_active);
@@ -425,8 +443,8 @@ pub const fn num_cols_poseidon_16() -> usize {
 }
 
 pub const fn num_cols_total_poseidon_16() -> usize {
-    // +2 for non-committed columns: POSEIDON_16_COL_INDEX_INPUT_LEFT, POSEIDON_16_COL_PRECOMPILE_DATA
-    num_cols_poseidon_16() + 2
+    // +3 for non-committed columns: input-left index, precompile data, precompile extra.
+    num_cols_poseidon_16() + 3
 }
 
 #[inline]
diff --git a/crates/lean_vm/src/tables/table_enum.rs b/crates/lean_vm/src/tables/table_enum.rs
index 55be30e28..3db2923e0 100644
--- a/crates/lean_vm/src/tables/table_enum.rs
+++ b/crates/lean_vm/src/tables/table_enum.rs
@@ -5,7 +5,7 @@ use crate::*;
 
 pub const N_TABLES: usize = 3;
 pub const ALL_TABLES: [Table; N_TABLES] = [Table::execution(), Table::extension_op(), Table::poseidon16()];
-pub const MAX_PRECOMPILE_BUS_WIDTH: usize = 4;
+pub const MAX_PRECOMPILE_BUS_WIDTH: usize = 5;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 #[repr(usize)]

From 97c38eb5113130d0821e066c1dbf083a4a69bfc6 Mon Sep 17 00:00:00 2001
From: frisitano <francesco.risitano95@gmail.com>
Date: Fri, 15 May 2026 18:36:43 +0100
Subject: [PATCH 2/2] Use row hashes for OOD row domain

---
 crates/lean-da/src/main.rs                    | 93 ++++++++++++++----
 crates/lean-da/zkdsl_implem/lean_da_ood.py    | 98 ++++++++++++-------
 .../zkdsl_implem/lean_da_ood_column_major.py  | 98 ++++++++++++-------
 .../lean-da/zkdsl_implem/lean_da_ood_tiled.py | 98 ++++++++++++-------
 4 files changed, 264 insertions(+), 123 deletions(-)

diff --git a/crates/lean-da/src/main.rs b/crates/lean-da/src/main.rs
index 65bec9a77..cff06a3d6 100644
--- a/crates/lean-da/src/main.rs
+++ b/crates/lean-da/src/main.rs
@@ -3,7 +3,7 @@ mod cache;
 use std::collections::{BTreeMap, HashMap};
 use std::time::Instant;
 
-use backend::{Algebra, BasedVectorSpace, Field, PrimeCharacteristicRing, Proof, TwoAdicField};
+use backend::{Algebra, BasedVectorSpace, PrimeCharacteristicRing, Proof, TwoAdicField};
 use clap::{Parser, ValueEnum};
 use lean_compiler::{CompilationFlags, ProgramSource, compile_program_with_flags};
 use lean_prover::{
@@ -75,16 +75,6 @@ fn main() {
 
 pub fn compile_lean_da_bytecode(n_blobs: usize, construction: Construction) -> Bytecode {
     assert!(n_blobs > 0, "n_blobs must be nonzero");
-    if matches!(
-        construction,
-        Construction::OodRow | Construction::OodRowColumnMajor | Construction::OodRowTiled
-    ) {
-        assert!(
-            n_blobs.is_power_of_two(),
-            "ood-row constructions require n_blobs to be a power of two"
-        );
-    }
-
     let mut replacements = BTreeMap::new();
     replacements.insert("LEAN_DA_ENTRY".to_string(), construction.entry().to_string());
     replacements.insert("LOG_M_PLACEHOLDER".to_string(), LOG_M.to_string());
@@ -311,7 +301,7 @@ fn build_ood_public_input(codewords: &[Vec<EF>]) -> Vec<F> {
     public_input.extend_from_slice(&z_digest[..dim]);
     let z = EF::from_basis_coefficients_slice(&z_digest[..dim]).unwrap();
 
-    let row_coeffs = build_scaled_ood_coeffs(n_blobs, z);
+    let row_coeffs = build_hash_domain_ood_coeffs(&row_digests, &chain_digest, z);
     public_input.extend(serialize_ext_vec(&row_coeffs));
 
     let ood_row = build_scaled_ood_row(codewords, &row_coeffs);
@@ -339,17 +329,43 @@ fn derive_z_digest(chain_digest: &[F; 8]) -> [F; 8] {
     utils::poseidon16_compress_pair(chain_digest, &tag_digest(1))
 }
 
-fn build_scaled_ood_coeffs(n_blobs: usize, z: EF) -> Vec<EF> {
-    let log_n_blobs = log2_exact_power_of_two(n_blobs);
-    let row_root_inv = EF::from(F::two_adic_generator(log_n_blobs).inverse());
-    let numerator = z.exp_u64(n_blobs as u64) - EF::ONE;
+fn build_hash_domain_ood_coeffs(row_digests: &[[F; 8]], chain_digest: &[F; 8], z: EF) -> Vec<EF> {
+    let row_points = build_hash_domain_row_points(row_digests, chain_digest);
+    build_lagrange_coeffs_at_z(&row_points, z)
+}
 
-    let mut row_coeffs = Vec::with_capacity(n_blobs);
-    let mut point = EF::ONE;
-    for _ in 0..n_blobs {
-        let denominator = z * point - EF::ONE;
+fn build_hash_domain_row_points(row_digests: &[[F; 8]], chain_digest: &[F; 8]) -> Vec<EF> {
+    row_digests
+        .iter()
+        .enumerate()
+        .map(|(row_idx, digest)| digest_to_ext(&derive_row_point_digest(chain_digest, digest, row_idx)))
+        .collect()
+}
+
+fn derive_row_point_digest(chain_digest: &[F; 8], row_digest: &[F; 8], row_idx: usize) -> [F; 8] {
+    let state = utils::poseidon16_compress_pair(chain_digest, &tag_digest(3));
+    let state = utils::poseidon16_compress_pair(&state, row_digest);
+    utils::poseidon16_compress_pair(&state, &tag_digest(1_000 + row_idx as u64))
+}
+
+fn digest_to_ext(digest: &[F; 8]) -> EF {
+    let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+    EF::from_basis_coefficients_slice(&digest[..dim]).unwrap()
+}
+
+fn build_lagrange_coeffs_at_z(row_points: &[EF], z: EF) -> Vec<EF> {
+    let mut row_coeffs = Vec::with_capacity(row_points.len());
+    for (i, &x_i) in row_points.iter().enumerate() {
+        let mut numerator = EF::ONE;
+        let mut denominator = EF::ONE;
+        for (k, &x_k) in row_points.iter().enumerate() {
+            if k == i {
+                continue;
+            }
+            numerator *= z - x_k;
+            denominator *= x_i - x_k;
+        }
         row_coeffs.push(numerator / denominator);
-        point *= row_root_inv;
     }
     row_coeffs
 }
@@ -475,6 +491,41 @@ mod tests {
         assert_eq!(public_input.len(), expected_len);
     }
 
+    #[test]
+    fn test_hash_domain_ood_coeffs_are_lagrange() {
+        let n_blobs = 3;
+        let dim = <EF as BasedVectorSpace<F>>::DIMENSION;
+        let m = 1 << LOG_M;
+        let codewords = generate_codewords(n_blobs);
+
+        let mut row_digests = Vec::with_capacity(n_blobs);
+        for codeword in &codewords {
+            let mut systematic = Vec::with_capacity(m * dim);
+            for value in codeword.iter().take(m) {
+                push_ext(&mut systematic, value);
+            }
+            row_digests.push(utils::poseidon_compress_slice(&systematic, false));
+        }
+
+        let chain_digest = chain_digest_array(&row_digests);
+        let z = digest_to_ext(&derive_z_digest(&chain_digest));
+        let row_points = build_hash_domain_row_points(&row_digests, &chain_digest);
+        let row_coeffs = build_hash_domain_ood_coeffs(&row_digests, &chain_digest, z);
+
+        for (i, &x_i) in row_points.iter().enumerate() {
+            let mut numerator = EF::ONE;
+            let mut denominator = EF::ONE;
+            for (k, &x_k) in row_points.iter().enumerate() {
+                if k == i {
+                    continue;
+                }
+                numerator *= z - x_k;
+                denominator *= x_i - x_k;
+            }
+            assert_eq!(row_coeffs[i] * denominator, numerator);
+        }
+    }
+
     #[test]
     #[ignore = "runs the full OOD-row proof benchmark path"]
     fn test_compile_prove_verify_ood_row() {
diff --git a/crates/lean-da/zkdsl_implem/lean_da_ood.py b/crates/lean-da/zkdsl_implem/lean_da_ood.py
index 274bdf140..dccd904c3 100644
--- a/crates/lean-da/zkdsl_implem/lean_da_ood.py
+++ b/crates/lean-da/zkdsl_implem/lean_da_ood.py
@@ -5,11 +5,10 @@
 #
 # Each blob witness is a length-2M Reed-Solomon codeword in natural evaluation
 # order: C_i[j] = P_i(w^j).  The public input commits to each systematic prefix
-# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  The row challenge z is
-# derived from D, and the OOD row is the column-wise row-domain evaluation
-#   OOD[j] = sum_i ((z^n - 1) / (z*h^{-i} - 1)) * C_i[j].
-# The 1/n Lagrange factor is intentionally omitted: it scales the whole OOD row
-# by a nonzero public constant and does not affect the parity identity.
+# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  Each row coordinate
+# x_i is derived from D, d_i, and i.  The OOD challenge z is derived from D,
+# and the OOD row is the hash-domain column evaluation
+#   OOD[j] = sum_i L_i(z) * C_i[j], where L_i is over the points x_i.
 #
 # The OOD row is computed in-circuit with one strided extension-field dot
 # product for each row-codeword position.
@@ -22,14 +21,11 @@
 DIM = 5
 
 N_BLOBS = N_BLOBS_PLACEHOLDER
-LOG_N_BLOBS = LOG_N_BLOBS_PLACEHOLDER
 
 W = ROOT_24**(2**(TWO_ADDICITY - LOG_M - 1))  # primitive 2M-th root of unity
 U = W * W
 U_INV = U ** (M - 1)
 W_INV = W ** (2 * M - 1)
-ROW_W = ROOT_24**(2**(TWO_ADDICITY - LOG_N_BLOBS))  # primitive N_BLOBS-th root
-ROW_W_INV = ROW_W ** (N_BLOBS - 1)
 
 DIGEST_LEN = 8
 LOG_LEAF_LEN_EXT = 4
@@ -67,7 +63,7 @@ def main():
     z_digest = Array(DIGEST_LEN)
     derive_z(D, z_digest)
     assert_eq_ext(z_digest, PUB_Z)
-    verify_row_coeffs(PUB_Z, PUB_ROW_COEFFS)
+    verify_row_coeffs(D, row_digests, PUB_Z, PUB_ROW_COEFFS)
 
     ood_row = compute_ood_row(all_codewords, PUB_ROW_COEFFS)
     ood_root = merkle_root_from_data(ood_row)
@@ -187,35 +183,69 @@ def compute_ood_row(codewords_base, row_coeffs):
     return ood_row
 
 
-def verify_row_coeffs(z, row_coeffs):
-    z_pow_log = Array((LOG_N_BLOBS + 1) * DIM)
-    copy_ext(z, z_pow_log)
-    for k in unroll(1, LOG_N_BLOBS + 1):
-        dot_product_ee(z_pow_log + (k - 1) * DIM,
-                       z_pow_log + (k - 1) * DIM,
-                       z_pow_log + k * DIM)
-    zn = z_pow_log + LOG_N_BLOBS * DIM
+def verify_row_coeffs(D, row_digests, z, row_coeffs):
+    row_points = Array(N_BLOBS * DIM)
+    derive_row_points(D, row_digests, row_points)
 
-    numerator = Array(DIM)
-    numerator[0] = zn[0] - 1
-    for d in unroll(1, DIM):
-        numerator[d] = zn[d]
+    for i in unroll(0, N_BLOBS):
+        numerator: Mut = one_ext()
+        denominator: Mut = one_ext()
+        for k in unroll(0, N_BLOBS):
+            if k != i:
+                z_minus_x = Array(DIM)
+                sub_ext(z, row_points + k * DIM, z_minus_x)
+                new_numerator = Array(DIM)
+                dot_product_ee(numerator, z_minus_x, new_numerator)
+                numerator = new_numerator
+
+                x_i_minus_x_k = Array(DIM)
+                sub_ext(row_points + i * DIM, row_points + k * DIM, x_i_minus_x_k)
+                new_denominator = Array(DIM)
+                dot_product_ee(denominator, x_i_minus_x_k, new_denominator)
+                denominator = new_denominator
+
+        lhs = Array(DIM)
+        dot_product_ee(denominator, row_coeffs + i * DIM, lhs)
+        assert_eq_ext(lhs, numerator)
+    return
 
-    row_w_inv_ext = Array(DIM)
-    row_w_inv_ext[0] = ROW_W_INV
-    for d in unroll(1, DIM):
-        row_w_inv_ext[d] = 0
-    points = Array(N_BLOBS * DIM)
-    copy_ext(z, points)
-    for i in unroll(1, N_BLOBS):
-        dot_product_ee(row_w_inv_ext, points + (i - 1) * DIM, points + i * DIM)
 
-    dens = Array(N_BLOBS * DIM)
+def derive_row_points(D, row_digests, row_points):
     for i in unroll(0, N_BLOBS):
-        dens[i * DIM] = points[i * DIM] - 1
-        for d in unroll(1, DIM):
-            dens[i * DIM + d] = points[i * DIM + d]
-        dot_product_ee(dens + i * DIM, row_coeffs + i * DIM, numerator)
+        derive_row_point(D, row_digests + i * DIGEST_LEN, i, row_points + i * DIM)
+    return
+
+
+def derive_row_point(D, row_digest, row_idx: Const, dest):
+    tag_row_x = domain_tag(3)
+    state = Array(DIGEST_LEN)
+    poseidon16_compress(D, tag_row_x, state)
+
+    state_2 = Array(DIGEST_LEN)
+    poseidon16_compress(state, row_digest, state_2)
+
+    tag_idx = domain_tag(1000 + row_idx)
+    point_digest = Array(DIGEST_LEN)
+    poseidon16_compress(state_2, tag_idx, point_digest)
+
+    for d in unroll(0, DIM):
+        dest[d] = point_digest[d]
+    return
+
+
+@inline
+def one_ext():
+    one = Array(DIM)
+    one[0] = 1
+    for d in unroll(1, DIM):
+        one[d] = 0
+    return one
+
+
+@inline
+def sub_ext(a, b, dest):
+    for d in unroll(0, DIM):
+        dest[d] = a[d] - b[d]
     return
 
 
diff --git a/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py b/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py
index a0a39bc7e..94dc836bf 100644
--- a/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py
+++ b/crates/lean-da/zkdsl_implem/lean_da_ood_column_major.py
@@ -6,11 +6,10 @@
 #
 # Each blob witness is a length-2M Reed-Solomon codeword in natural evaluation
 # order: C_i[j] = P_i(w^j).  The public input commits to each systematic prefix
-# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  The row challenge z is
-# derived from D, and the OOD row is the column-wise row-domain evaluation
-#   OOD[j] = sum_i ((z^n - 1) / (z*h^{-i} - 1)) * C_i[j].
-# The 1/n Lagrange factor is intentionally omitted: it scales the whole OOD row
-# by a nonzero public constant and does not affect the parity identity.
+# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  Each row coordinate
+# x_i is derived from D, d_i, and i.  The OOD challenge z is derived from D,
+# and the OOD row is the hash-domain column evaluation
+#   OOD[j] = sum_i L_i(z) * C_i[j], where L_i is over the points x_i.
 #
 # The canonical witness layout is:
 #   all_codewords[j][i] = C_i[j]
@@ -27,14 +26,11 @@
 DIM = 5
 
 N_BLOBS = N_BLOBS_PLACEHOLDER
-LOG_N_BLOBS = LOG_N_BLOBS_PLACEHOLDER
 
 W = ROOT_24**(2**(TWO_ADDICITY - LOG_M - 1))  # primitive 2M-th root of unity
 U = W * W
 U_INV = U ** (M - 1)
 W_INV = W ** (2 * M - 1)
-ROW_W = ROOT_24**(2**(TWO_ADDICITY - LOG_N_BLOBS))  # primitive N_BLOBS-th root
-ROW_W_INV = ROW_W ** (N_BLOBS - 1)
 
 DIGEST_LEN = 8
 LOG_LEAF_LEN_EXT = 4
@@ -71,7 +67,7 @@ def main():
     z_digest = Array(DIGEST_LEN)
     derive_z(D, z_digest)
     assert_eq_ext(z_digest, PUB_Z)
-    verify_row_coeffs(PUB_Z, PUB_ROW_COEFFS)
+    verify_row_coeffs(D, row_digests, PUB_Z, PUB_ROW_COEFFS)
 
     ood_row = compute_ood_row_column_major(all_codewords, PUB_ROW_COEFFS)
     ood_root = merkle_root_from_data(ood_row)
@@ -253,35 +249,69 @@ def compute_ood_row_column_major(codewords_base, row_coeffs):
     return ood_row
 
 
-def verify_row_coeffs(z, row_coeffs):
-    z_pow_log = Array((LOG_N_BLOBS + 1) * DIM)
-    copy_ext(z, z_pow_log)
-    for k in unroll(1, LOG_N_BLOBS + 1):
-        dot_product_ee(z_pow_log + (k - 1) * DIM,
-                       z_pow_log + (k - 1) * DIM,
-                       z_pow_log + k * DIM)
-    zn = z_pow_log + LOG_N_BLOBS * DIM
+def verify_row_coeffs(D, row_digests, z, row_coeffs):
+    row_points = Array(N_BLOBS * DIM)
+    derive_row_points(D, row_digests, row_points)
 
-    numerator = Array(DIM)
-    numerator[0] = zn[0] - 1
-    for d in unroll(1, DIM):
-        numerator[d] = zn[d]
+    for i in unroll(0, N_BLOBS):
+        numerator: Mut = one_ext()
+        denominator: Mut = one_ext()
+        for k in unroll(0, N_BLOBS):
+            if k != i:
+                z_minus_x = Array(DIM)
+                sub_ext(z, row_points + k * DIM, z_minus_x)
+                new_numerator = Array(DIM)
+                dot_product_ee(numerator, z_minus_x, new_numerator)
+                numerator = new_numerator
+
+                x_i_minus_x_k = Array(DIM)
+                sub_ext(row_points + i * DIM, row_points + k * DIM, x_i_minus_x_k)
+                new_denominator = Array(DIM)
+                dot_product_ee(denominator, x_i_minus_x_k, new_denominator)
+                denominator = new_denominator
+
+        lhs = Array(DIM)
+        dot_product_ee(denominator, row_coeffs + i * DIM, lhs)
+        assert_eq_ext(lhs, numerator)
+    return
 
-    row_w_inv_ext = Array(DIM)
-    row_w_inv_ext[0] = ROW_W_INV
-    for d in unroll(1, DIM):
-        row_w_inv_ext[d] = 0
-    points = Array(N_BLOBS * DIM)
-    copy_ext(z, points)
-    for i in unroll(1, N_BLOBS):
-        dot_product_ee(row_w_inv_ext, points + (i - 1) * DIM, points + i * DIM)
 
-    dens = Array(N_BLOBS * DIM)
+def derive_row_points(D, row_digests, row_points):
     for i in unroll(0, N_BLOBS):
-        dens[i * DIM] = points[i * DIM] - 1
-        for d in unroll(1, DIM):
-            dens[i * DIM + d] = points[i * DIM + d]
-        dot_product_ee(dens + i * DIM, row_coeffs + i * DIM, numerator)
+        derive_row_point(D, row_digests + i * DIGEST_LEN, i, row_points + i * DIM)
+    return
+
+
+def derive_row_point(D, row_digest, row_idx: Const, dest):
+    tag_row_x = domain_tag(3)
+    state = Array(DIGEST_LEN)
+    poseidon16_compress(D, tag_row_x, state)
+
+    state_2 = Array(DIGEST_LEN)
+    poseidon16_compress(state, row_digest, state_2)
+
+    tag_idx = domain_tag(1000 + row_idx)
+    point_digest = Array(DIGEST_LEN)
+    poseidon16_compress(state_2, tag_idx, point_digest)
+
+    for d in unroll(0, DIM):
+        dest[d] = point_digest[d]
+    return
+
+
+@inline
+def one_ext():
+    one = Array(DIM)
+    one[0] = 1
+    for d in unroll(1, DIM):
+        one[d] = 0
+    return one
+
+
+@inline
+def sub_ext(a, b, dest):
+    for d in unroll(0, DIM):
+        dest[d] = a[d] - b[d]
     return
 
 
diff --git a/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py b/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py
index 6e7e7f137..e49d61af0 100644
--- a/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py
+++ b/crates/lean-da/zkdsl_implem/lean_da_ood_tiled.py
@@ -6,11 +6,10 @@
 #
 # Each blob witness is a length-2M Reed-Solomon codeword in natural evaluation
 # order: C_i[j] = P_i(w^j).  The public input commits to each systematic prefix
-# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  The row challenge z is
-# derived from D, and the OOD row is the column-wise row-domain evaluation
-#   OOD[j] = sum_i ((z^n - 1) / (z*h^{-i} - 1)) * C_i[j].
-# The 1/n Lagrange factor is intentionally omitted: it scales the whole OOD row
-# by a nonzero public constant and does not affect the parity identity.
+# d_i = H(C_i[0..M)), then to D = H(d_0, ..., d_{n-1}).  Each row coordinate
+# x_i is derived from D, d_i, and i.  The OOD challenge z is derived from D,
+# and the OOD row is the hash-domain column evaluation
+#   OOD[j] = sum_i L_i(z) * C_i[j], where L_i is over the points x_i.
 #
 # The canonical witness is one flat `all_codewords` vector.  Pointer arithmetic
 # interprets it as tile-major:
@@ -30,14 +29,11 @@
 DIM = 5
 
 N_BLOBS = N_BLOBS_PLACEHOLDER
-LOG_N_BLOBS = LOG_N_BLOBS_PLACEHOLDER
 
 W = ROOT_24**(2**(TWO_ADDICITY - LOG_M - 1))  # primitive 2M-th root of unity
 U = W * W
 U_INV = U ** (M - 1)
 W_INV = W ** (2 * M - 1)
-ROW_W = ROOT_24**(2**(TWO_ADDICITY - LOG_N_BLOBS))  # primitive N_BLOBS-th root
-ROW_W_INV = ROW_W ** (N_BLOBS - 1)
 
 DIGEST_LEN = 8
 LOG_LEAF_LEN_EXT = 4
@@ -80,7 +76,7 @@ def main():
     z_digest = Array(DIGEST_LEN)
     derive_z(D, z_digest)
     assert_eq_ext(z_digest, PUB_Z)
-    verify_row_coeffs(PUB_Z, PUB_ROW_COEFFS)
+    verify_row_coeffs(D, row_digests, PUB_Z, PUB_ROW_COEFFS)
 
     ood_row = compute_ood_row(all_codewords, PUB_ROW_COEFFS)
     ood_root = merkle_root_from_data(ood_row)
@@ -225,35 +221,69 @@ def compute_ood_row(codewords_base, row_coeffs):
     return ood_row
 
 
-def verify_row_coeffs(z, row_coeffs):
-    z_pow_log = Array((LOG_N_BLOBS + 1) * DIM)
-    copy_ext(z, z_pow_log)
-    for k in unroll(1, LOG_N_BLOBS + 1):
-        dot_product_ee(z_pow_log + (k - 1) * DIM,
-                       z_pow_log + (k - 1) * DIM,
-                       z_pow_log + k * DIM)
-    zn = z_pow_log + LOG_N_BLOBS * DIM
+def verify_row_coeffs(D, row_digests, z, row_coeffs):
+    row_points = Array(N_BLOBS * DIM)
+    derive_row_points(D, row_digests, row_points)
 
-    numerator = Array(DIM)
-    numerator[0] = zn[0] - 1
-    for d in unroll(1, DIM):
-        numerator[d] = zn[d]
+    for i in unroll(0, N_BLOBS):
+        numerator: Mut = one_ext()
+        denominator: Mut = one_ext()
+        for k in unroll(0, N_BLOBS):
+            if k != i:
+                z_minus_x = Array(DIM)
+                sub_ext(z, row_points + k * DIM, z_minus_x)
+                new_numerator = Array(DIM)
+                dot_product_ee(numerator, z_minus_x, new_numerator)
+                numerator = new_numerator
+
+                x_i_minus_x_k = Array(DIM)
+                sub_ext(row_points + i * DIM, row_points + k * DIM, x_i_minus_x_k)
+                new_denominator = Array(DIM)
+                dot_product_ee(denominator, x_i_minus_x_k, new_denominator)
+                denominator = new_denominator
+
+        lhs = Array(DIM)
+        dot_product_ee(denominator, row_coeffs + i * DIM, lhs)
+        assert_eq_ext(lhs, numerator)
+    return
 
-    row_w_inv_ext = Array(DIM)
-    row_w_inv_ext[0] = ROW_W_INV
-    for d in unroll(1, DIM):
-        row_w_inv_ext[d] = 0
-    points = Array(N_BLOBS * DIM)
-    copy_ext(z, points)
-    for i in unroll(1, N_BLOBS):
-        dot_product_ee(row_w_inv_ext, points + (i - 1) * DIM, points + i * DIM)
 
-    dens = Array(N_BLOBS * DIM)
+def derive_row_points(D, row_digests, row_points):
     for i in unroll(0, N_BLOBS):
-        dens[i * DIM] = points[i * DIM] - 1
-        for d in unroll(1, DIM):
-            dens[i * DIM + d] = points[i * DIM + d]
-        dot_product_ee(dens + i * DIM, row_coeffs + i * DIM, numerator)
+        derive_row_point(D, row_digests + i * DIGEST_LEN, i, row_points + i * DIM)
+    return
+
+
+def derive_row_point(D, row_digest, row_idx: Const, dest):
+    tag_row_x = domain_tag(3)
+    state = Array(DIGEST_LEN)
+    poseidon16_compress(D, tag_row_x, state)
+
+    state_2 = Array(DIGEST_LEN)
+    poseidon16_compress(state, row_digest, state_2)
+
+    tag_idx = domain_tag(1000 + row_idx)
+    point_digest = Array(DIGEST_LEN)
+    poseidon16_compress(state_2, tag_idx, point_digest)
+
+    for d in unroll(0, DIM):
+        dest[d] = point_digest[d]
+    return
+
+
+@inline
+def one_ext():
+    one = Array(DIM)
+    one[0] = 1
+    for d in unroll(1, DIM):
+        one[d] = 0
+    return one
+
+
+@inline
+def sub_ext(a, b, dest):
+    for d in unroll(0, DIM):
+        dest[d] = a[d] - b[d]
     return