From cafa2d0ddeeb927850a8cb3e42917b11be79309e Mon Sep 17 00:00:00 2001 From: vladfdp Date: Fri, 15 May 2026 21:35:31 +0200 Subject: [PATCH 1/3] update twiddles now doesn't recompute --- crates/whir/src/dft.rs | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/crates/whir/src/dft.rs b/crates/whir/src/dft.rs index 277597eb8..cce7e5056 100644 --- a/crates/whir/src/dft.rs +++ b/crates/whir/src/dft.rs @@ -62,13 +62,35 @@ impl EvalsDft { } pub(crate) fn update_twiddles(&self, fft_len: usize) { - // TODO: This recomputes the entire table from scratch if we - // need it to be larger, which is wasteful. let mut guard = self.twiddles.write().unwrap(); - let curr_max_fft_len = 1 << guard.len(); - if fft_len > curr_max_fft_len { + + let lg_n = log2_strict_usize(fft_len); + + //if the current size is already big enough we don't do anything + if lg_n <= guard.len() { + return; + } + + //if current twiddles is empty we compute from nothing + if guard.is_empty() { *guard = self.roots_of_unity_table(fft_len); + return; + } + + let diff_log = lg_n - guard.len(); + let nb_steps = 1 << diff_log; //number of missing points between each preexisting points + + let generator = F::two_adic_generator(lg_n); + + let table = guard[0].clone(); + let mut nth_root = Vec::with_capacity(table.len() * nb_steps); + for &base in &table { + nth_root.extend(generator.shifted_powers(base).take(nb_steps)); } + + *guard = (0..lg_n) + .map(|i| nth_root.iter().step_by(1 << i).copied().collect()) + .collect(); } } From 729ecfceea5fa4bf18fc71673e278d0643c0dc41 Mon Sep 17 00:00:00 2001 From: vladfdp Date: Fri, 15 May 2026 22:17:46 +0200 Subject: [PATCH 2/3] reusing old twiddles instead of reslicing everything --- crates/whir/src/dft.rs | 43 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/crates/whir/src/dft.rs b/crates/whir/src/dft.rs index cce7e5056..284f4a2ed 100644 --- a/crates/whir/src/dft.rs +++ b/crates/whir/src/dft.rs @@ -88,9 +88,12 @@ impl EvalsDft { nth_root.extend(generator.shifted_powers(base).take(nb_steps)); } - *guard = (0..lg_n) - .map(|i| nth_root.iter().step_by(1 << i).copied().collect()) - .collect(); + let old_twiddles = std::mem::take(&mut *guard); + let mut twiddles = Vec::with_capacity(diff_log + old_twiddles.len()); + twiddles.extend((0..diff_log).map(|i| nth_root.iter().step_by(1 << i).copied().collect::>())); + twiddles.extend(old_twiddles); + + *guard = twiddles; } } @@ -406,8 +409,8 @@ fn fft_double_layer_single_twiddle>( block: &mut DoubleLayerBlockDecomposition<'_, F>, butterfly: Fly, ) { - butterfly.apply_to_rows(block.0.0, block.0.1); - butterfly.apply_to_rows(block.1.0, block.1.1); + butterfly.apply_to_rows(block.0 .0, block.0 .1); + butterfly.apply_to_rows(block.1 .0, block.1 .1); } #[inline] @@ -416,8 +419,8 @@ fn fft_double_layer_double_twiddle, Fly1: Butterfly fly0: Fly0, fly1: Fly1, ) { - fly0.apply_to_rows(block.0.0, block.1.0); - fly1.apply_to_rows(block.0.1, block.1.1); + fly0.apply_to_rows(block.0 .0, block.1 .0); + fly1.apply_to_rows(block.0 .1, block.1 .1); } /// A type representing a decomposition of an FFT block into eight sub-blocks. @@ -432,10 +435,10 @@ fn fft_triple_layer_single_twiddle>( block: &mut TripleLayerBlockDecomposition<'_, F>, butterfly: Fly, ) { - butterfly.apply_to_rows(block.0.0.0, block.0.0.1); - butterfly.apply_to_rows(block.0.1.0, block.0.1.1); - butterfly.apply_to_rows(block.1.0.0, block.1.0.1); - butterfly.apply_to_rows(block.1.1.0, block.1.1.1); + butterfly.apply_to_rows(block.0 .0 .0, block.0 .0 .1); + butterfly.apply_to_rows(block.0 .1 .0, block.0 .1 .1); + butterfly.apply_to_rows(block.1 .0 .0, block.1 .0 .1); + butterfly.apply_to_rows(block.1 .1 .0, block.1 .1 .1); } #[inline] @@ -444,10 +447,10 @@ fn fft_triple_layer_double_twiddle, Fly1: Butterfly fly0: Fly0, fly1: Fly1, ) { - fly0.apply_to_rows(block.0.0.0, block.0.1.0); - fly1.apply_to_rows(block.0.0.1, block.0.1.1); - fly0.apply_to_rows(block.1.0.0, block.1.1.0); - fly1.apply_to_rows(block.1.0.1, block.1.1.1); + fly0.apply_to_rows(block.0 .0 .0, block.0 .1 .0); + fly1.apply_to_rows(block.0 .0 .1, block.0 .1 .1); + fly0.apply_to_rows(block.1 .0 .0, block.1 .1 .0); + fly1.apply_to_rows(block.1 .0 .1, block.1 .1 .1); } #[inline] @@ -458,10 +461,10 @@ fn fft_triple_layer_quad_twiddle>( fly2: Fly, fly3: Fly, ) { - fly0.apply_to_rows(block.0.0.0, block.1.0.0); - fly1.apply_to_rows(block.0.0.1, block.1.0.1); - fly2.apply_to_rows(block.0.1.0, block.1.1.0); - fly3.apply_to_rows(block.0.1.1, block.1.1.1); + fly0.apply_to_rows(block.0 .0 .0, block.1 .0 .0); + fly1.apply_to_rows(block.0 .0 .1, block.1 .0 .1); + fly2.apply_to_rows(block.0 .1 .0, block.1 .1 .0); + fly3.apply_to_rows(block.0 .1 .1, block.1 .1 .1); } /// Estimates the optimal workload size for `T` to fit in L1 cache. @@ -594,7 +597,7 @@ mod tests { use field::{PrimeCharacteristicRing, TwoAdicField}; use koala_bear::{KoalaBear, QuinticExtensionFieldKB}; use poly::*; - use rand::{RngExt, SeedableRng, rngs::StdRng}; + use rand::{rngs::StdRng, RngExt, SeedableRng}; use crate::*; From ae4b1dfd795f45f122c498f7d7ed4d68e7f0ab83 Mon Sep 17 00:00:00 2001 From: vladfdp Date: Fri, 15 May 2026 22:35:41 +0200 Subject: [PATCH 3/3] fixed weird fmt --- crates/whir/src/dft.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/crates/whir/src/dft.rs b/crates/whir/src/dft.rs index 284f4a2ed..8f2901c2e 100644 --- a/crates/whir/src/dft.rs +++ b/crates/whir/src/dft.rs @@ -409,8 +409,8 @@ fn fft_double_layer_single_twiddle>( block: &mut DoubleLayerBlockDecomposition<'_, F>, butterfly: Fly, ) { - butterfly.apply_to_rows(block.0 .0, block.0 .1); - butterfly.apply_to_rows(block.1 .0, block.1 .1); + butterfly.apply_to_rows(block.0.0, block.0.1); + butterfly.apply_to_rows(block.1.0, block.1.1); } #[inline] @@ -419,8 +419,8 @@ fn fft_double_layer_double_twiddle, Fly1: Butterfly fly0: Fly0, fly1: Fly1, ) { - fly0.apply_to_rows(block.0 .0, block.1 .0); - fly1.apply_to_rows(block.0 .1, block.1 .1); + fly0.apply_to_rows(block.0.0, block.1.0); + fly1.apply_to_rows(block.0.1, block.1.1); } /// A type representing a decomposition of an FFT block into eight sub-blocks. @@ -435,10 +435,10 @@ fn fft_triple_layer_single_twiddle>( block: &mut TripleLayerBlockDecomposition<'_, F>, butterfly: Fly, ) { - butterfly.apply_to_rows(block.0 .0 .0, block.0 .0 .1); - butterfly.apply_to_rows(block.0 .1 .0, block.0 .1 .1); - butterfly.apply_to_rows(block.1 .0 .0, block.1 .0 .1); - butterfly.apply_to_rows(block.1 .1 .0, block.1 .1 .1); + butterfly.apply_to_rows(block.0.0.0, block.0.0.1); + butterfly.apply_to_rows(block.0.1.0, block.0.1.1); + butterfly.apply_to_rows(block.1.0.0, block.1.0.1); + butterfly.apply_to_rows(block.1.1.0, block.1.1.1); } #[inline] @@ -447,10 +447,10 @@ fn fft_triple_layer_double_twiddle, Fly1: Butterfly fly0: Fly0, fly1: Fly1, ) { - fly0.apply_to_rows(block.0 .0 .0, block.0 .1 .0); - fly1.apply_to_rows(block.0 .0 .1, block.0 .1 .1); - fly0.apply_to_rows(block.1 .0 .0, block.1 .1 .0); - fly1.apply_to_rows(block.1 .0 .1, block.1 .1 .1); + fly0.apply_to_rows(block.0.0.0, block.0.1.0); + fly1.apply_to_rows(block.0.0.1, block.0.1.1); + fly0.apply_to_rows(block.1.0.0, block.1.1.0); + fly1.apply_to_rows(block.1.0.1, block.1.1.1); } #[inline] @@ -461,10 +461,10 @@ fn fft_triple_layer_quad_twiddle>( fly2: Fly, fly3: Fly, ) { - fly0.apply_to_rows(block.0 .0 .0, block.1 .0 .0); - fly1.apply_to_rows(block.0 .0 .1, block.1 .0 .1); - fly2.apply_to_rows(block.0 .1 .0, block.1 .1 .0); - fly3.apply_to_rows(block.0 .1 .1, block.1 .1 .1); + fly0.apply_to_rows(block.0.0.0, block.1.0.0); + fly1.apply_to_rows(block.0.0.1, block.1.0.1); + fly2.apply_to_rows(block.0.1.0, block.1.1.0); + fly3.apply_to_rows(block.0.1.1, block.1.1.1); } /// Estimates the optimal workload size for `T` to fit in L1 cache. @@ -597,7 +597,7 @@ mod tests { use field::{PrimeCharacteristicRing, TwoAdicField}; use koala_bear::{KoalaBear, QuinticExtensionFieldKB}; use poly::*; - use rand::{rngs::StdRng, RngExt, SeedableRng}; + use rand::{RngExt, SeedableRng, rngs::StdRng}; use crate::*;