diff --git a/src/hpc/audio/bands.rs b/src/hpc/audio/bands.rs new file mode 100644 index 00000000..97526bd4 --- /dev/null +++ b/src/hpc/audio/bands.rs @@ -0,0 +1,140 @@ +//! Opus CELT band energy computation. +//! +//! 21 quasi-Bark critical bands at 48kHz. Each band's energy is the +//! gain component of gain-shape quantization. The normalized coefficients +//! (after dividing by band energy) are the shape component → PVQ. +//! +//! Band boundaries from Opus `celt/modes.c` eBands48. + +/// Opus CELT band boundaries at 48kHz, 960-sample frames (480 MDCT bins). +/// 22 boundaries define 21 bands. Bin index = frequency / (48000 / 960). +/// Band 0: bins 0-3 (~0-200 Hz), Band 20: bins 400-480 (~20-24 kHz). +pub const CELT_BANDS_48K: [usize; 22] = [ + 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 44, 52, 60, 68, 80, 96, + 112, 136, 160, 200, 256, 480, +]; + +/// Number of critical bands. +pub const N_BANDS: usize = 21; + +/// Compute band energies from MDCT coefficients. +/// +/// Returns 21 f32 energies (sqrt of sum-of-squares per band). +/// These are the "gain" in gain-shape quantization. +pub fn band_energies(coeffs: &[f32]) -> [f32; N_BANDS] { + let mut energies = [0.0f32; N_BANDS]; + for band in 0..N_BANDS { + let lo = CELT_BANDS_48K[band]; + let hi = CELT_BANDS_48K[band + 1].min(coeffs.len()); + let mut sum_sq = 0.0f32; + for i in lo..hi { + if i < coeffs.len() { + sum_sq += coeffs[i] * coeffs[i]; + } + } + energies[band] = sum_sq.sqrt(); + } + energies +} + +/// Normalize MDCT coefficients by band energy (produce unit-energy shape). +/// +/// After normalization, each band has unit energy. The shape encodes +/// the spectral tilt within the band. PVQ quantizes this shape. +pub fn normalize_bands(coeffs: &[f32], energies: &[f32; N_BANDS]) -> Vec { + let mut normalized = coeffs.to_vec(); + for band in 0..N_BANDS { + let lo = CELT_BANDS_48K[band]; + let hi = CELT_BANDS_48K[band + 1].min(normalized.len()); + let e = energies[band].max(1e-10); + for i in lo..hi { + if i < normalized.len() { + normalized[i] /= e; + } + } + } + normalized +} + +/// Denormalize: multiply shape coefficients by band energies. +/// +/// Inverse of normalize_bands. Used in the decoder path: +/// PVQ-decoded shape × band energies → MDCT coefficients → iMDCT → PCM. +pub fn denormalize_bands(shape: &[f32], energies: &[f32; N_BANDS]) -> Vec { + let mut coeffs = shape.to_vec(); + for band in 0..N_BANDS { + let lo = CELT_BANDS_48K[band]; + let hi = CELT_BANDS_48K[band + 1].min(coeffs.len()); + let e = energies[band]; + for i in lo..hi { + if i < coeffs.len() { + coeffs[i] *= e; + } + } + } + coeffs +} + +/// Pack band energies to BF16 (21 × 2 bytes = 42 bytes). +pub fn energies_to_bf16(energies: &[f32; N_BANDS]) -> [u16; N_BANDS] { + let mut bf16 = [0u16; N_BANDS]; + for i in 0..N_BANDS { + let bits = energies[i].to_bits(); + let lsb = (bits >> 16) & 1; + let biased = bits.wrapping_add(0x7FFF).wrapping_add(lsb); + bf16[i] = (biased >> 16) as u16; + } + bf16 +} + +/// Unpack BF16 band energies to f32. +pub fn bf16_to_energies(bf16: &[u16; N_BANDS]) -> [f32; N_BANDS] { + let mut energies = [0.0f32; N_BANDS]; + for i in 0..N_BANDS { + energies[i] = f32::from_bits((bf16[i] as u32) << 16); + } + energies +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn band_count() { + assert_eq!(CELT_BANDS_48K.len(), N_BANDS + 1); + } + + #[test] + fn band_energies_nonzero() { + let coeffs: Vec = (0..480).map(|i| (i as f32 * 0.05).sin()).collect(); + let e = band_energies(&coeffs); + let total: f32 = e.iter().sum(); + assert!(total > 0.1, "Total band energy too low: {}", total); + } + + #[test] + fn normalize_denormalize_roundtrip() { + let coeffs: Vec = (0..480).map(|i| (i as f32 * 0.1).sin() * 2.0).collect(); + let e = band_energies(&coeffs); + let shape = normalize_bands(&coeffs, &e); + let recovered = denormalize_bands(&shape, &e); + + for (orig, rec) in coeffs.iter().zip(recovered.iter()) { + assert!((orig - rec).abs() < 0.01, + "Roundtrip mismatch: {} vs {}", orig, rec); + } + } + + #[test] + fn bf16_energy_roundtrip() { + let e = [1.0, 0.5, 2.0, 0.001, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; + let bf16 = energies_to_bf16(&e); + let recovered = bf16_to_energies(&bf16); + for i in 0..5 { + let err = (e[i] - recovered[i]).abs() / e[i].max(1e-6); + assert!(err < 0.02, "BF16 roundtrip error for band {}: {:.4}", i, err); + } + } +} diff --git a/src/hpc/audio/codec.rs b/src/hpc/audio/codec.rs new file mode 100644 index 00000000..02526415 --- /dev/null +++ b/src/hpc/audio/codec.rs @@ -0,0 +1,152 @@ +//! AudioFrame: 48-byte codec for one frame of audio. +//! +//! The complete encode/decode pipeline: +//! encode: PCM → MDCT → band energies (gain) + PVQ (shape) → AudioFrame +//! decode: AudioFrame → band energies × PVQ shape → iMDCT → PCM +//! +//! One AudioFrame = one graph node in lance-graph. 48 bytes = CAM-compatible. + +use super::mdct; +use super::bands; +use super::pvq; + +/// One audio frame: 42 bytes gain + 6 bytes shape = 48 bytes. +/// +/// Maps to SPO: +/// Subject = spectral (WHAT frequencies) → band energies +/// Predicate = temporal (WHEN they happen) → PVQ summary bytes 2-3 +/// Object = harmonic (HOW they ring) → PVQ summary bytes 4-5 +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct AudioFrame { + /// 21 band energies as BF16 (42 bytes). The gain component. + pub band_energies: [u16; bands::N_BANDS], + /// PVQ shape fingerprint (6 bytes). HEEL/HIP/TWIG levels. + pub pvq_summary: [u8; 6], +} + +impl AudioFrame { + /// Total byte size: 42 (energies) + 6 (pvq) = 48. + pub const BYTE_SIZE: usize = bands::N_BANDS * 2 + 6; + + /// Encode one frame of PCM audio. + /// + /// `pcm`: mono f32 samples (padded to power of 2 internally). + /// `pvq_k`: PVQ pulse budget per band (higher = better quality, more bits). + pub fn encode(pcm: &[f32], pvq_k: u32) -> Self { + // MDCT: time → frequency + let coeffs = mdct::mdct_forward(pcm); + + // Band energies (gain) + let energies = bands::band_energies(&coeffs); + let bf16_energies = bands::energies_to_bf16(&energies); + + // Normalize bands (remove gain, keep shape) + let shape = bands::normalize_bands(&coeffs, &energies); + + // PVQ encode the shape of the first (most important) band + // For production: encode all 21 bands. For the POC: just first band's summary. + let first_band_end = bands::CELT_BANDS_48K[1].min(shape.len()); + let pulses = pvq::pvq_encode(&shape[..first_band_end], pvq_k); + let summary = pvq::pvq_summary(&pulses); + + AudioFrame { + band_energies: bf16_energies, + pvq_summary: summary, + } + } + + /// Decode: reconstruct PCM from AudioFrame + optional full PVQ data. + /// + /// Without PVQ data: uses band energies only (coarse reconstruction). + /// The PVQ summary gives the HHTL routing info, not the full shape. + /// For full quality: pass the per-band PVQ pulse vectors. + pub fn decode_coarse(&self) -> Vec { + let energies = bands::bf16_to_energies(&self.band_energies); + + // Synthesize a simple spectral envelope from band energies + // Each band gets a flat spectrum at its energy level + let n2 = bands::CELT_BANDS_48K[bands::N_BANDS].min(480); + let mut coeffs = vec![0.0f32; n2]; + for band in 0..bands::N_BANDS { + let lo = bands::CELT_BANDS_48K[band]; + let hi = bands::CELT_BANDS_48K[band + 1].min(n2); + let n_bins = (hi - lo).max(1); + let per_bin = energies[band] / (n_bins as f32).sqrt(); + for i in lo..hi { + // Alternate signs for a more natural-sounding shape + let sign = if (i - lo) % 2 == 0 { 1.0 } else { -1.0 }; + coeffs[i] = per_bin * sign; + } + } + + // iMDCT: frequency → time + mdct::mdct_backward(&coeffs) + } + + /// Serialize to 48 bytes. + pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] { + let mut bytes = [0u8; Self::BYTE_SIZE]; + for i in 0..bands::N_BANDS { + let b = self.band_energies[i].to_le_bytes(); + bytes[i * 2] = b[0]; + bytes[i * 2 + 1] = b[1]; + } + bytes[42..48].copy_from_slice(&self.pvq_summary); + bytes + } + + /// Deserialize from 48 bytes. + pub fn from_bytes(bytes: &[u8; Self::BYTE_SIZE]) -> Self { + let mut band_energies = [0u16; bands::N_BANDS]; + for i in 0..bands::N_BANDS { + band_energies[i] = u16::from_le_bytes([bytes[i * 2], bytes[i * 2 + 1]]); + } + let mut pvq_summary = [0u8; 6]; + pvq_summary.copy_from_slice(&bytes[42..48]); + AudioFrame { band_energies, pvq_summary } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f32::consts::PI; + + #[test] + fn frame_48_bytes() { + assert_eq!(AudioFrame::BYTE_SIZE, 48); + } + + #[test] + fn encode_decode_nonzero() { + // 440Hz sine at 48kHz, 1024 samples + let pcm: Vec = (0..1024) + .map(|i| (2.0 * PI * 440.0 * i as f32 / 48000.0).sin()) + .collect(); + + let frame = AudioFrame::encode(&pcm, 8); + + // Band energies should be nonzero (at least the band containing 440Hz) + let total_energy: f32 = frame.band_energies.iter() + .map(|&b| f32::from_bits((b as u32) << 16)) + .sum(); + assert!(total_energy > 0.01, "Encoded frame has no energy: {}", total_energy); + + // Decode + let decoded = frame.decode_coarse(); + assert!(!decoded.is_empty()); + let decoded_energy: f32 = decoded.iter().map(|s| s * s).sum(); + assert!(decoded_energy > 1e-10, "Decoded has no energy: {}", decoded_energy); + } + + #[test] + fn serialize_roundtrip() { + let frame = AudioFrame { + band_energies: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], + pvq_summary: [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF], + }; + let bytes = frame.to_bytes(); + let recovered = AudioFrame::from_bytes(&bytes); + assert_eq!(frame, recovered); + } +} diff --git a/src/hpc/audio/mdct.rs b/src/hpc/audio/mdct.rs new file mode 100644 index 00000000..970a969b --- /dev/null +++ b/src/hpc/audio/mdct.rs @@ -0,0 +1,184 @@ +//! MDCT / iMDCT: Modified Discrete Cosine Transform. +//! +//! Transcoded from Opus CELT `celt/mdct.c`. +//! Forward: 960 PCM samples → 480 frequency coefficients. +//! Inverse: 480 coefficients → 960 PCM samples (with overlap-add). +//! +//! MDCT is: window → fold → FFT → post-rotate. +//! iMDCT is the reverse: pre-rotate → IFFT → unfold → window. +//! +//! Uses `hpc::fft::fft_f32` / `ifft_f32` internally. No external deps. + +use crate::hpc::fft; +use core::f32::consts::PI; + +/// Opus CELT frame size at 48kHz: 960 samples = 20ms. +pub const FRAME_SIZE: usize = 960; +/// MDCT output: N/2 = 480 frequency coefficients. +pub const MDCT_SIZE: usize = FRAME_SIZE / 2; + +/// Sine window for MDCT (Opus uses a sine window for CELT mode). +/// w[n] = sin(π/N × (n + 0.5)) +pub fn sine_window(n: usize) -> Vec { + (0..n).map(|i| (PI / n as f32 * (i as f32 + 0.5)).sin()).collect() +} + +/// Forward MDCT: time-domain → frequency-domain. +/// +/// Input: `pcm` — N samples (N must be power of 2, typically 960 padded to 1024). +/// Output: N/2 frequency coefficients. +/// +/// Algorithm (Type-IV DCT via FFT): +/// 1. Window the input +/// 2. Fold: combine first and second half with sign flips +/// 3. Pre-rotate by exp(-j·π/2N·(2n+1+N/2)) +/// 4. FFT of N/4 complex values +/// 5. Post-rotate and extract real parts +pub fn mdct_forward(pcm: &[f32]) -> Vec { + // Pad to next power of 2 if needed + let n = pcm.len().next_power_of_two(); + let n2 = n / 2; + let n4 = n / 4; + + // Window + let window = sine_window(n); + let mut windowed = vec![0.0f32; n]; + for i in 0..pcm.len().min(n) { + windowed[i] = pcm[i] * window[i]; + } + + // Fold + pre-rotate → N/4 complex values for FFT + let mut fft_buf = vec![0.0f32; n4 * 2]; // interleaved complex + + for k in 0..n4 { + // Folding indices (from CELT mdct.c) + let cos_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).cos(); + let sin_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).sin(); + + // Combine samples from symmetric positions + let a = windowed.get(2 * k).copied().unwrap_or(0.0); + let b = windowed.get(n - 1 - 2 * k).copied().unwrap_or(0.0); + let c = windowed.get(n2 + 2 * k).copied().unwrap_or(0.0); + let d = if n2 > 2 * k + 1 { windowed[n2 - 1 - 2 * k] } else { 0.0 }; + + let re = a - c; + let im = d + b; // deliberate sign from MDCT folding + + // Pre-rotate + fft_buf[2 * k] = re * cos_val + im * sin_val; + fft_buf[2 * k + 1] = im * cos_val - re * sin_val; + } + + // FFT + fft::fft_f32(&mut fft_buf, n4); + + // Post-rotate → extract MDCT coefficients + let mut output = vec![0.0f32; n2]; + for k in 0..n4 { + let cos_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).cos(); + let sin_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).sin(); + + let re = fft_buf[2 * k]; + let im = fft_buf[2 * k + 1]; + + // Two output coefficients per FFT bin + output[2 * k] = re * cos_val + im * sin_val; + output[2 * k + 1] = im * cos_val - re * sin_val; + } + + output +} + +/// Inverse MDCT: frequency-domain → time-domain. +/// +/// Input: N/2 frequency coefficients. +/// Output: N time-domain samples (needs overlap-add with previous frame). +pub fn mdct_backward(coeffs: &[f32]) -> Vec { + // Pad to power of 2 if needed + let n2_raw = coeffs.len(); + let n = (n2_raw * 2).next_power_of_two(); + let n2 = n / 2; + let n4 = n / 4; + let mut padded = vec![0.0f32; n2]; + padded[..n2_raw.min(n2)].copy_from_slice(&coeffs[..n2_raw.min(n2)]); + + // Pre-rotate → N/4 complex values + let mut fft_buf = vec![0.0f32; n4 * 2]; + for k in 0..n4 { + let cos_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).cos(); + let sin_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).sin(); + + let a = padded.get(2 * k).copied().unwrap_or(0.0); + let b = padded.get(2 * k + 1).copied().unwrap_or(0.0); + + fft_buf[2 * k] = a * cos_val + b * sin_val; + fft_buf[2 * k + 1] = b * cos_val - a * sin_val; + } + + // Inverse FFT + fft::ifft_f32(&mut fft_buf, n4); + + // Post-rotate + unfold → N time-domain samples + let window = sine_window(n); + let mut output = vec![0.0f32; n]; + + for k in 0..n4 { + let cos_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).cos(); + let sin_val = (PI / n as f32 * (2.0 * k as f32 + 1.0 + n2 as f32 / 2.0) * 0.5).sin(); + + let re = fft_buf[2 * k]; + let im = fft_buf[2 * k + 1]; + + let y_re = re * cos_val + im * sin_val; + let y_im = im * cos_val - re * sin_val; + + // Unfold to symmetric positions + let idx_a = 2 * k; + let idx_b = n - 1 - 2 * k; + if idx_a < n { output[idx_a] = y_re * window[idx_a]; } + if idx_b < n { output[idx_b] = y_im * window[idx_b]; } + } + + // Scale (MDCT normalization: 2/N) + let scale = 2.0 / n as f32; + for s in &mut output { *s *= scale; } + + output +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn mdct_round_trip() { + // Generate a simple test signal (sum of two sinusoids) + let n = 1024; // power of 2 + let pcm: Vec = (0..n) + .map(|i| { + let t = i as f32 / 48000.0; + (2.0 * PI * 440.0 * t).sin() + 0.5 * (2.0 * PI * 880.0 * t).sin() + }) + .collect(); + + let coeffs = mdct_forward(&pcm); + assert_eq!(coeffs.len(), n / 2); + + let reconstructed = mdct_backward(&coeffs); + assert_eq!(reconstructed.len(), n); + + // Check non-trivial output (not all zeros) + // Note: perfect reconstruction requires overlap-add of consecutive frames. + // Single-frame roundtrip preserves energy but not exact waveform. + let energy: f32 = reconstructed.iter().map(|s| s * s).sum(); + assert!(energy > 1e-6, "Reconstructed signal has no energy: {}", energy); + } + + #[test] + fn mdct_coeffs_nonzero() { + let pcm: Vec = (0..512).map(|i| (i as f32 * 0.1).sin()).collect(); + let coeffs = mdct_forward(&pcm); + let max_coeff = coeffs.iter().map(|c| c.abs()).fold(0.0f32, f32::max); + assert!(max_coeff > 0.01, "MDCT coefficients are all near zero"); + } +} diff --git a/src/hpc/audio/mod.rs b/src/hpc/audio/mod.rs new file mode 100644 index 00000000..d156944b --- /dev/null +++ b/src/hpc/audio/mod.rs @@ -0,0 +1,11 @@ +//! Audio primitives transcoded from Opus CELT. +//! +//! MDCT, band energy extraction, PVQ, and AudioFrame for the +//! HHTL cascade → waveform synthesis pipeline. +//! +//! Zero external dependencies — uses `hpc::fft` internally. + +pub mod mdct; +pub mod bands; +pub mod pvq; +pub mod codec; diff --git a/src/hpc/audio/pvq.rs b/src/hpc/audio/pvq.rs new file mode 100644 index 00000000..1733a055 --- /dev/null +++ b/src/hpc/audio/pvq.rs @@ -0,0 +1,161 @@ +//! Pyramid Vector Quantizer (PVQ) — from Opus CELT `celt/vq.c`. +//! +//! Distributes K integer pulses across N dimensions on the L1 hypersphere. +//! sum(|pulse_i|) = K. Algebraic — no trained codebook. +//! +//! The number of valid codewords is C(N+K-1, K), indexed combinatorially +//! (CWRS encoding). This IS the shape component of gain-shape quantization. +//! +//! For the HHTL pipeline: PVQ encodes the normalized band shape at TWIG level. + +/// Encode: project a unit-energy band onto the PVQ lattice. +/// +/// Input: `band` — normalized band coefficients (unit L2 norm). +/// `k`: number of pulses to distribute. +/// Returns: integer pulse vector, sum(|pulses|) == k. +pub fn pvq_encode(band: &[f32], k: u32) -> Vec { + let n = band.len(); + if n == 0 || k == 0 { + return vec![0; n]; + } + + // Greedy pulse allocation (from Opus alg_quant): + // Repeatedly place the next pulse at the dimension that + // maximizes the inner product with the target. + let mut pulses = vec![0i32; n]; + let mut remaining = k as i32; + + while remaining > 0 { + // Find dimension with largest residual magnitude + let mut best_dim = 0; + let mut best_val = 0.0f32; + for d in 0..n { + let target = band[d]; + let current = pulses[d] as f32; + // How much would adding ±1 pulse improve alignment? + let benefit = (target.abs() - current.abs()).abs(); + if benefit > best_val || (benefit == best_val && target.abs() > band[best_dim].abs()) { + best_val = benefit; + best_dim = d; + } + } + // Place pulse with same sign as target + if band[best_dim] >= 0.0 { + pulses[best_dim] += 1; + } else { + pulses[best_dim] -= 1; + } + remaining -= 1; + } + + pulses +} + +/// Decode: convert pulse vector back to unit-energy coefficients. +/// +/// Normalizes the pulse vector to unit L2 norm. +pub fn pvq_decode(pulses: &[i32]) -> Vec { + let n = pulses.len(); + let mut output = vec![0.0f32; n]; + let mut norm_sq = 0.0f64; + for i in 0..n { + output[i] = pulses[i] as f32; + norm_sq += (pulses[i] as f64) * (pulses[i] as f64); + } + let norm = (norm_sq.sqrt()).max(1e-10) as f32; + for v in &mut output { + *v /= norm; + } + output +} + +/// Compute the L1 norm of a pulse vector (should equal K). +pub fn pvq_l1_norm(pulses: &[i32]) -> u32 { + pulses.iter().map(|&p| p.unsigned_abs()).sum() +} + +/// PVQ summary: compress pulse vector to 6-byte fingerprint for HHTL. +/// +/// Maps to SPO: Subject = spectral, Predicate = temporal, Object = harmonic. +/// Bytes 0-1 (HEEL): coarse spectral category (sign pattern of dominant dims) +/// Bytes 2-3 (HIP): energy distribution pattern +/// Bytes 4-5 (TWIG): fine harmonic structure +pub fn pvq_summary(pulses: &[i32]) -> [u8; 6] { + let n = pulses.len(); + let mut summary = [0u8; 6]; + + // HEEL (bytes 0-1): sign pattern of first 16 dims → 16 bits + let mut sign_bits = 0u16; + for i in 0..n.min(16) { + if pulses[i] > 0 { sign_bits |= 1 << i; } + } + summary[0] = sign_bits as u8; + summary[1] = (sign_bits >> 8) as u8; + + // HIP (bytes 2-3): which quarter has most energy + let q = n / 4; + let mut quarter_energy = [0u32; 4]; + for i in 0..n { + let qi = (i * 4 / n).min(3); + quarter_energy[qi] += pulses[i].unsigned_abs(); + } + let total = quarter_energy.iter().sum::().max(1); + for i in 0..4 { + let frac = (quarter_energy[i] * 255 / total) as u8; + if i < 2 { summary[2] |= frac >> (4 * (1 - i)); } + else { summary[3] |= frac >> (4 * (3 - i)); } + } + + // TWIG (bytes 4-5): max pulse position + magnitude + let (max_pos, max_val) = pulses.iter().enumerate() + .max_by_key(|(_, &p)| p.unsigned_abs()) + .map(|(i, &p)| (i, p.unsigned_abs())) + .unwrap_or((0, 0)); + summary[4] = (max_pos % 256) as u8; + summary[5] = (max_val % 256) as u8; + + summary +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pvq_l1_correct() { + let band = vec![0.5, -0.3, 0.7, -0.1, 0.2]; + let pulses = pvq_encode(&band, 8); + assert_eq!(pvq_l1_norm(&pulses), 8, "L1 norm should equal K=8"); + } + + #[test] + fn pvq_signs_match() { + let band = vec![1.0, -0.5, 0.3, -0.8, 0.0]; + let pulses = pvq_encode(&band, 10); + // Dominant pulse signs should match input signs + for i in 0..band.len() { + if band[i].abs() > 0.3 { + assert_eq!(pulses[i].signum(), band[i].signum() as i32, + "Sign mismatch at dim {}: pulse={}, band={}", i, pulses[i], band[i]); + } + } + } + + #[test] + fn pvq_decode_unit_norm() { + let pulses = vec![3, -2, 1, 0, -4]; + let decoded = pvq_decode(&pulses); + let norm: f32 = decoded.iter().map(|v| v * v).sum::().sqrt(); + assert!((norm - 1.0).abs() < 0.01, "Decoded should be unit norm: {}", norm); + } + + #[test] + fn pvq_summary_deterministic() { + let pulses = vec![3, -2, 1, 0, -4, 2, 0, 1]; + let s1 = pvq_summary(&pulses); + let s2 = pvq_summary(&pulses); + assert_eq!(s1, s2); + // Should be non-trivial + assert!(s1.iter().any(|&b| b != 0)); + } +} diff --git a/src/hpc/mod.rs b/src/hpc/mod.rs index a30598a7..e8cfa50b 100644 --- a/src/hpc/mod.rs +++ b/src/hpc/mod.rs @@ -226,6 +226,9 @@ pub mod jitson; pub mod jitson_cranelift; pub mod ocr_simd; pub mod ocr_felt; +/// Audio primitives: MDCT, band energies, PVQ, AudioFrame codec. +/// Transcoded from Opus CELT for the HHTL cascade → waveform pipeline. +pub mod audio; #[cfg(test)] mod e2e_tests {