Skip to content
19 changes: 19 additions & 0 deletions src/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,25 @@ mod scalar {
for i in 0..16 { out[i] = self.0[i].abs(); }
Self(out)
}
#[inline(always)]
pub fn from_i16_slice(s: &[i16]) -> Self {
assert!(s.len() >= 16);
let mut o = [0i32; 16];
for i in 0..16 { o[i] = s[i] as i32; }
Self(o)
}
#[inline(always)]
pub fn to_i16_array(self) -> [i16; 16] {
let mut o = [0i16; 16];
for i in 0..16 { o[i] = self.0[i] as i16; }
o
}
#[inline(always)]
pub fn cmpge_zero_mask(self) -> u16 {
let mut mask = 0u16;
for i in 0..16 { if self.0[i] >= 0 { mask |= 1 << i; } }
mask
}
}

impl Mul for I32x16 {
Expand Down
25 changes: 25 additions & 0 deletions src/simd_avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,31 @@ impl I32x16 {
#[inline(always)] pub fn simd_max(self, other: Self) -> Self { let mut o = [0i32; 16]; for i in 0..16 { o[i] = self.0[i].max(other.0[i]); } Self(o) }
#[inline(always)] pub fn cast_f32(self) -> F32x16 { let mut o = [0.0f32; 16]; for i in 0..16 { o[i] = self.0[i] as f32; } F32x16::from_array(o) }
#[inline(always)] pub fn abs(self) -> Self { let mut o = [0i32; 16]; for i in 0..16 { o[i] = self.0[i].abs(); } Self(o) }

/// Load 16 × i16, sign-extend to 16 × i32.
#[inline(always)]
pub fn from_i16_slice(s: &[i16]) -> Self {
assert!(s.len() >= 16);
let mut o = [0i32; 16];
for i in 0..16 { o[i] = s[i] as i32; }
Self(o)
}

/// Narrow 16 × i32 to 16 × i16 (truncation).
#[inline(always)]
pub fn to_i16_array(self) -> [i16; 16] {
let mut o = [0i16; 16];
for i in 0..16 { o[i] = self.0[i] as i16; }
o
}

/// Mask: bit i set where lane i >= 0.
#[inline(always)]
pub fn cmpge_zero_mask(self) -> u16 {
let mut mask = 0u16;
for i in 0..16 { if self.0[i] >= 0 { mask |= 1 << i; } }
mask
}
}
impl Mul for I32x16 { type Output = Self; #[inline(always)] fn mul(self, r: Self) -> Self { let mut o = [0i32; 16]; for i in 0..16 { o[i] = self.0[i].wrapping_mul(r.0[i]); } Self(o) } }
impl MulAssign for I32x16 { #[inline(always)] fn mul_assign(&mut self, r: Self) { *self = *self * r; } }
Expand Down
39 changes: 34 additions & 5 deletions src/simd_avx512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,40 @@ impl I32x16 {
unsafe { _mm512_reduce_max_epi32(self.0) }
}

// ── Base17 i16[17] operations: load-widen, abs, narrow ──────────────
// Used by bgz17_bridge.rs for L1 distance, weighted L1, sign agreement, xor_bind.

/// Load 16 × i16 from slice, sign-extend to 16 × i32.
/// This is the first step of every Base17 kernel: i16 → i32 to avoid overflow.
#[inline(always)]
pub fn from_i16_slice(s: &[i16]) -> Self {
assert!(s.len() >= 16);
Self(unsafe { _mm512_cvtepi16_epi32(_mm256_loadu_si256(s.as_ptr() as *const __m256i)) })
}

/// Absolute value per lane.
#[inline(always)]
pub fn abs(self) -> Self {
Self(unsafe { _mm512_abs_epi32(self.0) })
}

/// Narrow 16 × i32 back to 16 × i16 (truncation, no saturation).
#[inline(always)]
pub fn to_i16_array(self) -> [i16; 16] {
unsafe {
let packed = _mm512_cvtepi32_epi16(self.0);
let mut arr = [0i16; 16];
_mm256_storeu_si256(arr.as_mut_ptr() as *mut __m256i, packed);
arr
}
}

/// Compare >= 0: returns 16-bit mask. Bit i set where lane i >= 0.
#[inline(always)]
pub fn cmpge_zero_mask(self) -> u16 {
unsafe { _mm512_cmpge_epi32_mask(self.0, _mm512_setzero_si512()) }
}

#[inline(always)]
pub fn simd_min(self, other: Self) -> Self {
Self(unsafe { _mm512_min_epi32(self.0, other.0) })
Expand All @@ -773,11 +807,6 @@ impl I32x16 {
pub fn cast_f32(self) -> F32x16 {
F32x16(unsafe { _mm512_cvtepi32_ps(self.0) })
}

#[inline(always)]
pub fn abs(self) -> Self {
Self(unsafe { _mm512_abs_epi32(self.0) })
}
}

impl_bin_op!(I32x16, Add, add, _mm512_add_epi32);
Expand Down
Loading