|
| 1 | +//! SIMD capability singleton — detect once, dispatch forever. |
| 2 | +//! |
| 3 | +//! Replaces per-call `is_x86_feature_detected!` (hidden `AtomicU8` load each time) |
| 4 | +//! with a single `LazyLock<SimdCaps>` detected at first access. Every HPC module |
| 5 | +//! calls `simd_caps()` which is one pointer deref to a frozen `Copy` struct. |
| 6 | +//! |
| 7 | +//! ```text |
| 8 | +//! is_x86_feature_detected!("avx512f") → ~3ns (atomic load + branch) |
| 9 | +//! simd_caps().avx512f → ~1ns (LazyLock deref + bool read) |
| 10 | +//! ``` |
| 11 | +
|
| 12 | +use std::sync::LazyLock; |
| 13 | + |
| 14 | +/// Detected SIMD capabilities, frozen at first access. |
| 15 | +/// |
| 16 | +/// This is a `Copy` type: 8 bools packed into 8 bytes. Passed by value, |
| 17 | +/// lives in registers after the first `LazyLock` deref. |
| 18 | +#[derive(Debug, Clone, Copy)] |
| 19 | +pub struct SimdCaps { |
| 20 | + /// AVX2 (256-bit integer/FP SIMD). |
| 21 | + pub avx2: bool, |
| 22 | + /// AVX-512 Foundation (512-bit). |
| 23 | + pub avx512f: bool, |
| 24 | + /// AVX-512 Byte/Word operations. |
| 25 | + pub avx512bw: bool, |
| 26 | + /// AVX-512 Vector Length extensions. |
| 27 | + pub avx512vl: bool, |
| 28 | + /// AVX-512 VPOPCNTDQ (hardware popcount on 512-bit). |
| 29 | + pub avx512vpopcntdq: bool, |
| 30 | + /// SSE 4.1. |
| 31 | + pub sse41: bool, |
| 32 | + /// SSE2 (baseline on x86_64, but explicit for clarity). |
| 33 | + pub sse2: bool, |
| 34 | + /// FMA (fused multiply-add). |
| 35 | + pub fma: bool, |
| 36 | +} |
| 37 | + |
| 38 | +/// Global singleton — detected once at first access via `LazyLock`. |
| 39 | +static CAPS: LazyLock<SimdCaps> = LazyLock::new(SimdCaps::detect); |
| 40 | + |
| 41 | +/// Get the detected SIMD capabilities. First call detects; all subsequent |
| 42 | +/// calls are a single pointer deref with no atomic operations. |
| 43 | +#[inline(always)] |
| 44 | +pub fn simd_caps() -> SimdCaps { |
| 45 | + *CAPS |
| 46 | +} |
| 47 | + |
| 48 | +impl SimdCaps { |
| 49 | + /// Detect CPU capabilities at runtime. |
| 50 | + #[cfg(target_arch = "x86_64")] |
| 51 | + fn detect() -> Self { |
| 52 | + Self { |
| 53 | + avx2: is_x86_feature_detected!("avx2"), |
| 54 | + avx512f: is_x86_feature_detected!("avx512f"), |
| 55 | + avx512bw: is_x86_feature_detected!("avx512bw"), |
| 56 | + avx512vl: is_x86_feature_detected!("avx512vl"), |
| 57 | + avx512vpopcntdq: is_x86_feature_detected!("avx512vpopcntdq"), |
| 58 | + sse41: is_x86_feature_detected!("sse4.1"), |
| 59 | + sse2: is_x86_feature_detected!("sse2"), |
| 60 | + fma: is_x86_feature_detected!("fma"), |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + /// Non-x86: all false. |
| 65 | + #[cfg(not(target_arch = "x86_64"))] |
| 66 | + fn detect() -> Self { |
| 67 | + Self { |
| 68 | + avx2: false, |
| 69 | + avx512f: false, |
| 70 | + avx512bw: false, |
| 71 | + avx512vl: false, |
| 72 | + avx512vpopcntdq: false, |
| 73 | + sse41: false, |
| 74 | + sse2: false, |
| 75 | + fma: false, |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + /// True if AVX-512 Foundation + VPOPCNTDQ are both available. |
| 80 | + #[inline(always)] |
| 81 | + pub fn has_avx512_popcnt(self) -> bool { |
| 82 | + self.avx512f && self.avx512vpopcntdq |
| 83 | + } |
| 84 | + |
| 85 | + /// True if AVX-512 BW + VPOPCNTDQ are both available. |
| 86 | + #[inline(always)] |
| 87 | + pub fn has_avx512_bw_popcnt(self) -> bool { |
| 88 | + self.avx512bw && self.avx512vpopcntdq |
| 89 | + } |
| 90 | +} |
| 91 | + |
| 92 | +#[cfg(test)] |
| 93 | +mod tests { |
| 94 | + use super::*; |
| 95 | + |
| 96 | + #[test] |
| 97 | + fn detect_does_not_panic() { |
| 98 | + let caps = simd_caps(); |
| 99 | + // On any platform, simd_caps() should succeed. |
| 100 | + let _ = caps.avx2; |
| 101 | + let _ = caps.avx512f; |
| 102 | + } |
| 103 | + |
| 104 | + #[test] |
| 105 | + fn simd_caps_is_copy() { |
| 106 | + let a = simd_caps(); |
| 107 | + let b = a; // Copy |
| 108 | + let c = a; // Still valid |
| 109 | + assert_eq!(a.avx2, b.avx2); |
| 110 | + assert_eq!(b.avx512f, c.avx512f); |
| 111 | + } |
| 112 | + |
| 113 | + #[test] |
| 114 | + fn simd_caps_deterministic() { |
| 115 | + let a = simd_caps(); |
| 116 | + let b = simd_caps(); |
| 117 | + assert_eq!(a.avx2, b.avx2); |
| 118 | + assert_eq!(a.avx512f, b.avx512f); |
| 119 | + assert_eq!(a.avx512bw, b.avx512bw); |
| 120 | + assert_eq!(a.avx512vpopcntdq, b.avx512vpopcntdq); |
| 121 | + assert_eq!(a.sse41, b.sse41); |
| 122 | + } |
| 123 | + |
| 124 | + #[test] |
| 125 | + fn convenience_methods() { |
| 126 | + let caps = simd_caps(); |
| 127 | + // Just verify these don't panic and return consistent values. |
| 128 | + let _ = caps.has_avx512_popcnt(); |
| 129 | + let _ = caps.has_avx512_bw_popcnt(); |
| 130 | + } |
| 131 | +} |
0 commit comments