Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/hpc/byte_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
// ---------------------------------------------------------------------------

#[cfg(target_arch = "x86_64")]
mod simd_impl {
pub(crate) mod simd_impl {
use core::arch::x86_64::*;

/// Find all positions of `needle` in `haystack` using AVX2 (32 bytes/iter).
///
/// # Safety
/// Caller must ensure AVX2 is available.
#[target_feature(enable = "avx2")]
pub(super) unsafe fn byte_find_all_avx2(haystack: &[u8], needle: u8) -> Vec<usize> {
pub(crate) unsafe fn byte_find_all_avx2(haystack: &[u8], needle: u8) -> Vec<usize> {
let mut result = Vec::new();
let n = haystack.len();
let ptr = haystack.as_ptr();
Expand Down Expand Up @@ -52,7 +52,7 @@ mod simd_impl {
/// # Safety
/// Caller must ensure AVX-512 BW is available.
#[target_feature(enable = "avx512bw")]
pub(super) unsafe fn byte_find_all_avx512(haystack: &[u8], needle: u8) -> Vec<usize> {
pub(crate) unsafe fn byte_find_all_avx512(haystack: &[u8], needle: u8) -> Vec<usize> {
let mut result = Vec::new();
let n = haystack.len();
let ptr = haystack.as_ptr();
Expand Down Expand Up @@ -84,7 +84,7 @@ mod simd_impl {
/// # Safety
/// Caller must ensure AVX2 is available.
#[target_feature(enable = "avx2")]
pub(super) unsafe fn byte_count_avx2(haystack: &[u8], needle: u8) -> usize {
pub(crate) unsafe fn byte_count_avx2(haystack: &[u8], needle: u8) -> usize {
let n = haystack.len();
let ptr = haystack.as_ptr();
let needle_v = _mm256_set1_epi8(needle as i8);
Expand All @@ -111,7 +111,7 @@ mod simd_impl {
/// # Safety
/// Caller must ensure AVX-512 BW is available.
#[target_feature(enable = "avx512bw")]
pub(super) unsafe fn byte_count_avx512(haystack: &[u8], needle: u8) -> usize {
pub(crate) unsafe fn byte_count_avx512(haystack: &[u8], needle: u8) -> usize {
let n = haystack.len();
let ptr = haystack.as_ptr();
let needle_v = _mm512_set1_epi8(needle as i8);
Expand Down
4 changes: 2 additions & 2 deletions src/hpc/distance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn sq_dist_f64(a: [f64; 3], b: [f64; 3]) -> f64 {
// ---------------------------------------------------------------------------

#[cfg(target_arch = "x86_64")]
mod simd_impl {
pub(crate) mod simd_impl {
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

Expand All @@ -39,7 +39,7 @@ mod simd_impl {
/// # Safety
/// Caller must ensure AVX2 is available.
#[target_feature(enable = "avx2")]
pub(super) unsafe fn squared_distances_avx2(
pub(crate) unsafe fn squared_distances_avx2(
query: [f32; 3],
points: &[[f32; 3]],
out: &mut Vec<f32>,
Expand Down
2 changes: 2 additions & 0 deletions src/hpc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

// SIMD capability singleton — detect once, all modules share
pub mod simd_caps;
// LazyLock frozen SIMD dispatch — function pointers selected once at startup
pub mod simd_dispatch;

pub mod blas_level1;
pub mod blas_level2;
Expand Down
8 changes: 4 additions & 4 deletions src/hpc/nibble.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub fn nibble_unpack(packed: &[u8], count: usize) -> Vec<u8> {
out
}

fn nibble_unpack_scalar(packed: &[u8], count: usize, out: &mut Vec<u8>) {
pub(crate) fn nibble_unpack_scalar(packed: &[u8], count: usize, out: &mut Vec<u8>) {
for i in 0..count {
let byte = packed[i / 2];
let val = if i & 1 == 0 { byte & 0x0F } else { byte >> 4 };
Expand All @@ -54,7 +54,7 @@ fn nibble_unpack_scalar(packed: &[u8], count: usize, out: &mut Vec<u8>) {
/// Caller must ensure AVX2 is available and `count >= 32`.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn nibble_unpack_avx2(packed: &[u8], count: usize, out: &mut Vec<u8>) {
pub(crate) unsafe fn nibble_unpack_avx2(packed: &[u8], count: usize, out: &mut Vec<u8>) {
use core::arch::x86_64::*;

let low_mask = _mm_set1_epi8(0x0F);
Expand Down Expand Up @@ -252,7 +252,7 @@ pub fn nibble_above_threshold(packed: &[u8], threshold: u8) -> Vec<usize> {
nibble_above_threshold_scalar(packed, threshold)
}

fn nibble_above_threshold_scalar(packed: &[u8], threshold: u8) -> Vec<usize> {
pub(crate) fn nibble_above_threshold_scalar(packed: &[u8], threshold: u8) -> Vec<usize> {
let mut result = Vec::new();
let count = packed.len() * 2;
for i in 0..count {
Expand All @@ -272,7 +272,7 @@ fn nibble_above_threshold_scalar(packed: &[u8], threshold: u8) -> Vec<usize> {
/// Caller must ensure AVX2 is available and `packed.len() >= 16`.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn nibble_above_threshold_avx2(packed: &[u8], threshold: u8) -> Vec<usize> {
pub(crate) unsafe fn nibble_above_threshold_avx2(packed: &[u8], threshold: u8) -> Vec<usize> {
use core::arch::x86_64::*;

let mut result = Vec::new();
Expand Down
Loading
Loading