Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions src/hpc/gguf_indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,19 +519,34 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
octave_stride: usize,
callback: Option<&dyn Fn(&str, &LayerType, usize, usize)>,
) -> Result<IndexStats, String> {
let gguf_header = gguf::read_gguf_header(reader)?;
let header = gguf::read_gguf_header(reader)?;
stream_index_gguf_bf16_with_header(reader, writer, &header, octave_stride, callback)
}

/// Core BF16-direct indexer — works with any pre-parsed header (GGUF or safetensors).
///
/// The header must have:
/// - `tensor_data_offset`: absolute byte offset where tensor data starts
/// - `tensors`: Vec<TensorInfo> with name, dimensions, dtype, offset (relative to data start)
pub fn stream_index_gguf_bf16_with_header<R: Read + Seek, W: Write>(
reader: &mut R,
writer: &mut W,
header: &gguf::GgufFile,
octave_stride: usize,
callback: Option<&dyn Fn(&str, &LayerType, usize, usize)>,
) -> Result<IndexStats, String> {
let mut stats = IndexStats::default();
stats.tensors_total = gguf_header.tensors.len();
stats.tensors_total = header.tensors.len();

writer.write_all(b"BGZ7").map_err(|e| e.to_string())?;
writer.write_all(&(gguf_header.tensors.len() as u32).to_le_bytes()).map_err(|e| e.to_string())?;
writer.write_all(&(header.tensors.len() as u32).to_le_bytes()).map_err(|e| e.to_string())?;

// Reusable buffer — capped at 128 MB (64M u16 elements).
// Tensors larger than this are read in row batches.
const MAX_BUF_ELEMS: usize = 64 * 1024 * 1024; // 128 MB of u16
let mut bf16_buf: Vec<u16> = Vec::new();

for tensor in &gguf_header.tensors {
for tensor in &header.tensors {
let layer_type = classify_tensor(&tensor.name, &tensor.dimensions);

if matches!(layer_type, LayerType::Skip | LayerType::Norm) {
Expand Down Expand Up @@ -559,7 +574,7 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
}

// Seek to tensor start
let abs_offset = gguf_header.tensor_data_offset + tensor.offset;
let abs_offset = header.tensor_data_offset + tensor.offset;
reader.seek(std::io::SeekFrom::Start(abs_offset)).map_err(|e| e.to_string())?;

let mut rows: Vec<Base17> = Vec::with_capacity(n_rows);
Expand Down Expand Up @@ -636,7 +651,7 @@ pub fn stream_index_gguf_bf16<R: Read + Seek, W: Write>(
}
} else {
// FALLBACK: non-BF16 — use original f32 path
let data = gguf::read_tensor_f32(reader, &gguf_header, tensor)?;
let data = gguf::read_tensor_f32(reader, &header, tensor)?;
let tensor_bytes = data.len() as u64 * 4;
if tensor_bytes > stats.peak_tensor_bytes {
stats.peak_tensor_bytes = tensor_bytes;
Expand Down
4 changes: 4 additions & 0 deletions src/hpc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ pub mod gguf;
#[allow(missing_docs)]
pub mod gguf_indexer;

/// Safetensors header parser + streaming indexer for BF16 model weights.
#[allow(missing_docs)]
pub mod safetensors;

/// HTTP range reader — Read + Seek over HTTP for streaming GGUF from HuggingFace.
#[allow(missing_docs)]
pub mod http_reader;
Expand Down
Loading
Loading