Skip to content

Commit dceffdc

Browse files
authored
Merge pull request #50 from AdaWorldAPI/claude/transcode-deepnsm-rust-oNa1Z
data: Llama 4 Scout BF16 shard 2/5 → bgz17 https://claude.ai/code/session_01Y69Vnw751w75iVSBRws7o7
2 parents f7dd8d6 + 303c216 commit dceffdc

3 files changed

Lines changed: 31 additions & 16 deletions

File tree

src/hpc/gguf_indexer.rs

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -636,25 +636,26 @@ mod tests {
636636
assert!(stats.tensors_indexed > 0);
637637
}
638638

639-
#[test]
640-
#[ignore] // Streams BF16 shard 5 (18.2 GB) from HuggingFace
641-
fn test_stream_index_llama4_bf16_shard5() {
639+
/// Run one shard of Llama 4 Scout BF16 through the streaming indexer.
640+
/// Returns the output path on success.
641+
fn run_llama4_shard(shard: u32) -> Option<(String, IndexStats)> {
642642
use super::super::http_reader::HttpRangeReader;
643643
use std::io::BufWriter;
644644

645645
let repo = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF";
646-
let filename = "BF16/Llama-4-Scout-17B-16E-Instruct-BF16-00005-of-00005.gguf";
647-
let size: u64 = 18_220_000_000; // ~18.2 GB from metadata
646+
let filename = format!(
647+
"BF16/Llama-4-Scout-17B-16E-Instruct-BF16-{:05}-of-00005.gguf", shard
648+
);
649+
// Shards are ~18-44 GB each; use conservative 44 GB estimate
650+
let size: u64 = 44_000_000_000;
648651

649652
let url = format!("https://huggingface.co/{}/resolve/main/{}", repo, filename);
650-
eprintln!("Streaming shard 5: {:.2} GB", size as f64 / 1e9);
651-
eprintln!(" URL: {}", url);
653+
eprintln!("Streaming shard {}/5: {}", shard, filename);
652654

653-
// 16 MB chunks for fewer HTTP round-trips
654655
let mut reader = HttpRangeReader::with_chunk_size(url, size, 256 * 1024 * 1024);
655656

656-
let out_path = "/tmp/llama4_scout_shard5.bgz7";
657-
let out = std::fs::File::create(out_path).expect("create output");
657+
let out_path = format!("/tmp/llama4_scout_shard{}.bgz7", shard);
658+
let out = std::fs::File::create(&out_path).expect("create output");
658659
let mut writer = BufWriter::new(out);
659660

660661
let stats = stream_index_gguf(
@@ -668,12 +669,11 @@ mod tests {
668669
).expect("stream_index_gguf");
669670

670671
drop(writer);
671-
let out_size = std::fs::metadata(out_path).map(|m| m.len()).unwrap_or(0);
672+
let out_size = std::fs::metadata(&out_path).map(|m| m.len()).unwrap_or(0);
672673

673674
eprintln!();
674-
eprintln!("=== Llama 4 Scout BF16 Shard 5 → bgz17 ===");
675-
eprintln!(" Source: {:.2} GB (BF16, streamed from HF)", size as f64 / 1e9);
676-
eprintln!(" Output: {:.2} MB", out_size as f64 / 1e6);
675+
eprintln!("=== Llama 4 Scout BF16 Shard {}/5 → bgz17 ===", shard);
676+
eprintln!(" Output: {:.2} MB ({})", out_size as f64 / 1e6, out_path);
677677
eprintln!(" Downloaded: {:.2} GB", reader.bytes_downloaded() as f64 / 1e9);
678678
eprintln!(" Tensors: {} indexed, {} skipped",
679679
stats.tensors_indexed, stats.tensors_skipped);
@@ -693,7 +693,22 @@ mod tests {
693693
}
694694

695695
assert!(stats.tensors_indexed > 0);
696-
// BF16 dequant to f32 doubles the size, so original_bytes > source size
697-
assert!(stats.original_bytes > 0);
696+
Some((out_path, stats))
698697
}
698+
699+
#[test]
700+
#[ignore]
701+
fn test_stream_index_llama4_bf16_shard1() { run_llama4_shard(1); }
702+
#[test]
703+
#[ignore]
704+
fn test_stream_index_llama4_bf16_shard2() { run_llama4_shard(2); }
705+
#[test]
706+
#[ignore]
707+
fn test_stream_index_llama4_bf16_shard3() { run_llama4_shard(3); }
708+
#[test]
709+
#[ignore]
710+
fn test_stream_index_llama4_bf16_shard4() { run_llama4_shard(4); }
711+
#[test]
712+
#[ignore]
713+
fn test_stream_index_llama4_bf16_shard5() { run_llama4_shard(5); }
699714
}
11.2 MB
Binary file not shown.
7.93 MB
Binary file not shown.

0 commit comments

Comments
 (0)