@@ -636,25 +636,26 @@ mod tests {
636636 assert ! ( stats. tensors_indexed > 0 ) ;
637637 }
638638
639- # [ test ]
640- # [ ignore ] // Streams BF16 shard 5 (18.2 GB) from HuggingFace
641- fn test_stream_index_llama4_bf16_shard5 ( ) {
639+ /// Run one shard of Llama 4 Scout BF16 through the streaming indexer.
640+ /// Returns the output path on success.
641+ fn run_llama4_shard ( shard : u32 ) -> Option < ( String , IndexStats ) > {
642642 use super :: super :: http_reader:: HttpRangeReader ;
643643 use std:: io:: BufWriter ;
644644
645645 let repo = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF" ;
646- let filename = "BF16/Llama-4-Scout-17B-16E-Instruct-BF16-00005-of-00005.gguf" ;
647- let size: u64 = 18_220_000_000 ; // ~18.2 GB from metadata
646+ let filename = format ! (
647+ "BF16/Llama-4-Scout-17B-16E-Instruct-BF16-{:05}-of-00005.gguf" , shard
648+ ) ;
649+ // Shards are ~18-44 GB each; use conservative 44 GB estimate
650+ let size: u64 = 44_000_000_000 ;
648651
649652 let url = format ! ( "https://huggingface.co/{}/resolve/main/{}" , repo, filename) ;
650- eprintln ! ( "Streaming shard 5: {:.2} GB" , size as f64 / 1e9 ) ;
651- eprintln ! ( " URL: {}" , url) ;
653+ eprintln ! ( "Streaming shard {}/5: {}" , shard, filename) ;
652654
653- // 16 MB chunks for fewer HTTP round-trips
654655 let mut reader = HttpRangeReader :: with_chunk_size ( url, size, 256 * 1024 * 1024 ) ;
655656
656- let out_path = "/tmp/llama4_scout_shard5 .bgz7" ;
657- let out = std:: fs:: File :: create ( out_path) . expect ( "create output" ) ;
657+ let out_path = format ! ( "/tmp/llama4_scout_shard{} .bgz7" , shard ) ;
658+ let out = std:: fs:: File :: create ( & out_path) . expect ( "create output" ) ;
658659 let mut writer = BufWriter :: new ( out) ;
659660
660661 let stats = stream_index_gguf (
@@ -668,12 +669,11 @@ mod tests {
668669 ) . expect ( "stream_index_gguf" ) ;
669670
670671 drop ( writer) ;
671- let out_size = std:: fs:: metadata ( out_path) . map ( |m| m. len ( ) ) . unwrap_or ( 0 ) ;
672+ let out_size = std:: fs:: metadata ( & out_path) . map ( |m| m. len ( ) ) . unwrap_or ( 0 ) ;
672673
673674 eprintln ! ( ) ;
674- eprintln ! ( "=== Llama 4 Scout BF16 Shard 5 → bgz17 ===" ) ;
675- eprintln ! ( " Source: {:.2} GB (BF16, streamed from HF)" , size as f64 / 1e9 ) ;
676- eprintln ! ( " Output: {:.2} MB" , out_size as f64 / 1e6 ) ;
675+ eprintln ! ( "=== Llama 4 Scout BF16 Shard {}/5 → bgz17 ===" , shard) ;
676+ eprintln ! ( " Output: {:.2} MB ({})" , out_size as f64 / 1e6 , out_path) ;
677677 eprintln ! ( " Downloaded: {:.2} GB" , reader. bytes_downloaded( ) as f64 / 1e9 ) ;
678678 eprintln ! ( " Tensors: {} indexed, {} skipped" ,
679679 stats. tensors_indexed, stats. tensors_skipped) ;
@@ -693,7 +693,22 @@ mod tests {
693693 }
694694
695695 assert ! ( stats. tensors_indexed > 0 ) ;
696- // BF16 dequant to f32 doubles the size, so original_bytes > source size
697- assert ! ( stats. original_bytes > 0 ) ;
696+ Some ( ( out_path, stats) )
698697 }
698+
699+ #[ test]
700+ #[ ignore]
701+ fn test_stream_index_llama4_bf16_shard1 ( ) { run_llama4_shard ( 1 ) ; }
702+ #[ test]
703+ #[ ignore]
704+ fn test_stream_index_llama4_bf16_shard2 ( ) { run_llama4_shard ( 2 ) ; }
705+ #[ test]
706+ #[ ignore]
707+ fn test_stream_index_llama4_bf16_shard3 ( ) { run_llama4_shard ( 3 ) ; }
708+ #[ test]
709+ #[ ignore]
710+ fn test_stream_index_llama4_bf16_shard4 ( ) { run_llama4_shard ( 4 ) ; }
711+ #[ test]
712+ #[ ignore]
713+ fn test_stream_index_llama4_bf16_shard5 ( ) { run_llama4_shard ( 5 ) ; }
699714}
0 commit comments