From 75d0127a01cd3df01410fff8fe8877995cfeb538 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 10:56:11 +0000 Subject: [PATCH 01/17] perf: add microbenchmarks for crc32c and MRD reads --- .../perf/microbenchmarks/benchmark_crc32c.py | 107 +++++++++++ .../microbenchmarks/benchmark_mrd_reads.py | 168 ++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_crc32c.py create mode 100644 packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_crc32c.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_crc32c.py new file mode 100644 index 000000000000..ff995063bf31 --- /dev/null +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_crc32c.py @@ -0,0 +1,107 @@ +import argparse +import os +import statistics +import sys +import time + +try: + import google_crc32c +except ImportError: + print("Error: google_crc32c package is not installed in the python environment.", file=sys.stderr) + sys.exit(1) + + +def parse_size(size_str: str) -> int: + size_str = size_str.strip().upper() + if size_str.endswith("KIB"): + return int(float(size_str[:-3]) * 1024) + elif size_str.endswith("MIB"): + return int(float(size_str[:-3]) * 1024 * 1024) + elif size_str.endswith("GIB"): + return int(float(size_str[:-3]) * 1024 * 1024 * 1024) + elif size_str.endswith("KB"): + return int(float(size_str[:-2]) * 1000) + elif size_str.endswith("MB"): + return int(float(size_str[:-2]) * 1000 * 1000) + elif size_str.endswith("GB"): + return int(float(size_str[:-2]) * 1000 * 1000 * 1000) + elif size_str.endswith("B"): + return int(size_str[:-1]) + else: + try: + return int(size_str) + except ValueError: + raise ValueError(f"Unknown size format: {size_str}") + + +def format_time(seconds: float) -> str: + if seconds < 1e-6: + return f"{seconds * 1e9:.2f} ns" + elif seconds < 1e-3: + return f"{seconds * 1e6:.2f} \u03bcs" + elif seconds < 1.0: + return f"{seconds * 1e3:.2f} ms" + else: + return f"{seconds:.2f} s" + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark google_crc32c.value execution time.") + parser.add_argument( + "--sizes", + type=str, + default="1KiB,100KiB,2MiB", + help="Comma-separated list of sizes (e.g. '1KiB,100KiB,2MiB')" + ) + parser.add_argument( + "--iterations", + type=int, + default=100, + help="Number of iterations for benchmark (default: 100)" + ) + args = parser.parse_args() + + # Ensure google_crc32c uses accelerated C code + impl = getattr(google_crc32c, "implementation", None) + print(f"google_crc32c implementation: {impl}") + if impl != "c": + print(f"Error: google_crc32c is not using the accelerated C code (got '{impl}').", file=sys.stderr) + sys.exit(1) + + sizes_to_test = [] + for s in args.sizes.split(","): + try: + sizes_to_test.append((s.strip(), parse_size(s))) + except ValueError as e: + print(f"Error parsing size '{s}': {e}", file=sys.stderr) + sys.exit(1) + + print(f"Benchmarking google_crc32c.value(data) with {args.iterations} iterations:") + print("-" * 80) + print(f"{'Size (String)':<15} | {'Size (Bytes)':<12} | {'Min':<10} | {'Max':<10} | {'Mean':<10} | {'Median':<10}") + print("-" * 80) + + for size_str, size_bytes in sizes_to_test: + data = os.urandom(size_bytes) + + durations = [] + for _ in range(args.iterations): + start = time.perf_counter() + _ = google_crc32c.value(data) + end = time.perf_counter() + durations.append(end - start) + + min_time = min(durations) + max_time = max(durations) + mean_time = statistics.mean(durations) + median_time = statistics.median(durations) + + print( + f"{size_str:<15} | {size_bytes:<12} | " + f"{format_time(min_time):<10} | {format_time(max_time):<10} | " + f"{format_time(mean_time):<10} | {format_time(median_time):<10}" + ) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py new file mode 100644 index 000000000000..ec7bd29788a7 --- /dev/null +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -0,0 +1,168 @@ +import argparse +import asyncio +import os +import random +import statistics +import sys +import time + +try: + import google_crc32c +except ImportError: + print("Error: google_crc32c package is not installed in the python environment.", file=sys.stderr) + sys.exit(1) + +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import AsyncMultiRangeDownloader + + +class VoidBuffer: + """A writeable file-like object that discards written data to save memory.""" + def __init__(self): + self.size = 0 + + def write(self, data: bytes) -> int: + n = len(data) + self.size += n + return n + + def tell(self) -> int: + return self.size + + +def parse_size(size_str: str) -> int: + size_str = size_str.strip().upper() + if size_str.endswith("KIB"): + return int(float(size_str[:-3]) * 1024) + elif size_str.endswith("MIB"): + return int(float(size_str[:-3]) * 1024 * 1024) + elif size_str.endswith("GIB"): + return int(float(size_str[:-3]) * 1024 * 1024 * 1024) + elif size_str.endswith("KB"): + return int(float(size_str[:-2]) * 1000) + elif size_str.endswith("MB"): + return int(float(size_str[:-2]) * 1000 * 1000) + elif size_str.endswith("GB"): + return int(float(size_str[:-2]) * 1000 * 1000 * 1000) + elif size_str.endswith("B"): + return int(size_str[:-1]) + else: + try: + return int(size_str) + except ValueError: + raise ValueError(f"Unknown size format: {size_str}") + + +def format_time(seconds: float) -> str: + if seconds < 1e-6: + return f"{seconds * 1e9:.2f} ns" + elif seconds < 1e-3: + return f"{seconds * 1e6:.2f} \u03bcs" + elif seconds < 1.0: + return f"{seconds * 1e3:.2f} ms" + else: + return f"{seconds:.2f} s" + + +async def download_range( + grpc_client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + start_byte: int, + size: int, + enable_checksum: bool, +) -> float: + mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) + try: + await mrd.open() + output_buffer = VoidBuffer() + start = time.perf_counter() + await mrd.download_ranges( + [(start_byte, size, output_buffer)], + enable_checksum=enable_checksum, + ) + end = time.perf_counter() + return end - start + finally: + if mrd.is_stream_open: + await mrd.close() + + +async def run_benchmark(): + parser = argparse.ArgumentParser(description="Benchmark GCS Object Range Downloads using MRD.") + parser.add_argument("--bucket", type=str, default="chandrasiri-benchmarks-zb", help="Bucket name") + parser.add_argument("--object", type=str, default="large_20260507_10737418240", help="Object name (10GiB size)") + parser.add_argument("--sizes", type=str, default="1KiB,2MiB,10MiB,100MiB,1GiB", help="Sizes to benchmark") + parser.add_argument("--iterations", type=int, default=5, help="Number of iterations per size") + args = parser.parse_args() + + impl = getattr(google_crc32c, "implementation", None) + print(f"google_crc32c implementation: {impl}") + if impl != "c": + print(f"Error: google_crc32c implementation is '{impl}', expected 'c'", file=sys.stderr) + sys.exit(1) + + sizes_to_test = [] + for s in args.sizes.split(","): + try: + sizes_to_test.append((s.strip(), parse_size(s))) + except ValueError as e: + print(f"Error parsing size '{s}': {e}", file=sys.stderr) + sys.exit(1) + + # 10 GiB in bytes + object_size_bytes = 10 * 1024 * 1024 * 1024 + + grpc_client = AsyncGrpcClient() + + print(f"Benchmarking MRD Reads on gs://{args.bucket}/{args.object} with {args.iterations} iterations:") + print("-" * 125) + print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15}") + print("-" * 125) + + for size_str, size_bytes in sizes_to_test: + # Pre-generate random offsets so that both Enabled and Disabled configurations run on the exact same offsets + offsets = [random.randint(0, object_size_bytes - size_bytes) for _ in range(args.iterations)] + + for enable_chk in [True, False]: + chk_label = "Enabled" if enable_chk else "Disabled" + durations = [] + + for i, start_byte in enumerate(offsets): + print(f" [{size_str} - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Downloading from offset {start_byte}...", end="", flush=True) + + try: + duration = await download_range(grpc_client, args.bucket, args.object, start_byte, size_bytes, enable_checksum=enable_chk) + durations.append(duration) + print(f" Done in {format_time(duration)}") + except Exception as e: + print(f" Failed: {e}") + continue + + if not durations: + print(f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15}") + continue + + min_time = min(durations) + max_time = max(durations) + mean_time = statistics.mean(durations) + median_time = statistics.median(durations) + + # Throughput in MiB/s + avg_throughput = (size_bytes / (1024 * 1024)) / mean_time + + print( + f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | " + f"{format_time(min_time):<12} | {format_time(max_time):<12} | " + f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " + f"{avg_throughput:.2f} MiB/s" + ) + print("-" * 125) + + +def main(): + asyncio.run(run_benchmark()) + + +if __name__ == "__main__": + main() From 02fa59c334818f474f621dea5a2e71e4abbe77dc Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 12:23:27 +0000 Subject: [PATCH 02/17] perf: add warmup phase to MRD reads benchmark --- .../microbenchmarks/benchmark_mrd_reads.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index ec7bd29788a7..fb2ae8e06575 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -115,6 +115,28 @@ async def run_benchmark(): grpc_client = AsyncGrpcClient() + # Warmup phase + print("Warming up for 10 seconds to establish connections...") + warmup_start = time.perf_counter() + warmup_downloads = 0 + while time.perf_counter() - warmup_start < 10.0: + # download a small chunk (1MiB) to warm up channels + start_byte = random.randint(0, object_size_bytes - 1024 * 1024) + try: + await download_range( + grpc_client, + args.bucket, + args.object, + start_byte, + 1024 * 1024, + enable_checksum=True, + ) + warmup_downloads += 1 + except Exception: + pass + print(f"Warmup complete. Completed {warmup_downloads} warmup downloads.") + print() + print(f"Benchmarking MRD Reads on gs://{args.bucket}/{args.object} with {args.iterations} iterations:") print("-" * 125) print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15}") From 891a981d3974abadd1b11507247d6a084ed3abce Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 13:14:46 +0000 Subject: [PATCH 03/17] perf: update warmup chunk size to 10MiB --- .../tests/perf/microbenchmarks/benchmark_mrd_reads.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index fb2ae8e06575..f5b5ad55180a 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -120,15 +120,15 @@ async def run_benchmark(): warmup_start = time.perf_counter() warmup_downloads = 0 while time.perf_counter() - warmup_start < 10.0: - # download a small chunk (1MiB) to warm up channels - start_byte = random.randint(0, object_size_bytes - 1024 * 1024) + # download a small chunk (10MiB) to warm up channels + start_byte = random.randint(0, object_size_bytes - 10 * 1024 * 1024) try: await download_range( grpc_client, args.bucket, args.object, start_byte, - 1024 * 1024, + 10 * 1024 * 1024, enable_checksum=True, ) warmup_downloads += 1 From 88f0e215a5115a2b2656b3e9b9039009e26bc5c4 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 13:25:23 +0000 Subject: [PATCH 04/17] perf: make GCS object size configurable in MRD reads benchmark --- .../tests/perf/microbenchmarks/benchmark_mrd_reads.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index f5b5ad55180a..13e31d0ca092 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -94,6 +94,7 @@ async def run_benchmark(): parser.add_argument("--object", type=str, default="large_20260507_10737418240", help="Object name (10GiB size)") parser.add_argument("--sizes", type=str, default="1KiB,2MiB,10MiB,100MiB,1GiB", help="Sizes to benchmark") parser.add_argument("--iterations", type=int, default=5, help="Number of iterations per size") + parser.add_argument("--object-size", type=str, default="10GiB", help="Size of the target GCS object (default: '10GiB')") args = parser.parse_args() impl = getattr(google_crc32c, "implementation", None) @@ -110,8 +111,11 @@ async def run_benchmark(): print(f"Error parsing size '{s}': {e}", file=sys.stderr) sys.exit(1) - # 10 GiB in bytes - object_size_bytes = 10 * 1024 * 1024 * 1024 + try: + object_size_bytes = parse_size(args.object_size) + except ValueError as e: + print(f"Error parsing object-size '{args.object_size}': {e}", file=sys.stderr) + sys.exit(1) grpc_client = AsyncGrpcClient() From 1dbb5f21eaf3dcc1a89b857da8f89d7155734139 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 13:25:39 +0000 Subject: [PATCH 05/17] perf: add % change when checksum disabled column to MRD reads report --- .../microbenchmarks/benchmark_mrd_reads.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index 13e31d0ca092..e77257574ef8 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -142,13 +142,14 @@ async def run_benchmark(): print() print(f"Benchmarking MRD Reads on gs://{args.bucket}/{args.object} with {args.iterations} iterations:") - print("-" * 125) - print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15}") - print("-" * 125) + print("-" * 150) + print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15} | {'% Chk-Disabled Change':<22}") + print("-" * 150) for size_str, size_bytes in sizes_to_test: # Pre-generate random offsets so that both Enabled and Disabled configurations run on the exact same offsets offsets = [random.randint(0, object_size_bytes - size_bytes) for _ in range(args.iterations)] + enabled_throughput = None for enable_chk in [True, False]: chk_label = "Enabled" if enable_chk else "Disabled" @@ -166,7 +167,7 @@ async def run_benchmark(): continue if not durations: - print(f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15}") + print(f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") continue min_time = min(durations) @@ -177,13 +178,25 @@ async def run_benchmark(): # Throughput in MiB/s avg_throughput = (size_bytes / (1024 * 1024)) / mean_time + percent_diff_str = "" + if enable_chk: + enabled_throughput = avg_throughput + percent_diff_str = "N/A" + else: + if enabled_throughput is not None and enabled_throughput > 0: + percent_increase = ((avg_throughput - enabled_throughput) / enabled_throughput) * 100 + percent_diff_str = f"{percent_increase:+.2f}%" + else: + percent_diff_str = "N/A" + print( f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | " f"{format_time(min_time):<12} | {format_time(max_time):<12} | " f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " - f"{avg_throughput:.2f} MiB/s" + f"{avg_throughput:.2f} MiB/s | " + f"{percent_diff_str:<22}" ) - print("-" * 125) + print("-" * 150) def main(): From fc85a68757cddda30aa792630ddcfde86e033da0 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 13:47:11 +0000 Subject: [PATCH 06/17] perf: support pre-upload and full range downloads in MRD reads benchmark --- .../microbenchmarks/benchmark_mrd_reads.py | 84 +++++++++++++++++-- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index e77257574ef8..4e173b902edb 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -14,6 +14,7 @@ from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient from google.cloud.storage.asyncio.async_multi_range_downloader import AsyncMultiRangeDownloader +from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter class VoidBuffer: @@ -88,6 +89,42 @@ async def download_range( await mrd.close() +async def upload_random_object( + grpc_client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + total_size_bytes: int, + chunk_size_bytes: int, +): + print(f"Uploading a new random object of size {total_size_bytes} bytes to gs://{bucket_name}/{object_name}...") + print(f"Upload chunk size: {chunk_size_bytes} bytes") + + writer = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + generation=0, + ) + + await writer.open() + + uploaded_bytes = 0 + # Generate 10MiB of random buffer to slice from to avoid CPU urandom overhead + buffer_size = min(10 * 1024 * 1024, total_size_bytes) + random_buffer = os.urandom(buffer_size) + + while uploaded_bytes < total_size_bytes: + bytes_to_write = min(chunk_size_bytes, total_size_bytes - uploaded_bytes) + slice_start = (uploaded_bytes) % (buffer_size - bytes_to_write + 1) + data = random_buffer[slice_start : slice_start + bytes_to_write] + + await writer.append(data) + uploaded_bytes += bytes_to_write + + object_resource = await writer.finalize() + print(f"Appendable object {object_name} created and finalized. Uploaded {uploaded_bytes} bytes.") + + async def run_benchmark(): parser = argparse.ArgumentParser(description="Benchmark GCS Object Range Downloads using MRD.") parser.add_argument("--bucket", type=str, default="chandrasiri-benchmarks-zb", help="Bucket name") @@ -95,6 +132,8 @@ async def run_benchmark(): parser.add_argument("--sizes", type=str, default="1KiB,2MiB,10MiB,100MiB,1GiB", help="Sizes to benchmark") parser.add_argument("--iterations", type=int, default=5, help="Number of iterations per size") parser.add_argument("--object-size", type=str, default="10GiB", help="Size of the target GCS object (default: '10GiB')") + parser.add_argument("--upload", action="store_true", help="Upload a new random object before running the benchmark") + parser.add_argument("--upload-chunk-size", type=str, default="2MiB", help="Chunk size for the upload (default: 2MiB, max: 100MiB)") args = parser.parse_args() impl = getattr(google_crc32c, "implementation", None) @@ -105,11 +144,15 @@ async def run_benchmark(): sizes_to_test = [] for s in args.sizes.split(","): - try: - sizes_to_test.append((s.strip(), parse_size(s))) - except ValueError as e: - print(f"Error parsing size '{s}': {e}", file=sys.stderr) - sys.exit(1) + s_clean = s.strip().upper() + if s_clean == "FULL": + sizes_to_test.append(("full", 0)) + else: + try: + sizes_to_test.append((s.strip(), parse_size(s))) + except ValueError as e: + print(f"Error parsing size '{s}': {e}", file=sys.stderr) + sys.exit(1) try: object_size_bytes = parse_size(args.object_size) @@ -117,8 +160,31 @@ async def run_benchmark(): print(f"Error parsing object-size '{args.object_size}': {e}", file=sys.stderr) sys.exit(1) + try: + upload_chunk_size_bytes = parse_size(args.upload_chunk_size) + except ValueError as e: + print(f"Error parsing upload-chunk-size '{args.upload_chunk_size}': {e}", file=sys.stderr) + sys.exit(1) + + if upload_chunk_size_bytes > 100 * 1024 * 1024: + print("Error: max upload-chunk-size is 100MiB", file=sys.stderr) + sys.exit(1) + grpc_client = AsyncGrpcClient() + if args.upload: + try: + await upload_random_object( + grpc_client, + args.bucket, + args.object, + object_size_bytes, + upload_chunk_size_bytes, + ) + except Exception as e: + print(f"Upload failed: {e}", file=sys.stderr) + sys.exit(1) + # Warmup phase print("Warming up for 10 seconds to establish connections...") warmup_start = time.perf_counter() @@ -148,7 +214,10 @@ async def run_benchmark(): for size_str, size_bytes in sizes_to_test: # Pre-generate random offsets so that both Enabled and Disabled configurations run on the exact same offsets - offsets = [random.randint(0, object_size_bytes - size_bytes) for _ in range(args.iterations)] + if size_bytes == 0: + offsets = [0 for _ in range(args.iterations)] + else: + offsets = [random.randint(0, object_size_bytes - size_bytes) for _ in range(args.iterations)] enabled_throughput = None for enable_chk in [True, False]: @@ -176,7 +245,8 @@ async def run_benchmark(): median_time = statistics.median(durations) # Throughput in MiB/s - avg_throughput = (size_bytes / (1024 * 1024)) / mean_time + actual_size = object_size_bytes if size_bytes == 0 else size_bytes + avg_throughput = (actual_size / (1024 * 1024)) / mean_time percent_diff_str = "" if enable_chk: From 0334d2c3a5731555319a46d01e61ad83484a6383 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 14:40:59 +0000 Subject: [PATCH 07/17] perf: align upload/download sizes and use random temp objects in MRD benchmark --- .../microbenchmarks/benchmark_mrd_reads.py | 260 +++++++++++------- 1 file changed, 156 insertions(+), 104 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index 4e173b902edb..435645b4c0be 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -5,6 +5,7 @@ import statistics import sys import time +import uuid try: import google_crc32c @@ -128,11 +129,8 @@ async def upload_random_object( async def run_benchmark(): parser = argparse.ArgumentParser(description="Benchmark GCS Object Range Downloads using MRD.") parser.add_argument("--bucket", type=str, default="chandrasiri-benchmarks-zb", help="Bucket name") - parser.add_argument("--object", type=str, default="large_20260507_10737418240", help="Object name (10GiB size)") - parser.add_argument("--sizes", type=str, default="1KiB,2MiB,10MiB,100MiB,1GiB", help="Sizes to benchmark") + parser.add_argument("--sizes", type=str, default="1KiB,100KiB,1MiB,16MiB,100MiB,1GiB", help="Sizes to benchmark") parser.add_argument("--iterations", type=int, default=5, help="Number of iterations per size") - parser.add_argument("--object-size", type=str, default="10GiB", help="Size of the target GCS object (default: '10GiB')") - parser.add_argument("--upload", action="store_true", help="Upload a new random object before running the benchmark") parser.add_argument("--upload-chunk-size", type=str, default="2MiB", help="Chunk size for the upload (default: 2MiB, max: 100MiB)") args = parser.parse_args() @@ -144,21 +142,11 @@ async def run_benchmark(): sizes_to_test = [] for s in args.sizes.split(","): - s_clean = s.strip().upper() - if s_clean == "FULL": - sizes_to_test.append(("full", 0)) - else: - try: - sizes_to_test.append((s.strip(), parse_size(s))) - except ValueError as e: - print(f"Error parsing size '{s}': {e}", file=sys.stderr) - sys.exit(1) - - try: - object_size_bytes = parse_size(args.object_size) - except ValueError as e: - print(f"Error parsing object-size '{args.object_size}': {e}", file=sys.stderr) - sys.exit(1) + try: + sizes_to_test.append((s.strip(), parse_size(s))) + except ValueError as e: + print(f"Error parsing size '{s}': {e}", file=sys.stderr) + sys.exit(1) try: upload_chunk_size_bytes = parse_size(args.upload_chunk_size) @@ -172,101 +160,165 @@ async def run_benchmark(): grpc_client = AsyncGrpcClient() - if args.upload: + print(f"Benchmarking MRD Reads on gs://{args.bucket}/checksum_benchmarking__ with {args.iterations} iterations:") + print("-" * 150) + print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15} | {'% Chk-Disabled Change':<22}") + print("-" * 150) + + for size_str, size_bytes in sizes_to_test: + object_name = f"checksum_benchmarking_{size_str}_{uuid.uuid4().hex[:12]}" + try: + chunk_size = min(upload_chunk_size_bytes, size_bytes) await upload_random_object( grpc_client, args.bucket, - args.object, - object_size_bytes, - upload_chunk_size_bytes, + object_name, + size_bytes, + chunk_size, ) except Exception as e: - print(f"Upload failed: {e}", file=sys.stderr) + print(f"Upload failed for size {size_str}: {e}", file=sys.stderr) sys.exit(1) - # Warmup phase - print("Warming up for 10 seconds to establish connections...") - warmup_start = time.perf_counter() - warmup_downloads = 0 - while time.perf_counter() - warmup_start < 10.0: - # download a small chunk (10MiB) to warm up channels - start_byte = random.randint(0, object_size_bytes - 10 * 1024 * 1024) try: - await download_range( - grpc_client, - args.bucket, - args.object, - start_byte, - 10 * 1024 * 1024, - enable_checksum=True, - ) - warmup_downloads += 1 - except Exception: - pass - print(f"Warmup complete. Completed {warmup_downloads} warmup downloads.") - print() - - print(f"Benchmarking MRD Reads on gs://{args.bucket}/{args.object} with {args.iterations} iterations:") - print("-" * 150) - print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15} | {'% Chk-Disabled Change':<22}") - print("-" * 150) - - for size_str, size_bytes in sizes_to_test: - # Pre-generate random offsets so that both Enabled and Disabled configurations run on the exact same offsets - if size_bytes == 0: - offsets = [0 for _ in range(args.iterations)] - else: - offsets = [random.randint(0, object_size_bytes - size_bytes) for _ in range(args.iterations)] - enabled_throughput = None - - for enable_chk in [True, False]: - chk_label = "Enabled" if enable_chk else "Disabled" - durations = [] - - for i, start_byte in enumerate(offsets): - print(f" [{size_str} - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Downloading from offset {start_byte}...", end="", flush=True) - - try: - duration = await download_range(grpc_client, args.bucket, args.object, start_byte, size_bytes, enable_checksum=enable_chk) - durations.append(duration) - print(f" Done in {format_time(duration)}") - except Exception as e: - print(f" Failed: {e}") - continue - - if not durations: - print(f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") - continue - - min_time = min(durations) - max_time = max(durations) - mean_time = statistics.mean(durations) - median_time = statistics.median(durations) - - # Throughput in MiB/s - actual_size = object_size_bytes if size_bytes == 0 else size_bytes - avg_throughput = (actual_size / (1024 * 1024)) / mean_time - - percent_diff_str = "" - if enable_chk: - enabled_throughput = avg_throughput - percent_diff_str = "N/A" - else: - if enabled_throughput is not None and enabled_throughput > 0: - percent_increase = ((avg_throughput - enabled_throughput) / enabled_throughput) * 100 - percent_diff_str = f"{percent_increase:+.2f}%" + enabled_throughput_full = None + enabled_throughput_minus_1 = None + + for enable_chk in [True, False]: + chk_label = "Enabled" if enable_chk else "Disabled" + + # Warmup phase using the uploaded object + print(f"Warming up for 10 seconds using object {object_name} (Checksum {chk_label})...") + warmup_start = time.perf_counter() + warmup_downloads = 0 + warmup_chunk_size = min(10 * 1024 * 1024, size_bytes) + while time.perf_counter() - warmup_start < 10.0: + if size_bytes > warmup_chunk_size: + start_byte = random.randint(0, size_bytes - warmup_chunk_size) + else: + start_byte = 0 + try: + await download_range( + grpc_client, + args.bucket, + object_name, + start_byte, + warmup_chunk_size, + enable_checksum=enable_chk, + ) + warmup_downloads += 1 + except Exception: + pass + print(f"Warmup complete. Completed {warmup_downloads} warmup downloads.") + print() + + durations_full = [] + durations_minus_1 = [] + + for i in range(args.iterations): + # Download entire object + print(f" [{size_str} (Full) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Downloading from offset 0...", end="", flush=True) + try: + duration = await download_range( + grpc_client, + args.bucket, + object_name, + 0, + size_bytes, + enable_checksum=enable_chk, + ) + durations_full.append(duration) + print(f" Done in {format_time(duration)}") + except Exception as e: + print(f" Failed: {e}") + + # Download entire object - 1 byte + if size_bytes > 1: + print(f" [{size_str} (Full-1) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Downloading from offset 0...", end="", flush=True) + try: + duration = await download_range( + grpc_client, + args.bucket, + object_name, + 0, + size_bytes - 1, + enable_checksum=enable_chk, + ) + durations_minus_1.append(duration) + print(f" Done in {format_time(duration)}") + except Exception as e: + print(f" Failed: {e}") + + print() + + # Reporting for Full + if not durations_full: + print(f"{f'{size_str} (Full)':<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") else: - percent_diff_str = "N/A" - - print( - f"{size_str:<15} | {chk_label:<10} | {size_bytes:<12} | " - f"{format_time(min_time):<12} | {format_time(max_time):<12} | " - f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " - f"{avg_throughput:.2f} MiB/s | " - f"{percent_diff_str:<22}" - ) - print("-" * 150) + min_time = min(durations_full) + max_time = max(durations_full) + mean_time = statistics.mean(durations_full) + median_time = statistics.median(durations_full) + avg_throughput = (size_bytes / (1024 * 1024)) / mean_time + + percent_diff_str = "" + if enable_chk: + enabled_throughput_full = avg_throughput + percent_diff_str = "N/A" + else: + if enabled_throughput_full is not None and enabled_throughput_full > 0: + percent_increase = ((avg_throughput - enabled_throughput_full) / enabled_throughput_full) * 100 + percent_diff_str = f"{percent_increase:+.2f}%" + else: + percent_diff_str = "N/A" + + print( + f"{f'{size_str} (Full)':<15} | {chk_label:<10} | {size_bytes:<12} | " + f"{format_time(min_time):<12} | {format_time(max_time):<12} | " + f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " + f"{avg_throughput:.2f} MiB/s | " + f"{percent_diff_str:<22}" + ) + + # Reporting for Full-1 + if size_bytes > 1: + if not durations_minus_1: + print(f"{f'{size_str} (Full-1)':<15} | {chk_label:<10} | {size_bytes - 1:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") + else: + min_time = min(durations_minus_1) + max_time = max(durations_minus_1) + mean_time = statistics.mean(durations_minus_1) + median_time = statistics.median(durations_minus_1) + avg_throughput = ((size_bytes - 1) / (1024 * 1024)) / mean_time + + percent_diff_str = "" + if enable_chk: + enabled_throughput_minus_1 = avg_throughput + percent_diff_str = "N/A" + else: + if enabled_throughput_minus_1 is not None and enabled_throughput_minus_1 > 0: + percent_increase = ((avg_throughput - enabled_throughput_minus_1) / enabled_throughput_minus_1) * 100 + percent_diff_str = f"{percent_increase:+.2f}%" + else: + percent_diff_str = "N/A" + + print( + f"{f'{size_str} (Full-1)':<15} | {chk_label:<10} | {size_bytes - 1:<12} | " + f"{format_time(min_time):<12} | {format_time(max_time):<12} | " + f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " + f"{avg_throughput:.2f} MiB/s | " + f"{percent_diff_str:<22}" + ) + + print("-" * 150) + finally: + try: + print(f"Cleaning up object gs://{args.bucket}/{object_name}...") + await grpc_client.delete_object(args.bucket, object_name) + print(f"Cleanup complete.") + except Exception as e: + print(f"Warning: failed to delete test object {object_name}: {e}", file=sys.stderr) def main(): From f268e4fdfc61cea360b837fdf9da1f9d9582fe9a Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 14:48:26 +0000 Subject: [PATCH 08/17] perf: migrate prints to stderr logging, introducing --debug option for progress output --- .../microbenchmarks/benchmark_mrd_reads.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index 435645b4c0be..bf9f59b492ea 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -6,6 +6,7 @@ import sys import time import uuid +import logging try: import google_crc32c @@ -97,8 +98,8 @@ async def upload_random_object( total_size_bytes: int, chunk_size_bytes: int, ): - print(f"Uploading a new random object of size {total_size_bytes} bytes to gs://{bucket_name}/{object_name}...") - print(f"Upload chunk size: {chunk_size_bytes} bytes") + logging.debug(f"Uploading a new random object of size {total_size_bytes} bytes to gs://{bucket_name}/{object_name}...") + logging.debug(f"Upload chunk size: {chunk_size_bytes} bytes") writer = AsyncAppendableObjectWriter( client=grpc_client, @@ -123,7 +124,7 @@ async def upload_random_object( uploaded_bytes += bytes_to_write object_resource = await writer.finalize() - print(f"Appendable object {object_name} created and finalized. Uploaded {uploaded_bytes} bytes.") + logging.debug(f"Appendable object {object_name} created and finalized. Uploaded {uploaded_bytes} bytes.") async def run_benchmark(): @@ -132,12 +133,16 @@ async def run_benchmark(): parser.add_argument("--sizes", type=str, default="1KiB,100KiB,1MiB,16MiB,100MiB,1GiB", help="Sizes to benchmark") parser.add_argument("--iterations", type=int, default=5, help="Number of iterations per size") parser.add_argument("--upload-chunk-size", type=str, default="2MiB", help="Chunk size for the upload (default: 2MiB, max: 100MiB)") + parser.add_argument("--debug", action="store_true", help="Print debug/progress logs") args = parser.parse_args() + log_level = logging.DEBUG if args.debug else logging.WARNING + logging.basicConfig(level=log_level, format="%(asctime)s [%(levelname)s] %(message)s") + impl = getattr(google_crc32c, "implementation", None) - print(f"google_crc32c implementation: {impl}") + logging.info(f"google_crc32c implementation: {impl}") if impl != "c": - print(f"Error: google_crc32c implementation is '{impl}', expected 'c'", file=sys.stderr) + logging.error(f"Error: google_crc32c implementation is '{impl}', expected 'c'") sys.exit(1) sizes_to_test = [] @@ -145,17 +150,17 @@ async def run_benchmark(): try: sizes_to_test.append((s.strip(), parse_size(s))) except ValueError as e: - print(f"Error parsing size '{s}': {e}", file=sys.stderr) + logging.error(f"Error parsing size '{s}': {e}") sys.exit(1) try: upload_chunk_size_bytes = parse_size(args.upload_chunk_size) except ValueError as e: - print(f"Error parsing upload-chunk-size '{args.upload_chunk_size}': {e}", file=sys.stderr) + logging.error(f"Error parsing upload-chunk-size '{args.upload_chunk_size}': {e}") sys.exit(1) if upload_chunk_size_bytes > 100 * 1024 * 1024: - print("Error: max upload-chunk-size is 100MiB", file=sys.stderr) + logging.error("Error: max upload-chunk-size is 100MiB") sys.exit(1) grpc_client = AsyncGrpcClient() @@ -178,7 +183,7 @@ async def run_benchmark(): chunk_size, ) except Exception as e: - print(f"Upload failed for size {size_str}: {e}", file=sys.stderr) + logging.error(f"Upload failed for size {size_str}: {e}") sys.exit(1) try: @@ -189,7 +194,7 @@ async def run_benchmark(): chk_label = "Enabled" if enable_chk else "Disabled" # Warmup phase using the uploaded object - print(f"Warming up for 10 seconds using object {object_name} (Checksum {chk_label})...") + logging.info(f"Warming up for 10 seconds using object {object_name} (Checksum {chk_label})...") warmup_start = time.perf_counter() warmup_downloads = 0 warmup_chunk_size = min(10 * 1024 * 1024, size_bytes) @@ -210,15 +215,13 @@ async def run_benchmark(): warmup_downloads += 1 except Exception: pass - print(f"Warmup complete. Completed {warmup_downloads} warmup downloads.") - print() + logging.info(f"Warmup complete. Completed {warmup_downloads} warmup downloads.") durations_full = [] durations_minus_1 = [] for i in range(args.iterations): # Download entire object - print(f" [{size_str} (Full) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Downloading from offset 0...", end="", flush=True) try: duration = await download_range( grpc_client, @@ -229,13 +232,12 @@ async def run_benchmark(): enable_checksum=enable_chk, ) durations_full.append(duration) - print(f" Done in {format_time(duration)}") + logging.debug(f" [{size_str} (Full) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Done in {format_time(duration)}") except Exception as e: - print(f" Failed: {e}") + logging.error(f" [{size_str} (Full) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Failed: {e}") # Download entire object - 1 byte if size_bytes > 1: - print(f" [{size_str} (Full-1) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Downloading from offset 0...", end="", flush=True) try: duration = await download_range( grpc_client, @@ -246,11 +248,9 @@ async def run_benchmark(): enable_checksum=enable_chk, ) durations_minus_1.append(duration) - print(f" Done in {format_time(duration)}") + logging.debug(f" [{size_str} (Full-1) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Done in {format_time(duration)}") except Exception as e: - print(f" Failed: {e}") - - print() + logging.error(f" [{size_str} (Full-1) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Failed: {e}") # Reporting for Full if not durations_full: @@ -314,11 +314,11 @@ async def run_benchmark(): print("-" * 150) finally: try: - print(f"Cleaning up object gs://{args.bucket}/{object_name}...") + logging.info(f"Cleaning up object gs://{args.bucket}/{object_name}...") await grpc_client.delete_object(args.bucket, object_name) - print(f"Cleanup complete.") + logging.info(f"Cleanup complete.") except Exception as e: - print(f"Warning: failed to delete test object {object_name}: {e}", file=sys.stderr) + logging.warning(f"Warning: failed to delete test object {object_name}: {e}") def main(): From 1fe6c6921f5087822b0fea32946b36ce4fb5cc71 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 14:52:09 +0000 Subject: [PATCH 09/17] perf: skip Full-1 case when checksum validation is disabled --- .../tests/perf/microbenchmarks/benchmark_mrd_reads.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index bf9f59b492ea..89db3248cd06 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -237,7 +237,7 @@ async def run_benchmark(): logging.error(f" [{size_str} (Full) - Checksum {chk_label}] Iteration {i+1}/{args.iterations}: Failed: {e}") # Download entire object - 1 byte - if size_bytes > 1: + if size_bytes > 1 and enable_chk: try: duration = await download_range( grpc_client, @@ -282,7 +282,7 @@ async def run_benchmark(): ) # Reporting for Full-1 - if size_bytes > 1: + if size_bytes > 1 and enable_chk: if not durations_minus_1: print(f"{f'{size_str} (Full-1)':<15} | {chk_label:<10} | {size_bytes - 1:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") else: From 9403ed4aab43b2a9a7d71dfa183feee32392022c Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 15:12:02 +0000 Subject: [PATCH 10/17] perf: add pytest-benchmark test for checksum overhead in MRD reads --- .../microbenchmarks/test_checksum_overhead.py | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py new file mode 100644 index 000000000000..eb930212ae89 --- /dev/null +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py @@ -0,0 +1,176 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import os +import random +import time +import uuid +import pytest + +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import AsyncMultiRangeDownloader +from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter + +DEFAULT_BUCKET = os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", "chandrasiri-benchmarks-zb") + + +class VoidBuffer: + """A writeable file-like object that discards written data to save memory.""" + def __init__(self): + self.size = 0 + + def write(self, data: bytes) -> int: + n = len(data) + self.size += n + return n + + def tell(self) -> int: + return self.size + + +async def download_range( + grpc_client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + start_byte: int, + size: int, + enable_checksum: bool, +): + mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) + try: + await mrd.open() + output_buffer = VoidBuffer() + await mrd.download_ranges( + [(start_byte, size, output_buffer)], + enable_checksum=enable_checksum, + ) + finally: + if mrd.is_stream_open: + await mrd.close() + + +async def upload_random_object( + grpc_client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + total_size_bytes: int, + chunk_size_bytes: int = 2 * 1024 * 1024, +): + writer = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + generation=0, + ) + await writer.open() + uploaded_bytes = 0 + buffer_size = min(10 * 1024 * 1024, total_size_bytes) + random_buffer = os.urandom(buffer_size) + while uploaded_bytes < total_size_bytes: + bytes_to_write = min(chunk_size_bytes, total_size_bytes - uploaded_bytes) + slice_start = (uploaded_bytes) % (buffer_size - bytes_to_write + 1) + data = random_buffer[slice_start : slice_start + bytes_to_write] + await writer.append(data) + uploaded_bytes += bytes_to_write + await writer.finalize() + + +@pytest.mark.parametrize( + "download_size", + [ + 1024, # 1KiB + 100 * 1024, # 100KiB + 1024 * 1024, # 1MiB + 16 * 1024 * 1024, # 16MiB + 100 * 1024 * 1024, # 100MiB + 1024 * 1024 * 1024, # 1GiB + ], + ids=["1KiB", "100KiB", "1MiB", "16MiB", "100MiB", "1GiB"], +) +@pytest.mark.parametrize("enable_checksum", [True, False], ids=["checksum_enabled", "checksum_disabled"]) +def test_checksum_overhead(benchmark, download_size, enable_checksum): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + grpc_client = AsyncGrpcClient() + + object_name = f"checksum_benchmarking_{download_size}_{uuid.uuid4().hex[:12]}" + + # 1. upload an object + chunk_size = min(2 * 1024 * 1024, download_size) + loop.run_until_complete( + upload_random_object( + grpc_client, + DEFAULT_BUCKET, + object_name, + download_size, + chunk_size, + ) + ) + + try: + # 2. warmup + warmup_start = time.perf_counter() + warmup_chunk_size = min(10 * 1024 * 1024, download_size) + while time.perf_counter() - warmup_start < 10.0: + if download_size > warmup_chunk_size: + start_byte = random.randint(0, download_size - warmup_chunk_size) + else: + start_byte = 0 + try: + loop.run_until_complete( + download_range( + grpc_client, + DEFAULT_BUCKET, + object_name, + start_byte, + warmup_chunk_size, + enable_checksum, + ) + ) + except Exception: + pass + + # 3. download range (0, download_size) for 5 rounds + def run_download(): + loop.run_until_complete( + download_range( + grpc_client, + DEFAULT_BUCKET, + object_name, + 0, + download_size, + enable_checksum, + ) + ) + + benchmark.pedantic( + target=run_download, + iterations=1, + rounds=5, + ) + + finally: + # 4. delete the object + try: + loop.run_until_complete(grpc_client.delete_object(DEFAULT_BUCKET, object_name)) + except Exception: + pass + + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + if tasks: + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() From bc0ac8d99559c9245b4f7c5283ac1ccf4e8829f6 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 15:15:42 +0000 Subject: [PATCH 11/17] perf: calculate and report average throughput in test_checksum_overhead --- .../perf/microbenchmarks/test_checksum_overhead.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py index eb930212ae89..4d152f5796ea 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py @@ -105,6 +105,9 @@ def test_checksum_overhead(benchmark, download_size, enable_checksum): asyncio.set_event_loop(loop) grpc_client = AsyncGrpcClient() + download_bytes_list = [] + download_elapsed_times = [] + object_name = f"checksum_benchmarking_{download_size}_{uuid.uuid4().hex[:12]}" # 1. upload an object @@ -144,6 +147,7 @@ def test_checksum_overhead(benchmark, download_size, enable_checksum): # 3. download range (0, download_size) for 5 rounds def run_download(): + start_time = time.perf_counter() loop.run_until_complete( download_range( grpc_client, @@ -154,6 +158,9 @@ def run_download(): enable_checksum, ) ) + elapsed = time.perf_counter() - start_time + download_bytes_list.append(download_size) + download_elapsed_times.append(elapsed) benchmark.pedantic( target=run_download, @@ -162,6 +169,13 @@ def run_download(): ) finally: + if download_elapsed_times: + total_bytes = sum(download_bytes_list) + total_time = sum(download_elapsed_times) + throughput_mib_s = (total_bytes / total_time) / (1024 * 1024) + benchmark.extra_info["avg_throughput_mib_s"] = f"{throughput_mib_s:.2f}" + print(f"\nAvg Throughput: {throughput_mib_s:.2f} MiB/s") + # 4. delete the object try: loop.run_until_complete(grpc_client.delete_object(DEFAULT_BUCKET, object_name)) From 3efb0110089007f003f314c6e7af24a8da5a9491 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 16:55:14 +0000 Subject: [PATCH 12/17] perf: convert test parameter to (object_size, download_size) tuple, testing Full and Full-1 ranges --- .../microbenchmarks/test_checksum_overhead.py | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py index 4d152f5796ea..6af26ea04082 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py @@ -23,7 +23,7 @@ from google.cloud.storage.asyncio.async_multi_range_downloader import AsyncMultiRangeDownloader from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter -DEFAULT_BUCKET = os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", "chandrasiri-benchmarks-zb") +DEFAULT_BUCKET = os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", "chandrasiri-gcsfs-zb") class VoidBuffer: @@ -88,19 +88,35 @@ async def upload_random_object( @pytest.mark.parametrize( - "download_size", + "object_size,download_size", [ - 1024, # 1KiB - 100 * 1024, # 100KiB - 1024 * 1024, # 1MiB - 16 * 1024 * 1024, # 16MiB - 100 * 1024 * 1024, # 100MiB - 1024 * 1024 * 1024, # 1GiB + (1024, 1024), # 1KiB Full + (1024, 1024 - 1), # 1KiB Full-1 + (100 * 1024, 100 * 1024), # 100KiB Full + (100 * 1024, 100 * 1024 - 1), # 100KiB Full-1 + (1024 * 1024, 1024 * 1024), # 1MiB Full + (1024 * 1024, 1024 * 1024 - 1),# 1MiB Full-1 + (16 * 1024 * 1024, 16 * 1024 * 1024), # 16MiB Full + (16 * 1024 * 1024, 16 * 1024 * 1024 - 1), # 16MiB Full-1 + (100 * 1024 * 1024, 100 * 1024 * 1024), # 100MiB Full + (100 * 1024 * 1024, 100 * 1024 * 1024 - 1),# 100MiB Full-1 + (1024 * 1024 * 1024, 1024 * 1024 * 1024), # 1GiB Full + (1024 * 1024 * 1024, 1024 * 1024 * 1024 - 1),# 1GiB Full-1 + ], + ids=[ + "1KiB-Full", "1KiB-Full-1", + "100KiB-Full", "100KiB-Full-1", + "1MiB-Full", "1MiB-Full-1", + "16MiB-Full", "16MiB-Full-1", + "100MiB-Full", "100MiB-Full-1", + "1GiB-Full", "1GiB-Full-1" ], - ids=["1KiB", "100KiB", "1MiB", "16MiB", "100MiB", "1GiB"], ) @pytest.mark.parametrize("enable_checksum", [True, False], ids=["checksum_enabled", "checksum_disabled"]) -def test_checksum_overhead(benchmark, download_size, enable_checksum): +def test_checksum_overhead(benchmark, object_size, download_size, enable_checksum): + if not enable_checksum and download_size == object_size - 1: + pytest.skip("Skip Full-1 range download when checksum is disabled") + loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) grpc_client = AsyncGrpcClient() @@ -108,16 +124,16 @@ def test_checksum_overhead(benchmark, download_size, enable_checksum): download_bytes_list = [] download_elapsed_times = [] - object_name = f"checksum_benchmarking_{download_size}_{uuid.uuid4().hex[:12]}" + object_name = f"checksum_benchmarking_{object_size}_{uuid.uuid4().hex[:12]}" # 1. upload an object - chunk_size = min(2 * 1024 * 1024, download_size) + chunk_size = min(2 * 1024 * 1024, object_size) loop.run_until_complete( upload_random_object( grpc_client, DEFAULT_BUCKET, object_name, - download_size, + object_size, chunk_size, ) ) @@ -125,10 +141,10 @@ def test_checksum_overhead(benchmark, download_size, enable_checksum): try: # 2. warmup warmup_start = time.perf_counter() - warmup_chunk_size = min(10 * 1024 * 1024, download_size) + warmup_chunk_size = min(10 * 1024 * 1024, object_size) while time.perf_counter() - warmup_start < 10.0: - if download_size > warmup_chunk_size: - start_byte = random.randint(0, download_size - warmup_chunk_size) + if object_size > warmup_chunk_size: + start_byte = random.randint(0, object_size - warmup_chunk_size) else: start_byte = 0 try: From 183e29727580557b4e26169d95c3ecb59ce967c2 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 17:08:53 +0000 Subject: [PATCH 13/17] perf: upload fresh object for each enable_chk iteration in MRD reads benchmark --- .../microbenchmarks/benchmark_mrd_reads.py | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index 89db3248cd06..596549bd4fea 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -171,26 +171,26 @@ async def run_benchmark(): print("-" * 150) for size_str, size_bytes in sizes_to_test: - object_name = f"checksum_benchmarking_{size_str}_{uuid.uuid4().hex[:12]}" + enabled_throughput_full = None + enabled_throughput_minus_1 = None - try: - chunk_size = min(upload_chunk_size_bytes, size_bytes) - await upload_random_object( - grpc_client, - args.bucket, - object_name, - size_bytes, - chunk_size, - ) - except Exception as e: - logging.error(f"Upload failed for size {size_str}: {e}") - sys.exit(1) + for enable_chk in [True, False]: + object_name = f"checksum_benchmarking_{size_str}_{uuid.uuid4().hex[:12]}" - try: - enabled_throughput_full = None - enabled_throughput_minus_1 = None + try: + chunk_size = min(upload_chunk_size_bytes, size_bytes) + await upload_random_object( + grpc_client, + args.bucket, + object_name, + size_bytes, + chunk_size, + ) + except Exception as e: + logging.error(f"Upload failed for size {size_str}: {e}") + sys.exit(1) - for enable_chk in [True, False]: + try: chk_label = "Enabled" if enable_chk else "Disabled" # Warmup phase using the uploaded object @@ -312,13 +312,13 @@ async def run_benchmark(): ) print("-" * 150) - finally: - try: - logging.info(f"Cleaning up object gs://{args.bucket}/{object_name}...") - await grpc_client.delete_object(args.bucket, object_name) - logging.info(f"Cleanup complete.") - except Exception as e: - logging.warning(f"Warning: failed to delete test object {object_name}: {e}") + finally: + try: + logging.info(f"Cleaning up object gs://{args.bucket}/{object_name}...") + await grpc_client.delete_object(args.bucket, object_name) + logging.info(f"Cleanup complete.") + except Exception as e: + logging.warning(f"Warning: failed to delete test object {object_name}: {e}") def main(): From 39d633c5b80f5d4058cbc9151f9924b505ac9a67 Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Mon, 8 Jun 2026 17:16:37 +0000 Subject: [PATCH 14/17] perf: compare Full-1 throughput with Full baseline in MRD reads benchmark --- .../tests/perf/microbenchmarks/benchmark_mrd_reads.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index 596549bd4fea..b887d2e0f73b 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -295,7 +295,11 @@ async def run_benchmark(): percent_diff_str = "" if enable_chk: enabled_throughput_minus_1 = avg_throughput - percent_diff_str = "N/A" + if enabled_throughput_full is not None and enabled_throughput_full > 0: + percent_increase = ((enabled_throughput_minus_1 - enabled_throughput_full) / enabled_throughput_full) * 100 + percent_diff_str = f"{percent_increase:+.2f}%" + else: + percent_diff_str = "N/A" else: if enabled_throughput_minus_1 is not None and enabled_throughput_minus_1 > 0: percent_increase = ((avg_throughput - enabled_throughput_minus_1) / enabled_throughput_minus_1) * 100 From ec174cde38534618e9d3518989b85c89d9398a0f Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 9 Jun 2026 04:14:06 +0000 Subject: [PATCH 15/17] perf: make test_checksum_overhead rounds configurable via BENCHMARK_ROUNDS env var --- .../tests/perf/microbenchmarks/test_checksum_overhead.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py index 6af26ea04082..1d90a9947cdd 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py @@ -24,6 +24,7 @@ from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter DEFAULT_BUCKET = os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", "chandrasiri-gcsfs-zb") +DEFAULT_ROUNDS = int(os.environ.get("BENCHMARK_ROUNDS", "5")) class VoidBuffer: @@ -181,7 +182,7 @@ def run_download(): benchmark.pedantic( target=run_download, iterations=1, - rounds=5, + rounds=DEFAULT_ROUNDS, ) finally: From 9db8398178b0197b9717b9c19ba40e5c5d885a8e Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 9 Jun 2026 04:15:27 +0000 Subject: [PATCH 16/17] perf: calculate standard deviation in throughput and elapsed time, adding them to benchmark.extra_info --- .../perf/microbenchmarks/test_checksum_overhead.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py index 1d90a9947cdd..eba6d951c6ef 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/test_checksum_overhead.py @@ -17,6 +17,7 @@ import random import time import uuid +import statistics import pytest from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient @@ -193,6 +194,18 @@ def run_download(): benchmark.extra_info["avg_throughput_mib_s"] = f"{throughput_mib_s:.2f}" print(f"\nAvg Throughput: {throughput_mib_s:.2f} MiB/s") + if len(download_elapsed_times) > 1: + stdev_time = statistics.stdev(download_elapsed_times) + throughputs = [download_size / t / (1024 * 1024) for t in download_elapsed_times] + stdev_throughput = statistics.stdev(throughputs) + else: + stdev_time = 0.0 + stdev_throughput = 0.0 + + benchmark.extra_info["stdev_throughput_mib_s"] = f"{stdev_throughput:.2f}" + benchmark.extra_info["avg_elapsed_time_s"] = f"{total_time / len(download_elapsed_times):.4f}" + benchmark.extra_info["stdev_elapsed_time_s"] = f"{stdev_time:.4f}" + # 4. delete the object try: loop.run_until_complete(grpc_client.delete_object(DEFAULT_BUCKET, object_name)) From 56c39686f994113c6198cdd24b4c82e99f8a486e Mon Sep 17 00:00:00 2001 From: Chandra Sirimala Date: Tue, 9 Jun 2026 04:33:33 +0000 Subject: [PATCH 17/17] perf: add standard deviation to throughput reporting in MRD reads benchmark --- .../microbenchmarks/benchmark_mrd_reads.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py index b887d2e0f73b..556ce45231b8 100644 --- a/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py +++ b/packages/google-cloud-storage/tests/perf/microbenchmarks/benchmark_mrd_reads.py @@ -166,9 +166,9 @@ async def run_benchmark(): grpc_client = AsyncGrpcClient() print(f"Benchmarking MRD Reads on gs://{args.bucket}/checksum_benchmarking__ with {args.iterations} iterations:") - print("-" * 150) - print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput':<15} | {'% Chk-Disabled Change':<22}") - print("-" * 150) + print("-" * 161) + print(f"{'Size (String)':<15} | {'Checksum':<10} | {'Size (Bytes)':<12} | {'Min':<12} | {'Max':<12} | {'Mean':<12} | {'Median':<12} | {'Avg Throughput (\u00b1 StdDev)':<26} | {'% Chk-Disabled Change':<22}") + print("-" * 161) for size_str, size_bytes in sizes_to_test: enabled_throughput_full = None @@ -254,13 +254,16 @@ async def run_benchmark(): # Reporting for Full if not durations_full: - print(f"{f'{size_str} (Full)':<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") + print(f"{f'{size_str} (Full)':<15} | {chk_label:<10} | {size_bytes:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<26} | {'N/A':<22}") else: min_time = min(durations_full) max_time = max(durations_full) mean_time = statistics.mean(durations_full) median_time = statistics.median(durations_full) + throughputs = [(size_bytes / (1024 * 1024)) / d for d in durations_full] avg_throughput = (size_bytes / (1024 * 1024)) / mean_time + std_dev = statistics.stdev(throughputs) if len(throughputs) >= 2 else 0.0 + throughput_str = f"{avg_throughput:.2f} \u00b1 {std_dev:.2f} MiB/s" percent_diff_str = "" if enable_chk: @@ -277,20 +280,23 @@ async def run_benchmark(): f"{f'{size_str} (Full)':<15} | {chk_label:<10} | {size_bytes:<12} | " f"{format_time(min_time):<12} | {format_time(max_time):<12} | " f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " - f"{avg_throughput:.2f} MiB/s | " + f"{throughput_str:<26} | " f"{percent_diff_str:<22}" ) # Reporting for Full-1 if size_bytes > 1 and enable_chk: if not durations_minus_1: - print(f"{f'{size_str} (Full-1)':<15} | {chk_label:<10} | {size_bytes - 1:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<15} | {'N/A':<22}") + print(f"{f'{size_str} (Full-1)':<15} | {chk_label:<10} | {size_bytes - 1:<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'FAILED':<12} | {'N/A':<26} | {'N/A':<22}") else: min_time = min(durations_minus_1) max_time = max(durations_minus_1) mean_time = statistics.mean(durations_minus_1) median_time = statistics.median(durations_minus_1) + throughputs = [((size_bytes - 1) / (1024 * 1024)) / d for d in durations_minus_1] avg_throughput = ((size_bytes - 1) / (1024 * 1024)) / mean_time + std_dev = statistics.stdev(throughputs) if len(throughputs) >= 2 else 0.0 + throughput_str = f"{avg_throughput:.2f} \u00b1 {std_dev:.2f} MiB/s" percent_diff_str = "" if enable_chk: @@ -311,11 +317,11 @@ async def run_benchmark(): f"{f'{size_str} (Full-1)':<15} | {chk_label:<10} | {size_bytes - 1:<12} | " f"{format_time(min_time):<12} | {format_time(max_time):<12} | " f"{format_time(mean_time):<12} | {format_time(median_time):<12} | " - f"{avg_throughput:.2f} MiB/s | " + f"{throughput_str:<26} | " f"{percent_diff_str:<22}" ) - print("-" * 150) + print("-" * 161) finally: try: logging.info(f"Cleaning up object gs://{args.bucket}/{object_name}...")