Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
75d0127
perf: add microbenchmarks for crc32c and MRD reads
chandra-siri Jun 8, 2026
02fa59c
perf: add warmup phase to MRD reads benchmark
chandra-siri Jun 8, 2026
891a981
perf: update warmup chunk size to 10MiB
chandra-siri Jun 8, 2026
88f0e21
perf: make GCS object size configurable in MRD reads benchmark
chandra-siri Jun 8, 2026
1dbb5f2
perf: add % change when checksum disabled column to MRD reads report
chandra-siri Jun 8, 2026
fc85a68
perf: support pre-upload and full range downloads in MRD reads benchmark
chandra-siri Jun 8, 2026
0334d2c
perf: align upload/download sizes and use random temp objects in MRD …
chandra-siri Jun 8, 2026
f268e4f
perf: migrate prints to stderr logging, introducing --debug option fo…
chandra-siri Jun 8, 2026
1fe6c69
perf: skip Full-1 case when checksum validation is disabled
chandra-siri Jun 8, 2026
9403ed4
perf: add pytest-benchmark test for checksum overhead in MRD reads
chandra-siri Jun 8, 2026
bc0ac8d
perf: calculate and report average throughput in test_checksum_overhead
chandra-siri Jun 8, 2026
3efb011
perf: convert test parameter to (object_size, download_size) tuple, t…
chandra-siri Jun 8, 2026
183e297
perf: upload fresh object for each enable_chk iteration in MRD reads …
chandra-siri Jun 8, 2026
39d633c
perf: compare Full-1 throughput with Full baseline in MRD reads bench…
chandra-siri Jun 8, 2026
ec174cd
perf: make test_checksum_overhead rounds configurable via BENCHMARK_R…
chandra-siri Jun 9, 2026
9db8398
perf: calculate standard deviation in throughput and elapsed time, ad…
chandra-siri Jun 9, 2026
56c3968
perf: add standard deviation to throughput reporting in MRD reads ben…
chandra-siri Jun 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import argparse
import os
import statistics
import sys
import time

try:
import google_crc32c
except ImportError:
print("Error: google_crc32c package is not installed in the python environment.", file=sys.stderr)
sys.exit(1)


def parse_size(size_str: str) -> int:
size_str = size_str.strip().upper()
if size_str.endswith("KIB"):
return int(float(size_str[:-3]) * 1024)
elif size_str.endswith("MIB"):
return int(float(size_str[:-3]) * 1024 * 1024)
elif size_str.endswith("GIB"):
return int(float(size_str[:-3]) * 1024 * 1024 * 1024)
elif size_str.endswith("KB"):
return int(float(size_str[:-2]) * 1000)
elif size_str.endswith("MB"):
return int(float(size_str[:-2]) * 1000 * 1000)
elif size_str.endswith("GB"):
return int(float(size_str[:-2]) * 1000 * 1000 * 1000)
elif size_str.endswith("B"):
return int(size_str[:-1])
else:
try:
return int(size_str)
except ValueError:
raise ValueError(f"Unknown size format: {size_str}")


def format_time(seconds: float) -> str:
if seconds < 1e-6:
return f"{seconds * 1e9:.2f} ns"
elif seconds < 1e-3:
return f"{seconds * 1e6:.2f} \u03bcs"
elif seconds < 1.0:
return f"{seconds * 1e3:.2f} ms"
else:
return f"{seconds:.2f} s"


def main():
parser = argparse.ArgumentParser(description="Benchmark google_crc32c.value execution time.")
parser.add_argument(
"--sizes",
type=str,
default="1KiB,100KiB,2MiB",
help="Comma-separated list of sizes (e.g. '1KiB,100KiB,2MiB')"
)
parser.add_argument(
"--iterations",
type=int,
default=100,
help="Number of iterations for benchmark (default: 100)"
)
args = parser.parse_args()

# Ensure google_crc32c uses accelerated C code
impl = getattr(google_crc32c, "implementation", None)
print(f"google_crc32c implementation: {impl}")
if impl != "c":
print(f"Error: google_crc32c is not using the accelerated C code (got '{impl}').", file=sys.stderr)
sys.exit(1)

sizes_to_test = []
for s in args.sizes.split(","):
try:
sizes_to_test.append((s.strip(), parse_size(s)))
except ValueError as e:
print(f"Error parsing size '{s}': {e}", file=sys.stderr)
sys.exit(1)

print(f"Benchmarking google_crc32c.value(data) with {args.iterations} iterations:")
print("-" * 80)
print(f"{'Size (String)':<15} | {'Size (Bytes)':<12} | {'Min':<10} | {'Max':<10} | {'Mean':<10} | {'Median':<10}")
print("-" * 80)

for size_str, size_bytes in sizes_to_test:
data = os.urandom(size_bytes)

durations = []
for _ in range(args.iterations):
start = time.perf_counter()
_ = google_crc32c.value(data)
end = time.perf_counter()
durations.append(end - start)

min_time = min(durations)
max_time = max(durations)
mean_time = statistics.mean(durations)
median_time = statistics.median(durations)

print(
f"{size_str:<15} | {size_bytes:<12} | "
f"{format_time(min_time):<10} | {format_time(max_time):<10} | "
f"{format_time(mean_time):<10} | {format_time(median_time):<10}"
)


if __name__ == "__main__":
main()
Loading
Loading