From e956c9105122cfa47b5532825bddd49a875d43a8 Mon Sep 17 00:00:00 2001
From: Farid Yagubbayli <faridyagubbayli@gmail.com>
Date: Thu, 14 May 2026 14:16:05 +0000
Subject: [PATCH 1/2] Migrate Benchmark

---
 benchmarks/README.md    |  64 +++++++++
 benchmarks/__init__.py  |   1 +
 benchmarks/benchmark.py | 297 ++++++++++++++++++++++++++++++++++++++++
 tests/test_benchmark.py | 109 +++++++++++++++
 4 files changed, 471 insertions(+)
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/benchmark.py
 create mode 100644 tests/test_benchmark.py

diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 00000000..9878f6a5
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,64 @@
+# Benchmarks
+
+This directory contains performance benchmarks for k-wave-python. These are not standard examples: they are intended to measure runtime and memory behavior and can become expensive as the grid size grows.
+
+## 3D Solver Scaling Benchmark
+
+`benchmark.py` ports MATLAB k-Wave's `benchmark.m`. It runs `kspaceFirstOrder` on a sequence of 3D grids with increasing sizes, averages runtime over repeated runs, and saves partial results after each run.
+
+The default benchmark uses:
+
+- heterogeneous absorbing medium
+- smoothed initial pressure ball source
+- binary sensor mask built from 100 Cartesian points on a sphere
+- 1000 time steps
+- 3 averages per grid size
+- grid sizes based on MATLAB's original scale arrays, starting at `32 x 32 x 32`
+
+By default, this can run for a long time and may stop once memory limits are reached.
+
+## Usage
+
+Run a small smoke benchmark:
+
+```bash
+uv run benchmarks/benchmark.py --max-cases 1 --num-averages 1 --number-time-points 20
+```
+
+Run the default CPU benchmark:
+
+```bash
+uv run benchmarks/benchmark.py
+```
+
+Run with single-precision arrays:
+
+```bash
+uv run benchmarks/benchmark.py --data-cast single
+```
+
+Run on the Python GPU backend:
+
+```bash
+uv run benchmarks/benchmark.py --device gpu
+```
+
+Choose an output file:
+
+```bash
+uv run benchmarks/benchmark.py --output-path benchmark_data.json
+```
+
+## Output
+
+The benchmark writes a JSON file containing:
+
+- `comp_size`: total grid points for each completed grid size
+- `comp_time`: rolling average elapsed seconds for each grid size
+- `options`: benchmark settings and environment metadata
+- `output_path`: path to the JSON output file
+- `error_reached`: whether the benchmark stopped after an exception
+- `error_message`: exception message, if any
+- `mem_usage`: optional process peak memory estimate when `--report-mem-usage` is set
+
+Partial results are saved after each run so completed timings are preserved if a later grid fails.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 00000000..522d2cce
--- /dev/null
+++ b/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Performance benchmarks ported from MATLAB k-Wave."""
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
new file mode 100644
index 00000000..d59550c3
--- /dev/null
+++ b/benchmarks/benchmark.py
@@ -0,0 +1,297 @@
+"""
+k-Wave 3D Performance Benchmark
+
+Ported from: k-Wave/benchmark.m
+
+Runs a sequence of 3D initial-value simulations with increasing grid sizes and
+records average execution time for each grid.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import platform
+import sys
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from time import perf_counter
+from typing import Any, Callable
+
+import numpy as np
+
+import kwave
+from kwave.data import Vector
+from kwave.kgrid import kWaveGrid
+from kwave.kmedium import kWaveMedium
+from kwave.ksensor import kSensor
+from kwave.ksource import kSource
+from kwave.kspaceFirstOrder import kspaceFirstOrder
+from kwave.utils.conversion import cart2grid
+from kwave.utils.filters import smooth
+from kwave.utils.mapgen import make_ball, make_cart_sphere
+
+
+@dataclass(frozen=True)
+class BenchmarkOptions:
+    data_cast: str = "off"
+    heterogeneous_media: bool = True
+    absorbing_media: bool = True
+    nonlinear_media: bool = False
+    binary_sensor_mask: bool = True
+    number_sensor_points: int = 100
+    number_time_points: int = 1000
+    num_averages: int = 3
+    start_size: int = 32
+    x_scale_array: tuple[int, ...] = (1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16)
+    y_scale_array: tuple[int, ...] = (1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16)
+    z_scale_array: tuple[int, ...] = (1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8)
+    domain_size: float = 22e-3
+    sensor_radius: float = 10e-3
+    pml_size: int = 10
+    pml_inside: bool = True
+    report_mem_usage: bool = False
+
+    def __post_init__(self) -> None:
+        if self.data_cast not in {"off", "single"}:
+            raise ValueError("data_cast must be 'off' or 'single'. Use device='gpu' to run on a GPU.")
+        if not (len(self.x_scale_array) == len(self.y_scale_array) == len(self.z_scale_array)):
+            raise ValueError("scale arrays must have the same length")
+        if self.number_time_points <= 0 or self.num_averages <= 0:
+            raise ValueError("number_time_points and num_averages must be positive")
+        if self.number_sensor_points <= 1:
+            raise ValueError("number_sensor_points must be greater than 1")
+
+    @property
+    def dtype(self) -> type[np.floating[Any]]:
+        return np.float32 if self.data_cast == "single" else np.float64
+
+
+def grid_sizes(options: BenchmarkOptions = BenchmarkOptions()) -> list[tuple[int, int, int, int]]:
+    return [
+        (
+            options.start_size * xscale,
+            options.start_size * yscale,
+            options.start_size * zscale,
+            min(xscale, yscale, zscale),
+        )
+        for xscale, yscale, zscale in zip(options.x_scale_array, options.y_scale_array, options.z_scale_array)
+    ]
+
+
+def build_case(options: BenchmarkOptions, nx: int, ny: int, nz: int, scale: int) -> tuple[kWaveGrid, kWaveMedium, kSource, kSensor]:
+    dtype = options.dtype
+    dx = options.domain_size / nx
+    dy = options.domain_size / ny
+    dz = options.domain_size / nz
+    kgrid = kWaveGrid(Vector([nx, ny, nz]), Vector([dx, dy, dz]))
+
+    c0 = dtype(1500)
+    rho0 = dtype(1000)
+    alpha_coeff = dtype(0.75)
+    alpha_power = dtype(1.5)
+    bon_a = dtype(6)
+
+    if options.heterogeneous_media:
+        sound_speed = c0 * np.ones((nx, ny, nz), dtype=dtype)
+        sound_speed[: nx // 4, :, :] = c0 * dtype(1.2)
+        density = rho0 * np.ones((nx, ny, nz), dtype=dtype)
+        density[:, max(ny // 4 - 1, 0) :, :] = rho0 * dtype(1.2)
+    else:
+        sound_speed = np.array(c0, dtype=dtype)
+        density = np.array(rho0, dtype=dtype)
+
+    medium = kWaveMedium(sound_speed=sound_speed, density=density)
+    if options.absorbing_media:
+        medium.alpha_coeff = alpha_coeff
+        medium.alpha_power = alpha_power
+    if options.nonlinear_media:
+        medium.BonA = bon_a
+
+    source = kSource()
+    source.p0 = dtype(10) * make_ball(Vector([nx, ny, nz]), Vector([nx // 2, ny // 2, nz // 2]), 2 * scale)
+    source.p0 = smooth(source.p0.astype(dtype, copy=False), restore_max=True).astype(dtype, copy=False)
+
+    sensor_mask = make_cart_sphere(options.sensor_radius, options.number_sensor_points)
+    if options.binary_sensor_mask:
+        sensor_mask, _, _ = cart2grid(kgrid, sensor_mask, order="C")
+        sensor_mask = sensor_mask.astype(bool)
+    sensor = kSensor(mask=sensor_mask)
+    sensor.record = ["p"]
+
+    kgrid.makeTime(np.max(np.asarray(medium.sound_speed)))
+    kgrid.setTime(options.number_time_points, kgrid.dt)
+
+    return kgrid, medium, source, sensor
+
+
+def default_output_path(options: BenchmarkOptions) -> Path:
+    computer_name = platform.node() or "unknown-computer"
+    date = datetime.now().strftime("%Y%m%d-%H%M%S")
+    return Path(f"benchmark_data-{computer_name}-{options.data_cast}-{date}.json")
+
+
+def run(
+    options: BenchmarkOptions = BenchmarkOptions(),
+    *,
+    backend: str = "python",
+    device: str = "cpu",
+    max_cases: int | None = None,
+    output_path: str | Path | None = None,
+    quiet: bool = True,
+    solver: Callable[..., Any] | None = None,
+    timer: Callable[[], float] = perf_counter,
+) -> dict[str, Any]:
+    solver = kspaceFirstOrder if solver is None else solver
+    cases = grid_sizes(options)
+    if max_cases is not None:
+        if max_cases <= 0:
+            raise ValueError("max_cases must be positive")
+        cases = cases[:max_cases]
+
+    path = default_output_path(options) if output_path is None else Path(output_path)
+    result: dict[str, Any] = {
+        "comp_size": [],
+        "comp_time": [],
+        "options": _options_payload(options, backend, device),
+        "output_path": str(path),
+        "error_reached": False,
+        "error_message": "",
+    }
+    if options.report_mem_usage:
+        result["mem_usage"] = []
+
+    for nx, ny, nz, scale in cases:
+        loop_time = 0.0
+        loop_mem_usage = 0.0
+        try:
+            kgrid, medium, source, sensor = build_case(options, nx, ny, nz, scale)
+            for loop_num in range(1, options.num_averages + 1):
+                start = timer()
+                solver(
+                    kgrid,
+                    medium,
+                    source,
+                    sensor,
+                    backend=backend,
+                    device=device,
+                    quiet=quiet,
+                    pml_size=options.pml_size,
+                    pml_inside=options.pml_inside,
+                    smooth_p0=False,
+                )
+                elapsed_time = timer() - start
+                loop_time = _rolling_average(loop_time, elapsed_time, loop_num)
+                if options.report_mem_usage:
+                    loop_mem_usage = _rolling_average(loop_mem_usage, _peak_memory_bytes(), loop_num)
+                _store_case_result(result, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage)
+                _save_results(path, result)
+        except Exception as exc:
+            result["error_reached"] = True
+            result["error_message"] = str(exc)
+            _save_results(path, result)
+            break
+
+    return result
+
+
+def _rolling_average(previous_average: float, new_value: float, count: int) -> float:
+    return (previous_average * (count - 1) + new_value) / count
+
+
+def _store_case_result(result: dict[str, Any], comp_size: int, comp_time: float, mem_usage: float, report_mem_usage: bool) -> None:
+    if len(result["comp_size"]) == 0 or result["comp_size"][-1] != comp_size:
+        result["comp_size"].append(comp_size)
+        result["comp_time"].append(comp_time)
+        if report_mem_usage:
+            result["mem_usage"].append(mem_usage)
+        return
+
+    result["comp_time"][-1] = comp_time
+    if report_mem_usage:
+        result["mem_usage"][-1] = mem_usage
+
+
+def _save_results(path: Path, result: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = {
+        "comp_size": [int(size) for size in result["comp_size"]],
+        "comp_time": [float(time) for time in result["comp_time"]],
+        "options": result["options"],
+        "output_path": result["output_path"],
+        "error_reached": bool(result["error_reached"]),
+        "error_message": result["error_message"],
+    }
+    if "mem_usage" in result:
+        payload["mem_usage"] = [float(usage) for usage in result["mem_usage"]]
+    path.write_text(json.dumps(payload, indent=2) + "\n")
+
+
+def _options_payload(options: BenchmarkOptions, backend: str, device: str) -> dict[str, Any]:
+    payload = asdict(options)
+    payload.update(
+        {
+            "backend": backend,
+            "device": device,
+            "computer_name": platform.node(),
+            "python_version": platform.python_version(),
+            "platform": platform.platform(),
+            "kwave_python_version": kwave.__version__,
+        }
+    )
+    return payload
+
+
+def _peak_memory_bytes() -> float:
+    try:
+        import resource
+    except ImportError:
+        return float("nan")
+
+    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    if platform.system().lower() == "darwin":
+        return float(usage)
+    return float(usage * 1024)
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run the k-Wave 3D performance benchmark.")
+    parser.add_argument("--data-cast", choices=("off", "single"), default="off")
+    parser.add_argument("--backend", choices=("python", "cpp"), default="python")
+    parser.add_argument("--device", choices=("cpu", "gpu"), default="cpu")
+    parser.add_argument("--max-cases", type=int, default=None)
+    parser.add_argument("--num-averages", type=int, default=3)
+    parser.add_argument("--number-time-points", type=int, default=1000)
+    parser.add_argument("--output-path", type=Path, default=None)
+    parser.add_argument("--report-mem-usage", action="store_true")
+    parser.add_argument("--verbose", action="store_true")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = _parse_args()
+    benchmark_options = BenchmarkOptions(
+        data_cast=args.data_cast,
+        num_averages=args.num_averages,
+        number_time_points=args.number_time_points,
+        report_mem_usage=args.report_mem_usage,
+    )
+    result = run(
+        benchmark_options,
+        backend=args.backend,
+        device=args.device,
+        max_cases=args.max_cases,
+        output_path=args.output_path,
+        quiet=not args.verbose,
+    )
+    print(f"Benchmark results saved to {result['output_path']}")
+    if result["error_reached"]:
+        print("Memory limit reached or error encountered, exiting benchmark. Error message:", file=sys.stderr)
+        print(f"  {result['error_message']}", file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 00000000..37c4a0ed
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,109 @@
+import json
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from benchmarks.benchmark import BenchmarkOptions, build_case, grid_sizes, run
+
+
+def small_options(**overrides):
+    values = {
+        "start_size": 8,
+        "x_scale_array": (1,),
+        "y_scale_array": (1,),
+        "z_scale_array": (1,),
+        "number_sensor_points": 8,
+        "number_time_points": 5,
+        "num_averages": 1,
+        "sensor_radius": 1e-3,
+    }
+    values.update(overrides)
+    return BenchmarkOptions(**values)
+
+
+def test_grid_sizes_match_matlab_scale_arrays():
+    sizes = grid_sizes(BenchmarkOptions())
+
+    assert sizes[0] == (32, 32, 32, 1)
+    assert sizes[-1] == (512, 512, 256, 8)
+    assert len(sizes) == 12
+
+
+def test_build_case_matches_benchmark_defaults_for_small_grid():
+    options = small_options()
+
+    kgrid, medium, source, sensor = build_case(options, 8, 8, 8, 1)
+
+    assert tuple(kgrid.N) == (8, 8, 8)
+    assert np.isclose(kgrid.dx, options.domain_size / 8)
+    assert kgrid.Nt == options.number_time_points
+    assert medium.sound_speed.shape == (8, 8, 8)
+    assert np.all(medium.sound_speed[:2, :, :] == 1800)
+    assert np.all(medium.sound_speed[2:, :, :] == 1500)
+    assert np.all(medium.density[:, :1, :] == 1000)
+    assert np.all(medium.density[:, 1:, :] == 1200)
+    assert medium.alpha_coeff == pytest.approx(0.75)
+    assert medium.alpha_power == pytest.approx(1.5)
+    assert source.p0.shape == (8, 8, 8)
+    assert np.max(source.p0) == pytest.approx(10)
+    assert sensor.mask.shape == (8, 8, 8)
+    assert sensor.mask.dtype == bool
+    assert 0 < np.count_nonzero(sensor.mask) <= options.number_sensor_points
+    assert sensor.record == ["p"]
+
+
+def test_single_data_cast_uses_float32_arrays():
+    options = small_options(data_cast="single")
+
+    _, medium, source, _ = build_case(options, 8, 8, 8, 1)
+
+    assert medium.sound_speed.dtype == np.float32
+    assert medium.density.dtype == np.float32
+    assert source.p0.dtype == np.float32
+
+
+def test_run_aggregates_timings_and_saves_json_file(tmp_path: Path):
+    options = small_options(num_averages=2)
+    output_path = tmp_path / "benchmark.json"
+    times = iter([0.0, 1.0, 1.0, 3.0])
+    calls = []
+
+    def fake_solver(kgrid, medium, source, sensor, **kwargs):
+        calls.append((kgrid, medium, source, sensor, kwargs))
+        return {"p": np.zeros((1, 1))}
+
+    result = run(options, max_cases=1, output_path=output_path, solver=fake_solver, timer=lambda: next(times))
+
+    assert len(calls) == 2
+    assert calls[0][4]["pml_size"] == options.pml_size
+    assert calls[0][4]["pml_inside"] is True
+    assert calls[0][4]["smooth_p0"] is False
+    assert result["comp_size"] == [8 * 8 * 8]
+    assert result["comp_time"] == [pytest.approx(1.5)]
+    assert result["output_path"] == str(output_path)
+    assert result["error_reached"] is False
+    assert output_path.exists()
+
+    saved = json.loads(output_path.read_text())
+    assert saved["comp_size"] == [8 * 8 * 8]
+    assert saved["comp_time"] == [pytest.approx(1.5)]
+    assert saved["output_path"] == str(output_path)
+    assert saved["options"]["start_size"] == 8
+
+
+def test_run_saves_partial_results_after_solver_error(tmp_path: Path):
+    options = small_options()
+    output_path = tmp_path / "benchmark.json"
+
+    def failing_solver(*args, **kwargs):
+        raise RuntimeError("solver failed")
+
+    result = run(options, max_cases=1, output_path=output_path, solver=failing_solver)
+
+    assert result["error_reached"] is True
+    assert result["error_message"] == "solver failed"
+    assert output_path.exists()
+    saved = json.loads(output_path.read_text())
+    assert saved["error_reached"] is True
+    assert saved["error_message"] == "solver failed"

From 968b5f8f00fc148b9bca19c973144abf6a987ef1 Mon Sep 17 00:00:00 2001
From: Farid Yagubbayli <faridyagubbayli@gmail.com>
Date: Thu, 14 May 2026 14:31:48 +0000
Subject: [PATCH 2/2] helpers

---
 benchmarks/benchmark.py | 196 ++++------------------------------------
 benchmarks/helpers.py   | 178 ++++++++++++++++++++++++++++++++++++
 2 files changed, 195 insertions(+), 179 deletions(-)
 create mode 100644 benchmarks/helpers.py

diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
index d59550c3..23ec6f5e 100644
--- a/benchmarks/benchmark.py
+++ b/benchmarks/benchmark.py
@@ -10,126 +10,23 @@
 from __future__ import annotations
 
 import argparse
-import json
-import platform
 import sys
-from dataclasses import asdict, dataclass
-from datetime import datetime
 from pathlib import Path
 from time import perf_counter
 from typing import Any, Callable
 
-import numpy as np
-
-import kwave
-from kwave.data import Vector
-from kwave.kgrid import kWaveGrid
-from kwave.kmedium import kWaveMedium
-from kwave.ksensor import kSensor
-from kwave.ksource import kSource
+from benchmarks.helpers import (
+    BenchmarkOptions,
+    build_case,
+    default_output_path,
+    grid_sizes,
+    options_payload,
+    peak_memory_bytes,
+    rolling_average,
+    save_results,
+    store_case_result,
+)
 from kwave.kspaceFirstOrder import kspaceFirstOrder
-from kwave.utils.conversion import cart2grid
-from kwave.utils.filters import smooth
-from kwave.utils.mapgen import make_ball, make_cart_sphere
-
-
-@dataclass(frozen=True)
-class BenchmarkOptions:
-    data_cast: str = "off"
-    heterogeneous_media: bool = True
-    absorbing_media: bool = True
-    nonlinear_media: bool = False
-    binary_sensor_mask: bool = True
-    number_sensor_points: int = 100
-    number_time_points: int = 1000
-    num_averages: int = 3
-    start_size: int = 32
-    x_scale_array: tuple[int, ...] = (1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16)
-    y_scale_array: tuple[int, ...] = (1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16)
-    z_scale_array: tuple[int, ...] = (1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8)
-    domain_size: float = 22e-3
-    sensor_radius: float = 10e-3
-    pml_size: int = 10
-    pml_inside: bool = True
-    report_mem_usage: bool = False
-
-    def __post_init__(self) -> None:
-        if self.data_cast not in {"off", "single"}:
-            raise ValueError("data_cast must be 'off' or 'single'. Use device='gpu' to run on a GPU.")
-        if not (len(self.x_scale_array) == len(self.y_scale_array) == len(self.z_scale_array)):
-            raise ValueError("scale arrays must have the same length")
-        if self.number_time_points <= 0 or self.num_averages <= 0:
-            raise ValueError("number_time_points and num_averages must be positive")
-        if self.number_sensor_points <= 1:
-            raise ValueError("number_sensor_points must be greater than 1")
-
-    @property
-    def dtype(self) -> type[np.floating[Any]]:
-        return np.float32 if self.data_cast == "single" else np.float64
-
-
-def grid_sizes(options: BenchmarkOptions = BenchmarkOptions()) -> list[tuple[int, int, int, int]]:
-    return [
-        (
-            options.start_size * xscale,
-            options.start_size * yscale,
-            options.start_size * zscale,
-            min(xscale, yscale, zscale),
-        )
-        for xscale, yscale, zscale in zip(options.x_scale_array, options.y_scale_array, options.z_scale_array)
-    ]
-
-
-def build_case(options: BenchmarkOptions, nx: int, ny: int, nz: int, scale: int) -> tuple[kWaveGrid, kWaveMedium, kSource, kSensor]:
-    dtype = options.dtype
-    dx = options.domain_size / nx
-    dy = options.domain_size / ny
-    dz = options.domain_size / nz
-    kgrid = kWaveGrid(Vector([nx, ny, nz]), Vector([dx, dy, dz]))
-
-    c0 = dtype(1500)
-    rho0 = dtype(1000)
-    alpha_coeff = dtype(0.75)
-    alpha_power = dtype(1.5)
-    bon_a = dtype(6)
-
-    if options.heterogeneous_media:
-        sound_speed = c0 * np.ones((nx, ny, nz), dtype=dtype)
-        sound_speed[: nx // 4, :, :] = c0 * dtype(1.2)
-        density = rho0 * np.ones((nx, ny, nz), dtype=dtype)
-        density[:, max(ny // 4 - 1, 0) :, :] = rho0 * dtype(1.2)
-    else:
-        sound_speed = np.array(c0, dtype=dtype)
-        density = np.array(rho0, dtype=dtype)
-
-    medium = kWaveMedium(sound_speed=sound_speed, density=density)
-    if options.absorbing_media:
-        medium.alpha_coeff = alpha_coeff
-        medium.alpha_power = alpha_power
-    if options.nonlinear_media:
-        medium.BonA = bon_a
-
-    source = kSource()
-    source.p0 = dtype(10) * make_ball(Vector([nx, ny, nz]), Vector([nx // 2, ny // 2, nz // 2]), 2 * scale)
-    source.p0 = smooth(source.p0.astype(dtype, copy=False), restore_max=True).astype(dtype, copy=False)
-
-    sensor_mask = make_cart_sphere(options.sensor_radius, options.number_sensor_points)
-    if options.binary_sensor_mask:
-        sensor_mask, _, _ = cart2grid(kgrid, sensor_mask, order="C")
-        sensor_mask = sensor_mask.astype(bool)
-    sensor = kSensor(mask=sensor_mask)
-    sensor.record = ["p"]
-
-    kgrid.makeTime(np.max(np.asarray(medium.sound_speed)))
-    kgrid.setTime(options.number_time_points, kgrid.dt)
-
-    return kgrid, medium, source, sensor
-
-
-def default_output_path(options: BenchmarkOptions) -> Path:
-    computer_name = platform.node() or "unknown-computer"
-    date = datetime.now().strftime("%Y%m%d-%H%M%S")
-    return Path(f"benchmark_data-{computer_name}-{options.data_cast}-{date}.json")
 
 
 def run(
@@ -154,7 +51,7 @@ def run(
     result: dict[str, Any] = {
         "comp_size": [],
         "comp_time": [],
-        "options": _options_payload(options, backend, device),
+        "options": options_payload(options, backend, device),
         "output_path": str(path),
         "error_reached": False,
         "error_message": "",
@@ -182,79 +79,20 @@ def run(
                     smooth_p0=False,
                 )
                 elapsed_time = timer() - start
-                loop_time = _rolling_average(loop_time, elapsed_time, loop_num)
+                loop_time = rolling_average(loop_time, elapsed_time, loop_num)
                 if options.report_mem_usage:
-                    loop_mem_usage = _rolling_average(loop_mem_usage, _peak_memory_bytes(), loop_num)
-                _store_case_result(result, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage)
-                _save_results(path, result)
+                    loop_mem_usage = rolling_average(loop_mem_usage, peak_memory_bytes(), loop_num)
+                store_case_result(result, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage)
+                save_results(path, result)
         except Exception as exc:
             result["error_reached"] = True
             result["error_message"] = str(exc)
-            _save_results(path, result)
+            save_results(path, result)
             break
 
     return result
 
 
-def _rolling_average(previous_average: float, new_value: float, count: int) -> float:
-    return (previous_average * (count - 1) + new_value) / count
-
-
-def _store_case_result(result: dict[str, Any], comp_size: int, comp_time: float, mem_usage: float, report_mem_usage: bool) -> None:
-    if len(result["comp_size"]) == 0 or result["comp_size"][-1] != comp_size:
-        result["comp_size"].append(comp_size)
-        result["comp_time"].append(comp_time)
-        if report_mem_usage:
-            result["mem_usage"].append(mem_usage)
-        return
-
-    result["comp_time"][-1] = comp_time
-    if report_mem_usage:
-        result["mem_usage"][-1] = mem_usage
-
-
-def _save_results(path: Path, result: dict[str, Any]) -> None:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    payload = {
-        "comp_size": [int(size) for size in result["comp_size"]],
-        "comp_time": [float(time) for time in result["comp_time"]],
-        "options": result["options"],
-        "output_path": result["output_path"],
-        "error_reached": bool(result["error_reached"]),
-        "error_message": result["error_message"],
-    }
-    if "mem_usage" in result:
-        payload["mem_usage"] = [float(usage) for usage in result["mem_usage"]]
-    path.write_text(json.dumps(payload, indent=2) + "\n")
-
-
-def _options_payload(options: BenchmarkOptions, backend: str, device: str) -> dict[str, Any]:
-    payload = asdict(options)
-    payload.update(
-        {
-            "backend": backend,
-            "device": device,
-            "computer_name": platform.node(),
-            "python_version": platform.python_version(),
-            "platform": platform.platform(),
-            "kwave_python_version": kwave.__version__,
-        }
-    )
-    return payload
-
-
-def _peak_memory_bytes() -> float:
-    try:
-        import resource
-    except ImportError:
-        return float("nan")
-
-    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-    if platform.system().lower() == "darwin":
-        return float(usage)
-    return float(usage * 1024)
-
-
 def _parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Run the k-Wave 3D performance benchmark.")
     parser.add_argument("--data-cast", choices=("off", "single"), default="off")
diff --git a/benchmarks/helpers.py b/benchmarks/helpers.py
new file mode 100644
index 00000000..1843bc5a
--- /dev/null
+++ b/benchmarks/helpers.py
@@ -0,0 +1,178 @@
+from __future__ import annotations
+
+import json
+import platform
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+import kwave
+from kwave.data import Vector
+from kwave.kgrid import kWaveGrid
+from kwave.kmedium import kWaveMedium
+from kwave.ksensor import kSensor
+from kwave.ksource import kSource
+from kwave.utils.conversion import cart2grid
+from kwave.utils.filters import smooth
+from kwave.utils.mapgen import make_ball, make_cart_sphere
+
+
+@dataclass(frozen=True)
+class BenchmarkOptions:
+    data_cast: str = "off"
+    heterogeneous_media: bool = True
+    absorbing_media: bool = True
+    nonlinear_media: bool = False
+    binary_sensor_mask: bool = True
+    number_sensor_points: int = 100
+    number_time_points: int = 1000
+    num_averages: int = 3
+    start_size: int = 32
+    x_scale_array: tuple[int, ...] = (1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16)
+    y_scale_array: tuple[int, ...] = (1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16)
+    z_scale_array: tuple[int, ...] = (1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8)
+    domain_size: float = 22e-3
+    sensor_radius: float = 10e-3
+    pml_size: int = 10
+    pml_inside: bool = True
+    report_mem_usage: bool = False
+
+    def __post_init__(self) -> None:
+        if self.data_cast not in {"off", "single"}:
+            raise ValueError("data_cast must be 'off' or 'single'. Use device='gpu' to run on a GPU.")
+        if not (len(self.x_scale_array) == len(self.y_scale_array) == len(self.z_scale_array)):
+            raise ValueError("scale arrays must have the same length")
+        if self.number_time_points <= 0 or self.num_averages <= 0:
+            raise ValueError("number_time_points and num_averages must be positive")
+        if self.number_sensor_points <= 1:
+            raise ValueError("number_sensor_points must be greater than 1")
+
+    @property
+    def dtype(self) -> type[np.floating[Any]]:
+        return np.float32 if self.data_cast == "single" else np.float64
+
+
+def grid_sizes(options: BenchmarkOptions = BenchmarkOptions()) -> list[tuple[int, int, int, int]]:
+    return [
+        (
+            options.start_size * xscale,
+            options.start_size * yscale,
+            options.start_size * zscale,
+            min(xscale, yscale, zscale),
+        )
+        for xscale, yscale, zscale in zip(options.x_scale_array, options.y_scale_array, options.z_scale_array)
+    ]
+
+
+def build_case(options: BenchmarkOptions, nx: int, ny: int, nz: int, scale: int) -> tuple[kWaveGrid, kWaveMedium, kSource, kSensor]:
+    dtype = options.dtype
+    dx = options.domain_size / nx
+    dy = options.domain_size / ny
+    dz = options.domain_size / nz
+    kgrid = kWaveGrid(Vector([nx, ny, nz]), Vector([dx, dy, dz]))
+
+    c0 = dtype(1500)
+    rho0 = dtype(1000)
+    alpha_coeff = dtype(0.75)
+    alpha_power = dtype(1.5)
+    bon_a = dtype(6)
+
+    if options.heterogeneous_media:
+        sound_speed = c0 * np.ones((nx, ny, nz), dtype=dtype)
+        sound_speed[: nx // 4, :, :] = c0 * dtype(1.2)
+        density = rho0 * np.ones((nx, ny, nz), dtype=dtype)
+        density[:, max(ny // 4 - 1, 0) :, :] = rho0 * dtype(1.2)
+    else:
+        sound_speed = np.array(c0, dtype=dtype)
+        density = np.array(rho0, dtype=dtype)
+
+    medium = kWaveMedium(sound_speed=sound_speed, density=density)
+    if options.absorbing_media:
+        medium.alpha_coeff = alpha_coeff
+        medium.alpha_power = alpha_power
+    if options.nonlinear_media:
+        medium.BonA = bon_a
+
+    source = kSource()
+    source.p0 = dtype(10) * make_ball(Vector([nx, ny, nz]), Vector([nx // 2, ny // 2, nz // 2]), 2 * scale)
+    source.p0 = smooth(source.p0.astype(dtype, copy=False), restore_max=True).astype(dtype, copy=False)
+
+    sensor_mask = make_cart_sphere(options.sensor_radius, options.number_sensor_points)
+    if options.binary_sensor_mask:
+        sensor_mask, _, _ = cart2grid(kgrid, sensor_mask, order="C")
+        sensor_mask = sensor_mask.astype(bool)
+    sensor = kSensor(mask=sensor_mask)
+    sensor.record = ["p"]
+
+    kgrid.makeTime(np.max(np.asarray(medium.sound_speed)))
+    kgrid.setTime(options.number_time_points, kgrid.dt)
+
+    return kgrid, medium, source, sensor
+
+
+def default_output_path(options: BenchmarkOptions) -> Path:
+    computer_name = platform.node() or "unknown-computer"
+    date = datetime.now().strftime("%Y%m%d-%H%M%S")
+    return Path(f"benchmark_data-{computer_name}-{options.data_cast}-{date}.json")
+
+
+def rolling_average(previous_average: float, new_value: float, count: int) -> float:
+    return (previous_average * (count - 1) + new_value) / count
+
+
+def store_case_result(result: dict[str, Any], comp_size: int, comp_time: float, mem_usage: float, report_mem_usage: bool) -> None:
+    if len(result["comp_size"]) == 0 or result["comp_size"][-1] != comp_size:
+        result["comp_size"].append(comp_size)
+        result["comp_time"].append(comp_time)
+        if report_mem_usage:
+            result["mem_usage"].append(mem_usage)
+        return
+
+    result["comp_time"][-1] = comp_time
+    if report_mem_usage:
+        result["mem_usage"][-1] = mem_usage
+
+
+def save_results(path: Path, result: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = {
+        "comp_size": [int(size) for size in result["comp_size"]],
+        "comp_time": [float(time) for time in result["comp_time"]],
+        "options": result["options"],
+        "output_path": result["output_path"],
+        "error_reached": bool(result["error_reached"]),
+        "error_message": result["error_message"],
+    }
+    if "mem_usage" in result:
+        payload["mem_usage"] = [float(usage) for usage in result["mem_usage"]]
+    path.write_text(json.dumps(payload, indent=2) + "\n")
+
+
+def options_payload(options: BenchmarkOptions, backend: str, device: str) -> dict[str, Any]:
+    payload = asdict(options)
+    payload.update(
+        {
+            "backend": backend,
+            "device": device,
+            "computer_name": platform.node(),
+            "python_version": platform.python_version(),
+            "platform": platform.platform(),
+            "kwave_python_version": kwave.__version__,
+        }
+    )
+    return payload
+
+
+def peak_memory_bytes() -> float:
+    try:
+        import resource
+    except ImportError:
+        return float("nan")
+
+    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    if platform.system().lower() == "darwin":
+        return float(usage)
+    return float(usage * 1024)