Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Benchmarks

This directory contains performance benchmarks for k-wave-python. These are not standard examples: they are intended to measure runtime and memory behavior and can become expensive as the grid size grows.

## 3D Solver Scaling Benchmark

`benchmark.py` ports MATLAB k-Wave's `benchmark.m`. It runs `kspaceFirstOrder` on a sequence of 3D grids with increasing sizes, averages runtime over repeated runs, and saves partial results after each run.

The default benchmark uses:

- heterogeneous absorbing medium
- smoothed initial pressure ball source
- binary sensor mask built from 100 Cartesian points on a sphere
- 1000 time steps
- 3 averages per grid size
- grid sizes based on MATLAB's original scale arrays, starting at `32 x 32 x 32`

By default, this can run for a long time and may stop once memory limits are reached.

## Usage

Run a small smoke benchmark:

```bash
uv run benchmarks/benchmark.py --max-cases 1 --num-averages 1 --number-time-points 20
```

Run the default CPU benchmark:

```bash
uv run benchmarks/benchmark.py
```

Run with single-precision arrays:

```bash
uv run benchmarks/benchmark.py --data-cast single
```

Run on the Python GPU backend:

```bash
uv run benchmarks/benchmark.py --device gpu
```

Choose an output file:

```bash
uv run benchmarks/benchmark.py --output-path benchmark_data.json
```

## Output

The benchmark writes a JSON file containing:

- `comp_size`: total grid points for each completed grid size
- `comp_time`: rolling average elapsed seconds for each grid size
- `options`: benchmark settings and environment metadata
- `output_path`: path to the JSON output file
- `error_reached`: whether the benchmark stopped after an exception
- `error_message`: exception message, if any
- `mem_usage`: optional process peak memory estimate when `--report-mem-usage` is set

Partial results are saved after each run so completed timings are preserved if a later grid fails.
1 change: 1 addition & 0 deletions benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Performance benchmarks ported from MATLAB k-Wave."""
135 changes: 135 additions & 0 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""
k-Wave 3D Performance Benchmark

Ported from: k-Wave/benchmark.m

Runs a sequence of 3D initial-value simulations with increasing grid sizes and
records average execution time for each grid.
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path
from time import perf_counter
from typing import Any, Callable

from benchmarks.helpers import (
BenchmarkOptions,
build_case,
default_output_path,
grid_sizes,
options_payload,
peak_memory_bytes,
rolling_average,
save_results,
store_case_result,
)
from kwave.kspaceFirstOrder import kspaceFirstOrder


def run(
options: BenchmarkOptions = BenchmarkOptions(),
*,
backend: str = "python",
device: str = "cpu",
max_cases: int | None = None,
output_path: str | Path | None = None,
quiet: bool = True,
solver: Callable[..., Any] | None = None,
timer: Callable[[], float] = perf_counter,
) -> dict[str, Any]:
solver = kspaceFirstOrder if solver is None else solver
cases = grid_sizes(options)
if max_cases is not None:
if max_cases <= 0:
raise ValueError("max_cases must be positive")
cases = cases[:max_cases]

path = default_output_path(options) if output_path is None else Path(output_path)
result: dict[str, Any] = {
"comp_size": [],
"comp_time": [],
"options": options_payload(options, backend, device),
"output_path": str(path),
"error_reached": False,
"error_message": "",
}
if options.report_mem_usage:
result["mem_usage"] = []

for nx, ny, nz, scale in cases:
loop_time = 0.0
loop_mem_usage = 0.0
try:
kgrid, medium, source, sensor = build_case(options, nx, ny, nz, scale)
for loop_num in range(1, options.num_averages + 1):
start = timer()
solver(
kgrid,
medium,
source,
sensor,
backend=backend,
device=device,
quiet=quiet,
pml_size=options.pml_size,
pml_inside=options.pml_inside,
smooth_p0=False,
)
elapsed_time = timer() - start
loop_time = rolling_average(loop_time, elapsed_time, loop_num)
if options.report_mem_usage:
loop_mem_usage = rolling_average(loop_mem_usage, peak_memory_bytes(), loop_num)
store_case_result(result, nx * ny * nz, loop_time, loop_mem_usage, options.report_mem_usage)
save_results(path, result)
except Exception as exc:
result["error_reached"] = True
result["error_message"] = str(exc)
save_results(path, result)
break

return result


def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Run the k-Wave 3D performance benchmark.")
parser.add_argument("--data-cast", choices=("off", "single"), default="off")
parser.add_argument("--backend", choices=("python", "cpp"), default="python")
parser.add_argument("--device", choices=("cpu", "gpu"), default="cpu")
parser.add_argument("--max-cases", type=int, default=None)
parser.add_argument("--num-averages", type=int, default=3)
parser.add_argument("--number-time-points", type=int, default=1000)
parser.add_argument("--output-path", type=Path, default=None)
parser.add_argument("--report-mem-usage", action="store_true")
parser.add_argument("--verbose", action="store_true")
return parser.parse_args()


def main() -> int:
args = _parse_args()
benchmark_options = BenchmarkOptions(
data_cast=args.data_cast,
num_averages=args.num_averages,
number_time_points=args.number_time_points,
report_mem_usage=args.report_mem_usage,
)
result = run(
benchmark_options,
backend=args.backend,
device=args.device,
max_cases=args.max_cases,
output_path=args.output_path,
quiet=not args.verbose,
)
print(f"Benchmark results saved to {result['output_path']}")
if result["error_reached"]:
print("Memory limit reached or error encountered, exiting benchmark. Error message:", file=sys.stderr)
print(f" {result['error_message']}", file=sys.stderr)
return 1
return 0


if __name__ == "__main__":
raise SystemExit(main())
178 changes: 178 additions & 0 deletions benchmarks/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from __future__ import annotations

import json
import platform
from dataclasses import asdict, dataclass
from datetime import datetime
from pathlib import Path
from typing import Any

import numpy as np

import kwave
from kwave.data import Vector
from kwave.kgrid import kWaveGrid
from kwave.kmedium import kWaveMedium
from kwave.ksensor import kSensor
from kwave.ksource import kSource
from kwave.utils.conversion import cart2grid
from kwave.utils.filters import smooth
from kwave.utils.mapgen import make_ball, make_cart_sphere


@dataclass(frozen=True)
class BenchmarkOptions:
data_cast: str = "off"
heterogeneous_media: bool = True
absorbing_media: bool = True
nonlinear_media: bool = False
binary_sensor_mask: bool = True
number_sensor_points: int = 100
number_time_points: int = 1000
num_averages: int = 3
start_size: int = 32
x_scale_array: tuple[int, ...] = (1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16)
y_scale_array: tuple[int, ...] = (1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16)
z_scale_array: tuple[int, ...] = (1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8)
domain_size: float = 22e-3
sensor_radius: float = 10e-3
pml_size: int = 10
pml_inside: bool = True
report_mem_usage: bool = False

def __post_init__(self) -> None:
if self.data_cast not in {"off", "single"}:
raise ValueError("data_cast must be 'off' or 'single'. Use device='gpu' to run on a GPU.")
if not (len(self.x_scale_array) == len(self.y_scale_array) == len(self.z_scale_array)):
raise ValueError("scale arrays must have the same length")
if self.number_time_points <= 0 or self.num_averages <= 0:
raise ValueError("number_time_points and num_averages must be positive")
if self.number_sensor_points <= 1:
raise ValueError("number_sensor_points must be greater than 1")

@property
def dtype(self) -> type[np.floating[Any]]:
return np.float32 if self.data_cast == "single" else np.float64


def grid_sizes(options: BenchmarkOptions = BenchmarkOptions()) -> list[tuple[int, int, int, int]]:
return [
(
options.start_size * xscale,
options.start_size * yscale,
options.start_size * zscale,
min(xscale, yscale, zscale),
)
for xscale, yscale, zscale in zip(options.x_scale_array, options.y_scale_array, options.z_scale_array)
]


def build_case(options: BenchmarkOptions, nx: int, ny: int, nz: int, scale: int) -> tuple[kWaveGrid, kWaveMedium, kSource, kSensor]:
dtype = options.dtype
dx = options.domain_size / nx
dy = options.domain_size / ny
dz = options.domain_size / nz
kgrid = kWaveGrid(Vector([nx, ny, nz]), Vector([dx, dy, dz]))

c0 = dtype(1500)
rho0 = dtype(1000)
alpha_coeff = dtype(0.75)
alpha_power = dtype(1.5)
bon_a = dtype(6)

if options.heterogeneous_media:
sound_speed = c0 * np.ones((nx, ny, nz), dtype=dtype)
sound_speed[: nx // 4, :, :] = c0 * dtype(1.2)
density = rho0 * np.ones((nx, ny, nz), dtype=dtype)
density[:, max(ny // 4 - 1, 0) :, :] = rho0 * dtype(1.2)
else:
sound_speed = np.array(c0, dtype=dtype)
density = np.array(rho0, dtype=dtype)

medium = kWaveMedium(sound_speed=sound_speed, density=density)
if options.absorbing_media:
medium.alpha_coeff = alpha_coeff
medium.alpha_power = alpha_power
if options.nonlinear_media:
medium.BonA = bon_a

source = kSource()
source.p0 = dtype(10) * make_ball(Vector([nx, ny, nz]), Vector([nx // 2, ny // 2, nz // 2]), 2 * scale)
source.p0 = smooth(source.p0.astype(dtype, copy=False), restore_max=True).astype(dtype, copy=False)

sensor_mask = make_cart_sphere(options.sensor_radius, options.number_sensor_points)
if options.binary_sensor_mask:
sensor_mask, _, _ = cart2grid(kgrid, sensor_mask, order="C")
sensor_mask = sensor_mask.astype(bool)
sensor = kSensor(mask=sensor_mask)
sensor.record = ["p"]

kgrid.makeTime(np.max(np.asarray(medium.sound_speed)))
kgrid.setTime(options.number_time_points, kgrid.dt)

return kgrid, medium, source, sensor


def default_output_path(options: BenchmarkOptions) -> Path:
computer_name = platform.node() or "unknown-computer"
date = datetime.now().strftime("%Y%m%d-%H%M%S")
return Path(f"benchmark_data-{computer_name}-{options.data_cast}-{date}.json")


def rolling_average(previous_average: float, new_value: float, count: int) -> float:
return (previous_average * (count - 1) + new_value) / count


def store_case_result(result: dict[str, Any], comp_size: int, comp_time: float, mem_usage: float, report_mem_usage: bool) -> None:
if len(result["comp_size"]) == 0 or result["comp_size"][-1] != comp_size:
result["comp_size"].append(comp_size)
result["comp_time"].append(comp_time)
if report_mem_usage:
result["mem_usage"].append(mem_usage)
return

result["comp_time"][-1] = comp_time
if report_mem_usage:
result["mem_usage"][-1] = mem_usage


def save_results(path: Path, result: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"comp_size": [int(size) for size in result["comp_size"]],
"comp_time": [float(time) for time in result["comp_time"]],
"options": result["options"],
"output_path": result["output_path"],
"error_reached": bool(result["error_reached"]),
"error_message": result["error_message"],
}
if "mem_usage" in result:
payload["mem_usage"] = [float(usage) for usage in result["mem_usage"]]
path.write_text(json.dumps(payload, indent=2) + "\n")


def options_payload(options: BenchmarkOptions, backend: str, device: str) -> dict[str, Any]:
payload = asdict(options)
payload.update(
{
"backend": backend,
"device": device,
"computer_name": platform.node(),
"python_version": platform.python_version(),
"platform": platform.platform(),
"kwave_python_version": kwave.__version__,
}
)
return payload


def peak_memory_bytes() -> float:
try:
import resource
except ImportError:
return float("nan")

usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if platform.system().lower() == "darwin":
return float(usage)
return float(usage * 1024)
Comment on lines +169 to +178
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 nan sentinel silently produces unreadable JSON on Windows

peak_memory_bytes() returns float("nan") on Windows where the resource module isn't available. That nan flows unguarded through rolling_average and store_case_result into result["mem_usage"]. Python's json.dumps serialises nan as the bare token NaN (not valid JSON), and Python's own json.loads will then reject the file with a ValueError — so any run with --report-mem-usage on Windows silently writes a file that cannot be read back.

Consider raising early (ValueError: report_mem_usage is not supported on this platform) when resource is unavailable, or filtering/skipping the nan entry in save_results.

Loading
Loading