diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index bf06b782..badf4c28 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -1502,7 +1502,7 @@ "bzlTransitiveDigest": "WyF2tLBpR8cyete55VmU+AwLC/mhDbnIlEdbVymbm04=", "usagesDigest": "DXk0VBJUK7HdLXJE59BVPG4lCTTPnHGNQg/SH9dTuVE=", "recordedFileInputs": { - "@@//deps/pip/requirements_3_11.txt": "01687dd0690641cecfe0cfa3a5a8ba2186f65112c34b4ab7b64caa55a416ae56", + "@@//deps/pip/requirements_3_11.txt": "845ddefb3a79c099a05f39880752241d3c30cefb3774c0611f253d5bff3f774f", "@@//deps/pip/requirements_3_8.txt": "94c254d645dc1d0555f48e66fa499aa2efecfc5e547500cd4ed409aee28e8880", "@@rules_fuzzing+//fuzzing/requirements.txt": "ab04664be026b632a0d2a2446c4f65982b7654f5b6851d2f9d399a19b7242a5b", "@@rules_python+//tools/publish/requirements_darwin.txt": "095d4a4f3d639dce831cd493367631cd51b53665292ab20194bac2c0c6458fa8", diff --git a/deps/pip/BUILD.bazel b/deps/pip/BUILD.bazel index 1e9c3e3d..becbd300 100644 --- a/deps/pip/BUILD.bazel +++ b/deps/pip/BUILD.bazel @@ -34,6 +34,7 @@ pip_compile( size = "medium", args = PIP_COMPILE_ARGS, data = [ + ":requirements_argoverse2.in", ":requirements_colmap.in", ":requirements_docs.in", ":requirements_ncore.in", diff --git a/deps/pip/requirements_3_11.in b/deps/pip/requirements_3_11.in index e7c68a14..b31bf737 100644 --- a/deps/pip/requirements_3_11.in +++ b/deps/pip/requirements_3_11.in @@ -21,6 +21,7 @@ -r requirements_waymo.in -r requirements_colmap.in -r requirements_nuscenes.in +-r requirements_argoverse2.in -r requirements_pai.in # Public API restrictions for 3.11 diff --git a/deps/pip/requirements_3_11.txt b/deps/pip/requirements_3_11.txt index 37fe4126..d620e757 100644 --- a/deps/pip/requirements_3_11.txt +++ b/deps/pip/requirements_3_11.txt @@ -1853,7 +1853,9 @@ pyarrow==23.0.1 \ --hash=sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f \ --hash=sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1 \ --hash=sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd - # via -r deps/pip/requirements_pai.in + # via + # -r deps/pip/requirements_argoverse2.in + # -r deps/pip/requirements_pai.in pycocotools==2.0.11 \ --hash=sha256:04480330df5013f6edd94891a0ee8294274185f1b5093d1b0f23d51778f0c0e9 \ --hash=sha256:08c79789fd79e801ae4ecfcfeec32b31e36254e7a2b4019af28c104975d5e730 \ diff --git a/deps/pip/requirements_argoverse2.in b/deps/pip/requirements_argoverse2.in new file mode 100644 index 00000000..da09d455 --- /dev/null +++ b/deps/pip/requirements_argoverse2.in @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Argoverse 2 converter dependencies. +# The converter reads the AV2 Feather files directly and intentionally avoids the +# heavy `av2` devkit (torch, kornia, numba, polars, PyAV). Quaternion handling uses +# scipy (already an ncore dependency), so no extra dependency is needed here. +pyarrow diff --git a/docs/conversions/argoverse2/argoverse2.rst b/docs/conversions/argoverse2/argoverse2.rst new file mode 100644 index 00000000..0900181a --- /dev/null +++ b/docs/conversions/argoverse2/argoverse2.rst @@ -0,0 +1,135 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: Apache-2.0 + +Argoverse 2 Dataset +=================== + +The NCore Argoverse 2 tool converts data from the +`Argoverse 2 `_ Sensor Dataset into NCore +V4 format. The converter reads the Argoverse 2 on-disk Apache Feather files +directly with ``pyarrow`` and deliberately avoids the heavy ``av2`` devkit +(which pulls in torch, kornia, numba, polars and PyAV). Quaternion handling uses +``scipy`` (already an ncore dependency), so no extra dependency is introduced. + +.. _argoverse2_data_conventions: + +Conventions +----------- + +Argoverse 2 provides data from 9 cameras and 2 lidars; it has no radar. The +converter handles all sensor modalities and 3D cuboid annotations. + +Camera Sensors +^^^^^^^^^^^^^^ + 1. **ring_front_center** -- 2048x1550 (portrait) + 2. **ring_front_left** -- 1550x2048 + 3. **ring_front_right** -- 1550x2048 + 4. **ring_side_left** -- 1550x2048 + 5. **ring_side_right** -- 1550x2048 + 6. **ring_rear_left** -- 1550x2048 + 7. **ring_rear_right** -- 1550x2048 + 8. **stereo_front_left** -- 1550x2048 + 9. **stereo_front_right** -- 1550x2048 + +The released imagery for all nine cameras is already undistorted -- the official +av2 devkit projects with the intrinsic matrix ``K`` only and does not load the +distortion columns -- so camera intrinsics are stored using +:class:`~ncore.data.IdealPinholeCameraModelParameters`. Because the imagery is +already undistorted, **global shutter is assumed** (``ShutterType.GLOBAL``). The +``k1, k2, k3`` coefficients present in ``intrinsics.feather`` describe the +original lens (for re-distorting into the raw frame) and are intentionally not +applied to the released images; they are preserved per camera in the camera +component ``generic_meta_data`` under ``av2_original_distortion`` so the original +calibration is not lost. + +LiDAR Sensors +^^^^^^^^^^^^^ + 1. **up_lidar** -- Velodyne VLP-32C, 32 beams, 10 Hz + 2. **down_lidar** -- Velodyne VLP-32C, 32 beams, 10 Hz + +Argoverse 2 sweeps are egomotion-compensated to the sweep reference timestamp +and provided in the egovehicle frame, with real per-point timestamps +(``offset_ns``). The two stacked VLP-32C units are stored separately, each with +its own static extrinsic. Points are split per unit by ``laser_number``, +mapped into the unit's own sensor frame, and decompensated using the real +per-point timestamps so that NCore stores raw per-point-time ray directions. +Because the sensor extrinsic is static, this decompensation is independent of +whether the source data applied ego-motion before or after the sensor +transform. + +A structured VLP-32C model is stored per unit as lidar intrinsics, with per-point +``model_element`` (row, column). Argoverse 2 provides no native firing-column +index, so the firing pattern is reconstructed from ``offset_ns`` (firing columns -- +one VLP-32C revolution at 10 Hz) and ``laser_number`` (the beam, mapped to an +elevation-sorted row). The geometry is derived per log from the *decompensated* +reference sweep: elevations, the laser-to-row map, column timing, per-column +azimuths, and per-row azimuth offsets (the 32 beams of a firing column span several +degrees of azimuth, so the per-row offset is fit empirically). The two stacked +units fire in opposite phase, so they spin oppositely in their own frames (one +``cw``, one ``ccw``), which is detected from the data. The column grid is upsampled +4x so per-frame alignment is not column-quantized, and each sweep is re-aligned to +the model by a per-frame affine column remap -- a constant phase (the spin phase at +a given ``offset_ns`` drifts ~1 deg between sweeps) plus a linear term (the spin +rate drifts slightly within a sweep on some scenes). Steep downward beams that only +return at near range (no far data) have their azimuth offset fit from near-range +returns. Deriving from the decompensated cloud (not the ego-motion-smeared +compensated one) plus these steps gives ~0.03 deg median far-range reconstruction +across scenes (validated on 38 val logs / 76 units, all sub-0.08 deg median with no +systematic azimuth or elevation bias), on par with native-column sensors. Pass +``--lidar-model-source none`` to store raw ray bundles only. + +The ``laser_number`` to up/down unit split is not documented by Argoverse 2. The +two units occupy the two laser-number halves (``< 32`` and ``>= 32``); the unit +*label* is recovered from extrinsic geometry by per-beam elevation flatness -- a +laser ring traces a constant-elevation cone only in its own sensor frame, so the +wrong extrinsic tilts the cone and inflates the per-ring elevation spread. The +decision is made once per log and is stable with a wide (~2-10x) margin. + +Annotations +^^^^^^^^^^^ + +3D cuboid annotations are native to the egovehicle frame at the sweep reference +time. They are stored in the ``rig`` frame at that timestamp with no ego pose +baked in, so the egovehicle motion stays out of the stored coordinates and +remains swappable downstream (a V4 feature); the pose graph places the cuboids +using the active ego trajectory. The full 3-DOF box orientation is preserved (the +AV2 quaternion is converted to the ``BBox3`` ``xyz``-Euler convention, not reduced +to yaw). The ``track_uuid`` is used as the track ID. + +Coordinate Frames +^^^^^^^^^^^^^^^^^ + +The first ego pose's ``city_SE3_egovehicle`` is stored as the static +``world -> world_global`` pose, so ``world_global`` is the Argoverse 2 city +frame. All absolute city coordinates remain recoverable for later alignment +with the Argoverse 2 HD map (which the converter does not export). + +Usage +----- + +.. code-block:: bash + + bazel run //tools/data_converter/argoverse2 -- \ + --root-dir /path/to/argoverse2/sensor \ + --output-dir /path/to/output \ + argoverse2-v4 \ + --split val + +Convert a single log: + +.. code-block:: bash + + bazel run //tools/data_converter/argoverse2 -- \ + --root-dir /path/to/argoverse2/sensor \ + --output-dir /path/to/output \ + argoverse2-v4 \ + --split val \ + --log-id 02678d04-cc9f-3148-9f95-1ba66347dff9 + +Testing +------- + +.. code-block:: bash + + AV2_DIR=/path/to/argoverse2/sensor AV2_SPLIT=val \ + bazel test //tools/data_converter/argoverse2:pytest_converter diff --git a/docs/conversions/index.rst b/docs/conversions/index.rst index be50bee1..9bc84d1d 100644 --- a/docs/conversions/index.rst +++ b/docs/conversions/index.rst @@ -6,13 +6,14 @@ Data Conversions NCore provides conversion tools for importing 3rd-party dataset formats into the NCore V4 component-based format. Supported formats include KITTI, nuScenes, -Waymo, COLMAP (including ScanNet++), and PAI. +Argoverse 2, Waymo, COLMAP (including ScanNet++), and PAI. .. toctree:: :maxdepth: 1 kitti/kitti nuscenes/nuscenes + argoverse2/argoverse2 waymo/waymo colmap/colmap pai/pai diff --git a/tools/data_converter/argoverse2/BUILD.bazel b/tools/data_converter/argoverse2/BUILD.bazel new file mode 100644 index 00000000..a645eb51 --- /dev/null +++ b/tools/data_converter/argoverse2/BUILD.bazel @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@ncore_pip_deps//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_binary", "py_library") +load("//bazel/pytest:defs.bzl", "pytest_test") + +# Argoverse 2 specific utilities: feather readers, sensor maps, lidar unit split +py_library( + name = "pylib_utils", + srcs = ["utils.py"], + deps = [ + requirement("numpy"), + requirement("pyarrow"), + requirement("scipy"), + requirement("universal_pathlib"), + "//ncore:pylib", + "//tools/data_converter:pylib_structured_lidar_model", + ], +) + +# Converter library (config, converter class, CLI registration) +py_library( + name = "pylib", + srcs = [ + "converter.py", + ], + deps = [ + ":pylib_utils", + requirement("click"), + requirement("numpy"), + requirement("pyarrow"), + requirement("scipy"), + requirement("tqdm"), + requirement("universal_pathlib"), + "//ncore:pylib", + "//tools/data_converter:pylib_cli", + "//tools/data_converter:pylib_structured_lidar_model", + ], +) + +# Standalone CLI binary +py_binary( + name = "convert", + srcs = ["main.py"], + main = "main.py", + deps = [":pylib"], +) + +alias( + name = "argoverse2", + actual = ":convert", +) + +# Integration test for the Argoverse 2 converter (requires AV2_DIR env var) +pytest_test( + name = "pytest_converter", + srcs = ["converter_test.py"], + python_versions = ["3.11"], + tags = ["manual"], # Only run when explicitly requested (needs external data) + deps = [ + ":pylib", + requirement("numpy"), + requirement("parameterized"), + requirement("pyarrow"), + requirement("torch"), + requirement("universal_pathlib"), + "//ncore:pylib", + ], +) + +# Data-free unit test for the VLP-32C lidar-model derivation (runs in CI). +pytest_test( + name = "pytest_utils", + srcs = ["utils_test.py"], + python_versions = ["3.11"], + deps = [ + ":pylib_utils", + requirement("numpy"), + requirement("pyarrow"), + requirement("torch"), + requirement("universal_pathlib"), + "//ncore:pylib", + ], +) diff --git a/tools/data_converter/argoverse2/NOTICE b/tools/data_converter/argoverse2/NOTICE new file mode 100644 index 00000000..e16f1b37 --- /dev/null +++ b/tools/data_converter/argoverse2/NOTICE @@ -0,0 +1,12 @@ +Argoverse 2 Dataset +Copyright (c) 2021 Argo AI, LLC + +This converter processes data from the Argoverse 2 (AV2) Sensor Dataset. +The Argoverse 2 dataset is released under the Creative Commons +Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0). + +Users must agree to the Argoverse Terms of Use before downloading or using the dataset: +https://www.argoverse.org/about.html#terms-of-use + +This converter reads the Argoverse 2 on-disk Feather files directly and does not +depend on the av2 devkit. The av2 devkit is released under the MIT License. diff --git a/tools/data_converter/argoverse2/README.md b/tools/data_converter/argoverse2/README.md new file mode 100644 index 00000000..033d7952 --- /dev/null +++ b/tools/data_converter/argoverse2/README.md @@ -0,0 +1,117 @@ +# Argoverse 2 to NCore V4 Converter + +Converts [Argoverse 2](https://www.argoverse.org/av2.html) Sensor Dataset logs to +NCore V4 format. + +The converter reads the Argoverse 2 on-disk Apache Feather files directly with +`pyarrow`, deliberately avoiding the heavy `av2` devkit (which pulls in torch, +kornia, numba, polars and PyAV). Quaternion handling uses `scipy` (already an +ncore dependency), so no extra dependency is introduced. + +## Requirements + +- Argoverse 2 Sensor Dataset downloaded locally, organised as + `{root}/{split}/{log_id}/...` +- Python packages: `pyarrow` (plus `scipy`, already an ncore dependency) + +## Usage + +```bash +bazel run //tools/data_converter/argoverse2 -- \ + --root-dir /path/to/argoverse2/sensor \ + --output-dir /path/to/output \ + argoverse2-v4 \ + --split val +``` + +### Convert a single log + +```bash +bazel run //tools/data_converter/argoverse2 -- \ + --root-dir /path/to/argoverse2/sensor \ + --output-dir /path/to/output \ + argoverse2-v4 \ + --split val \ + --log-id 02678d04-cc9f-3148-9f95-1ba66347dff9 +``` + +## Options + +| Option | Default | Description | +|--------|---------|-------------| +| `--split` | val | Split directory under `--root-dir` (train, val, test) | +| `--log-id` | None | Filter to a single log by ID | +| `--store-type` | itar | Output store format (itar or directory) | +| `--profile` | separate-sensors | Component group assignment profile | +| `--sequence-meta/--no-sequence-meta` | enabled | Generate sequence meta JSON | + +## Sensor Assumptions + +- **Cameras**: 9 cameras (7 ring + 2 stereo). AV2 imagery is shipped already + undistorted -- the official av2 devkit projects with the intrinsic matrix `K` + only and does not load the `k1, k2, k3` columns -- so the stored model is an + ideal (distortion-free) pinhole (`IdealPinholeCameraModelParameters`). Because + the imagery is already undistorted, global shutter is assumed + (`ShutterType.GLOBAL`). The `k1, k2, k3` coefficients in `intrinsics.feather` + describe the original lens (for re-distorting into the raw frame) and must not be + applied to the released images, so they are not used for projection -- but they + are preserved per camera in the camera component `generic_meta_data` under + `av2_original_distortion` so the original calibration is not lost. +- **Lidar**: two stacked Velodyne VLP-32C units (`up_lidar` / `down_lidar`, 10 Hz). + The source sweep is egomotion-compensated to the sweep reference timestamp and + expressed in the egovehicle frame. Real per-point timestamps are available via + `offset_ns`. Each unit is stored separately with its own extrinsic. Points are + mapped into each unit's sensor frame and decompensated using the real per-point + timestamps so the stored directions are raw per-point-time measurements. Because + the extrinsic is static, this is independent of whether AV2 applied ego-motion + before or after the sensor transform. + - A structured VLP-32C model is stored per unit as lidar intrinsics, with + per-point `model_element` (row, column). AV2 provides no native firing-column + index, so the firing pattern is reconstructed from `offset_ns` (firing columns -- + one VLP-32C revolution at 10 Hz) and `laser_number` (the beam, mapped to an + elevation-sorted row). The geometry is derived per log from the *decompensated* + reference sweep: elevations, the laser->row map, column timing, per-column + azimuths, and per-row azimuth offsets (the 32 beams of a firing column span + several degrees of azimuth, so the per-row offset is fit empirically rather than + assumed). The two stacked units fire in opposite phase, so they spin oppositely + in their own frames (one `cw`, one `ccw`); this is detected from the data. The + column grid is upsampled 4x so the per-frame alignment is not column-quantized. + Each sweep is re-aligned to the model by a per-frame affine column remap (a + constant phase plus a linear term): the spin phase at a given `offset_ns` drifts + ~1 deg between sweeps (the constant), and the spin rate drifts slightly within a + sweep on some scenes (the linear term). A fixed mapping, or a phase-only shift, + would leave some frames off by up to ~1 deg / ~0.25 deg respectively. + Steep downward beams that only return at near range (no far data, e.g. the + lowest laser at ~-25 deg) have their azimuth offset fit from near-range returns. + Deriving from the decompensated cloud (not the ego-motion-smeared compensated + one) plus these steps gives ~0.03-0.05 deg median far-range reconstruction across + scenes, on par with native-column sensors. Pass `--lidar-model-source none` to + store raw ray bundles only. + - The `laser_number` to up/down unit split is not documented by AV2. The two + units occupy the two laser-number halves (`< 32` and `>= 32`); the unit *label* + is recovered from extrinsic geometry by per-beam elevation flatness (a laser + ring traces a constant-elevation cone only in its own sensor frame, so the + wrong extrinsic tilts the cone and inflates the per-ring elevation spread). The + decision is made once per log and is stable with a wide (~2-10x) margin. +- **Radar**: AV2 has no radar. +- **Cuboid annotations**: native to the egovehicle frame at the sweep reference + time, stored in the `rig` frame at that timestamp with no ego pose baked in. This + keeps the egovehicle motion out of the stored coordinates so it stays swappable + downstream (a V4 feature); the pose graph places the cuboids using the active ego + trajectory. The full 3-DOF box orientation is preserved (the AV2 quaternion is + converted to the BBox3 `xyz`-Euler convention, not reduced to yaw). `track_uuid` + is used as track ID. + +## Coordinate frames + +The first ego pose's `city_SE3_egovehicle` is stored as the static +`world -> world_global` pose, so `world_global` is the AV2 city frame. All absolute +city coordinates remain recoverable for later alignment with the AV2 HD map (which +the converter does not export). + +## Testing + +```bash +AV2_DIR=/path/to/argoverse2/sensor AV2_SPLIT=val \ + bazel test //tools/data_converter/argoverse2:pytest_converter +``` diff --git a/tools/data_converter/argoverse2/converter.py b/tools/data_converter/argoverse2/converter.py new file mode 100644 index 00000000..7c999e1a --- /dev/null +++ b/tools/data_converter/argoverse2/converter.py @@ -0,0 +1,776 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Argoverse 2 Sensor Dataset to NCore V4 converter.""" + +from __future__ import annotations + +import json +import logging + +from dataclasses import dataclass +from typing import Dict, List, Literal, Optional + +import click +import numpy as np +import tqdm + +from scipy.spatial.transform import Rotation as R +from upath import UPath + +from ncore.impl.common.transformations import ( + HalfClosedInterval, + MotionCompensator, + se3_inverse, +) +from ncore.impl.data.types import ( + BBox3, + CuboidTrackObservation, + LabelSource, + RowOffsetStructuredSpinningLidarModelParameters, +) +from ncore.impl.data.v4.components import ( + CameraSensorComponent, + CuboidsComponent, + IntrinsicsComponent, + LidarSensorComponent, + MasksComponent, + PosesComponent, + RadarSensorComponent, # noqa: F401 -- imported for parity; AV2 has no radar + SequenceComponentGroupsReader, + SequenceComponentGroupsWriter, +) +from ncore.impl.data.v4.types import ComponentGroupAssignments +from ncore.impl.data_converter.base import FileBasedDataConverter, FileBasedDataConverterConfig +from tools.data_converter.argoverse2.utils import ( + AV2_CATEGORY_MAP, + CAMERA_NAMES, + LIDAR_NAMES, + VLP32C_N_BEAMS, + Vlp32cGeometry, + assign_lidar_units, + build_vlp32c_model, + derive_vlp32c_geometry, + list_log_ids, + list_sensor_timestamps, + read_annotations, + read_city_se3_ego, + read_ego_se3_sensor, + read_intrinsics, + read_lidar_sweep, + reconstruct_model_elements, +) +from tools.data_converter.cli import cli + + +# Argoverse 2 timestamps are nanoseconds; NCore V4 uses microseconds. +NS_PER_US = 1000 + + +def _ns_to_us(value_ns: int) -> int: + return int(value_ns) // NS_PER_US + + +# ----------------------------------------------------------------------------- +# Config +# ----------------------------------------------------------------------------- + + +@dataclass(kw_only=True, slots=True) +class Argoverse2Converter4Config(FileBasedDataConverterConfig): + """Configuration for Argoverse 2 to NCore V4 conversion.""" + + split: str = "val" + log_id: Optional[str] = None + store_type: Literal["itar", "directory"] = "itar" + component_group_profile: Literal["default", "separate-sensors", "separate-all"] = "separate-sensors" + store_sequence_meta: bool = True + # Lidar model: "empirical" derives a VLP-32C structured model (per unit) and stores + # per-point model elements + intrinsics; "none" stores raw ray bundles only. + lidar_model_source: Literal["empirical", "none"] = "empirical" + + +# ----------------------------------------------------------------------------- +# Converter +# ----------------------------------------------------------------------------- + + +class Argoverse2Converter4(FileBasedDataConverter): + """Dataset preprocessing class for converting Argoverse 2 data to NCore V4. + + Sensor assumptions (sourced from the AV2 devkit and User Guide): + + - Cameras: 9 cameras (7 ring + 2 stereo). Imagery is shipped already + undistorted, so a pinhole model with zero distortion is exact. Because the + released imagery is already undistorted (a single capture timestamp per + image, no rolling-shutter metadata), global shutter is assumed -> + ``ShutterType.GLOBAL``. The original lens radial-distortion coefficients + ``(k1, k2, k3)`` are not applied but are preserved per camera in the camera + component ``generic_meta_data`` under ``av2_original_distortion``. + - Lidar: two stacked Velodyne VLP-32C units (``up_lidar`` / ``down_lidar``, + 10 Hz). The source sweep is egomotion-compensated to the sweep reference + timestamp and stored in the egovehicle frame. Real per-point timestamps are + provided (``offset_ns``). We split points per unit by ``laser_number``, + transform into each unit's sensor frame, and decompensate using the real + per-point timestamps so NCore stores raw per-point-time directions. A + structured VLP-32C model is stored per unit (with per-point ``model_element``) + by reconstructing the firing pattern from ``offset_ns`` + ``laser_number``; + the two units spin oppositely in their own frames (detected from data). Pass + ``--lidar-model-source none`` to skip the model and store raw ray bundles only. + - Radar: AV2 has no radar. + - Cuboid annotations: native to the egovehicle frame at the sweep reference + time. Stored in the ``rig`` frame at that timestamp with no ego pose baked + in, so the egovehicle motion stays swappable downstream (a V4 feature); the + pose graph places the cuboids using the active ego trajectory. + + The first ego pose's ``city_SE3_egovehicle`` is stored as the static + ``world -> world_global`` anchor, so ``world_global`` is the AV2 city frame. + This keeps absolute coordinates recoverable for later HD-map alignment. + """ + + def __init__(self, config: Argoverse2Converter4Config) -> None: + super().__init__(config) + + self.component_group_profile = config.component_group_profile + self.store_type = config.store_type + self.store_sequence_meta = config.store_sequence_meta + + self._split = config.split + self._log_id = config.log_id + self._lidar_model_source = config.lidar_model_source + + self.logger = logging.getLogger(__name__) + + @property + def _split_dir(self) -> UPath: + return self.root_dir / self._split + + @staticmethod + def get_sequence_ids(config: Argoverse2Converter4Config) -> List[str]: + """Discover log IDs to convert.""" + if config.log_id is not None: + return [config.log_id] + split_dir = UPath(config.root_dir) / config.split + return list_log_ids(split_dir) + + @staticmethod + def from_config(config: Argoverse2Converter4Config) -> Argoverse2Converter4: + return Argoverse2Converter4(config) + + def convert_sequence(self, sequence_id: str) -> None: + """Convert a single Argoverse 2 log to NCore V4 format.""" + log_id = sequence_id + log_dir = self._split_dir / log_id + + self.logger.info(f"Converting log {log_id} (split={self._split})") + + # --- Ego poses (egovehicle -> city) ----------------------------------- + pose_timestamps_ns, T_ego_city_all = read_city_se3_ego(log_dir) + n_poses = len(pose_timestamps_ns) + assert n_poses >= 2, f"Log has fewer than 2 ego poses: {n_poses}" + + pose_timestamps_us = np.array([_ns_to_us(t) for t in pose_timestamps_ns], dtype=np.uint64) + + # AV2 ego poses are dense (some only nanoseconds apart), so the ns -> us + # truncation can produce duplicate microsecond timestamps. The pose writer + # requires strictly increasing timestamps, so keep the first pose for each + # unique microsecond timestamp. + pose_timestamps_us, unique_idx = np.unique(pose_timestamps_us, return_index=True) + T_ego_city_all = T_ego_city_all[unique_idx] + n_poses = len(pose_timestamps_us) + assert n_poses >= 2, f"Log has fewer than 2 unique-microsecond ego poses: {n_poses}" + + # Anchor the first pose as world_global (the AV2 city frame); store all + # poses relative to it so the relative poses are float32-safe. + T_world_world_global = T_ego_city_all[0].copy() # float64 for global accuracy + T_world_global_inv = se3_inverse(T_world_world_global) + T_rig_world_relative = (T_world_global_inv @ T_ego_city_all).astype(np.float32) + + # --- Static sensor extrinsics (sensor -> ego) ------------------------- + ego_se3_sensor = read_ego_se3_sensor(log_dir) + + # --- Determine active sensors ----------------------------------------- + camera_ids = self.get_active_camera_ids(list(CAMERA_NAMES)) + lidar_ids = self.get_active_lidar_ids(list(LIDAR_NAMES)) + radar_ids = self.get_active_radar_ids([]) # AV2 has no radar + + # --- Sequence time interval ------------------------------------------- + all_ts_us: List[int] = [int(pose_timestamps_us[0]), int(pose_timestamps_us[-1])] + lidar_ts_ns = list_sensor_timestamps(log_dir, "lidar") + if lidar_ts_ns: + all_ts_us += [_ns_to_us(lidar_ts_ns[0]), _ns_to_us(lidar_ts_ns[-1])] + for cam_id in camera_ids: + cam_ts_ns = list_sensor_timestamps(log_dir, "cameras", cam_id) + if cam_ts_ns: + all_ts_us += [_ns_to_us(cam_ts_ns[0]), _ns_to_us(cam_ts_ns[-1])] + + seq_start_us = min(all_ts_us) + seq_end_us = max(all_ts_us) + + # Extend pose timeline to cover the sequence interval, extrapolating with a + # constant-velocity assumption so lidar decompensation near the boundaries + # has real motion to invert (mirrors the nuScenes converter). + T_rig_world_relative, pose_timestamps_us = self._extend_pose_timeline( + T_rig_world_relative, pose_timestamps_us, seq_start_us, seq_end_us + ) + + sequence_timestamp_interval_us = HalfClosedInterval.from_start_end(seq_start_us, seq_end_us) + + # --- Component group assignments -------------------------------------- + component_groups = ComponentGroupAssignments.create( + camera_ids=camera_ids, + lidar_ids=lidar_ids, + radar_ids=radar_ids, + point_clouds_ids=[], + camera_labels_ids=[], + profile=self.component_group_profile, + ) + + # --- Create writer ---------------------------------------------------- + store_writer = SequenceComponentGroupsWriter( + output_dir_path=self.output_dir / log_id, + store_base_name=log_id, + sequence_id=log_id, + sequence_timestamp_interval_us=sequence_timestamp_interval_us, + store_type=self.store_type, + generic_meta_data={ + "source_dataset": "argoverse2", + "argoverse2_split": self._split, + "argoverse2_log_id": log_id, + }, + ) + + poses_writer = store_writer.register_component_writer( + PosesComponent.Writer, + component_instance_name="default", + group_name=component_groups.poses_component_group, + generic_meta_data={ + "calibration_type": "argoverse2:egovehicle_SE3_sensor", + "egomotion_type": "argoverse2:city_SE3_egovehicle", + }, + ) + intrinsics_writer = store_writer.register_component_writer( + IntrinsicsComponent.Writer, + component_instance_name="default", + group_name=component_groups.intrinsics_component_group, + ) + masks_writer = store_writer.register_component_writer( + MasksComponent.Writer, + component_instance_name="default", + group_name=component_groups.masks_component_group, + ) + + # --- Store ego poses -------------------------------------------------- + poses_writer.store_dynamic_pose( + source_frame_id="rig", + target_frame_id="world", + poses=T_rig_world_relative, + timestamps_us=pose_timestamps_us, + ) + poses_writer.store_static_pose( + source_frame_id="world", + target_frame_id="world_global", + pose=T_world_world_global, + ) + + # --- Decode sensors --------------------------------------------------- + if lidar_ids: + self._decode_lidars( + log_dir=log_dir, + store_writer=store_writer, + poses_writer=poses_writer, + intrinsics_writer=intrinsics_writer, + component_groups=component_groups, + lidar_ids=lidar_ids, + ego_se3_sensor=ego_se3_sensor, + T_rig_world_relative=T_rig_world_relative, + pose_timestamps_us=pose_timestamps_us, + ) + + self._decode_cameras( + log_dir=log_dir, + store_writer=store_writer, + poses_writer=poses_writer, + intrinsics_writer=intrinsics_writer, + masks_writer=masks_writer, + component_groups=component_groups, + camera_ids=camera_ids, + ego_se3_sensor=ego_se3_sensor, + ) + + self._decode_cuboids( + log_dir=log_dir, + store_writer=store_writer, + component_groups=component_groups, + ) + + # --- Finalize --------------------------------------------------------- + ncore_4_paths = store_writer.finalize() + + if self.store_sequence_meta: + sequence_component_reader = SequenceComponentGroupsReader(ncore_4_paths) + sequence_meta_path = self.output_dir / log_id / f"{sequence_component_reader.sequence_id}.json" + with sequence_meta_path.open("w") as f: + json.dump(sequence_component_reader.get_sequence_meta().to_dict(), f, indent=2) + self.logger.info(f"Wrote sequence meta data {str(sequence_meta_path)}") + + # ------------------------------------------------------------------------- + # Pose timeline + # ------------------------------------------------------------------------- + + @staticmethod + def _extend_pose_timeline( + T_rig_world_relative: np.ndarray, + pose_timestamps_us: np.ndarray, + seq_start_us: int, + seq_end_us: int, + ) -> tuple[np.ndarray, np.ndarray]: + """Extend the relative pose timeline to cover [seq_start, seq_end]. + + Boundaries are extrapolated with a constant-velocity assumption so the + first/last lidar sweeps have real ego motion to decompensate against. + """ + if seq_start_us < int(pose_timestamps_us[0]): + T_0 = T_rig_world_relative[0] + T_1 = T_rig_world_relative[1] + T_delta_inv = se3_inverse(T_1) @ T_0 + T_boundary = (T_0 @ T_delta_inv).astype(np.float32) + T_rig_world_relative = np.concatenate([T_boundary[np.newaxis], T_rig_world_relative], axis=0) + pose_timestamps_us = np.concatenate([np.array([seq_start_us], dtype=np.uint64), pose_timestamps_us]) + + if seq_end_us > int(pose_timestamps_us[-1]): + T_n1 = T_rig_world_relative[-2] + T_n = T_rig_world_relative[-1] + T_delta = se3_inverse(T_n1) @ T_n + T_boundary = (T_n @ T_delta).astype(np.float32) + T_rig_world_relative = np.concatenate([T_rig_world_relative, T_boundary[np.newaxis]], axis=0) + pose_timestamps_us = np.concatenate([pose_timestamps_us, np.array([seq_end_us], dtype=np.uint64)]) + + return T_rig_world_relative, pose_timestamps_us + + # ------------------------------------------------------------------------- + # Lidar + # ------------------------------------------------------------------------- + + def _decode_lidars( + self, + log_dir: UPath, + store_writer: SequenceComponentGroupsWriter, + poses_writer: PosesComponent.Writer, + intrinsics_writer: IntrinsicsComponent.Writer, + component_groups: ComponentGroupAssignments, + lidar_ids: List[str], + ego_se3_sensor: Dict[str, np.ndarray], + T_rig_world_relative: np.ndarray, + pose_timestamps_us: np.ndarray, + ) -> None: + """Decode and store the two stacked VLP-32C lidars individually. + + AV2 lidar points are egomotion-compensated to the sweep reference time + (the sweep start) and provided in the egovehicle frame. For each unit we + map points into the unit's own sensor frame (via the static extrinsic) and + decompensate using the real per-point timestamps -- referenced to the sweep + start -- to recover raw per-point-time directions. + + Because the sensor extrinsic is static, the decompensation commutes with + the ego->sensor transform, so the result is independent of whether AV2 + applied ego-motion before or after the sensor transform. + """ + sweep_ts_ns = list_sensor_timestamps(log_dir, "lidar") + if not sweep_ts_ns: + self.logger.warning("No lidar sweeps found") + return + + # Static extrinsics + per-unit writers + per-unit motion compensators. + T_unit_rig: Dict[str, np.ndarray] = {} + lidar_writers: Dict[str, LidarSensorComponent.Writer] = {} + compensators: Dict[str, MotionCompensator] = {} + + for unit_id in lidar_ids: + T_unit_rig[unit_id] = ego_se3_sensor[unit_id].astype(np.float32) + poses_writer.store_static_pose(source_frame_id=unit_id, target_frame_id="rig", pose=T_unit_rig[unit_id]) + lidar_writers[unit_id] = store_writer.register_component_writer( + LidarSensorComponent.Writer, + component_instance_name=unit_id, + group_name=component_groups.lidar_component_groups.get(unit_id), + generic_meta_data={"sensor_model": "Velodyne VLP-32C"}, + ) + compensators[unit_id] = MotionCompensator.from_sensor_rig( + sensor_id=unit_id, + T_sensor_rig=T_unit_rig[unit_id], + T_rig_worlds=T_rig_world_relative, + T_rig_worlds_timestamps_us=pose_timestamps_us, + ) + + # Determine the laser_number -> unit labelling once from the first sweep. + # The split is a fixed physical property of the two stacked sensors, so we + # resolve it once (robustly, from extrinsic geometry) and reuse it for every + # sweep rather than re-deciding per frame. + T_up = ego_se3_sensor["up_lidar"] if "up_lidar" in ego_se3_sensor else np.eye(4) + T_down = ego_se3_sensor["down_lidar"] if "down_lidar" in ego_se3_sensor else np.eye(4) + first_sweep = read_lidar_sweep(log_dir / "sensors" / "lidar" / f"{sweep_ts_ns[0]}.feather") + first_masks = assign_lidar_units(first_sweep.laser_number, first_sweep.xyz, T_up, T_down) + + # Map the decision to a laser_number threshold test (lo half == laser_number < 32). + lo_is_up = bool(first_masks["up_lidar"][first_sweep.laser_number < 4].all()) + unit_for_lo = "up_lidar" if lo_is_up else "down_lidar" + unit_for_hi = "down_lidar" if lo_is_up else "up_lidar" + self.logger.info(f"Lidar unit split: laser_number<32 -> {unit_for_lo}, >=32 -> {unit_for_hi}") + + # Build a structured VLP-32C model per unit from the first sweep. AV2 has no + # native firing-column index, but offset_ns + laser_number reconstruct it (one + # firing column per VLP-32C revolution; laser_number selects the beam/row). The + # firing geometry (elevations, laser->row map, per-column azimuths, per-row + # azimuth offsets, spin direction) is derived empirically per log from the + # DECOMPENSATED reference sweep -- decompensation is essential, since the raw + # motion-compensated azimuths are smeared by ego motion (~0.5 deg) which would + # otherwise dominate the model error. + unit_geometry: Dict[str, Vlp32cGeometry] = {} + unit_model: Dict[str, RowOffsetStructuredSpinningLidarModelParameters] = {} + if self._lidar_model_source == "empirical": + first_ref_ts_us = _ns_to_us(int(first_sweep.timestamp_ns)) + first_pt_ts_us = ((first_sweep.timestamp_ns + first_sweep.offset_ns) // NS_PER_US).astype(np.uint64) + first_start_us = min(first_ref_ts_us, int(first_pt_ts_us.min())) + first_end_us = max(int(first_pt_ts_us.max()), first_start_us + 1) + for unit_id, mask_first in ( + (unit_for_lo, first_sweep.laser_number < 32), + (unit_for_hi, first_sweep.laser_number >= 32), + ): + if unit_id not in lidar_ids or not mask_first.any(): + continue + laser_in_unit = (first_sweep.laser_number[mask_first] % VLP32C_N_BEAMS).astype(np.int64) + offset_in_unit = first_sweep.offset_ns[mask_first] + + # rig -> unit sensor frame, then decompensate to per-point time so the + # firing geometry is clean (un-smeared by ego motion). + T_rig_unit = se3_inverse(T_unit_rig[unit_id]) + xyz_sensor_first = (T_rig_unit[:3, :3] @ first_sweep.xyz[mask_first].T).T + T_rig_unit[:3, 3] + xyz_decomp_first = compensators[unit_id].motion_decompensate_points( + sensor_id=unit_id, + xyz_reftime=xyz_sensor_first, + timestamp_us=first_pt_ts_us[mask_first], + frame_start_timestamp_us=first_start_us, + frame_end_timestamp_us=first_end_us, + reference_timestamp_us=first_ref_ts_us, + ) + + geometry = derive_vlp32c_geometry( + xyz_decompensated=xyz_decomp_first.astype(np.float64), + laser_number_in_unit=laser_in_unit, + offset_ns=offset_in_unit, + ) + model = build_vlp32c_model(geometry) + unit_geometry[unit_id] = geometry + unit_model[unit_id] = model + self.logger.info( + f"Derived VLP-32C model for {unit_id}: {geometry.n_columns} columns, " + f"spin {geometry.spinning_direction}, " + f"column period {geometry.column_period_ns / 1000.0:.2f} us" + ) + + for ts_ns in tqdm.tqdm(sweep_ts_ns, desc="Process lidar"): + sweep = read_lidar_sweep(log_dir / "sensors" / "lidar" / f"{ts_ns}.feather") + + # Per-point absolute time = sweep_ts + offset; convert to microseconds. + # The sweep reference timestamp (the filename) is the frame start and + # the egomotion-compensation reference for all points in the sweep. + reference_ts_us = _ns_to_us(int(sweep.timestamp_ns)) + point_ts_us = ((sweep.timestamp_ns + sweep.offset_ns) // NS_PER_US).astype(np.uint64) + + frame_start_us = min(reference_ts_us, int(point_ts_us.min())) + frame_end_us = int(point_ts_us.max()) + if frame_end_us <= frame_start_us: + frame_end_us = frame_start_us + 1 + + lo = sweep.laser_number < 32 + unit_masks = {unit_for_lo: lo, unit_for_hi: ~lo} + + for unit_id in lidar_ids: + mask = unit_masks[unit_id] + if not mask.any(): + continue + + xyz_rig = sweep.xyz[mask] # AV2 egovehicle frame == NCore rig frame + ts_unit = point_ts_us[mask] + intensity_unit = sweep.intensity[mask] + + # rig -> unit sensor frame (points are compensated to the sweep + # reference time, so this is the sensor reference-time frame). + T_rig_unit = se3_inverse(T_unit_rig[unit_id]) + xyz_sensor = (T_rig_unit[:3, :3] @ xyz_rig.T).T + T_rig_unit[:3, 3] + + # Decompensate from the reference-time sensor frame (the sweep start, + # AV2's compensation reference) to each point's own measurement time. + xyz_raw = compensators[unit_id].motion_decompensate_points( + sensor_id=unit_id, + xyz_reftime=xyz_sensor, + timestamp_us=ts_unit, + frame_start_timestamp_us=frame_start_us, + frame_end_timestamp_us=frame_end_us, + reference_timestamp_us=reference_ts_us, + ) + + distance_m = np.linalg.norm(xyz_raw, axis=1) + direction = np.zeros_like(xyz_raw) + nonzero = distance_m > 0 + direction[nonzero] = xyz_raw[nonzero] / distance_m[nonzero, np.newaxis] + + # Per-point structured-model element (row, column) from beam + firing + # time, re-aligned to the model by this frame's azimuth phase. + model_element = None + if unit_id in unit_geometry: + model_element = reconstruct_model_elements( + laser_number_in_unit=(sweep.laser_number[mask] % VLP32C_N_BEAMS).astype(np.int64), + offset_ns=sweep.offset_ns[mask], + geometry=unit_geometry[unit_id], + xyz_decompensated=xyz_raw, + ) + + lidar_writers[unit_id].store_frame( + direction=direction.astype(np.float32), + timestamp_us=ts_unit, + model_element=model_element, + distance_m=distance_m.reshape(1, -1), + intensity=intensity_unit.reshape(1, -1), + frame_timestamps_us=np.array([frame_start_us, frame_end_us], dtype=np.uint64), + generic_data={}, + generic_meta_data={}, + ) + + # Store the structured lidar model as intrinsics for each unit. + for unit_id, model in unit_model.items(): + intrinsics_writer.store_lidar_intrinsics(lidar_id=unit_id, lidar_model_parameters=model) + + # ------------------------------------------------------------------------- + # Cameras + # ------------------------------------------------------------------------- + + def _decode_cameras( + self, + log_dir: UPath, + store_writer: SequenceComponentGroupsWriter, + poses_writer: PosesComponent.Writer, + intrinsics_writer: IntrinsicsComponent.Writer, + masks_writer: MasksComponent.Writer, + component_groups: ComponentGroupAssignments, + camera_ids: List[str], + ego_se3_sensor: Dict[str, np.ndarray], + ) -> None: + """Decode and store all camera frames. + + AV2 imagery is shipped already undistorted, so global shutter is assumed and + the stored model is a pinhole with zero distortion coefficients. The original + lens radial-distortion coefficients ``(k1, k2, k3)`` are preserved per camera + in the camera component ``generic_meta_data`` (``av2_original_distortion``). + """ + intrinsics = read_intrinsics(log_dir) + + for camera_id in camera_ids: + cam_ts_ns = list_sensor_timestamps(log_dir, "cameras", camera_id) + if not cam_ts_ns: + self.logger.warning(f"No data for camera {camera_id}") + continue + + self.logger.info(f"Processing camera {camera_id}") + + # Extrinsic: camera -> rig (ego) + T_cam_rig = ego_se3_sensor[camera_id].astype(np.float32) + poses_writer.store_static_pose(source_frame_id=camera_id, target_frame_id="rig", pose=T_cam_rig) + + camera_intrinsics = intrinsics[camera_id] + intrinsics_writer.store_camera_intrinsics( + camera_id=camera_id, + camera_model_parameters=camera_intrinsics.model, + ) + + masks_writer.store_camera_masks(camera_id=camera_id, mask_images={}) + + # Preserve the original AV2 lens radial-distortion coefficients as + # provenance. The released imagery is already undistorted (so the stored + # model is a distortion-free ideal pinhole), but the raw (k1, k2, k3) + # describe the original lens and would otherwise be lost. + k1, k2, k3 = camera_intrinsics.original_distortion_k1k2k3 + camera_writer = store_writer.register_component_writer( + CameraSensorComponent.Writer, + component_instance_name=camera_id, + group_name=component_groups.camera_component_groups.get(camera_id), + generic_meta_data={ + "av2_original_distortion": {"k1": k1, "k2": k2, "k3": k3}, + }, + ) + + camera_dir = log_dir / "sensors" / "cameras" / camera_id + for ts_ns in tqdm.tqdm(cam_ts_ns, desc=f"Process {camera_id}"): + image_path = camera_dir / f"{ts_ns}.jpg" + with image_path.open("rb") as f: + image_binary = f.read() + + frame_ts = _ns_to_us(ts_ns) + camera_writer.store_frame( + image_binary_data=image_binary, + image_format="jpeg", + frame_timestamps_us=np.array([frame_ts, frame_ts], dtype=np.uint64), + generic_data={}, + generic_meta_data={}, + ) + + self.logger.info(f"Processed {len(camera_ids)} cameras") + + # ------------------------------------------------------------------------- + # Cuboid annotations + # ------------------------------------------------------------------------- + + def _decode_cuboids( + self, + log_dir: UPath, + store_writer: SequenceComponentGroupsWriter, + component_groups: ComponentGroupAssignments, + ) -> None: + """Decode AV2 3D annotations and store as cuboid track observations. + + AV2 cuboids are native to the egovehicle frame at the sweep reference + timestamp. We store them in that native frame -- ``rig`` at the sweep + timestamp -- without baking in any ego pose. This is lossless and, unlike + baking the cuboids into a static world frame, keeps the egovehicle motion + out of the stored coordinates so it remains swappable downstream (a V4 + feature): the pose graph places the cuboids using whatever ego trajectory + is active. + + Lidar points are decompensated to their own per-point time, but a cuboid is + a single object pose at the sweep reference time, so referencing it to + ``rig`` at that timestamp is exactly correct -- the pose graph evaluates the + rig pose at the cuboid timestamp when transforming. + """ + annotations_path = log_dir / "annotations.feather" + if not annotations_path.exists(): + self.logger.info("No annotations.feather found (test split)") + return + + cols = read_annotations(log_dir) + n = len(cols["category"]) + + cuboid_observations: List[CuboidTrackObservation] = [] + for i in tqdm.tqdm(range(n), total=n, desc="Process cuboids"): + category = str(cols["category"][i]) + if category not in AV2_CATEGORY_MAP: + continue + + timestamp_us = _ns_to_us(int(cols["timestamp_ns"][i])) + + # Convert the full AV2 orientation quaternion (scalar-first wxyz) to the + # BBox3 "xyz" intrinsic-Euler convention. Keep the full 3-DOF rotation -- + # AV2 cuboids can carry roll/pitch (e.g. objects on slopes/banked roads), + # so extracting yaw only would silently drop that. + quat_wxyz = (cols["qw"][i], cols["qx"][i], cols["qy"][i], cols["qz"][i]) + rx, ry, rz = R.from_quat(quat_wxyz, scalar_first=True).as_euler("xyz", degrees=False) + + # AV2 cuboids are in the egovehicle (rig) frame at the sweep reference + # time. length_m -> x extent, width_m -> y extent, height_m -> z extent. + bbox3 = BBox3.from_array( + np.array( + [ + cols["tx_m"][i], + cols["ty_m"][i], + cols["tz_m"][i], + cols["length_m"][i], + cols["width_m"][i], + cols["height_m"][i], + rx, + ry, + rz, + ], + dtype=np.float32, + ) + ) + + cuboid_observations.append( + CuboidTrackObservation( + track_id=str(cols["track_uuid"][i]), + class_id=AV2_CATEGORY_MAP[category], + timestamp_us=timestamp_us, + reference_frame_id="rig", + reference_frame_timestamp_us=timestamp_us, + bbox3=bbox3, + source=LabelSource.EXTERNAL, + ) + ) + + if cuboid_observations: + store_writer.register_component_writer( + CuboidsComponent.Writer, + "default", + component_groups.cuboid_track_observations_component_group, + ).store_observations(cuboid_observations) + self.logger.info(f"Stored {len(cuboid_observations)} cuboid observations") + else: + self.logger.info("No mapped cuboid annotations found") + + +# ----------------------------------------------------------------------------- +# CLI +# ----------------------------------------------------------------------------- + + +@cli.command(name="argoverse2-v4") +@click.option( + "--split", + type=str, + default="val", + show_default=True, + help="Argoverse 2 split directory under --root-dir (e.g. train, val, test)", +) +@click.option( + "--log-id", + type=str, + default=None, + help="Convert only the log with this ID (defaults to all logs in the split)", +) +@click.option( + "--store-type", + type=click.Choice(["itar", "directory"], case_sensitive=False), + default="itar", + show_default=True, + help="Output store type", +) +@click.option( + "component_group_profile", + "--profile", + type=click.Choice(["default", "separate-sensors", "separate-all"], case_sensitive=False), + default="separate-sensors", + show_default=True, + help="Output profile for component group assignment", +) +@click.option( + "store_sequence_meta", + "--sequence-meta/--no-sequence-meta", + default=True, + help="Generate sequence meta-data JSON?", +) +@click.option( + "lidar_model_source", + "--lidar-model-source", + type=click.Choice(["empirical", "none"], case_sensitive=False), + default="empirical", + show_default=True, + help="Lidar model: 'empirical' derives a VLP-32C structured model per unit (model " + "elements + intrinsics) from the data; 'none' stores raw ray bundles only.", +) +@click.pass_context +def argoverse2_v4(ctx, split, log_id, **kwargs): + """Argoverse 2 Sensor Dataset conversion (V4 format)""" + + config = Argoverse2Converter4Config(**{**vars(ctx.obj), "split": split, "log_id": log_id, **kwargs}) + + Argoverse2Converter4.convert(config) diff --git a/tools/data_converter/argoverse2/converter_test.py b/tools/data_converter/argoverse2/converter_test.py new file mode 100644 index 00000000..c70bc25e --- /dev/null +++ b/tools/data_converter/argoverse2/converter_test.py @@ -0,0 +1,379 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Integration tests for the Argoverse 2 data converter (V4 format). + +Requires the AV2_DIR environment variable pointing to an Argoverse 2 Sensor +Dataset root directory organised as ``{AV2_DIR}/{split}/{log_id}/...``. + +Set AV2_SPLIT to override the default split (``val``). The first log in the split +is used for testing. +""" + +import os +import tempfile +import unittest + +from typing import Literal, cast + +import numpy as np + +from parameterized import parameterized_class +from upath import UPath + +from ncore.impl.data.types import ( + IdealPinholeCameraModelParameters, + RowOffsetStructuredSpinningLidarModelParameters, + ShutterType, +) +from ncore.impl.data.v4.components import ( + CameraSensorComponent, + CuboidsComponent, + IntrinsicsComponent, + LidarSensorComponent, + PosesComponent, + RadarSensorComponent, + SequenceComponentGroupsReader, +) +from tools.data_converter.argoverse2.converter import Argoverse2Converter4, Argoverse2Converter4Config +from tools.data_converter.argoverse2.utils import CAMERA_NAMES, LIDAR_NAMES, list_log_ids + + +@parameterized_class( + ("store_type",), + [ + ("itar",), + ("directory",), + ], +) +class TestArgoverse2Converter(unittest.TestCase): + """Integration tests for the Argoverse 2 data converter. + + Requires AV2_DIR environment variable pointing to an Argoverse 2 Sensor Dataset + root. Uses the first log in the split for testing. + """ + + store_type: Literal["itar", "directory"] + + @classmethod + def setUpClass(cls): + cls.av2_dir = os.environ.get("AV2_DIR") + if cls.av2_dir is None: + raise unittest.SkipTest("AV2_DIR not set -- skipping Argoverse 2 integration tests") + + cls.split = os.environ.get("AV2_SPLIT", "val") + + log_ids = list_log_ids(UPath(cls.av2_dir) / cls.split) + assert log_ids, f"No logs found under {cls.av2_dir}/{cls.split}" + cls.log_id = log_ids[0] + + cls._tempdir = tempfile.TemporaryDirectory(prefix="argoverse2_test_") + cls.output_dir = cls._tempdir.name + + config = Argoverse2Converter4Config( + root_dir=cls.av2_dir, + output_dir=cls.output_dir, + no_cameras=False, + camera_ids=None, + no_lidars=False, + lidar_ids=None, + no_radars=False, + radar_ids=None, + verbose=False, + debug=False, + debug_port=5678, + split=cls.split, + log_id=cls.log_id, + store_type=cls.store_type, + component_group_profile="separate-sensors", + store_sequence_meta=True, + ) + Argoverse2Converter4.convert(config) + + seq_dirs = [d for d in UPath(cls.output_dir).iterdir() if d.is_dir()] + assert len(seq_dirs) == 1, f"Expected 1 sequence dir, found {len(seq_dirs)}: {seq_dirs}" + cls.seq_dir = seq_dirs[0] + + meta_files = list(cls.seq_dir.glob("*.json")) + assert len(meta_files) == 1, f"Expected 1 meta JSON, found {len(meta_files)}" + cls.reader = SequenceComponentGroupsReader([meta_files[0]]) + + @classmethod + def tearDownClass(cls): + cls._tempdir.cleanup() + + # --- Poses ---------------------------------------------------------------- + + def test_sequence_has_dynamic_rig_to_world_pose(self): + poses_readers = self.reader.open_component_readers(PosesComponent.Reader) + self.assertEqual(len(poses_readers), 1) + poses_reader = list(poses_readers.values())[0] + + poses, timestamps = poses_reader.get_dynamic_pose("rig", "world") + self.assertEqual(poses.shape[1:], (4, 4)) + self.assertGreater(poses.shape[0], 0) + self.assertEqual(timestamps.shape[0], poses.shape[0]) + + def test_sequence_has_static_world_to_world_global(self): + """world_global is the AV2 city frame; verify the static anchor exists.""" + poses_readers = self.reader.open_component_readers(PosesComponent.Reader) + poses_reader = list(poses_readers.values())[0] + + static_poses = dict(poses_reader.get_static_poses()) + self.assertIn(("world", "world_global"), static_poses) + self.assertEqual(static_poses[("world", "world_global")].shape, (4, 4)) + + def test_first_real_pose_near_identity(self): + """The anchored ego pose is stored as relative identity in the trajectory. + + The first pose's city_SE3_egovehicle is the world_global anchor, so its + relative rig -> world pose must be (near) identity. Boundary extrapolation + may prepend an extra pose, so we locate the identity pose rather than + assuming a fixed index. + """ + poses_readers = self.reader.open_component_readers(PosesComponent.Reader) + poses_reader = list(poses_readers.values())[0] + + poses, _ = poses_reader.get_dynamic_pose("rig", "world") + deviations = np.linalg.norm(poses - np.eye(4, dtype=np.float32), axis=(1, 2)) + np.testing.assert_array_almost_equal(poses[int(np.argmin(deviations))], np.eye(4, dtype=np.float32), decimal=3) + + # --- Cameras -------------------------------------------------------------- + + def test_nine_cameras_exist(self): + camera_readers = self.reader.open_component_readers(CameraSensorComponent.Reader) + self.assertEqual(set(camera_readers.keys()), set(CAMERA_NAMES)) + for cam_id, cam_reader in camera_readers.items(): + self.assertGreater(cam_reader.frames_count, 0, f"{cam_id} should have frames") + + def test_original_distortion_coefficients_preserved_in_metadata(self): + # The released imagery is undistorted (so the stored model is distortion-free), + # but the original lens k1/k2/k3 are preserved per camera as provenance. + camera_readers = self.reader.open_component_readers(CameraSensorComponent.Reader) + for cam_id, cam_reader in camera_readers.items(): + meta = cam_reader.generic_meta_data + self.assertIn("av2_original_distortion", meta, f"{cam_id} missing distortion provenance") + distortion = meta["av2_original_distortion"] + self.assertEqual(set(distortion), {"k1", "k2", "k3"}, f"{cam_id} distortion keys") + for key, value in distortion.items(): + self.assertIsInstance(value, float, f"{cam_id} {key} should be a float") + + def test_camera_intrinsics_ideal_pinhole_global_shutter(self): + intrinsics_readers = self.reader.open_component_readers(IntrinsicsComponent.Reader) + self.assertEqual(len(intrinsics_readers), 1) + intrinsics_reader = list(intrinsics_readers.values())[0] + + for cam_id in CAMERA_NAMES: + params = intrinsics_reader.get_camera_model_parameters(cam_id) + # AV2 imagery is shipped undistorted, so an ideal (distortion-free) + # pinhole is the exact model. + self.assertIsInstance(params, IdealPinholeCameraModelParameters) + params = cast(IdealPinholeCameraModelParameters, params) + self.assertEqual(params.shutter_type, ShutterType.GLOBAL) + self.assertTrue(np.all(params.focal_length > 0)) + + def test_camera_extrinsics_stored_as_static_poses(self): + poses_readers = self.reader.open_component_readers(PosesComponent.Reader) + poses_reader = list(poses_readers.values())[0] + + static_poses = dict(poses_reader.get_static_poses()) + for cam_id in CAMERA_NAMES: + self.assertIn((cam_id, "rig"), static_poses) + + # --- Lidar ---------------------------------------------------------------- + + def test_two_lidar_units_exist(self): + lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader) + self.assertEqual(set(lidar_readers.keys()), set(LIDAR_NAMES)) + for lidar_id, lidar_reader in lidar_readers.items(): + self.assertGreater(lidar_reader.frames_count, 0, f"{lidar_id} should have frames") + + def test_lidar_extrinsics_stored_as_static_poses(self): + poses_readers = self.reader.open_component_readers(PosesComponent.Reader) + poses_reader = list(poses_readers.values())[0] + + static_poses = dict(poses_reader.get_static_poses()) + for lidar_id in LIDAR_NAMES: + self.assertIn((lidar_id, "rig"), static_poses) + + def test_lidar_directions_unit_norm(self): + lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader) + lidar_reader = lidar_readers["up_lidar"] + ts = int(lidar_reader.frames_timestamps_us[0, 1]) # end-of-frame timestamp key + direction = lidar_reader.get_frame_ray_bundle_data(ts, "direction") + norms = np.linalg.norm(direction, axis=1) + # Zero-distance rays may have zero direction; check the populated ones. + nonzero = norms > 0 + np.testing.assert_allclose(norms[nonzero], 1.0, atol=1e-4) + + def test_lidar_unit_split_recovered_from_geometry(self): + """The two units carry comparable point counts (~half the sweep each). + + Each VLP-32C contributes 32 of the 64 beams, so a correct split yields + roughly balanced point counts per unit (allowing for differing FOV + occupancy). + """ + lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader) + counts = {} + for unit in ("up_lidar", "down_lidar"): + reader = lidar_readers[unit] + ts = int(reader.frames_timestamps_us[0, 1]) + counts[unit] = len(reader.get_frame_ray_bundle_data(ts, "direction")) + ratio = min(counts.values()) / max(counts.values()) + self.assertGreater(ratio, 0.5, f"Lidar unit point counts unbalanced: {counts}") + + # --- Lidar structured model ----------------------------------------------- + + def test_lidar_intrinsics_vlp32c_model_per_unit(self): + """Each unit stores a VLP-32C structured model (32 rows) as intrinsics.""" + intrinsics_reader = list(self.reader.open_component_readers(IntrinsicsComponent.Reader).values())[0] + for unit in LIDAR_NAMES: + model = intrinsics_reader.get_lidar_model_parameters(unit) + self.assertIsInstance(model, RowOffsetStructuredSpinningLidarModelParameters) + model = cast(RowOffsetStructuredSpinningLidarModelParameters, model) + self.assertEqual(model.n_rows, 32) + self.assertGreater(model.n_columns, 100) + self.assertIn(model.spinning_direction, ("cw", "ccw")) + + def test_lidar_model_elements_in_bounds(self): + """Stored per-point model elements index valid (row, column) cells.""" + intrinsics_reader = list(self.reader.open_component_readers(IntrinsicsComponent.Reader).values())[0] + lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader) + for unit in LIDAR_NAMES: + model = cast( + RowOffsetStructuredSpinningLidarModelParameters, + intrinsics_reader.get_lidar_model_parameters(unit), + ) + reader = lidar_readers[unit] + ts = int(reader.frames_timestamps_us[0, 1]) + elem = reader.get_frame_ray_bundle_data(ts, "model_element") + self.assertEqual(elem.shape[1], 2) + self.assertEqual(elem.dtype, np.uint16) + self.assertTrue(np.all(elem[:, 0] < model.n_rows), "row index out of bounds") + self.assertTrue(np.all(elem[:, 1] < model.n_columns), "column index out of bounds") + + def test_lidar_model_reconstructs_directions(self): + """Model-predicted directions match stored native directions (far-range). + + Validates the firing-pattern reconstruction: the structured model, indexed + by the stored per-point (row, column), should reproduce the stored ray + directions to within a small angular error for far-range returns. + """ + from ncore.impl.sensors.lidar import StructuredLidarModel + + intrinsics_reader = list(self.reader.open_component_readers(IntrinsicsComponent.Reader).values())[0] + lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader) + for unit in LIDAR_NAMES: + model_params = cast( + RowOffsetStructuredSpinningLidarModelParameters, + intrinsics_reader.get_lidar_model_parameters(unit), + ) + reader = lidar_readers[unit] + ts = int(reader.frames_timestamps_us[0, 1]) + direction = reader.get_frame_ray_bundle_data(ts, "direction") + elem = reader.get_frame_ray_bundle_data(ts, "model_element") + distance = np.asarray(reader._get_ray_bundle_returns_group(ts)["distance_m"])[0] + + far = np.isfinite(distance) & (distance > 20.0) & (np.linalg.norm(direction, axis=1) > 0) + self.assertGreater(int(far.sum()), 100, f"{unit}: too few far returns to validate") + + model = StructuredLidarModel.maybe_from_parameters(model_params, device="cpu") + assert model is not None + predicted = model.elements_to_sensor_points(elem[far], np.ones(int(far.sum()), dtype=np.float32)) + predicted = predicted.cpu().numpy() + predicted /= np.linalg.norm(predicted, axis=1, keepdims=True) + cos = np.clip(np.sum(predicted * direction[far], axis=1), -1.0, 1.0) + median_err_deg = float(np.degrees(np.median(np.arccos(cos)))) + # The structured model is reconstructed from offset_ns + laser_number on the + # decompensated cloud, with empirical per-row azimuth offsets. This yields + # sub-0.1 deg far-range reconstruction (on par with native-column sensors). + self.assertLess(median_err_deg, 0.2, f"{unit}: model direction error {median_err_deg:.3f} deg too high") + + # --- No radar ------------------------------------------------------------- + + def test_no_radar(self): + radar_readers = self.reader.open_component_readers(RadarSensorComponent.Reader) + self.assertEqual(len(radar_readers), 0) + + # --- Cuboids -------------------------------------------------------------- + + def test_cuboids_in_rig_frame(self): + """Cuboids are stored in the native ``rig`` frame (no ego pose baked in).""" + cuboid_readers = self.reader.open_component_readers(CuboidsComponent.Reader) + if not cuboid_readers: + self.skipTest("No cuboids (test split)") + cuboid_reader = list(cuboid_readers.values())[0] + observations = list(cuboid_reader.get_observations()) + self.assertGreater(len(observations), 0) + for obs in observations[:50]: + self.assertEqual(obs.reference_frame_id, "rig") + + def test_cuboids_align_with_lidar(self): + """A reasonable fraction of lidar points fall inside annotated cuboids. + + This is the regression guard for the lidar decompensation reference bug: if + the points were decompensated against the wrong reference (or the cuboids + mis-referenced), almost no points would land inside the boxes. We transform + the stored (decompensated) first-frame lidar points to ``world`` via their + own per-point rig pose, transform the active cuboids from ``rig`` at the + sweep timestamp to ``world``, and count points inside. + """ + from ncore.impl.common.transformations import is_within_3d_bboxes, transform_bbox + + cuboid_readers = self.reader.open_component_readers(CuboidsComponent.Reader) + if not cuboid_readers: + self.skipTest("No cuboids (test split)") + + poses_reader = list(self.reader.open_component_readers(PosesComponent.Reader).values())[0] + lidar_reader = self.reader.open_component_readers(LidarSensorComponent.Reader)["up_lidar"] + frame_start_us, frame_end_us = (int(v) for v in lidar_reader.frames_timestamps_us[0]) + ts = frame_end_us # reader frame key is the end-of-frame timestamp + + # The cuboid reference timestamp is the AV2 sweep reference time, which is + # the start of the point window (offset_ns runs forward from it). + cuboid_ts = frame_start_us + + # Each lidar point is in its own per-point-time sensor frame; transform via + # sensor -> rig (static) and rig -> world at the point's own timestamp. + static = dict(poses_reader.get_static_poses()) + T_up_rig = static[("up_lidar", "rig")] + rig_poses, pose_ts = poses_reader.get_dynamic_pose("rig", "world") + + direction = lidar_reader.get_frame_ray_bundle_data(ts, "direction") + distance = np.asarray(lidar_reader._get_ray_bundle_returns_group(ts)["distance_m"])[0] + point_ts = lidar_reader.get_frame_ray_bundle_data(ts, "timestamp_us").astype(np.int64) + valid = np.isfinite(distance) & (distance > 0) + pts_sensor = direction[valid] * distance[valid, None] + pts_rig = (T_up_rig[:3, :3] @ pts_sensor.T).T + T_up_rig[:3, 3] + # Per-point rig -> world using the nearest stored pose (poses are dense). + nearest = np.searchsorted(pose_ts.astype(np.int64), point_ts[valid]).clip(0, len(rig_poses) - 1) + T_pts = rig_poses[nearest] + pts_world = np.einsum("nij,nj->ni", T_pts[:, :3, :3], pts_rig) + T_pts[:, :3, 3] + + observations = list(list(cuboid_readers.values())[0].get_observations()) + # cuboids active at this sweep (cuboid ref ts == sweep start) + active = [o for o in observations if abs(o.reference_frame_timestamp_us - cuboid_ts) < 2000] + self.assertGreater(len(active), 0, "no cuboids active at first lidar frame") + + # Cuboids are in rig at the sweep timestamp; bring them to world via the + # rig pose at that timestamp. + cuboid_pose_idx = int(np.argmin(np.abs(pose_ts.astype(np.int64) - cuboid_ts))) + T_rig_world_cuboid = rig_poses[cuboid_pose_idx] + boxes = np.stack([transform_bbox(o.bbox3.to_array().astype(np.float64), T_rig_world_cuboid) for o in active]) + inside = is_within_3d_bboxes(pts_world.astype(np.float64), boxes.astype(np.float64)) + n_inside = int(inside.any(axis=1).sum()) + self.assertGreater( + n_inside, 50, f"only {n_inside} lidar points inside any cuboid -- likely a frame/timestamp shift" + ) diff --git a/tools/data_converter/argoverse2/main.py b/tools/data_converter/argoverse2/main.py new file mode 100644 index 00000000..aa60f7c4 --- /dev/null +++ b/tools/data_converter/argoverse2/main.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Argoverse 2 converter CLI entry point.""" + +from tools.data_converter.argoverse2.converter import argoverse2_v4 # noqa: F401 -- registers CLI command +from tools.data_converter.cli import cli + + +if __name__ == "__main__": + cli(show_default=True) diff --git a/tools/data_converter/argoverse2/utils.py b/tools/data_converter/argoverse2/utils.py new file mode 100644 index 00000000..c8b7f93d --- /dev/null +++ b/tools/data_converter/argoverse2/utils.py @@ -0,0 +1,661 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Argoverse 2 specific utilities for the NCore V4 converter. + +This module reads the Argoverse 2 Sensor Dataset directly from its on-disk +Apache Feather files using ``pyarrow`` only, deliberately avoiding the heavy +``av2`` devkit (which pulls in torch, kornia, numba, polars and PyAV). Quaternions +are converted with ``scipy.spatial.transform.Rotation`` (already an ncore +dependency), so no extra runtime dependency is introduced. + +Reference (sourced from github.com/argoverse/av2-api and the AV2 User Guide): + +- Lidar sweeps are *egomotion-compensated* to the sweep reference timestamp and + stored in the **egovehicle** frame (not the individual sensor frame). The + feather columns are ``x, y, z, intensity, laser_number, offset_ns``. + Per-point absolute time is ``sweep_timestamp_ns + offset_ns``. +- The released imagery is **already undistorted**, so a pinhole model with zero + distortion is exact and **global shutter is assumed** on that basis. The + original lens radial-distortion coefficients ``(k1, k2, k3)`` are returned by + :func:`read_intrinsics` for provenance but are never applied. +- All quaternions are scalar-first ``(qw, qx, qy, qz)``. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, List, Literal, Optional + +import numpy as np +import pyarrow.feather as feather + +from scipy.spatial.transform import Rotation as R +from upath import UPath + +from ncore.impl.common.transformations import se3_inverse +from ncore.impl.data.types import ( + IdealPinholeCameraModelParameters, + RowOffsetStructuredSpinningLidarModelParameters, + ShutterType, +) +from tools.data_converter.structured_lidar_model import enforce_spinning_monotonic + + +# --- Feather reading (no pandas) ----------------------------------------------- +# We read Arrow tables directly and pull columns out as numpy arrays. This avoids +# pulling pandas into the dependency closure (pyarrow.read_feather would default to +# a pandas DataFrame). + + +def _read_columns(path: UPath) -> Dict[str, np.ndarray]: + """Read a feather file into a ``column_name -> numpy array`` mapping.""" + table = feather.read_table(str(path)) + return {name: table.column(name).to_numpy(zero_copy_only=False) for name in table.column_names} + + +# --- Sensor ID mappings -------------------------------------------------------- +# Argoverse 2 sensor names are already descriptive; we keep them verbatim as the +# NCore sensor IDs so that any downstream alignment with AV2 map / metadata stays +# unambiguous. + +# All nine global-shutter cameras (7 ring + 2 stereo). +CAMERA_NAMES: List[str] = [ + "ring_front_center", + "ring_front_left", + "ring_front_right", + "ring_side_left", + "ring_side_right", + "ring_rear_left", + "ring_rear_right", + "stereo_front_left", + "stereo_front_right", +] + +# The two stacked Velodyne VLP-32C units. +LIDAR_NAMES: List[str] = ["up_lidar", "down_lidar"] + +# Number of beams per VLP-32C unit. laser_number spans [0, 63] across both units. +VLP32C_N_BEAMS: int = 32 + +# VLP-32C spins at 10 Hz (~100 ms per revolution). Both stacked units share this. +VLP32C_SCAN_DURATION_US: int = 100_000 +VLP32C_SPINNING_FREQUENCY_HZ: float = 10.0 +# Note: the apparent spin direction in a unit's own frame is detected per unit +# (the two stacked units fire in opposite phase), not assumed. + +# AV2 ships no radar. + +# --- Annotation taxonomy ------------------------------------------------------- +# Argoverse 2 3D cuboid categories (the 30-class `AnnotationCategories` taxonomy) +# mapped to NCore class IDs. AV2 category strings are upper snake-case. +AV2_CATEGORY_MAP: Dict[str, str] = { + "REGULAR_VEHICLE": "car", + "LARGE_VEHICLE": "truck", + "BOX_TRUCK": "truck", + "TRUCK": "truck", + "TRUCK_CAB": "truck", + "VEHICULAR_TRAILER": "trailer", + "SCHOOL_BUS": "bus", + "ARTICULATED_BUS": "bus", + "BUS": "bus", + "MESSAGE_BOARD_TRAILER": "trailer", + "RAILED_VEHICLE": "vehicle", + "MOTORCYCLE": "motorcycle", + "MOTORCYCLIST": "motorcyclist", + "BICYCLE": "bicycle", + "BICYCLIST": "bicyclist", + "WHEELED_DEVICE": "wheeled_device", + "WHEELED_RIDER": "wheeled_rider", + "PEDESTRIAN": "pedestrian", + "OFFICIAL_SIGNALER": "pedestrian", + "STROLLER": "stroller", + "WHEELCHAIR": "wheelchair", + "DOG": "animal", + "ANIMAL": "animal", + "CONSTRUCTION_CONE": "traffic_cone", + "CONSTRUCTION_BARREL": "barrier", + "STOP_SIGN": "stop_sign", + "BOLLARD": "bollard", + "SIGN": "sign", + "MOBILE_PEDESTRIAN_CROSSING_SIGN": "sign", + "TRAFFIC_LIGHT_TRAILER": "trailer", +} + + +# --- Pose / quaternion helpers ------------------------------------------------- + + +def se3_from_qwxyz_t(qw: float, qx: float, qy: float, qz: float, tx: float, ty: float, tz: float) -> np.ndarray: + """Build a 4x4 SE(3) matrix from a scalar-first quaternion and translation. + + Argoverse 2 stores all rotations as ``(qw, qx, qy, qz)``. + """ + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R.from_quat((qw, qx, qy, qz), scalar_first=True).as_matrix() + T[:3, 3] = (tx, ty, tz) + return T + + +# --- Dataset layout / feather readers ------------------------------------------ + + +def list_log_ids(split_dir: UPath) -> List[str]: + """Return the sorted log IDs (sub-directory names) under a split directory.""" + return sorted(p.name for p in split_dir.iterdir() if p.is_dir()) + + +def read_city_se3_ego(log_dir: UPath) -> tuple[np.ndarray, np.ndarray]: + """Read ``city_SE3_egovehicle.feather`` (at the log root). + + Returns: + timestamps_ns: [N] uint64 sweep/pose timestamps (sorted ascending). + T_ego_city: [N, 4, 4] float64 poses (egovehicle -> city/global frame). + """ + cols = _read_columns(log_dir / "city_SE3_egovehicle.feather") + order = np.argsort(cols["timestamp_ns"]) + timestamps_ns = cols["timestamp_ns"][order].astype(np.uint64) + poses = np.stack( + [ + se3_from_qwxyz_t( + cols["qw"][i], + cols["qx"][i], + cols["qy"][i], + cols["qz"][i], + cols["tx_m"][i], + cols["ty_m"][i], + cols["tz_m"][i], + ) + for i in order + ] + ) + return timestamps_ns, poses + + +def read_ego_se3_sensor(log_dir: UPath) -> Dict[str, np.ndarray]: + """Read ``calibration/egovehicle_SE3_sensor.feather``. + + Returns a mapping ``sensor_name -> T_sensor_ego`` (4x4, sensor-frame point -> + egovehicle frame). + """ + cols = _read_columns(log_dir / "calibration" / "egovehicle_SE3_sensor.feather") + result: Dict[str, np.ndarray] = {} + for i, name in enumerate(cols["sensor_name"]): + result[str(name)] = se3_from_qwxyz_t( + cols["qw"][i], + cols["qx"][i], + cols["qy"][i], + cols["qz"][i], + cols["tx_m"][i], + cols["ty_m"][i], + cols["tz_m"][i], + ) + return result + + +@dataclass(frozen=True) +class CameraIntrinsics: + """An AV2 camera's ideal-pinhole model plus its original distortion provenance. + + AV2 ships already-undistorted imagery, so the converted ``model`` is a + distortion-free ideal pinhole. The original lens radial-distortion coefficients + ``(k1, k2, k3)`` from ``intrinsics.feather`` are kept here purely as provenance + (they describe the *raw* lens and are never applied to the released images). + """ + + model: IdealPinholeCameraModelParameters + original_distortion_k1k2k3: tuple[float, float, float] + + +def read_intrinsics( + log_dir: UPath, +) -> Dict[str, CameraIntrinsics]: + """Read ``calibration/intrinsics.feather`` into ideal-pinhole camera models. + + AV2 imagery is shipped already undistorted: the official av2 devkit projects + with the intrinsic matrix K only (``PinholeCamera.project_ego_to_img``) and its + ``Intrinsics`` dataclass does not even load the ``k1, k2, k3`` columns present + in the file. Those coefficients describe the *original* lens (for re-distorting + into the raw frame) and must not be applied to the released images, so an ideal + (distortion-free) pinhole is the exact model. Because the released imagery is + already undistorted, the cameras are modelled as global shutter. + + The raw ``k1, k2, k3`` coefficients are returned alongside the pinhole model so + callers can preserve them as provenance metadata (they are *not* applied to the + undistorted images). + """ + cols = _read_columns(log_dir / "calibration" / "intrinsics.feather") + result: Dict[str, CameraIntrinsics] = {} + for i, name in enumerate(cols["sensor_name"]): + model = IdealPinholeCameraModelParameters( + resolution=np.array([int(cols["width_px"][i]), int(cols["height_px"][i])], dtype=np.uint64), + shutter_type=ShutterType.GLOBAL, + external_distortion_parameters=None, + principal_point=np.array([cols["cx_px"][i], cols["cy_px"][i]], dtype=np.float32), + focal_length=np.array([cols["fx_px"][i], cols["fy_px"][i]], dtype=np.float32), + ) + result[str(name)] = CameraIntrinsics( + model=model, + original_distortion_k1k2k3=( + float(cols["k1"][i]), + float(cols["k2"][i]), + float(cols["k3"][i]), + ), + ) + return result + + +@dataclass(frozen=True) +class LidarSweep: + """A single AV2 lidar sweep, in the egovehicle frame. + + xyz are egomotion-compensated to ``timestamp_ns``; per-point absolute time is + ``timestamp_ns + offset_ns``. + """ + + xyz: np.ndarray # [N, 3] float32, egovehicle frame + intensity: np.ndarray # [N] float32 in [0, 1] + laser_number: np.ndarray # [N] uint8 in [0, 63] + offset_ns: np.ndarray # [N] int64, offset from sweep start + timestamp_ns: int # sweep reference timestamp (filename) + + +def read_lidar_sweep(path: UPath) -> LidarSweep: + """Read a single lidar sweep feather file (filename is the sweep timestamp).""" + cols = _read_columns(path) + timestamp_ns = int(UPath(path).stem) + return LidarSweep( + xyz=np.stack( + [ + cols["x"].astype(np.float32), + cols["y"].astype(np.float32), + cols["z"].astype(np.float32), + ], + axis=1, + ), + intensity=(cols["intensity"].astype(np.float32) / 255.0), + laser_number=cols["laser_number"].astype(np.uint8), + offset_ns=cols["offset_ns"].astype(np.int64), + timestamp_ns=timestamp_ns, + ) + + +def read_annotations(log_dir: UPath) -> Dict[str, np.ndarray]: + """Read ``annotations.feather`` into a column -> numpy array mapping.""" + return _read_columns(log_dir / "annotations.feather") + + +def list_sensor_timestamps(log_dir: UPath, sensor_kind: str, sensor_name: Optional[str] = None) -> List[int]: + """List the sorted nanosecond timestamps available for a sensor stream. + Args: + sensor_kind: ``"lidar"`` or ``"cameras"``. + sensor_name: camera name (required for ``"cameras"``; ignored for lidar). + """ + if sensor_kind == "lidar": + sensor_dir = log_dir / "sensors" / "lidar" + suffix = ".feather" + elif sensor_kind == "cameras": + assert sensor_name is not None, "sensor_name required for cameras" + sensor_dir = log_dir / "sensors" / "cameras" / sensor_name + suffix = ".jpg" + else: + raise ValueError(f"Unknown sensor_kind: {sensor_kind}") + + if not sensor_dir.exists(): + return [] + + return sorted(int(p.stem) for p in sensor_dir.iterdir() if p.name.endswith(suffix)) + + +def assign_lidar_units( + laser_number: np.ndarray, + xyz_ego: np.ndarray, + T_up_ego: np.ndarray, + T_down_ego: np.ndarray, +) -> Dict[str, np.ndarray]: + """Assign each point to ``up_lidar`` or ``down_lidar``. + + Argoverse 2 distributes a single aggregated sweep from two stacked Velodyne + VLP-32C units whose 64 beams share one ``laser_number`` range ``[0, 63]``. The + boundary that separates the two units is not documented in the AV2 devkit, so + we recover it from the geometry of the calibrated extrinsics. + + The two units are split into the two laser-number halves (``< 32`` and + ``>= 32``); empirically these are the two physical sensors (at any shared + ``offset_ns`` they point ~180 deg apart in the ego frame). To decide *which* + half is ``up_lidar`` vs ``down_lidar`` we use per-beam elevation flatness: a + single laser ring traces a cone of (nearly) constant elevation only in its own + sensor frame. Mapping a half into the wrong unit's extrinsic tilts that cone + (the two units differ in pitch/roll), inflating the per-ring elevation spread. + We pick the labelling that minimises the summed per-ring elevation spread, + which separates the two assignments by a wide, stable margin (~2-10x). + + Returns a mapping ``unit_name -> boolean point mask``. + """ + lo_mask = laser_number < VLP32C_N_BEAMS + hi_mask = ~lo_mask + + # Cost of assigning lo->up_unit and hi->down_unit (assignment A) vs swapped (B). + cost_a = _ring_elevation_spread( + laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS), T_up_ego + ) + _ring_elevation_spread(laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS, 2 * VLP32C_N_BEAMS), T_down_ego) + cost_b = _ring_elevation_spread( + laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS), T_down_ego + ) + _ring_elevation_spread(laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS, 2 * VLP32C_N_BEAMS), T_up_ego) + + if cost_b < cost_a: + return {"up_lidar": hi_mask, "down_lidar": lo_mask} + return {"up_lidar": lo_mask, "down_lidar": hi_mask} + + +def _ring_elevation_spread( + laser_number: np.ndarray, + xyz_ego: np.ndarray, + beams: np.ndarray, + T_unit_ego: np.ndarray, + min_valid_distance_m: float = 2.0, + min_ring_points: int = 10, +) -> float: + """Mean per-beam elevation standard deviation when ``beams`` are mapped to a unit. + + In the correct sensor frame each laser ring has near-constant elevation across + azimuth, so a tight per-ring elevation distribution indicates the correct + extrinsic. Returns the mean per-ring elevation std in degrees. + + Args: + min_valid_distance_m: ignore near-range returns when estimating elevation. + min_ring_points: minimum returns for a ring to contribute to the estimate. + """ + T_ego_unit = se3_inverse(T_unit_ego) + pts = (T_ego_unit[:3, :3] @ xyz_ego.T).T + T_ego_unit[:3, 3] + dist = np.linalg.norm(pts, axis=1) + + spreads: List[float] = [] + for beam in beams: + ring = (laser_number == beam) & (dist > min_valid_distance_m) + if int(ring.sum()) < min_ring_points: + continue + elev = np.degrees(np.arcsin(np.clip(pts[ring, 2] / dist[ring], -1.0, 1.0))) + spreads.append(float(np.std(elev))) + + return float(np.mean(spreads)) if spreads else float("inf") + + +# --- Structured VLP-32C lidar model -------------------------------------------- +# AV2 provides no native firing-column index, only per-point ``laser_number`` +# (0..31 within a unit) and ``offset_ns``. The two are a faithful proxy for the +# firing pattern: ``offset_ns`` quantizes into firing columns (one VLP-32C +# revolution at 10 Hz), and ``laser_number`` selects the beam (row). This lets us +# reconstruct the rows x columns structure required for a structured spinning +# lidar model and reuse the generic ``structured_lidar_model`` library. + + +@dataclass(frozen=True) +class Vlp32cGeometry: + """Per-unit VLP-32C geometry recovered from a reference sweep. + + Derived empirically (and stably) from the data rather than a hard-coded spec + table, so it self-corrects to the dataset's actual calibration. + """ + + elevations_rad: np.ndarray # [32] float32, model row order (elevation high -> low) + laser_to_row: np.ndarray # [32] int, maps laser_number (0..31) -> model row index + column_period_ns: float # firing-column period (one beam refire interval) + n_columns: int # upsampled columns per revolution (native_columns * resolution_factor) + spinning_direction: Literal["cw", "ccw"] # apparent spin in this unit's own frame + column_azimuths_rad: np.ndarray # [n_columns] float32, per-(upsampled-)column azimuth + row_azimuth_offsets_rad: np.ndarray # [32] float32, per-row azimuth offset from the column + resolution_factor: int # column upsampling factor (1 = native firing resolution) + + +def derive_vlp32c_geometry( + xyz_decompensated: np.ndarray, + laser_number_in_unit: np.ndarray, + offset_ns: np.ndarray, + min_valid_distance_m: float = 2.0, + far_range_m: float = 5.0, + n_refine_iterations: int = 5, + resolution_factor: int = 4, +) -> Vlp32cGeometry: + """Recover the VLP-32C firing geometry from one *decompensated* sweep. + + The input must be the decompensated point cloud (each point in the sensor frame + at its own measurement time). On the raw motion-compensated cloud the azimuths + are smeared by ego motion (~0.5 deg), which would dominate the model error; on + the decompensated cloud the firing geometry is clean (~0.05 deg). + + The model is ``azimuth(point) = column_azimuth[col] + row_azimuth_offset[row]``. + The 32 beams of a firing column are *not* co-azimuthal -- VLP-32C fires them + across a wide span -- so the per-row offset is essential (it is the dominant + structure, several degrees) and is fit empirically rather than assumed. + + Args: + xyz_decompensated: decompensated points in the unit's sensor frame, [N, 3]. + laser_number_in_unit: per-point beam index within the unit (0..31), [N]. + offset_ns: per-point nanosecond offset from the sweep start, [N]. + min_valid_distance_m: ignore near-range returns when estimating elevation. + far_range_m: distance threshold for azimuth fitting (near returns are noisy). + n_refine_iterations: alternating column/row-offset refinement passes. + """ + dist = np.linalg.norm(xyz_decompensated, axis=1) + valid = dist > min_valid_distance_m + elev = np.arcsin(np.clip(xyz_decompensated[valid, 2] / dist[valid], -1.0, 1.0)) + lasers = laser_number_in_unit[valid] + + # Median elevation per laser, then sort lasers by elevation (high -> low). + median_elev = np.full(VLP32C_N_BEAMS, np.nan, dtype=np.float64) + for laser in range(VLP32C_N_BEAMS): + sel = lasers == laser + if sel.any(): + median_elev[laser] = float(np.median(elev[sel])) + if np.isnan(median_elev).any(): + good = ~np.isnan(median_elev) + median_elev[~good] = np.interp(np.flatnonzero(~good), np.flatnonzero(good), median_elev[good]) + + laser_order_high_to_low = np.argsort(-median_elev) # laser indices, highest elevation first + elevations_rad = median_elev[laser_order_high_to_low].astype(np.float32) + laser_to_row = np.empty(VLP32C_N_BEAMS, dtype=np.int64) + laser_to_row[laser_order_high_to_low] = np.arange(VLP32C_N_BEAMS) + row = laser_to_row[laser_number_in_unit] + + column_period_ns, n_columns = _estimate_column_timing(laser_number_in_unit, offset_ns) + + o = offset_ns.astype(np.int64) + # Wrap modulo one revolution: the sweep slightly exceeds one revolution, and the + # overlap folds back onto early columns (same physical azimuth). + col = np.round((o - o.min()) / column_period_ns).astype(np.int64) % n_columns + az = np.arctan2(xyz_decompensated[:, 1], xyz_decompensated[:, 0]) + far = dist > far_range_m + + # Detect spin direction from the sign of azimuth-vs-column (the two stacked units + # fire in opposite phase, so they spin oppositely in their own frames). + az_unwrapped = np.unwrap(az[far][np.argsort(col[far])]) + slope = float(np.polyfit(np.arange(len(az_unwrapped)), az_unwrapped, 1)[0]) if len(az_unwrapped) > 1 else -1.0 + spinning_direction: Literal["cw", "ccw"] = "cw" if slope < 0 else "ccw" + + # Jointly fit per-column azimuths and per-row azimuth offsets by alternating + # circular medians. The 32 beams of a column span several degrees of azimuth, + # captured by the per-row offset; averaging it into the column azimuth (offset=0) + # would leave multi-degree per-row error. + def circmean(a: np.ndarray) -> float: + return float(np.arctan2(np.mean(np.sin(a)), np.mean(np.cos(a)))) + + # Steep downward beams (e.g. the lowest VLP-32C laser at ~-25 deg) only ever hit + # nearby ground, so they have no far-range returns. Fit their azimuth offset from + # whatever valid returns they do have (near-range) rather than leaving it at 0. + valid = dist > min_valid_distance_m + row_has_far = np.zeros(VLP32C_N_BEAMS, dtype=bool) + for r in range(VLP32C_N_BEAMS): + row_has_far[r] = bool(((row == r) & far).any()) + + row_offsets = np.zeros(VLP32C_N_BEAMS, dtype=np.float64) + col_az = np.zeros(n_columns, dtype=np.float64) + for _ in range(max(n_refine_iterations, 1)): + # column azimuth = circular median of (az - row_offset) over far points in the + # column (far returns give the cleanest, least range-dependent azimuth). + adj = np.angle(np.exp(1j * (az - row_offsets[row]))) + col_az[:] = np.nan + for c in np.unique(col[far]): + sel = (col == c) & far + col_az[c] = np.arctan2(np.median(np.sin(adj[sel])), np.median(np.cos(adj[sel]))) + good = ~np.isnan(col_az) + if not good.any(): + break + col_az = np.interp(np.arange(n_columns), np.flatnonzero(good), np.unwrap(col_az[good])) + # row offset = circular mean of (az - column azimuth). Use far returns where a + # row has them; otherwise fall back to all valid (near-range) returns so every + # row gets a real offset estimate. + for r in range(VLP32C_N_BEAMS): + sel = (row == r) & (far if row_has_far[r] else valid) + if sel.any(): + row_offsets[r] = circmean(np.angle(np.exp(1j * (az[sel] - col_az[col[sel]])))) + + column_azimuths_rad = enforce_spinning_monotonic(col_az, n_columns, spinning_direction) + row_azimuth_offsets_rad = np.angle(np.exp(1j * row_offsets)).astype(np.float32) + + # Upsample the column-azimuth grid. The native column step is ~0.2 deg, so the + # per-frame integer column shift (see reconstruct_model_elements) would quantize + # alignment to ~0.1 deg. Upsampling shrinks the step by resolution_factor, + # removing that quantization (4x -> ~0.025 deg). + factor = max(int(resolution_factor), 1) + if factor > 1: + column_azimuths_rad = _upsample_azimuths(column_azimuths_rad, factor, spinning_direction) + + return Vlp32cGeometry( + elevations_rad=elevations_rad, + laser_to_row=laser_to_row, + column_period_ns=column_period_ns, + n_columns=n_columns * factor, + spinning_direction=spinning_direction, + column_azimuths_rad=column_azimuths_rad, + row_azimuth_offsets_rad=row_azimuth_offsets_rad, + resolution_factor=factor, + ) + + +def _upsample_azimuths( + column_azimuths_rad: np.ndarray, factor: int, spinning_direction: Literal["cw", "ccw"] +) -> np.ndarray: + """Interpolate column azimuths to ``len * factor`` entries, preserving monotonicity.""" + n = len(column_azimuths_rad) + src_idx = np.arange(n) * factor + dst_idx = np.arange(n * factor) + unwrapped = np.unwrap(column_azimuths_rad.astype(np.float64)) + upsampled = np.interp(dst_idx, src_idx, unwrapped) + return enforce_spinning_monotonic(upsampled, n * factor, spinning_direction) + + +def build_vlp32c_model(geometry: Vlp32cGeometry) -> RowOffsetStructuredSpinningLidarModelParameters: + """Build a structured VLP-32C model from recovered geometry. + + Uses the empirically measured per-column azimuths, per-row azimuth offsets and + elevation table. The per-row offsets capture the (several-degree) intra-column + firing spread and are essential for sub-degree reconstruction. The spin + direction is the one detected for this unit (the two stacked units fire in + opposite phase, so they spin oppositely in their own frames). + """ + return RowOffsetStructuredSpinningLidarModelParameters( + spinning_frequency_hz=VLP32C_SPINNING_FREQUENCY_HZ, + spinning_direction=geometry.spinning_direction, + n_rows=VLP32C_N_BEAMS, + n_columns=geometry.n_columns, + row_elevations_rad=geometry.elevations_rad, + column_azimuths_rad=geometry.column_azimuths_rad, + row_azimuth_offsets_rad=geometry.row_azimuth_offsets_rad, + ) + + +def _estimate_column_timing(laser_number_in_unit: np.ndarray, offset_ns: np.ndarray) -> tuple[float, int]: + """Estimate the firing-column period and column count from beam-0 refire timing. + + The column count spans exactly one revolution. An AV2 sweep covers slightly + more than one revolution (~1.02 rev over ~102 ms at 10 Hz), so columns are + wrapped modulo this count (see :func:`reconstruct_model_elements`): the few + degrees of overlap fold back onto the early columns, which represent the same + physical azimuth. Sizing to one revolution keeps the column-azimuth ramp below + 2*pi, as the structured spinning-lidar model requires. + """ + o = offset_ns.astype(np.int64) + beam0_times = np.sort(o[laser_number_in_unit == 0]) + if len(beam0_times) >= 2: + gaps = np.diff(beam0_times) + # Use the median of the small, regular gaps (drop large no-return stretches). + column_period_ns = float(np.median(gaps[gaps <= np.median(gaps) * 1.5])) + else: + # Fallback: one revolution divided by a nominal VLP-32C column count. + column_period_ns = VLP32C_SCAN_DURATION_US * 1000.0 / 1800.0 + revolution_ns = VLP32C_SCAN_DURATION_US * 1000.0 + n_columns = max(int(round(revolution_ns / column_period_ns)), 2) + return column_period_ns, n_columns + + +def reconstruct_model_elements( + laser_number_in_unit: np.ndarray, + offset_ns: np.ndarray, + geometry: Vlp32cGeometry, + xyz_decompensated: np.ndarray, + min_valid_distance_m: float = 5.0, +) -> np.ndarray: + """Build per-point ``model_element`` = (row, column) for one frame. + + Row comes from the laser->row map (elevation order). The column comes from + quantizing ``offset_ns`` by the firing-column period (wrapped modulo one + revolution), then applying a single per-frame column shift. + + The per-frame shift is essential: the static model fixes one mapping from + ``offset_ns`` to azimuth, but the sensor's spin phase at a given ``offset_ns`` + drifts a degree or so between sweeps (and ``offset_ns`` is referenced to a + per-sweep start). Without re-aligning, frames other than the one the model was + derived from are systematically rotated by up to ~1.2 deg. We estimate the + frame's rigid azimuth offset from the model (circular mean of the residual + between measured and model-predicted azimuth over far returns) and convert it + to an integer column shift, which restores sub-0.1 deg accuracy on every frame. + + Args: + xyz_decompensated: per-point decompensated points in the sensor frame [N, 3], + used to measure this frame's azimuth phase relative to the model. + min_valid_distance_m: far-range threshold for the phase estimate. + + Returns a [N, 2] uint16 array (row, column). + """ + row = geometry.laser_to_row[laser_number_in_unit] + o = offset_ns.astype(np.int64) + # Native firing column from the firing timing, scaled onto the (upsampled) grid. + native_col = (o - o.min()) / geometry.column_period_ns + col = (np.round(native_col).astype(np.int64) * geometry.resolution_factor) % geometry.n_columns + + # Re-align this frame to the model. The static model fixes one mapping from + # offset_ns to azimuth, but between sweeps the spin phase drifts (a rigid + # rotation) AND the spin rate varies slightly within a sweep (a drift that grows + # with the column index). We fit the residual azimuth as an affine function of + # the native column, residual ~= a + b * native_col, and fold it back into the + # column index. The constant term handles the phase, the linear term the + # intra-sweep rate drift; without the linear term some scenes retain ~0.25 deg. + dist = np.linalg.norm(xyz_decompensated, axis=1) + far = dist > min_valid_distance_m + if far.any(): + az = np.arctan2(xyz_decompensated[far, 1], xyz_decompensated[far, 0]) + predicted = geometry.column_azimuths_rad[col[far]] + geometry.row_azimuth_offsets_rad[row[far]] + residual = np.angle(np.exp(1j * (az - predicted))) + column_step_rad = 2.0 * np.pi / geometry.n_columns + sign = -1.0 if geometry.spinning_direction == "cw" else 1.0 + # Affine fit residual_columns ~= a + b * native_col (least squares). + residual_cols = sign * residual / column_step_rad + nc_far = native_col[far] + coeffs = np.polyfit(nc_far, residual_cols, 1) + col_shift = np.round(np.polyval(coeffs, native_col)).astype(np.int64) + col = (col + col_shift) % geometry.n_columns + + return np.stack([row, col], axis=1).astype(np.uint16) diff --git a/tools/data_converter/argoverse2/utils_test.py b/tools/data_converter/argoverse2/utils_test.py new file mode 100644 index 00000000..bcc0ea4b --- /dev/null +++ b/tools/data_converter/argoverse2/utils_test.py @@ -0,0 +1,272 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data-free unit tests for the Argoverse 2 VLP-32C lidar-model derivation. + +These run in CI (no external dataset needed): a synthetic spinning-lidar sweep is +generated with a known firing geometry, the model is derived from it, and the +reconstruction accuracy is asserted -- the same property the eval tool measures. +This guards the firing-pattern reconstruction (and in particular the +more-than-one-revolution column wrap) against regressions. +""" + +from __future__ import annotations + +import tempfile +import unittest + +from pathlib import Path + +import numpy as np +import pyarrow as pa +import pyarrow.feather as feather + +from upath import UPath + +from ncore.impl.data.types import IdealPinholeCameraModelParameters, ShutterType +from ncore.impl.sensors.lidar import StructuredLidarModel +from tools.data_converter.argoverse2.utils import ( + VLP32C_N_BEAMS, + VLP32C_SCAN_DURATION_US, + build_vlp32c_model, + derive_vlp32c_geometry, + read_intrinsics, + reconstruct_model_elements, +) + + +# A VLP-32C-like, deliberately non-uniform elevation table (degrees), and a +# laser-number -> physical order that is NOT elevation-sorted, so the test +# exercises the laser->row recovery. +_TRUE_ELEVATIONS_DEG = np.linspace(15.0, -25.0, VLP32C_N_BEAMS) +_LASER_PERMUTATION = np.array( + [(i * 7) % VLP32C_N_BEAMS for i in range(VLP32C_N_BEAMS)], dtype=np.int64 +) # laser_number -> index into the (elevation-sorted) beam list + + +def _synthesize_sweep( + spinning_direction: str, + revolutions: float, + column_period_ns: float = 55296.0, + seed: int = 0, + start_az_rad: float | None = None, + az_rate_factor: float = 1.0, +): + """Build a synthetic decompensated VLP-32C sweep with a known geometry. + + Returns (xyz_decompensated [N,3], laser_number_in_unit [N], offset_ns [N], + true_elevations_rad [32] in laser order, true_row_offsets_rad [32] in laser order). + """ + rng = np.random.default_rng(seed) + sign = -1.0 if spinning_direction == "cw" else 1.0 + + # Per-laser true elevation (in laser-number order) and a per-laser azimuth + # offset spanning several degrees (VLP-32C beams in a column are not co-azimuthal). + elev_sorted_rad = np.radians(_TRUE_ELEVATIONS_DEG) # highest..lowest + true_elev_by_laser = np.empty(VLP32C_N_BEAMS) + true_elev_by_laser[np.arange(VLP32C_N_BEAMS)] = elev_sorted_rad[_LASER_PERMUTATION] + true_offset_by_laser = np.radians(np.linspace(-4.0, 4.0, VLP32C_N_BEAMS))[_LASER_PERMUTATION] + + revolution_ns = VLP32C_SCAN_DURATION_US * 1000.0 + n_cols = int(round(revolution_ns / column_period_ns)) + total_cols = int(round(n_cols * revolutions)) + + laser_list = [] + offset_list = [] + xyz_list = [] + base_az = rng.uniform(-np.pi, np.pi) if start_az_rad is None else start_az_rad + for c in range(total_cols): + # column reference azimuth advances with the spin. az_rate_factor != 1 + # models an intra-sweep spin-rate drift relative to the offset_ns timing + # (the azimuth advances slightly faster/slower than the nominal column rate). + col_az = base_az + sign * 2.0 * np.pi * c * az_rate_factor / n_cols + for laser in range(VLP32C_N_BEAMS): + # drop ~10% of returns to mimic sparsity; keep enough far points + if rng.random() < 0.1: + continue + az = col_az + true_offset_by_laser[laser] + el = true_elev_by_laser[laser] + rng_m = rng.uniform(25.0, 60.0) # far-range so azimuth fitting is clean + x = rng_m * np.cos(el) * np.cos(az) + y = rng_m * np.cos(el) * np.sin(az) + z = rng_m * np.sin(el) + xyz_list.append((x, y, z)) + laser_list.append(laser) + offset_list.append(int(round(c * column_period_ns))) + + xyz = np.array(xyz_list, dtype=np.float64) + laser = np.array(laser_list, dtype=np.int64) + offset = np.array(offset_list, dtype=np.int64) + return xyz, laser, offset, true_elev_by_laser, true_offset_by_laser + + +class TestVlp32cModelDerivation(unittest.TestCase): + def _check(self, spinning_direction: str, revolutions: float) -> None: + xyz, laser, offset, _, _ = _synthesize_sweep(spinning_direction, revolutions) + + geometry = derive_vlp32c_geometry(xyz, laser, offset) + self.assertEqual(geometry.spinning_direction, spinning_direction) + # Columns are upsampled for sub-column alignment precision. + self.assertGreater(geometry.resolution_factor, 1) + self.assertEqual(geometry.n_columns, len(geometry.column_azimuths_rad)) + + model = build_vlp32c_model(geometry) + self.assertEqual(model.n_rows, VLP32C_N_BEAMS) + # The column-azimuth ramp must stay strictly below one revolution; the model + # constructor enforces this, so building it at all proves the >1-rev wrap held. + self.assertLess(abs(float(model.column_azimuths_rad[-1] - model.column_azimuths_rad[0])), 2.0 * np.pi) + + elem = reconstruct_model_elements(laser, offset, geometry, xyz) + self.assertEqual(elem.dtype, np.uint16) + self.assertTrue(np.all(elem[:, 0] < model.n_rows)) + self.assertTrue(np.all(elem[:, 1] < model.n_columns)) + + sm = StructuredLidarModel.maybe_from_parameters(model, device="cpu") + assert sm is not None + predicted = sm.elements_to_sensor_points(elem, np.ones(len(elem), dtype=np.float32)).cpu().numpy() + predicted /= np.linalg.norm(predicted, axis=1, keepdims=True) + direction = xyz / np.linalg.norm(xyz, axis=1, keepdims=True) + cos = np.clip(np.sum(predicted * direction, axis=1), -1.0, 1.0) + err_deg = np.degrees(np.arccos(cos)) + median_err = float(np.median(err_deg)) + self.assertLess( + median_err, + 0.1, + f"{spinning_direction} {revolutions}rev: reconstruction error {median_err:.4f} deg too high", + ) + + def test_cw_single_revolution(self) -> None: + self._check("cw", revolutions=1.0) + + def test_cw_more_than_one_revolution(self) -> None: + # The regression case: an AV2 sweep covers ~1.02 revolutions, so the column + # ramp would exceed 2*pi unless columns are wrapped modulo one revolution. + self._check("cw", revolutions=1.05) + + def test_ccw_more_than_one_revolution(self) -> None: + # The two stacked units spin oppositely in their own frames; cover the ccw case. + self._check("ccw", revolutions=1.05) + + def test_model_generalizes_across_phase_shifted_frames(self) -> None: + """The model derived from one sweep must reconstruct other sweeps accurately. + + The sensor's spin phase at a given ``offset_ns`` drifts between sweeps, so a + sweep other than the one the model was derived from is rigidly rotated in + azimuth. ``reconstruct_model_elements`` must re-align per frame; without it, + these frames are off by ~1 deg (the multi-scene failure this guards against). + """ + # Derive the model from a reference sweep at one phase. + ref_xyz, ref_laser, ref_offset, _, _ = _synthesize_sweep("cw", revolutions=1.05, start_az_rad=0.3) + geometry = derive_vlp32c_geometry(ref_xyz, ref_laser, ref_offset) + model = build_vlp32c_model(geometry) + sm = StructuredLidarModel.maybe_from_parameters(model, device="cpu") + assert sm is not None + + # Reconstruct several sweeps captured at different spin phases. + for shift_deg in (-30.0, -1.0, 1.0, 30.0): + xyz, laser, offset, _, _ = _synthesize_sweep( + "cw", revolutions=1.05, seed=1, start_az_rad=0.3 + np.radians(shift_deg) + ) + elem = reconstruct_model_elements(laser, offset, geometry, xyz) + self.assertTrue(np.all(elem[:, 1] < model.n_columns)) + predicted = sm.elements_to_sensor_points(elem, np.ones(len(elem), dtype=np.float32)).cpu().numpy() + predicted /= np.linalg.norm(predicted, axis=1, keepdims=True) + direction = xyz / np.linalg.norm(xyz, axis=1, keepdims=True) + cos = np.clip(np.sum(predicted * direction, axis=1), -1.0, 1.0) + median_err = float(np.degrees(np.median(np.arccos(cos)))) + self.assertLess( + median_err, + 0.1, + f"phase shift {shift_deg} deg: reconstruction error {median_err:.4f} deg too high", + ) + + def test_model_handles_intra_sweep_rate_drift(self) -> None: + """A frame whose spin rate drifts vs offset_ns is reconstructed accurately. + + On some scenes the azimuth advances slightly faster/slower than the nominal + ``offset_ns`` column rate within a sweep. A single rigid phase shift cannot + correct this (it leaves ~0.25 deg); the affine (phase + linear) per-frame + alignment in ``reconstruct_model_elements`` does. This guards that fix. + """ + ref_xyz, ref_laser, ref_offset, _, _ = _synthesize_sweep("cw", revolutions=1.05, start_az_rad=0.2) + geometry = derive_vlp32c_geometry(ref_xyz, ref_laser, ref_offset) + model = build_vlp32c_model(geometry) + sm = StructuredLidarModel.maybe_from_parameters(model, device="cpu") + assert sm is not None + + # A sweep whose azimuth advances 0.3% faster than the nominal column rate + # (~1 deg of drift accumulated over the revolution) plus a phase offset. + xyz, laser, offset, _, _ = _synthesize_sweep( + "cw", revolutions=1.05, seed=2, start_az_rad=0.2 + np.radians(5.0), az_rate_factor=1.003 + ) + elem = reconstruct_model_elements(laser, offset, geometry, xyz) + predicted = sm.elements_to_sensor_points(elem, np.ones(len(elem), dtype=np.float32)).cpu().numpy() + predicted /= np.linalg.norm(predicted, axis=1, keepdims=True) + direction = xyz / np.linalg.norm(xyz, axis=1, keepdims=True) + cos = np.clip(np.sum(predicted * direction, axis=1), -1.0, 1.0) + median_err = float(np.degrees(np.median(np.arccos(cos)))) + self.assertLess(median_err, 0.1, f"intra-sweep drift: reconstruction error {median_err:.4f} deg too high") + + +class TestReadIntrinsics(unittest.TestCase): + """``read_intrinsics`` builds an ideal pinhole and preserves k1/k2/k3.""" + + def _write_intrinsics(self, log_dir: Path) -> None: + (log_dir / "calibration").mkdir(parents=True, exist_ok=True) + table = pa.table( + { + "sensor_name": ["ring_front_center", "ring_rear_left"], + "fx_px": [1685.0, 1683.0], + "fy_px": [1685.5, 1683.5], + "cx_px": [775.0, 773.0], + "cy_px": [1023.0, 1021.0], + "k1": [-0.27, -0.26], + "k2": [0.11, 0.10], + "k3": [-0.018, -0.017], + "height_px": [2048, 1550], + "width_px": [1550, 2048], + } + ) + feather.write_feather(table, str(log_dir / "calibration" / "intrinsics.feather")) + + def test_model_is_ideal_pinhole_global_shutter(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + log_dir = Path(tmp) + self._write_intrinsics(log_dir) + intrinsics = read_intrinsics(UPath(log_dir)) + + self.assertEqual(set(intrinsics), {"ring_front_center", "ring_rear_left"}) + cam = intrinsics["ring_front_center"] + self.assertIsInstance(cam.model, IdealPinholeCameraModelParameters) + # Global shutter is assumed because the released imagery is already undistorted. + self.assertEqual(cam.model.shutter_type, ShutterType.GLOBAL) + self.assertIsNone(cam.model.external_distortion_parameters) + np.testing.assert_array_equal(cam.model.resolution, np.array([1550, 2048], dtype=np.uint64)) + np.testing.assert_allclose(cam.model.focal_length, np.array([1685.0, 1685.5], dtype=np.float32)) + np.testing.assert_allclose(cam.model.principal_point, np.array([775.0, 1023.0], dtype=np.float32)) + + def test_original_distortion_coefficients_preserved(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + log_dir = Path(tmp) + self._write_intrinsics(log_dir) + intrinsics = read_intrinsics(UPath(log_dir)) + + # The raw lens coefficients are kept verbatim (not applied to the images). + self.assertEqual(intrinsics["ring_front_center"].original_distortion_k1k2k3, (-0.27, 0.11, -0.018)) + self.assertEqual(intrinsics["ring_rear_left"].original_distortion_k1k2k3, (-0.26, 0.10, -0.017)) + + +if __name__ == "__main__": + unittest.main()