diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock
index bf06b782..badf4c28 100644
--- a/MODULE.bazel.lock
+++ b/MODULE.bazel.lock
@@ -1502,7 +1502,7 @@
         "bzlTransitiveDigest": "WyF2tLBpR8cyete55VmU+AwLC/mhDbnIlEdbVymbm04=",
         "usagesDigest": "DXk0VBJUK7HdLXJE59BVPG4lCTTPnHGNQg/SH9dTuVE=",
         "recordedFileInputs": {
-          "@@//deps/pip/requirements_3_11.txt": "01687dd0690641cecfe0cfa3a5a8ba2186f65112c34b4ab7b64caa55a416ae56",
+          "@@//deps/pip/requirements_3_11.txt": "845ddefb3a79c099a05f39880752241d3c30cefb3774c0611f253d5bff3f774f",
           "@@//deps/pip/requirements_3_8.txt": "94c254d645dc1d0555f48e66fa499aa2efecfc5e547500cd4ed409aee28e8880",
           "@@rules_fuzzing+//fuzzing/requirements.txt": "ab04664be026b632a0d2a2446c4f65982b7654f5b6851d2f9d399a19b7242a5b",
           "@@rules_python+//tools/publish/requirements_darwin.txt": "095d4a4f3d639dce831cd493367631cd51b53665292ab20194bac2c0c6458fa8",
diff --git a/deps/pip/BUILD.bazel b/deps/pip/BUILD.bazel
index 1e9c3e3d..becbd300 100644
--- a/deps/pip/BUILD.bazel
+++ b/deps/pip/BUILD.bazel
@@ -34,6 +34,7 @@ pip_compile(
     size = "medium",
     args = PIP_COMPILE_ARGS,
     data = [
+        ":requirements_argoverse2.in",
         ":requirements_colmap.in",
         ":requirements_docs.in",
         ":requirements_ncore.in",
diff --git a/deps/pip/requirements_3_11.in b/deps/pip/requirements_3_11.in
index e7c68a14..b31bf737 100644
--- a/deps/pip/requirements_3_11.in
+++ b/deps/pip/requirements_3_11.in
@@ -21,6 +21,7 @@
 -r requirements_waymo.in
 -r requirements_colmap.in
 -r requirements_nuscenes.in
+-r requirements_argoverse2.in
 -r requirements_pai.in
 
 # Public API restrictions for 3.11
diff --git a/deps/pip/requirements_3_11.txt b/deps/pip/requirements_3_11.txt
index 37fe4126..d620e757 100644
--- a/deps/pip/requirements_3_11.txt
+++ b/deps/pip/requirements_3_11.txt
@@ -1853,7 +1853,9 @@ pyarrow==23.0.1 \
     --hash=sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f \
     --hash=sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1 \
     --hash=sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd
-    # via -r deps/pip/requirements_pai.in
+    # via
+    #   -r deps/pip/requirements_argoverse2.in
+    #   -r deps/pip/requirements_pai.in
 pycocotools==2.0.11 \
     --hash=sha256:04480330df5013f6edd94891a0ee8294274185f1b5093d1b0f23d51778f0c0e9 \
     --hash=sha256:08c79789fd79e801ae4ecfcfeec32b31e36254e7a2b4019af28c104975d5e730 \
diff --git a/deps/pip/requirements_argoverse2.in b/deps/pip/requirements_argoverse2.in
new file mode 100644
index 00000000..da09d455
--- /dev/null
+++ b/deps/pip/requirements_argoverse2.in
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Argoverse 2 converter dependencies.
+# The converter reads the AV2 Feather files directly and intentionally avoids the
+# heavy `av2` devkit (torch, kornia, numba, polars, PyAV). Quaternion handling uses
+# scipy (already an ncore dependency), so no extra dependency is needed here.
+pyarrow
diff --git a/docs/conversions/argoverse2/argoverse2.rst b/docs/conversions/argoverse2/argoverse2.rst
new file mode 100644
index 00000000..0900181a
--- /dev/null
+++ b/docs/conversions/argoverse2/argoverse2.rst
@@ -0,0 +1,135 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+Argoverse 2 Dataset
+===================
+
+The NCore Argoverse 2 tool converts data from the
+`Argoverse 2 <https://www.argoverse.org/av2.html>`_ Sensor Dataset into NCore
+V4 format. The converter reads the Argoverse 2 on-disk Apache Feather files
+directly with ``pyarrow`` and deliberately avoids the heavy ``av2`` devkit
+(which pulls in torch, kornia, numba, polars and PyAV). Quaternion handling uses
+``scipy`` (already an ncore dependency), so no extra dependency is introduced.
+
+.. _argoverse2_data_conventions:
+
+Conventions
+-----------
+
+Argoverse 2 provides data from 9 cameras and 2 lidars; it has no radar. The
+converter handles all sensor modalities and 3D cuboid annotations.
+
+Camera Sensors
+^^^^^^^^^^^^^^
+    1. **ring_front_center** -- 2048x1550 (portrait)
+    2. **ring_front_left** -- 1550x2048
+    3. **ring_front_right** -- 1550x2048
+    4. **ring_side_left** -- 1550x2048
+    5. **ring_side_right** -- 1550x2048
+    6. **ring_rear_left** -- 1550x2048
+    7. **ring_rear_right** -- 1550x2048
+    8. **stereo_front_left** -- 1550x2048
+    9. **stereo_front_right** -- 1550x2048
+
+The released imagery for all nine cameras is already undistorted -- the official
+av2 devkit projects with the intrinsic matrix ``K`` only and does not load the
+distortion columns -- so camera intrinsics are stored using
+:class:`~ncore.data.IdealPinholeCameraModelParameters`. Because the imagery is
+already undistorted, **global shutter is assumed** (``ShutterType.GLOBAL``). The
+``k1, k2, k3`` coefficients present in ``intrinsics.feather`` describe the
+original lens (for re-distorting into the raw frame) and are intentionally not
+applied to the released images; they are preserved per camera in the camera
+component ``generic_meta_data`` under ``av2_original_distortion`` so the original
+calibration is not lost.
+
+LiDAR Sensors
+^^^^^^^^^^^^^
+    1. **up_lidar** -- Velodyne VLP-32C, 32 beams, 10 Hz
+    2. **down_lidar** -- Velodyne VLP-32C, 32 beams, 10 Hz
+
+Argoverse 2 sweeps are egomotion-compensated to the sweep reference timestamp
+and provided in the egovehicle frame, with real per-point timestamps
+(``offset_ns``). The two stacked VLP-32C units are stored separately, each with
+its own static extrinsic. Points are split per unit by ``laser_number``,
+mapped into the unit's own sensor frame, and decompensated using the real
+per-point timestamps so that NCore stores raw per-point-time ray directions.
+Because the sensor extrinsic is static, this decompensation is independent of
+whether the source data applied ego-motion before or after the sensor
+transform.
+
+A structured VLP-32C model is stored per unit as lidar intrinsics, with per-point
+``model_element`` (row, column). Argoverse 2 provides no native firing-column
+index, so the firing pattern is reconstructed from ``offset_ns`` (firing columns --
+one VLP-32C revolution at 10 Hz) and ``laser_number`` (the beam, mapped to an
+elevation-sorted row). The geometry is derived per log from the *decompensated*
+reference sweep: elevations, the laser-to-row map, column timing, per-column
+azimuths, and per-row azimuth offsets (the 32 beams of a firing column span several
+degrees of azimuth, so the per-row offset is fit empirically). The two stacked
+units fire in opposite phase, so they spin oppositely in their own frames (one
+``cw``, one ``ccw``), which is detected from the data. The column grid is upsampled
+4x so per-frame alignment is not column-quantized, and each sweep is re-aligned to
+the model by a per-frame affine column remap -- a constant phase (the spin phase at
+a given ``offset_ns`` drifts ~1 deg between sweeps) plus a linear term (the spin
+rate drifts slightly within a sweep on some scenes). Steep downward beams that only
+return at near range (no far data) have their azimuth offset fit from near-range
+returns. Deriving from the decompensated cloud (not the ego-motion-smeared
+compensated one) plus these steps gives ~0.03 deg median far-range reconstruction
+across scenes (validated on 38 val logs / 76 units, all sub-0.08 deg median with no
+systematic azimuth or elevation bias), on par with native-column sensors. Pass
+``--lidar-model-source none`` to store raw ray bundles only.
+
+The ``laser_number`` to up/down unit split is not documented by Argoverse 2. The
+two units occupy the two laser-number halves (``< 32`` and ``>= 32``); the unit
+*label* is recovered from extrinsic geometry by per-beam elevation flatness -- a
+laser ring traces a constant-elevation cone only in its own sensor frame, so the
+wrong extrinsic tilts the cone and inflates the per-ring elevation spread. The
+decision is made once per log and is stable with a wide (~2-10x) margin.
+
+Annotations
+^^^^^^^^^^^
+
+3D cuboid annotations are native to the egovehicle frame at the sweep reference
+time. They are stored in the ``rig`` frame at that timestamp with no ego pose
+baked in, so the egovehicle motion stays out of the stored coordinates and
+remains swappable downstream (a V4 feature); the pose graph places the cuboids
+using the active ego trajectory. The full 3-DOF box orientation is preserved (the
+AV2 quaternion is converted to the ``BBox3`` ``xyz``-Euler convention, not reduced
+to yaw). The ``track_uuid`` is used as the track ID.
+
+Coordinate Frames
+^^^^^^^^^^^^^^^^^
+
+The first ego pose's ``city_SE3_egovehicle`` is stored as the static
+``world -> world_global`` pose, so ``world_global`` is the Argoverse 2 city
+frame. All absolute city coordinates remain recoverable for later alignment
+with the Argoverse 2 HD map (which the converter does not export).
+
+Usage
+-----
+
+.. code-block:: bash
+
+    bazel run //tools/data_converter/argoverse2 -- \
+        --root-dir /path/to/argoverse2/sensor \
+        --output-dir /path/to/output \
+        argoverse2-v4 \
+        --split val
+
+Convert a single log:
+
+.. code-block:: bash
+
+    bazel run //tools/data_converter/argoverse2 -- \
+        --root-dir /path/to/argoverse2/sensor \
+        --output-dir /path/to/output \
+        argoverse2-v4 \
+        --split val \
+        --log-id 02678d04-cc9f-3148-9f95-1ba66347dff9
+
+Testing
+-------
+
+.. code-block:: bash
+
+    AV2_DIR=/path/to/argoverse2/sensor AV2_SPLIT=val \
+        bazel test //tools/data_converter/argoverse2:pytest_converter
diff --git a/docs/conversions/index.rst b/docs/conversions/index.rst
index be50bee1..9bc84d1d 100644
--- a/docs/conversions/index.rst
+++ b/docs/conversions/index.rst
@@ -6,13 +6,14 @@ Data Conversions
 
 NCore provides conversion tools for importing 3rd-party dataset formats into
 the NCore V4 component-based format. Supported formats include KITTI, nuScenes,
-Waymo, COLMAP (including ScanNet++), and PAI.
+Argoverse 2, Waymo, COLMAP (including ScanNet++), and PAI.
 
 .. toctree::
    :maxdepth: 1
 
    kitti/kitti
    nuscenes/nuscenes
+   argoverse2/argoverse2
    waymo/waymo
    colmap/colmap
    pai/pai
diff --git a/tools/data_converter/argoverse2/BUILD.bazel b/tools/data_converter/argoverse2/BUILD.bazel
new file mode 100644
index 00000000..a645eb51
--- /dev/null
+++ b/tools/data_converter/argoverse2/BUILD.bazel
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("@ncore_pip_deps//:requirements.bzl", "requirement")
+load("@rules_python//python:defs.bzl", "py_binary", "py_library")
+load("//bazel/pytest:defs.bzl", "pytest_test")
+
+# Argoverse 2 specific utilities: feather readers, sensor maps, lidar unit split
+py_library(
+    name = "pylib_utils",
+    srcs = ["utils.py"],
+    deps = [
+        requirement("numpy"),
+        requirement("pyarrow"),
+        requirement("scipy"),
+        requirement("universal_pathlib"),
+        "//ncore:pylib",
+        "//tools/data_converter:pylib_structured_lidar_model",
+    ],
+)
+
+# Converter library (config, converter class, CLI registration)
+py_library(
+    name = "pylib",
+    srcs = [
+        "converter.py",
+    ],
+    deps = [
+        ":pylib_utils",
+        requirement("click"),
+        requirement("numpy"),
+        requirement("pyarrow"),
+        requirement("scipy"),
+        requirement("tqdm"),
+        requirement("universal_pathlib"),
+        "//ncore:pylib",
+        "//tools/data_converter:pylib_cli",
+        "//tools/data_converter:pylib_structured_lidar_model",
+    ],
+)
+
+# Standalone CLI binary
+py_binary(
+    name = "convert",
+    srcs = ["main.py"],
+    main = "main.py",
+    deps = [":pylib"],
+)
+
+alias(
+    name = "argoverse2",
+    actual = ":convert",
+)
+
+# Integration test for the Argoverse 2 converter (requires AV2_DIR env var)
+pytest_test(
+    name = "pytest_converter",
+    srcs = ["converter_test.py"],
+    python_versions = ["3.11"],
+    tags = ["manual"],  # Only run when explicitly requested (needs external data)
+    deps = [
+        ":pylib",
+        requirement("numpy"),
+        requirement("parameterized"),
+        requirement("pyarrow"),
+        requirement("torch"),
+        requirement("universal_pathlib"),
+        "//ncore:pylib",
+    ],
+)
+
+# Data-free unit test for the VLP-32C lidar-model derivation (runs in CI).
+pytest_test(
+    name = "pytest_utils",
+    srcs = ["utils_test.py"],
+    python_versions = ["3.11"],
+    deps = [
+        ":pylib_utils",
+        requirement("numpy"),
+        requirement("pyarrow"),
+        requirement("torch"),
+        requirement("universal_pathlib"),
+        "//ncore:pylib",
+    ],
+)
diff --git a/tools/data_converter/argoverse2/NOTICE b/tools/data_converter/argoverse2/NOTICE
new file mode 100644
index 00000000..e16f1b37
--- /dev/null
+++ b/tools/data_converter/argoverse2/NOTICE
@@ -0,0 +1,12 @@
+Argoverse 2 Dataset
+Copyright (c) 2021 Argo AI, LLC
+
+This converter processes data from the Argoverse 2 (AV2) Sensor Dataset.
+The Argoverse 2 dataset is released under the Creative Commons
+Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0).
+
+Users must agree to the Argoverse Terms of Use before downloading or using the dataset:
+https://www.argoverse.org/about.html#terms-of-use
+
+This converter reads the Argoverse 2 on-disk Feather files directly and does not
+depend on the av2 devkit. The av2 devkit is released under the MIT License.
diff --git a/tools/data_converter/argoverse2/README.md b/tools/data_converter/argoverse2/README.md
new file mode 100644
index 00000000..033d7952
--- /dev/null
+++ b/tools/data_converter/argoverse2/README.md
@@ -0,0 +1,117 @@
+# Argoverse 2 to NCore V4 Converter
+
+Converts [Argoverse 2](https://www.argoverse.org/av2.html) Sensor Dataset logs to
+NCore V4 format.
+
+The converter reads the Argoverse 2 on-disk Apache Feather files directly with
+`pyarrow`, deliberately avoiding the heavy `av2` devkit (which pulls in torch,
+kornia, numba, polars and PyAV). Quaternion handling uses `scipy` (already an
+ncore dependency), so no extra dependency is introduced.
+
+## Requirements
+
+- Argoverse 2 Sensor Dataset downloaded locally, organised as
+  `{root}/{split}/{log_id}/...`
+- Python packages: `pyarrow` (plus `scipy`, already an ncore dependency)
+
+## Usage
+
+```bash
+bazel run //tools/data_converter/argoverse2 -- \
+    --root-dir /path/to/argoverse2/sensor \
+    --output-dir /path/to/output \
+    argoverse2-v4 \
+    --split val
+```
+
+### Convert a single log
+
+```bash
+bazel run //tools/data_converter/argoverse2 -- \
+    --root-dir /path/to/argoverse2/sensor \
+    --output-dir /path/to/output \
+    argoverse2-v4 \
+    --split val \
+    --log-id 02678d04-cc9f-3148-9f95-1ba66347dff9
+```
+
+## Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--split` | val | Split directory under `--root-dir` (train, val, test) |
+| `--log-id` | None | Filter to a single log by ID |
+| `--store-type` | itar | Output store format (itar or directory) |
+| `--profile` | separate-sensors | Component group assignment profile |
+| `--sequence-meta/--no-sequence-meta` | enabled | Generate sequence meta JSON |
+
+## Sensor Assumptions
+
+- **Cameras**: 9 cameras (7 ring + 2 stereo). AV2 imagery is shipped already
+  undistorted -- the official av2 devkit projects with the intrinsic matrix `K`
+  only and does not load the `k1, k2, k3` columns -- so the stored model is an
+  ideal (distortion-free) pinhole (`IdealPinholeCameraModelParameters`). Because
+  the imagery is already undistorted, global shutter is assumed
+  (`ShutterType.GLOBAL`). The `k1, k2, k3` coefficients in `intrinsics.feather`
+  describe the original lens (for re-distorting into the raw frame) and must not be
+  applied to the released images, so they are not used for projection -- but they
+  are preserved per camera in the camera component `generic_meta_data` under
+  `av2_original_distortion` so the original calibration is not lost.
+- **Lidar**: two stacked Velodyne VLP-32C units (`up_lidar` / `down_lidar`, 10 Hz).
+  The source sweep is egomotion-compensated to the sweep reference timestamp and
+  expressed in the egovehicle frame. Real per-point timestamps are available via
+  `offset_ns`. Each unit is stored separately with its own extrinsic. Points are
+  mapped into each unit's sensor frame and decompensated using the real per-point
+  timestamps so the stored directions are raw per-point-time measurements. Because
+  the extrinsic is static, this is independent of whether AV2 applied ego-motion
+  before or after the sensor transform.
+  - A structured VLP-32C model is stored per unit as lidar intrinsics, with
+    per-point `model_element` (row, column). AV2 provides no native firing-column
+    index, so the firing pattern is reconstructed from `offset_ns` (firing columns --
+    one VLP-32C revolution at 10 Hz) and `laser_number` (the beam, mapped to an
+    elevation-sorted row). The geometry is derived per log from the *decompensated*
+    reference sweep: elevations, the laser->row map, column timing, per-column
+    azimuths, and per-row azimuth offsets (the 32 beams of a firing column span
+    several degrees of azimuth, so the per-row offset is fit empirically rather than
+    assumed). The two stacked units fire in opposite phase, so they spin oppositely
+    in their own frames (one `cw`, one `ccw`); this is detected from the data. The
+    column grid is upsampled 4x so the per-frame alignment is not column-quantized.
+    Each sweep is re-aligned to the model by a per-frame affine column remap (a
+    constant phase plus a linear term): the spin phase at a given `offset_ns` drifts
+    ~1 deg between sweeps (the constant), and the spin rate drifts slightly within a
+    sweep on some scenes (the linear term). A fixed mapping, or a phase-only shift,
+    would leave some frames off by up to ~1 deg / ~0.25 deg respectively.
+    Steep downward beams that only return at near range (no far data, e.g. the
+    lowest laser at ~-25 deg) have their azimuth offset fit from near-range returns.
+    Deriving from the decompensated cloud (not the ego-motion-smeared compensated
+    one) plus these steps gives ~0.03-0.05 deg median far-range reconstruction across
+    scenes, on par with native-column sensors. Pass `--lidar-model-source none` to
+    store raw ray bundles only.
+  - The `laser_number` to up/down unit split is not documented by AV2. The two
+    units occupy the two laser-number halves (`< 32` and `>= 32`); the unit *label*
+    is recovered from extrinsic geometry by per-beam elevation flatness (a laser
+    ring traces a constant-elevation cone only in its own sensor frame, so the
+    wrong extrinsic tilts the cone and inflates the per-ring elevation spread). The
+    decision is made once per log and is stable with a wide (~2-10x) margin.
+- **Radar**: AV2 has no radar.
+- **Cuboid annotations**: native to the egovehicle frame at the sweep reference
+  time, stored in the `rig` frame at that timestamp with no ego pose baked in. This
+  keeps the egovehicle motion out of the stored coordinates so it stays swappable
+  downstream (a V4 feature); the pose graph places the cuboids using the active ego
+  trajectory. The full 3-DOF box orientation is preserved (the AV2 quaternion is
+  converted to the BBox3 `xyz`-Euler convention, not reduced to yaw). `track_uuid`
+  is used as track ID.
+
+## Coordinate frames
+
+The first ego pose's `city_SE3_egovehicle` is stored as the static
+`world -> world_global` pose, so `world_global` is the AV2 city frame. All absolute
+city coordinates remain recoverable for later alignment with the AV2 HD map (which
+the converter does not export).
+
+## Testing
+
+```bash
+AV2_DIR=/path/to/argoverse2/sensor AV2_SPLIT=val \
+    bazel test //tools/data_converter/argoverse2:pytest_converter
+```
diff --git a/tools/data_converter/argoverse2/converter.py b/tools/data_converter/argoverse2/converter.py
new file mode 100644
index 00000000..7c999e1a
--- /dev/null
+++ b/tools/data_converter/argoverse2/converter.py
@@ -0,0 +1,776 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Argoverse 2 Sensor Dataset to NCore V4 converter."""
+
+from __future__ import annotations
+
+import json
+import logging
+
+from dataclasses import dataclass
+from typing import Dict, List, Literal, Optional
+
+import click
+import numpy as np
+import tqdm
+
+from scipy.spatial.transform import Rotation as R
+from upath import UPath
+
+from ncore.impl.common.transformations import (
+    HalfClosedInterval,
+    MotionCompensator,
+    se3_inverse,
+)
+from ncore.impl.data.types import (
+    BBox3,
+    CuboidTrackObservation,
+    LabelSource,
+    RowOffsetStructuredSpinningLidarModelParameters,
+)
+from ncore.impl.data.v4.components import (
+    CameraSensorComponent,
+    CuboidsComponent,
+    IntrinsicsComponent,
+    LidarSensorComponent,
+    MasksComponent,
+    PosesComponent,
+    RadarSensorComponent,  # noqa: F401 -- imported for parity; AV2 has no radar
+    SequenceComponentGroupsReader,
+    SequenceComponentGroupsWriter,
+)
+from ncore.impl.data.v4.types import ComponentGroupAssignments
+from ncore.impl.data_converter.base import FileBasedDataConverter, FileBasedDataConverterConfig
+from tools.data_converter.argoverse2.utils import (
+    AV2_CATEGORY_MAP,
+    CAMERA_NAMES,
+    LIDAR_NAMES,
+    VLP32C_N_BEAMS,
+    Vlp32cGeometry,
+    assign_lidar_units,
+    build_vlp32c_model,
+    derive_vlp32c_geometry,
+    list_log_ids,
+    list_sensor_timestamps,
+    read_annotations,
+    read_city_se3_ego,
+    read_ego_se3_sensor,
+    read_intrinsics,
+    read_lidar_sweep,
+    reconstruct_model_elements,
+)
+from tools.data_converter.cli import cli
+
+
+# Argoverse 2 timestamps are nanoseconds; NCore V4 uses microseconds.
+NS_PER_US = 1000
+
+
+def _ns_to_us(value_ns: int) -> int:
+    return int(value_ns) // NS_PER_US
+
+
+# -----------------------------------------------------------------------------
+# Config
+# -----------------------------------------------------------------------------
+
+
+@dataclass(kw_only=True, slots=True)
+class Argoverse2Converter4Config(FileBasedDataConverterConfig):
+    """Configuration for Argoverse 2 to NCore V4 conversion."""
+
+    split: str = "val"
+    log_id: Optional[str] = None
+    store_type: Literal["itar", "directory"] = "itar"
+    component_group_profile: Literal["default", "separate-sensors", "separate-all"] = "separate-sensors"
+    store_sequence_meta: bool = True
+    # Lidar model: "empirical" derives a VLP-32C structured model (per unit) and stores
+    # per-point model elements + intrinsics; "none" stores raw ray bundles only.
+    lidar_model_source: Literal["empirical", "none"] = "empirical"
+
+
+# -----------------------------------------------------------------------------
+# Converter
+# -----------------------------------------------------------------------------
+
+
+class Argoverse2Converter4(FileBasedDataConverter):
+    """Dataset preprocessing class for converting Argoverse 2 data to NCore V4.
+
+    Sensor assumptions (sourced from the AV2 devkit and User Guide):
+
+    - Cameras: 9 cameras (7 ring + 2 stereo). Imagery is shipped already
+      undistorted, so a pinhole model with zero distortion is exact. Because the
+      released imagery is already undistorted (a single capture timestamp per
+      image, no rolling-shutter metadata), global shutter is assumed ->
+      ``ShutterType.GLOBAL``. The original lens radial-distortion coefficients
+      ``(k1, k2, k3)`` are not applied but are preserved per camera in the camera
+      component ``generic_meta_data`` under ``av2_original_distortion``.
+    - Lidar: two stacked Velodyne VLP-32C units (``up_lidar`` / ``down_lidar``,
+      10 Hz). The source sweep is egomotion-compensated to the sweep reference
+      timestamp and stored in the egovehicle frame. Real per-point timestamps are
+      provided (``offset_ns``). We split points per unit by ``laser_number``,
+      transform into each unit's sensor frame, and decompensate using the real
+      per-point timestamps so NCore stores raw per-point-time directions. A
+      structured VLP-32C model is stored per unit (with per-point ``model_element``)
+      by reconstructing the firing pattern from ``offset_ns`` + ``laser_number``;
+      the two units spin oppositely in their own frames (detected from data). Pass
+      ``--lidar-model-source none`` to skip the model and store raw ray bundles only.
+    - Radar: AV2 has no radar.
+    - Cuboid annotations: native to the egovehicle frame at the sweep reference
+      time. Stored in the ``rig`` frame at that timestamp with no ego pose baked
+      in, so the egovehicle motion stays swappable downstream (a V4 feature); the
+      pose graph places the cuboids using the active ego trajectory.
+
+    The first ego pose's ``city_SE3_egovehicle`` is stored as the static
+    ``world -> world_global`` anchor, so ``world_global`` is the AV2 city frame.
+    This keeps absolute coordinates recoverable for later HD-map alignment.
+    """
+
+    def __init__(self, config: Argoverse2Converter4Config) -> None:
+        super().__init__(config)
+
+        self.component_group_profile = config.component_group_profile
+        self.store_type = config.store_type
+        self.store_sequence_meta = config.store_sequence_meta
+
+        self._split = config.split
+        self._log_id = config.log_id
+        self._lidar_model_source = config.lidar_model_source
+
+        self.logger = logging.getLogger(__name__)
+
+    @property
+    def _split_dir(self) -> UPath:
+        return self.root_dir / self._split
+
+    @staticmethod
+    def get_sequence_ids(config: Argoverse2Converter4Config) -> List[str]:
+        """Discover log IDs to convert."""
+        if config.log_id is not None:
+            return [config.log_id]
+        split_dir = UPath(config.root_dir) / config.split
+        return list_log_ids(split_dir)
+
+    @staticmethod
+    def from_config(config: Argoverse2Converter4Config) -> Argoverse2Converter4:
+        return Argoverse2Converter4(config)
+
+    def convert_sequence(self, sequence_id: str) -> None:
+        """Convert a single Argoverse 2 log to NCore V4 format."""
+        log_id = sequence_id
+        log_dir = self._split_dir / log_id
+
+        self.logger.info(f"Converting log {log_id} (split={self._split})")
+
+        # --- Ego poses (egovehicle -> city) -----------------------------------
+        pose_timestamps_ns, T_ego_city_all = read_city_se3_ego(log_dir)
+        n_poses = len(pose_timestamps_ns)
+        assert n_poses >= 2, f"Log has fewer than 2 ego poses: {n_poses}"
+
+        pose_timestamps_us = np.array([_ns_to_us(t) for t in pose_timestamps_ns], dtype=np.uint64)
+
+        # AV2 ego poses are dense (some only nanoseconds apart), so the ns -> us
+        # truncation can produce duplicate microsecond timestamps. The pose writer
+        # requires strictly increasing timestamps, so keep the first pose for each
+        # unique microsecond timestamp.
+        pose_timestamps_us, unique_idx = np.unique(pose_timestamps_us, return_index=True)
+        T_ego_city_all = T_ego_city_all[unique_idx]
+        n_poses = len(pose_timestamps_us)
+        assert n_poses >= 2, f"Log has fewer than 2 unique-microsecond ego poses: {n_poses}"
+
+        # Anchor the first pose as world_global (the AV2 city frame); store all
+        # poses relative to it so the relative poses are float32-safe.
+        T_world_world_global = T_ego_city_all[0].copy()  # float64 for global accuracy
+        T_world_global_inv = se3_inverse(T_world_world_global)
+        T_rig_world_relative = (T_world_global_inv @ T_ego_city_all).astype(np.float32)
+
+        # --- Static sensor extrinsics (sensor -> ego) -------------------------
+        ego_se3_sensor = read_ego_se3_sensor(log_dir)
+
+        # --- Determine active sensors -----------------------------------------
+        camera_ids = self.get_active_camera_ids(list(CAMERA_NAMES))
+        lidar_ids = self.get_active_lidar_ids(list(LIDAR_NAMES))
+        radar_ids = self.get_active_radar_ids([])  # AV2 has no radar
+
+        # --- Sequence time interval -------------------------------------------
+        all_ts_us: List[int] = [int(pose_timestamps_us[0]), int(pose_timestamps_us[-1])]
+        lidar_ts_ns = list_sensor_timestamps(log_dir, "lidar")
+        if lidar_ts_ns:
+            all_ts_us += [_ns_to_us(lidar_ts_ns[0]), _ns_to_us(lidar_ts_ns[-1])]
+        for cam_id in camera_ids:
+            cam_ts_ns = list_sensor_timestamps(log_dir, "cameras", cam_id)
+            if cam_ts_ns:
+                all_ts_us += [_ns_to_us(cam_ts_ns[0]), _ns_to_us(cam_ts_ns[-1])]
+
+        seq_start_us = min(all_ts_us)
+        seq_end_us = max(all_ts_us)
+
+        # Extend pose timeline to cover the sequence interval, extrapolating with a
+        # constant-velocity assumption so lidar decompensation near the boundaries
+        # has real motion to invert (mirrors the nuScenes converter).
+        T_rig_world_relative, pose_timestamps_us = self._extend_pose_timeline(
+            T_rig_world_relative, pose_timestamps_us, seq_start_us, seq_end_us
+        )
+
+        sequence_timestamp_interval_us = HalfClosedInterval.from_start_end(seq_start_us, seq_end_us)
+
+        # --- Component group assignments --------------------------------------
+        component_groups = ComponentGroupAssignments.create(
+            camera_ids=camera_ids,
+            lidar_ids=lidar_ids,
+            radar_ids=radar_ids,
+            point_clouds_ids=[],
+            camera_labels_ids=[],
+            profile=self.component_group_profile,
+        )
+
+        # --- Create writer ----------------------------------------------------
+        store_writer = SequenceComponentGroupsWriter(
+            output_dir_path=self.output_dir / log_id,
+            store_base_name=log_id,
+            sequence_id=log_id,
+            sequence_timestamp_interval_us=sequence_timestamp_interval_us,
+            store_type=self.store_type,
+            generic_meta_data={
+                "source_dataset": "argoverse2",
+                "argoverse2_split": self._split,
+                "argoverse2_log_id": log_id,
+            },
+        )
+
+        poses_writer = store_writer.register_component_writer(
+            PosesComponent.Writer,
+            component_instance_name="default",
+            group_name=component_groups.poses_component_group,
+            generic_meta_data={
+                "calibration_type": "argoverse2:egovehicle_SE3_sensor",
+                "egomotion_type": "argoverse2:city_SE3_egovehicle",
+            },
+        )
+        intrinsics_writer = store_writer.register_component_writer(
+            IntrinsicsComponent.Writer,
+            component_instance_name="default",
+            group_name=component_groups.intrinsics_component_group,
+        )
+        masks_writer = store_writer.register_component_writer(
+            MasksComponent.Writer,
+            component_instance_name="default",
+            group_name=component_groups.masks_component_group,
+        )
+
+        # --- Store ego poses --------------------------------------------------
+        poses_writer.store_dynamic_pose(
+            source_frame_id="rig",
+            target_frame_id="world",
+            poses=T_rig_world_relative,
+            timestamps_us=pose_timestamps_us,
+        )
+        poses_writer.store_static_pose(
+            source_frame_id="world",
+            target_frame_id="world_global",
+            pose=T_world_world_global,
+        )
+
+        # --- Decode sensors ---------------------------------------------------
+        if lidar_ids:
+            self._decode_lidars(
+                log_dir=log_dir,
+                store_writer=store_writer,
+                poses_writer=poses_writer,
+                intrinsics_writer=intrinsics_writer,
+                component_groups=component_groups,
+                lidar_ids=lidar_ids,
+                ego_se3_sensor=ego_se3_sensor,
+                T_rig_world_relative=T_rig_world_relative,
+                pose_timestamps_us=pose_timestamps_us,
+            )
+
+        self._decode_cameras(
+            log_dir=log_dir,
+            store_writer=store_writer,
+            poses_writer=poses_writer,
+            intrinsics_writer=intrinsics_writer,
+            masks_writer=masks_writer,
+            component_groups=component_groups,
+            camera_ids=camera_ids,
+            ego_se3_sensor=ego_se3_sensor,
+        )
+
+        self._decode_cuboids(
+            log_dir=log_dir,
+            store_writer=store_writer,
+            component_groups=component_groups,
+        )
+
+        # --- Finalize ---------------------------------------------------------
+        ncore_4_paths = store_writer.finalize()
+
+        if self.store_sequence_meta:
+            sequence_component_reader = SequenceComponentGroupsReader(ncore_4_paths)
+            sequence_meta_path = self.output_dir / log_id / f"{sequence_component_reader.sequence_id}.json"
+            with sequence_meta_path.open("w") as f:
+                json.dump(sequence_component_reader.get_sequence_meta().to_dict(), f, indent=2)
+            self.logger.info(f"Wrote sequence meta data {str(sequence_meta_path)}")
+
+    # -------------------------------------------------------------------------
+    # Pose timeline
+    # -------------------------------------------------------------------------
+
+    @staticmethod
+    def _extend_pose_timeline(
+        T_rig_world_relative: np.ndarray,
+        pose_timestamps_us: np.ndarray,
+        seq_start_us: int,
+        seq_end_us: int,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Extend the relative pose timeline to cover [seq_start, seq_end].
+
+        Boundaries are extrapolated with a constant-velocity assumption so the
+        first/last lidar sweeps have real ego motion to decompensate against.
+        """
+        if seq_start_us < int(pose_timestamps_us[0]):
+            T_0 = T_rig_world_relative[0]
+            T_1 = T_rig_world_relative[1]
+            T_delta_inv = se3_inverse(T_1) @ T_0
+            T_boundary = (T_0 @ T_delta_inv).astype(np.float32)
+            T_rig_world_relative = np.concatenate([T_boundary[np.newaxis], T_rig_world_relative], axis=0)
+            pose_timestamps_us = np.concatenate([np.array([seq_start_us], dtype=np.uint64), pose_timestamps_us])
+
+        if seq_end_us > int(pose_timestamps_us[-1]):
+            T_n1 = T_rig_world_relative[-2]
+            T_n = T_rig_world_relative[-1]
+            T_delta = se3_inverse(T_n1) @ T_n
+            T_boundary = (T_n @ T_delta).astype(np.float32)
+            T_rig_world_relative = np.concatenate([T_rig_world_relative, T_boundary[np.newaxis]], axis=0)
+            pose_timestamps_us = np.concatenate([pose_timestamps_us, np.array([seq_end_us], dtype=np.uint64)])
+
+        return T_rig_world_relative, pose_timestamps_us
+
+    # -------------------------------------------------------------------------
+    # Lidar
+    # -------------------------------------------------------------------------
+
+    def _decode_lidars(
+        self,
+        log_dir: UPath,
+        store_writer: SequenceComponentGroupsWriter,
+        poses_writer: PosesComponent.Writer,
+        intrinsics_writer: IntrinsicsComponent.Writer,
+        component_groups: ComponentGroupAssignments,
+        lidar_ids: List[str],
+        ego_se3_sensor: Dict[str, np.ndarray],
+        T_rig_world_relative: np.ndarray,
+        pose_timestamps_us: np.ndarray,
+    ) -> None:
+        """Decode and store the two stacked VLP-32C lidars individually.
+
+        AV2 lidar points are egomotion-compensated to the sweep reference time
+        (the sweep start) and provided in the egovehicle frame. For each unit we
+        map points into the unit's own sensor frame (via the static extrinsic) and
+        decompensate using the real per-point timestamps -- referenced to the sweep
+        start -- to recover raw per-point-time directions.
+
+        Because the sensor extrinsic is static, the decompensation commutes with
+        the ego->sensor transform, so the result is independent of whether AV2
+        applied ego-motion before or after the sensor transform.
+        """
+        sweep_ts_ns = list_sensor_timestamps(log_dir, "lidar")
+        if not sweep_ts_ns:
+            self.logger.warning("No lidar sweeps found")
+            return
+
+        # Static extrinsics + per-unit writers + per-unit motion compensators.
+        T_unit_rig: Dict[str, np.ndarray] = {}
+        lidar_writers: Dict[str, LidarSensorComponent.Writer] = {}
+        compensators: Dict[str, MotionCompensator] = {}
+
+        for unit_id in lidar_ids:
+            T_unit_rig[unit_id] = ego_se3_sensor[unit_id].astype(np.float32)
+            poses_writer.store_static_pose(source_frame_id=unit_id, target_frame_id="rig", pose=T_unit_rig[unit_id])
+            lidar_writers[unit_id] = store_writer.register_component_writer(
+                LidarSensorComponent.Writer,
+                component_instance_name=unit_id,
+                group_name=component_groups.lidar_component_groups.get(unit_id),
+                generic_meta_data={"sensor_model": "Velodyne VLP-32C"},
+            )
+            compensators[unit_id] = MotionCompensator.from_sensor_rig(
+                sensor_id=unit_id,
+                T_sensor_rig=T_unit_rig[unit_id],
+                T_rig_worlds=T_rig_world_relative,
+                T_rig_worlds_timestamps_us=pose_timestamps_us,
+            )
+
+        # Determine the laser_number -> unit labelling once from the first sweep.
+        # The split is a fixed physical property of the two stacked sensors, so we
+        # resolve it once (robustly, from extrinsic geometry) and reuse it for every
+        # sweep rather than re-deciding per frame.
+        T_up = ego_se3_sensor["up_lidar"] if "up_lidar" in ego_se3_sensor else np.eye(4)
+        T_down = ego_se3_sensor["down_lidar"] if "down_lidar" in ego_se3_sensor else np.eye(4)
+        first_sweep = read_lidar_sweep(log_dir / "sensors" / "lidar" / f"{sweep_ts_ns[0]}.feather")
+        first_masks = assign_lidar_units(first_sweep.laser_number, first_sweep.xyz, T_up, T_down)
+
+        # Map the decision to a laser_number threshold test (lo half == laser_number < 32).
+        lo_is_up = bool(first_masks["up_lidar"][first_sweep.laser_number < 4].all())
+        unit_for_lo = "up_lidar" if lo_is_up else "down_lidar"
+        unit_for_hi = "down_lidar" if lo_is_up else "up_lidar"
+        self.logger.info(f"Lidar unit split: laser_number<32 -> {unit_for_lo}, >=32 -> {unit_for_hi}")
+
+        # Build a structured VLP-32C model per unit from the first sweep. AV2 has no
+        # native firing-column index, but offset_ns + laser_number reconstruct it (one
+        # firing column per VLP-32C revolution; laser_number selects the beam/row). The
+        # firing geometry (elevations, laser->row map, per-column azimuths, per-row
+        # azimuth offsets, spin direction) is derived empirically per log from the
+        # DECOMPENSATED reference sweep -- decompensation is essential, since the raw
+        # motion-compensated azimuths are smeared by ego motion (~0.5 deg) which would
+        # otherwise dominate the model error.
+        unit_geometry: Dict[str, Vlp32cGeometry] = {}
+        unit_model: Dict[str, RowOffsetStructuredSpinningLidarModelParameters] = {}
+        if self._lidar_model_source == "empirical":
+            first_ref_ts_us = _ns_to_us(int(first_sweep.timestamp_ns))
+            first_pt_ts_us = ((first_sweep.timestamp_ns + first_sweep.offset_ns) // NS_PER_US).astype(np.uint64)
+            first_start_us = min(first_ref_ts_us, int(first_pt_ts_us.min()))
+            first_end_us = max(int(first_pt_ts_us.max()), first_start_us + 1)
+            for unit_id, mask_first in (
+                (unit_for_lo, first_sweep.laser_number < 32),
+                (unit_for_hi, first_sweep.laser_number >= 32),
+            ):
+                if unit_id not in lidar_ids or not mask_first.any():
+                    continue
+                laser_in_unit = (first_sweep.laser_number[mask_first] % VLP32C_N_BEAMS).astype(np.int64)
+                offset_in_unit = first_sweep.offset_ns[mask_first]
+
+                # rig -> unit sensor frame, then decompensate to per-point time so the
+                # firing geometry is clean (un-smeared by ego motion).
+                T_rig_unit = se3_inverse(T_unit_rig[unit_id])
+                xyz_sensor_first = (T_rig_unit[:3, :3] @ first_sweep.xyz[mask_first].T).T + T_rig_unit[:3, 3]
+                xyz_decomp_first = compensators[unit_id].motion_decompensate_points(
+                    sensor_id=unit_id,
+                    xyz_reftime=xyz_sensor_first,
+                    timestamp_us=first_pt_ts_us[mask_first],
+                    frame_start_timestamp_us=first_start_us,
+                    frame_end_timestamp_us=first_end_us,
+                    reference_timestamp_us=first_ref_ts_us,
+                )
+
+                geometry = derive_vlp32c_geometry(
+                    xyz_decompensated=xyz_decomp_first.astype(np.float64),
+                    laser_number_in_unit=laser_in_unit,
+                    offset_ns=offset_in_unit,
+                )
+                model = build_vlp32c_model(geometry)
+                unit_geometry[unit_id] = geometry
+                unit_model[unit_id] = model
+                self.logger.info(
+                    f"Derived VLP-32C model for {unit_id}: {geometry.n_columns} columns, "
+                    f"spin {geometry.spinning_direction}, "
+                    f"column period {geometry.column_period_ns / 1000.0:.2f} us"
+                )
+
+        for ts_ns in tqdm.tqdm(sweep_ts_ns, desc="Process lidar"):
+            sweep = read_lidar_sweep(log_dir / "sensors" / "lidar" / f"{ts_ns}.feather")
+
+            # Per-point absolute time = sweep_ts + offset; convert to microseconds.
+            # The sweep reference timestamp (the filename) is the frame start and
+            # the egomotion-compensation reference for all points in the sweep.
+            reference_ts_us = _ns_to_us(int(sweep.timestamp_ns))
+            point_ts_us = ((sweep.timestamp_ns + sweep.offset_ns) // NS_PER_US).astype(np.uint64)
+
+            frame_start_us = min(reference_ts_us, int(point_ts_us.min()))
+            frame_end_us = int(point_ts_us.max())
+            if frame_end_us <= frame_start_us:
+                frame_end_us = frame_start_us + 1
+
+            lo = sweep.laser_number < 32
+            unit_masks = {unit_for_lo: lo, unit_for_hi: ~lo}
+
+            for unit_id in lidar_ids:
+                mask = unit_masks[unit_id]
+                if not mask.any():
+                    continue
+
+                xyz_rig = sweep.xyz[mask]  # AV2 egovehicle frame == NCore rig frame
+                ts_unit = point_ts_us[mask]
+                intensity_unit = sweep.intensity[mask]
+
+                # rig -> unit sensor frame (points are compensated to the sweep
+                # reference time, so this is the sensor reference-time frame).
+                T_rig_unit = se3_inverse(T_unit_rig[unit_id])
+                xyz_sensor = (T_rig_unit[:3, :3] @ xyz_rig.T).T + T_rig_unit[:3, 3]
+
+                # Decompensate from the reference-time sensor frame (the sweep start,
+                # AV2's compensation reference) to each point's own measurement time.
+                xyz_raw = compensators[unit_id].motion_decompensate_points(
+                    sensor_id=unit_id,
+                    xyz_reftime=xyz_sensor,
+                    timestamp_us=ts_unit,
+                    frame_start_timestamp_us=frame_start_us,
+                    frame_end_timestamp_us=frame_end_us,
+                    reference_timestamp_us=reference_ts_us,
+                )
+
+                distance_m = np.linalg.norm(xyz_raw, axis=1)
+                direction = np.zeros_like(xyz_raw)
+                nonzero = distance_m > 0
+                direction[nonzero] = xyz_raw[nonzero] / distance_m[nonzero, np.newaxis]
+
+                # Per-point structured-model element (row, column) from beam + firing
+                # time, re-aligned to the model by this frame's azimuth phase.
+                model_element = None
+                if unit_id in unit_geometry:
+                    model_element = reconstruct_model_elements(
+                        laser_number_in_unit=(sweep.laser_number[mask] % VLP32C_N_BEAMS).astype(np.int64),
+                        offset_ns=sweep.offset_ns[mask],
+                        geometry=unit_geometry[unit_id],
+                        xyz_decompensated=xyz_raw,
+                    )
+
+                lidar_writers[unit_id].store_frame(
+                    direction=direction.astype(np.float32),
+                    timestamp_us=ts_unit,
+                    model_element=model_element,
+                    distance_m=distance_m.reshape(1, -1),
+                    intensity=intensity_unit.reshape(1, -1),
+                    frame_timestamps_us=np.array([frame_start_us, frame_end_us], dtype=np.uint64),
+                    generic_data={},
+                    generic_meta_data={},
+                )
+
+        # Store the structured lidar model as intrinsics for each unit.
+        for unit_id, model in unit_model.items():
+            intrinsics_writer.store_lidar_intrinsics(lidar_id=unit_id, lidar_model_parameters=model)
+
+    # -------------------------------------------------------------------------
+    # Cameras
+    # -------------------------------------------------------------------------
+
+    def _decode_cameras(
+        self,
+        log_dir: UPath,
+        store_writer: SequenceComponentGroupsWriter,
+        poses_writer: PosesComponent.Writer,
+        intrinsics_writer: IntrinsicsComponent.Writer,
+        masks_writer: MasksComponent.Writer,
+        component_groups: ComponentGroupAssignments,
+        camera_ids: List[str],
+        ego_se3_sensor: Dict[str, np.ndarray],
+    ) -> None:
+        """Decode and store all camera frames.
+
+        AV2 imagery is shipped already undistorted, so global shutter is assumed and
+        the stored model is a pinhole with zero distortion coefficients. The original
+        lens radial-distortion coefficients ``(k1, k2, k3)`` are preserved per camera
+        in the camera component ``generic_meta_data`` (``av2_original_distortion``).
+        """
+        intrinsics = read_intrinsics(log_dir)
+
+        for camera_id in camera_ids:
+            cam_ts_ns = list_sensor_timestamps(log_dir, "cameras", camera_id)
+            if not cam_ts_ns:
+                self.logger.warning(f"No data for camera {camera_id}")
+                continue
+
+            self.logger.info(f"Processing camera {camera_id}")
+
+            # Extrinsic: camera -> rig (ego)
+            T_cam_rig = ego_se3_sensor[camera_id].astype(np.float32)
+            poses_writer.store_static_pose(source_frame_id=camera_id, target_frame_id="rig", pose=T_cam_rig)
+
+            camera_intrinsics = intrinsics[camera_id]
+            intrinsics_writer.store_camera_intrinsics(
+                camera_id=camera_id,
+                camera_model_parameters=camera_intrinsics.model,
+            )
+
+            masks_writer.store_camera_masks(camera_id=camera_id, mask_images={})
+
+            # Preserve the original AV2 lens radial-distortion coefficients as
+            # provenance. The released imagery is already undistorted (so the stored
+            # model is a distortion-free ideal pinhole), but the raw (k1, k2, k3)
+            # describe the original lens and would otherwise be lost.
+            k1, k2, k3 = camera_intrinsics.original_distortion_k1k2k3
+            camera_writer = store_writer.register_component_writer(
+                CameraSensorComponent.Writer,
+                component_instance_name=camera_id,
+                group_name=component_groups.camera_component_groups.get(camera_id),
+                generic_meta_data={
+                    "av2_original_distortion": {"k1": k1, "k2": k2, "k3": k3},
+                },
+            )
+
+            camera_dir = log_dir / "sensors" / "cameras" / camera_id
+            for ts_ns in tqdm.tqdm(cam_ts_ns, desc=f"Process {camera_id}"):
+                image_path = camera_dir / f"{ts_ns}.jpg"
+                with image_path.open("rb") as f:
+                    image_binary = f.read()
+
+                frame_ts = _ns_to_us(ts_ns)
+                camera_writer.store_frame(
+                    image_binary_data=image_binary,
+                    image_format="jpeg",
+                    frame_timestamps_us=np.array([frame_ts, frame_ts], dtype=np.uint64),
+                    generic_data={},
+                    generic_meta_data={},
+                )
+
+        self.logger.info(f"Processed {len(camera_ids)} cameras")
+
+    # -------------------------------------------------------------------------
+    # Cuboid annotations
+    # -------------------------------------------------------------------------
+
+    def _decode_cuboids(
+        self,
+        log_dir: UPath,
+        store_writer: SequenceComponentGroupsWriter,
+        component_groups: ComponentGroupAssignments,
+    ) -> None:
+        """Decode AV2 3D annotations and store as cuboid track observations.
+
+        AV2 cuboids are native to the egovehicle frame at the sweep reference
+        timestamp. We store them in that native frame -- ``rig`` at the sweep
+        timestamp -- without baking in any ego pose. This is lossless and, unlike
+        baking the cuboids into a static world frame, keeps the egovehicle motion
+        out of the stored coordinates so it remains swappable downstream (a V4
+        feature): the pose graph places the cuboids using whatever ego trajectory
+        is active.
+
+        Lidar points are decompensated to their own per-point time, but a cuboid is
+        a single object pose at the sweep reference time, so referencing it to
+        ``rig`` at that timestamp is exactly correct -- the pose graph evaluates the
+        rig pose at the cuboid timestamp when transforming.
+        """
+        annotations_path = log_dir / "annotations.feather"
+        if not annotations_path.exists():
+            self.logger.info("No annotations.feather found (test split)")
+            return
+
+        cols = read_annotations(log_dir)
+        n = len(cols["category"])
+
+        cuboid_observations: List[CuboidTrackObservation] = []
+        for i in tqdm.tqdm(range(n), total=n, desc="Process cuboids"):
+            category = str(cols["category"][i])
+            if category not in AV2_CATEGORY_MAP:
+                continue
+
+            timestamp_us = _ns_to_us(int(cols["timestamp_ns"][i]))
+
+            # Convert the full AV2 orientation quaternion (scalar-first wxyz) to the
+            # BBox3 "xyz" intrinsic-Euler convention. Keep the full 3-DOF rotation --
+            # AV2 cuboids can carry roll/pitch (e.g. objects on slopes/banked roads),
+            # so extracting yaw only would silently drop that.
+            quat_wxyz = (cols["qw"][i], cols["qx"][i], cols["qy"][i], cols["qz"][i])
+            rx, ry, rz = R.from_quat(quat_wxyz, scalar_first=True).as_euler("xyz", degrees=False)
+
+            # AV2 cuboids are in the egovehicle (rig) frame at the sweep reference
+            # time. length_m -> x extent, width_m -> y extent, height_m -> z extent.
+            bbox3 = BBox3.from_array(
+                np.array(
+                    [
+                        cols["tx_m"][i],
+                        cols["ty_m"][i],
+                        cols["tz_m"][i],
+                        cols["length_m"][i],
+                        cols["width_m"][i],
+                        cols["height_m"][i],
+                        rx,
+                        ry,
+                        rz,
+                    ],
+                    dtype=np.float32,
+                )
+            )
+
+            cuboid_observations.append(
+                CuboidTrackObservation(
+                    track_id=str(cols["track_uuid"][i]),
+                    class_id=AV2_CATEGORY_MAP[category],
+                    timestamp_us=timestamp_us,
+                    reference_frame_id="rig",
+                    reference_frame_timestamp_us=timestamp_us,
+                    bbox3=bbox3,
+                    source=LabelSource.EXTERNAL,
+                )
+            )
+
+        if cuboid_observations:
+            store_writer.register_component_writer(
+                CuboidsComponent.Writer,
+                "default",
+                component_groups.cuboid_track_observations_component_group,
+            ).store_observations(cuboid_observations)
+            self.logger.info(f"Stored {len(cuboid_observations)} cuboid observations")
+        else:
+            self.logger.info("No mapped cuboid annotations found")
+
+
+# -----------------------------------------------------------------------------
+# CLI
+# -----------------------------------------------------------------------------
+
+
+@cli.command(name="argoverse2-v4")
+@click.option(
+    "--split",
+    type=str,
+    default="val",
+    show_default=True,
+    help="Argoverse 2 split directory under --root-dir (e.g. train, val, test)",
+)
+@click.option(
+    "--log-id",
+    type=str,
+    default=None,
+    help="Convert only the log with this ID (defaults to all logs in the split)",
+)
+@click.option(
+    "--store-type",
+    type=click.Choice(["itar", "directory"], case_sensitive=False),
+    default="itar",
+    show_default=True,
+    help="Output store type",
+)
+@click.option(
+    "component_group_profile",
+    "--profile",
+    type=click.Choice(["default", "separate-sensors", "separate-all"], case_sensitive=False),
+    default="separate-sensors",
+    show_default=True,
+    help="Output profile for component group assignment",
+)
+@click.option(
+    "store_sequence_meta",
+    "--sequence-meta/--no-sequence-meta",
+    default=True,
+    help="Generate sequence meta-data JSON?",
+)
+@click.option(
+    "lidar_model_source",
+    "--lidar-model-source",
+    type=click.Choice(["empirical", "none"], case_sensitive=False),
+    default="empirical",
+    show_default=True,
+    help="Lidar model: 'empirical' derives a VLP-32C structured model per unit (model "
+    "elements + intrinsics) from the data; 'none' stores raw ray bundles only.",
+)
+@click.pass_context
+def argoverse2_v4(ctx, split, log_id, **kwargs):
+    """Argoverse 2 Sensor Dataset conversion (V4 format)"""
+
+    config = Argoverse2Converter4Config(**{**vars(ctx.obj), "split": split, "log_id": log_id, **kwargs})
+
+    Argoverse2Converter4.convert(config)
diff --git a/tools/data_converter/argoverse2/converter_test.py b/tools/data_converter/argoverse2/converter_test.py
new file mode 100644
index 00000000..c70bc25e
--- /dev/null
+++ b/tools/data_converter/argoverse2/converter_test.py
@@ -0,0 +1,379 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Integration tests for the Argoverse 2 data converter (V4 format).
+
+Requires the AV2_DIR environment variable pointing to an Argoverse 2 Sensor
+Dataset root directory organised as ``{AV2_DIR}/{split}/{log_id}/...``.
+
+Set AV2_SPLIT to override the default split (``val``). The first log in the split
+is used for testing.
+"""
+
+import os
+import tempfile
+import unittest
+
+from typing import Literal, cast
+
+import numpy as np
+
+from parameterized import parameterized_class
+from upath import UPath
+
+from ncore.impl.data.types import (
+    IdealPinholeCameraModelParameters,
+    RowOffsetStructuredSpinningLidarModelParameters,
+    ShutterType,
+)
+from ncore.impl.data.v4.components import (
+    CameraSensorComponent,
+    CuboidsComponent,
+    IntrinsicsComponent,
+    LidarSensorComponent,
+    PosesComponent,
+    RadarSensorComponent,
+    SequenceComponentGroupsReader,
+)
+from tools.data_converter.argoverse2.converter import Argoverse2Converter4, Argoverse2Converter4Config
+from tools.data_converter.argoverse2.utils import CAMERA_NAMES, LIDAR_NAMES, list_log_ids
+
+
+@parameterized_class(
+    ("store_type",),
+    [
+        ("itar",),
+        ("directory",),
+    ],
+)
+class TestArgoverse2Converter(unittest.TestCase):
+    """Integration tests for the Argoverse 2 data converter.
+
+    Requires AV2_DIR environment variable pointing to an Argoverse 2 Sensor Dataset
+    root. Uses the first log in the split for testing.
+    """
+
+    store_type: Literal["itar", "directory"]
+
+    @classmethod
+    def setUpClass(cls):
+        cls.av2_dir = os.environ.get("AV2_DIR")
+        if cls.av2_dir is None:
+            raise unittest.SkipTest("AV2_DIR not set -- skipping Argoverse 2 integration tests")
+
+        cls.split = os.environ.get("AV2_SPLIT", "val")
+
+        log_ids = list_log_ids(UPath(cls.av2_dir) / cls.split)
+        assert log_ids, f"No logs found under {cls.av2_dir}/{cls.split}"
+        cls.log_id = log_ids[0]
+
+        cls._tempdir = tempfile.TemporaryDirectory(prefix="argoverse2_test_")
+        cls.output_dir = cls._tempdir.name
+
+        config = Argoverse2Converter4Config(
+            root_dir=cls.av2_dir,
+            output_dir=cls.output_dir,
+            no_cameras=False,
+            camera_ids=None,
+            no_lidars=False,
+            lidar_ids=None,
+            no_radars=False,
+            radar_ids=None,
+            verbose=False,
+            debug=False,
+            debug_port=5678,
+            split=cls.split,
+            log_id=cls.log_id,
+            store_type=cls.store_type,
+            component_group_profile="separate-sensors",
+            store_sequence_meta=True,
+        )
+        Argoverse2Converter4.convert(config)
+
+        seq_dirs = [d for d in UPath(cls.output_dir).iterdir() if d.is_dir()]
+        assert len(seq_dirs) == 1, f"Expected 1 sequence dir, found {len(seq_dirs)}: {seq_dirs}"
+        cls.seq_dir = seq_dirs[0]
+
+        meta_files = list(cls.seq_dir.glob("*.json"))
+        assert len(meta_files) == 1, f"Expected 1 meta JSON, found {len(meta_files)}"
+        cls.reader = SequenceComponentGroupsReader([meta_files[0]])
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._tempdir.cleanup()
+
+    # --- Poses ----------------------------------------------------------------
+
+    def test_sequence_has_dynamic_rig_to_world_pose(self):
+        poses_readers = self.reader.open_component_readers(PosesComponent.Reader)
+        self.assertEqual(len(poses_readers), 1)
+        poses_reader = list(poses_readers.values())[0]
+
+        poses, timestamps = poses_reader.get_dynamic_pose("rig", "world")
+        self.assertEqual(poses.shape[1:], (4, 4))
+        self.assertGreater(poses.shape[0], 0)
+        self.assertEqual(timestamps.shape[0], poses.shape[0])
+
+    def test_sequence_has_static_world_to_world_global(self):
+        """world_global is the AV2 city frame; verify the static anchor exists."""
+        poses_readers = self.reader.open_component_readers(PosesComponent.Reader)
+        poses_reader = list(poses_readers.values())[0]
+
+        static_poses = dict(poses_reader.get_static_poses())
+        self.assertIn(("world", "world_global"), static_poses)
+        self.assertEqual(static_poses[("world", "world_global")].shape, (4, 4))
+
+    def test_first_real_pose_near_identity(self):
+        """The anchored ego pose is stored as relative identity in the trajectory.
+
+        The first pose's city_SE3_egovehicle is the world_global anchor, so its
+        relative rig -> world pose must be (near) identity. Boundary extrapolation
+        may prepend an extra pose, so we locate the identity pose rather than
+        assuming a fixed index.
+        """
+        poses_readers = self.reader.open_component_readers(PosesComponent.Reader)
+        poses_reader = list(poses_readers.values())[0]
+
+        poses, _ = poses_reader.get_dynamic_pose("rig", "world")
+        deviations = np.linalg.norm(poses - np.eye(4, dtype=np.float32), axis=(1, 2))
+        np.testing.assert_array_almost_equal(poses[int(np.argmin(deviations))], np.eye(4, dtype=np.float32), decimal=3)
+
+    # --- Cameras --------------------------------------------------------------
+
+    def test_nine_cameras_exist(self):
+        camera_readers = self.reader.open_component_readers(CameraSensorComponent.Reader)
+        self.assertEqual(set(camera_readers.keys()), set(CAMERA_NAMES))
+        for cam_id, cam_reader in camera_readers.items():
+            self.assertGreater(cam_reader.frames_count, 0, f"{cam_id} should have frames")
+
+    def test_original_distortion_coefficients_preserved_in_metadata(self):
+        # The released imagery is undistorted (so the stored model is distortion-free),
+        # but the original lens k1/k2/k3 are preserved per camera as provenance.
+        camera_readers = self.reader.open_component_readers(CameraSensorComponent.Reader)
+        for cam_id, cam_reader in camera_readers.items():
+            meta = cam_reader.generic_meta_data
+            self.assertIn("av2_original_distortion", meta, f"{cam_id} missing distortion provenance")
+            distortion = meta["av2_original_distortion"]
+            self.assertEqual(set(distortion), {"k1", "k2", "k3"}, f"{cam_id} distortion keys")
+            for key, value in distortion.items():
+                self.assertIsInstance(value, float, f"{cam_id} {key} should be a float")
+
+    def test_camera_intrinsics_ideal_pinhole_global_shutter(self):
+        intrinsics_readers = self.reader.open_component_readers(IntrinsicsComponent.Reader)
+        self.assertEqual(len(intrinsics_readers), 1)
+        intrinsics_reader = list(intrinsics_readers.values())[0]
+
+        for cam_id in CAMERA_NAMES:
+            params = intrinsics_reader.get_camera_model_parameters(cam_id)
+            # AV2 imagery is shipped undistorted, so an ideal (distortion-free)
+            # pinhole is the exact model.
+            self.assertIsInstance(params, IdealPinholeCameraModelParameters)
+            params = cast(IdealPinholeCameraModelParameters, params)
+            self.assertEqual(params.shutter_type, ShutterType.GLOBAL)
+            self.assertTrue(np.all(params.focal_length > 0))
+
+    def test_camera_extrinsics_stored_as_static_poses(self):
+        poses_readers = self.reader.open_component_readers(PosesComponent.Reader)
+        poses_reader = list(poses_readers.values())[0]
+
+        static_poses = dict(poses_reader.get_static_poses())
+        for cam_id in CAMERA_NAMES:
+            self.assertIn((cam_id, "rig"), static_poses)
+
+    # --- Lidar ----------------------------------------------------------------
+
+    def test_two_lidar_units_exist(self):
+        lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader)
+        self.assertEqual(set(lidar_readers.keys()), set(LIDAR_NAMES))
+        for lidar_id, lidar_reader in lidar_readers.items():
+            self.assertGreater(lidar_reader.frames_count, 0, f"{lidar_id} should have frames")
+
+    def test_lidar_extrinsics_stored_as_static_poses(self):
+        poses_readers = self.reader.open_component_readers(PosesComponent.Reader)
+        poses_reader = list(poses_readers.values())[0]
+
+        static_poses = dict(poses_reader.get_static_poses())
+        for lidar_id in LIDAR_NAMES:
+            self.assertIn((lidar_id, "rig"), static_poses)
+
+    def test_lidar_directions_unit_norm(self):
+        lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader)
+        lidar_reader = lidar_readers["up_lidar"]
+        ts = int(lidar_reader.frames_timestamps_us[0, 1])  # end-of-frame timestamp key
+        direction = lidar_reader.get_frame_ray_bundle_data(ts, "direction")
+        norms = np.linalg.norm(direction, axis=1)
+        # Zero-distance rays may have zero direction; check the populated ones.
+        nonzero = norms > 0
+        np.testing.assert_allclose(norms[nonzero], 1.0, atol=1e-4)
+
+    def test_lidar_unit_split_recovered_from_geometry(self):
+        """The two units carry comparable point counts (~half the sweep each).
+
+        Each VLP-32C contributes 32 of the 64 beams, so a correct split yields
+        roughly balanced point counts per unit (allowing for differing FOV
+        occupancy).
+        """
+        lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader)
+        counts = {}
+        for unit in ("up_lidar", "down_lidar"):
+            reader = lidar_readers[unit]
+            ts = int(reader.frames_timestamps_us[0, 1])
+            counts[unit] = len(reader.get_frame_ray_bundle_data(ts, "direction"))
+        ratio = min(counts.values()) / max(counts.values())
+        self.assertGreater(ratio, 0.5, f"Lidar unit point counts unbalanced: {counts}")
+
+    # --- Lidar structured model -----------------------------------------------
+
+    def test_lidar_intrinsics_vlp32c_model_per_unit(self):
+        """Each unit stores a VLP-32C structured model (32 rows) as intrinsics."""
+        intrinsics_reader = list(self.reader.open_component_readers(IntrinsicsComponent.Reader).values())[0]
+        for unit in LIDAR_NAMES:
+            model = intrinsics_reader.get_lidar_model_parameters(unit)
+            self.assertIsInstance(model, RowOffsetStructuredSpinningLidarModelParameters)
+            model = cast(RowOffsetStructuredSpinningLidarModelParameters, model)
+            self.assertEqual(model.n_rows, 32)
+            self.assertGreater(model.n_columns, 100)
+            self.assertIn(model.spinning_direction, ("cw", "ccw"))
+
+    def test_lidar_model_elements_in_bounds(self):
+        """Stored per-point model elements index valid (row, column) cells."""
+        intrinsics_reader = list(self.reader.open_component_readers(IntrinsicsComponent.Reader).values())[0]
+        lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader)
+        for unit in LIDAR_NAMES:
+            model = cast(
+                RowOffsetStructuredSpinningLidarModelParameters,
+                intrinsics_reader.get_lidar_model_parameters(unit),
+            )
+            reader = lidar_readers[unit]
+            ts = int(reader.frames_timestamps_us[0, 1])
+            elem = reader.get_frame_ray_bundle_data(ts, "model_element")
+            self.assertEqual(elem.shape[1], 2)
+            self.assertEqual(elem.dtype, np.uint16)
+            self.assertTrue(np.all(elem[:, 0] < model.n_rows), "row index out of bounds")
+            self.assertTrue(np.all(elem[:, 1] < model.n_columns), "column index out of bounds")
+
+    def test_lidar_model_reconstructs_directions(self):
+        """Model-predicted directions match stored native directions (far-range).
+
+        Validates the firing-pattern reconstruction: the structured model, indexed
+        by the stored per-point (row, column), should reproduce the stored ray
+        directions to within a small angular error for far-range returns.
+        """
+        from ncore.impl.sensors.lidar import StructuredLidarModel
+
+        intrinsics_reader = list(self.reader.open_component_readers(IntrinsicsComponent.Reader).values())[0]
+        lidar_readers = self.reader.open_component_readers(LidarSensorComponent.Reader)
+        for unit in LIDAR_NAMES:
+            model_params = cast(
+                RowOffsetStructuredSpinningLidarModelParameters,
+                intrinsics_reader.get_lidar_model_parameters(unit),
+            )
+            reader = lidar_readers[unit]
+            ts = int(reader.frames_timestamps_us[0, 1])
+            direction = reader.get_frame_ray_bundle_data(ts, "direction")
+            elem = reader.get_frame_ray_bundle_data(ts, "model_element")
+            distance = np.asarray(reader._get_ray_bundle_returns_group(ts)["distance_m"])[0]
+
+            far = np.isfinite(distance) & (distance > 20.0) & (np.linalg.norm(direction, axis=1) > 0)
+            self.assertGreater(int(far.sum()), 100, f"{unit}: too few far returns to validate")
+
+            model = StructuredLidarModel.maybe_from_parameters(model_params, device="cpu")
+            assert model is not None
+            predicted = model.elements_to_sensor_points(elem[far], np.ones(int(far.sum()), dtype=np.float32))
+            predicted = predicted.cpu().numpy()
+            predicted /= np.linalg.norm(predicted, axis=1, keepdims=True)
+            cos = np.clip(np.sum(predicted * direction[far], axis=1), -1.0, 1.0)
+            median_err_deg = float(np.degrees(np.median(np.arccos(cos))))
+            # The structured model is reconstructed from offset_ns + laser_number on the
+            # decompensated cloud, with empirical per-row azimuth offsets. This yields
+            # sub-0.1 deg far-range reconstruction (on par with native-column sensors).
+            self.assertLess(median_err_deg, 0.2, f"{unit}: model direction error {median_err_deg:.3f} deg too high")
+
+    # --- No radar -------------------------------------------------------------
+
+    def test_no_radar(self):
+        radar_readers = self.reader.open_component_readers(RadarSensorComponent.Reader)
+        self.assertEqual(len(radar_readers), 0)
+
+    # --- Cuboids --------------------------------------------------------------
+
+    def test_cuboids_in_rig_frame(self):
+        """Cuboids are stored in the native ``rig`` frame (no ego pose baked in)."""
+        cuboid_readers = self.reader.open_component_readers(CuboidsComponent.Reader)
+        if not cuboid_readers:
+            self.skipTest("No cuboids (test split)")
+        cuboid_reader = list(cuboid_readers.values())[0]
+        observations = list(cuboid_reader.get_observations())
+        self.assertGreater(len(observations), 0)
+        for obs in observations[:50]:
+            self.assertEqual(obs.reference_frame_id, "rig")
+
+    def test_cuboids_align_with_lidar(self):
+        """A reasonable fraction of lidar points fall inside annotated cuboids.
+
+        This is the regression guard for the lidar decompensation reference bug: if
+        the points were decompensated against the wrong reference (or the cuboids
+        mis-referenced), almost no points would land inside the boxes. We transform
+        the stored (decompensated) first-frame lidar points to ``world`` via their
+        own per-point rig pose, transform the active cuboids from ``rig`` at the
+        sweep timestamp to ``world``, and count points inside.
+        """
+        from ncore.impl.common.transformations import is_within_3d_bboxes, transform_bbox
+
+        cuboid_readers = self.reader.open_component_readers(CuboidsComponent.Reader)
+        if not cuboid_readers:
+            self.skipTest("No cuboids (test split)")
+
+        poses_reader = list(self.reader.open_component_readers(PosesComponent.Reader).values())[0]
+        lidar_reader = self.reader.open_component_readers(LidarSensorComponent.Reader)["up_lidar"]
+        frame_start_us, frame_end_us = (int(v) for v in lidar_reader.frames_timestamps_us[0])
+        ts = frame_end_us  # reader frame key is the end-of-frame timestamp
+
+        # The cuboid reference timestamp is the AV2 sweep reference time, which is
+        # the start of the point window (offset_ns runs forward from it).
+        cuboid_ts = frame_start_us
+
+        # Each lidar point is in its own per-point-time sensor frame; transform via
+        # sensor -> rig (static) and rig -> world at the point's own timestamp.
+        static = dict(poses_reader.get_static_poses())
+        T_up_rig = static[("up_lidar", "rig")]
+        rig_poses, pose_ts = poses_reader.get_dynamic_pose("rig", "world")
+
+        direction = lidar_reader.get_frame_ray_bundle_data(ts, "direction")
+        distance = np.asarray(lidar_reader._get_ray_bundle_returns_group(ts)["distance_m"])[0]
+        point_ts = lidar_reader.get_frame_ray_bundle_data(ts, "timestamp_us").astype(np.int64)
+        valid = np.isfinite(distance) & (distance > 0)
+        pts_sensor = direction[valid] * distance[valid, None]
+        pts_rig = (T_up_rig[:3, :3] @ pts_sensor.T).T + T_up_rig[:3, 3]
+        # Per-point rig -> world using the nearest stored pose (poses are dense).
+        nearest = np.searchsorted(pose_ts.astype(np.int64), point_ts[valid]).clip(0, len(rig_poses) - 1)
+        T_pts = rig_poses[nearest]
+        pts_world = np.einsum("nij,nj->ni", T_pts[:, :3, :3], pts_rig) + T_pts[:, :3, 3]
+
+        observations = list(list(cuboid_readers.values())[0].get_observations())
+        # cuboids active at this sweep (cuboid ref ts == sweep start)
+        active = [o for o in observations if abs(o.reference_frame_timestamp_us - cuboid_ts) < 2000]
+        self.assertGreater(len(active), 0, "no cuboids active at first lidar frame")
+
+        # Cuboids are in rig at the sweep timestamp; bring them to world via the
+        # rig pose at that timestamp.
+        cuboid_pose_idx = int(np.argmin(np.abs(pose_ts.astype(np.int64) - cuboid_ts)))
+        T_rig_world_cuboid = rig_poses[cuboid_pose_idx]
+        boxes = np.stack([transform_bbox(o.bbox3.to_array().astype(np.float64), T_rig_world_cuboid) for o in active])
+        inside = is_within_3d_bboxes(pts_world.astype(np.float64), boxes.astype(np.float64))
+        n_inside = int(inside.any(axis=1).sum())
+        self.assertGreater(
+            n_inside, 50, f"only {n_inside} lidar points inside any cuboid -- likely a frame/timestamp shift"
+        )
diff --git a/tools/data_converter/argoverse2/main.py b/tools/data_converter/argoverse2/main.py
new file mode 100644
index 00000000..aa60f7c4
--- /dev/null
+++ b/tools/data_converter/argoverse2/main.py
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Argoverse 2 converter CLI entry point."""
+
+from tools.data_converter.argoverse2.converter import argoverse2_v4  # noqa: F401 -- registers CLI command
+from tools.data_converter.cli import cli
+
+
+if __name__ == "__main__":
+    cli(show_default=True)
diff --git a/tools/data_converter/argoverse2/utils.py b/tools/data_converter/argoverse2/utils.py
new file mode 100644
index 00000000..c8b7f93d
--- /dev/null
+++ b/tools/data_converter/argoverse2/utils.py
@@ -0,0 +1,661 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Argoverse 2 specific utilities for the NCore V4 converter.
+
+This module reads the Argoverse 2 Sensor Dataset directly from its on-disk
+Apache Feather files using ``pyarrow`` only, deliberately avoiding the heavy
+``av2`` devkit (which pulls in torch, kornia, numba, polars and PyAV). Quaternions
+are converted with ``scipy.spatial.transform.Rotation`` (already an ncore
+dependency), so no extra runtime dependency is introduced.
+
+Reference (sourced from github.com/argoverse/av2-api and the AV2 User Guide):
+
+- Lidar sweeps are *egomotion-compensated* to the sweep reference timestamp and
+  stored in the **egovehicle** frame (not the individual sensor frame). The
+  feather columns are ``x, y, z, intensity, laser_number, offset_ns``.
+  Per-point absolute time is ``sweep_timestamp_ns + offset_ns``.
+- The released imagery is **already undistorted**, so a pinhole model with zero
+  distortion is exact and **global shutter is assumed** on that basis. The
+  original lens radial-distortion coefficients ``(k1, k2, k3)`` are returned by
+  :func:`read_intrinsics` for provenance but are never applied.
+- All quaternions are scalar-first ``(qw, qx, qy, qz)``.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List, Literal, Optional
+
+import numpy as np
+import pyarrow.feather as feather
+
+from scipy.spatial.transform import Rotation as R
+from upath import UPath
+
+from ncore.impl.common.transformations import se3_inverse
+from ncore.impl.data.types import (
+    IdealPinholeCameraModelParameters,
+    RowOffsetStructuredSpinningLidarModelParameters,
+    ShutterType,
+)
+from tools.data_converter.structured_lidar_model import enforce_spinning_monotonic
+
+
+# --- Feather reading (no pandas) -----------------------------------------------
+# We read Arrow tables directly and pull columns out as numpy arrays. This avoids
+# pulling pandas into the dependency closure (pyarrow.read_feather would default to
+# a pandas DataFrame).
+
+
+def _read_columns(path: UPath) -> Dict[str, np.ndarray]:
+    """Read a feather file into a ``column_name -> numpy array`` mapping."""
+    table = feather.read_table(str(path))
+    return {name: table.column(name).to_numpy(zero_copy_only=False) for name in table.column_names}
+
+
+# --- Sensor ID mappings --------------------------------------------------------
+# Argoverse 2 sensor names are already descriptive; we keep them verbatim as the
+# NCore sensor IDs so that any downstream alignment with AV2 map / metadata stays
+# unambiguous.
+
+# All nine global-shutter cameras (7 ring + 2 stereo).
+CAMERA_NAMES: List[str] = [
+    "ring_front_center",
+    "ring_front_left",
+    "ring_front_right",
+    "ring_side_left",
+    "ring_side_right",
+    "ring_rear_left",
+    "ring_rear_right",
+    "stereo_front_left",
+    "stereo_front_right",
+]
+
+# The two stacked Velodyne VLP-32C units.
+LIDAR_NAMES: List[str] = ["up_lidar", "down_lidar"]
+
+# Number of beams per VLP-32C unit. laser_number spans [0, 63] across both units.
+VLP32C_N_BEAMS: int = 32
+
+# VLP-32C spins at 10 Hz (~100 ms per revolution). Both stacked units share this.
+VLP32C_SCAN_DURATION_US: int = 100_000
+VLP32C_SPINNING_FREQUENCY_HZ: float = 10.0
+# Note: the apparent spin direction in a unit's own frame is detected per unit
+# (the two stacked units fire in opposite phase), not assumed.
+
+# AV2 ships no radar.
+
+# --- Annotation taxonomy -------------------------------------------------------
+# Argoverse 2 3D cuboid categories (the 30-class `AnnotationCategories` taxonomy)
+# mapped to NCore class IDs. AV2 category strings are upper snake-case.
+AV2_CATEGORY_MAP: Dict[str, str] = {
+    "REGULAR_VEHICLE": "car",
+    "LARGE_VEHICLE": "truck",
+    "BOX_TRUCK": "truck",
+    "TRUCK": "truck",
+    "TRUCK_CAB": "truck",
+    "VEHICULAR_TRAILER": "trailer",
+    "SCHOOL_BUS": "bus",
+    "ARTICULATED_BUS": "bus",
+    "BUS": "bus",
+    "MESSAGE_BOARD_TRAILER": "trailer",
+    "RAILED_VEHICLE": "vehicle",
+    "MOTORCYCLE": "motorcycle",
+    "MOTORCYCLIST": "motorcyclist",
+    "BICYCLE": "bicycle",
+    "BICYCLIST": "bicyclist",
+    "WHEELED_DEVICE": "wheeled_device",
+    "WHEELED_RIDER": "wheeled_rider",
+    "PEDESTRIAN": "pedestrian",
+    "OFFICIAL_SIGNALER": "pedestrian",
+    "STROLLER": "stroller",
+    "WHEELCHAIR": "wheelchair",
+    "DOG": "animal",
+    "ANIMAL": "animal",
+    "CONSTRUCTION_CONE": "traffic_cone",
+    "CONSTRUCTION_BARREL": "barrier",
+    "STOP_SIGN": "stop_sign",
+    "BOLLARD": "bollard",
+    "SIGN": "sign",
+    "MOBILE_PEDESTRIAN_CROSSING_SIGN": "sign",
+    "TRAFFIC_LIGHT_TRAILER": "trailer",
+}
+
+
+# --- Pose / quaternion helpers -------------------------------------------------
+
+
+def se3_from_qwxyz_t(qw: float, qx: float, qy: float, qz: float, tx: float, ty: float, tz: float) -> np.ndarray:
+    """Build a 4x4 SE(3) matrix from a scalar-first quaternion and translation.
+
+    Argoverse 2 stores all rotations as ``(qw, qx, qy, qz)``.
+    """
+    T = np.eye(4, dtype=np.float64)
+    T[:3, :3] = R.from_quat((qw, qx, qy, qz), scalar_first=True).as_matrix()
+    T[:3, 3] = (tx, ty, tz)
+    return T
+
+
+# --- Dataset layout / feather readers ------------------------------------------
+
+
+def list_log_ids(split_dir: UPath) -> List[str]:
+    """Return the sorted log IDs (sub-directory names) under a split directory."""
+    return sorted(p.name for p in split_dir.iterdir() if p.is_dir())
+
+
+def read_city_se3_ego(log_dir: UPath) -> tuple[np.ndarray, np.ndarray]:
+    """Read ``city_SE3_egovehicle.feather`` (at the log root).
+
+    Returns:
+        timestamps_ns: [N] uint64 sweep/pose timestamps (sorted ascending).
+        T_ego_city: [N, 4, 4] float64 poses (egovehicle -> city/global frame).
+    """
+    cols = _read_columns(log_dir / "city_SE3_egovehicle.feather")
+    order = np.argsort(cols["timestamp_ns"])
+    timestamps_ns = cols["timestamp_ns"][order].astype(np.uint64)
+    poses = np.stack(
+        [
+            se3_from_qwxyz_t(
+                cols["qw"][i],
+                cols["qx"][i],
+                cols["qy"][i],
+                cols["qz"][i],
+                cols["tx_m"][i],
+                cols["ty_m"][i],
+                cols["tz_m"][i],
+            )
+            for i in order
+        ]
+    )
+    return timestamps_ns, poses
+
+
+def read_ego_se3_sensor(log_dir: UPath) -> Dict[str, np.ndarray]:
+    """Read ``calibration/egovehicle_SE3_sensor.feather``.
+
+    Returns a mapping ``sensor_name -> T_sensor_ego`` (4x4, sensor-frame point ->
+    egovehicle frame).
+    """
+    cols = _read_columns(log_dir / "calibration" / "egovehicle_SE3_sensor.feather")
+    result: Dict[str, np.ndarray] = {}
+    for i, name in enumerate(cols["sensor_name"]):
+        result[str(name)] = se3_from_qwxyz_t(
+            cols["qw"][i],
+            cols["qx"][i],
+            cols["qy"][i],
+            cols["qz"][i],
+            cols["tx_m"][i],
+            cols["ty_m"][i],
+            cols["tz_m"][i],
+        )
+    return result
+
+
+@dataclass(frozen=True)
+class CameraIntrinsics:
+    """An AV2 camera's ideal-pinhole model plus its original distortion provenance.
+
+    AV2 ships already-undistorted imagery, so the converted ``model`` is a
+    distortion-free ideal pinhole. The original lens radial-distortion coefficients
+    ``(k1, k2, k3)`` from ``intrinsics.feather`` are kept here purely as provenance
+    (they describe the *raw* lens and are never applied to the released images).
+    """
+
+    model: IdealPinholeCameraModelParameters
+    original_distortion_k1k2k3: tuple[float, float, float]
+
+
+def read_intrinsics(
+    log_dir: UPath,
+) -> Dict[str, CameraIntrinsics]:
+    """Read ``calibration/intrinsics.feather`` into ideal-pinhole camera models.
+
+    AV2 imagery is shipped already undistorted: the official av2 devkit projects
+    with the intrinsic matrix K only (``PinholeCamera.project_ego_to_img``) and its
+    ``Intrinsics`` dataclass does not even load the ``k1, k2, k3`` columns present
+    in the file. Those coefficients describe the *original* lens (for re-distorting
+    into the raw frame) and must not be applied to the released images, so an ideal
+    (distortion-free) pinhole is the exact model. Because the released imagery is
+    already undistorted, the cameras are modelled as global shutter.
+
+    The raw ``k1, k2, k3`` coefficients are returned alongside the pinhole model so
+    callers can preserve them as provenance metadata (they are *not* applied to the
+    undistorted images).
+    """
+    cols = _read_columns(log_dir / "calibration" / "intrinsics.feather")
+    result: Dict[str, CameraIntrinsics] = {}
+    for i, name in enumerate(cols["sensor_name"]):
+        model = IdealPinholeCameraModelParameters(
+            resolution=np.array([int(cols["width_px"][i]), int(cols["height_px"][i])], dtype=np.uint64),
+            shutter_type=ShutterType.GLOBAL,
+            external_distortion_parameters=None,
+            principal_point=np.array([cols["cx_px"][i], cols["cy_px"][i]], dtype=np.float32),
+            focal_length=np.array([cols["fx_px"][i], cols["fy_px"][i]], dtype=np.float32),
+        )
+        result[str(name)] = CameraIntrinsics(
+            model=model,
+            original_distortion_k1k2k3=(
+                float(cols["k1"][i]),
+                float(cols["k2"][i]),
+                float(cols["k3"][i]),
+            ),
+        )
+    return result
+
+
+@dataclass(frozen=True)
+class LidarSweep:
+    """A single AV2 lidar sweep, in the egovehicle frame.
+
+    xyz are egomotion-compensated to ``timestamp_ns``; per-point absolute time is
+    ``timestamp_ns + offset_ns``.
+    """
+
+    xyz: np.ndarray  # [N, 3] float32, egovehicle frame
+    intensity: np.ndarray  # [N] float32 in [0, 1]
+    laser_number: np.ndarray  # [N] uint8 in [0, 63]
+    offset_ns: np.ndarray  # [N] int64, offset from sweep start
+    timestamp_ns: int  # sweep reference timestamp (filename)
+
+
+def read_lidar_sweep(path: UPath) -> LidarSweep:
+    """Read a single lidar sweep feather file (filename is the sweep timestamp)."""
+    cols = _read_columns(path)
+    timestamp_ns = int(UPath(path).stem)
+    return LidarSweep(
+        xyz=np.stack(
+            [
+                cols["x"].astype(np.float32),
+                cols["y"].astype(np.float32),
+                cols["z"].astype(np.float32),
+            ],
+            axis=1,
+        ),
+        intensity=(cols["intensity"].astype(np.float32) / 255.0),
+        laser_number=cols["laser_number"].astype(np.uint8),
+        offset_ns=cols["offset_ns"].astype(np.int64),
+        timestamp_ns=timestamp_ns,
+    )
+
+
+def read_annotations(log_dir: UPath) -> Dict[str, np.ndarray]:
+    """Read ``annotations.feather`` into a column -> numpy array mapping."""
+    return _read_columns(log_dir / "annotations.feather")
+
+
+def list_sensor_timestamps(log_dir: UPath, sensor_kind: str, sensor_name: Optional[str] = None) -> List[int]:
+    """List the sorted nanosecond timestamps available for a sensor stream.
+    Args:
+        sensor_kind: ``"lidar"`` or ``"cameras"``.
+        sensor_name: camera name (required for ``"cameras"``; ignored for lidar).
+    """
+    if sensor_kind == "lidar":
+        sensor_dir = log_dir / "sensors" / "lidar"
+        suffix = ".feather"
+    elif sensor_kind == "cameras":
+        assert sensor_name is not None, "sensor_name required for cameras"
+        sensor_dir = log_dir / "sensors" / "cameras" / sensor_name
+        suffix = ".jpg"
+    else:
+        raise ValueError(f"Unknown sensor_kind: {sensor_kind}")
+
+    if not sensor_dir.exists():
+        return []
+
+    return sorted(int(p.stem) for p in sensor_dir.iterdir() if p.name.endswith(suffix))
+
+
+def assign_lidar_units(
+    laser_number: np.ndarray,
+    xyz_ego: np.ndarray,
+    T_up_ego: np.ndarray,
+    T_down_ego: np.ndarray,
+) -> Dict[str, np.ndarray]:
+    """Assign each point to ``up_lidar`` or ``down_lidar``.
+
+    Argoverse 2 distributes a single aggregated sweep from two stacked Velodyne
+    VLP-32C units whose 64 beams share one ``laser_number`` range ``[0, 63]``. The
+    boundary that separates the two units is not documented in the AV2 devkit, so
+    we recover it from the geometry of the calibrated extrinsics.
+
+    The two units are split into the two laser-number halves (``< 32`` and
+    ``>= 32``); empirically these are the two physical sensors (at any shared
+    ``offset_ns`` they point ~180 deg apart in the ego frame). To decide *which*
+    half is ``up_lidar`` vs ``down_lidar`` we use per-beam elevation flatness: a
+    single laser ring traces a cone of (nearly) constant elevation only in its own
+    sensor frame. Mapping a half into the wrong unit's extrinsic tilts that cone
+    (the two units differ in pitch/roll), inflating the per-ring elevation spread.
+    We pick the labelling that minimises the summed per-ring elevation spread,
+    which separates the two assignments by a wide, stable margin (~2-10x).
+
+    Returns a mapping ``unit_name -> boolean point mask``.
+    """
+    lo_mask = laser_number < VLP32C_N_BEAMS
+    hi_mask = ~lo_mask
+
+    # Cost of assigning lo->up_unit and hi->down_unit (assignment A) vs swapped (B).
+    cost_a = _ring_elevation_spread(
+        laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS), T_up_ego
+    ) + _ring_elevation_spread(laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS, 2 * VLP32C_N_BEAMS), T_down_ego)
+    cost_b = _ring_elevation_spread(
+        laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS), T_down_ego
+    ) + _ring_elevation_spread(laser_number, xyz_ego, np.arange(VLP32C_N_BEAMS, 2 * VLP32C_N_BEAMS), T_up_ego)
+
+    if cost_b < cost_a:
+        return {"up_lidar": hi_mask, "down_lidar": lo_mask}
+    return {"up_lidar": lo_mask, "down_lidar": hi_mask}
+
+
+def _ring_elevation_spread(
+    laser_number: np.ndarray,
+    xyz_ego: np.ndarray,
+    beams: np.ndarray,
+    T_unit_ego: np.ndarray,
+    min_valid_distance_m: float = 2.0,
+    min_ring_points: int = 10,
+) -> float:
+    """Mean per-beam elevation standard deviation when ``beams`` are mapped to a unit.
+
+    In the correct sensor frame each laser ring has near-constant elevation across
+    azimuth, so a tight per-ring elevation distribution indicates the correct
+    extrinsic. Returns the mean per-ring elevation std in degrees.
+
+    Args:
+        min_valid_distance_m: ignore near-range returns when estimating elevation.
+        min_ring_points: minimum returns for a ring to contribute to the estimate.
+    """
+    T_ego_unit = se3_inverse(T_unit_ego)
+    pts = (T_ego_unit[:3, :3] @ xyz_ego.T).T + T_ego_unit[:3, 3]
+    dist = np.linalg.norm(pts, axis=1)
+
+    spreads: List[float] = []
+    for beam in beams:
+        ring = (laser_number == beam) & (dist > min_valid_distance_m)
+        if int(ring.sum()) < min_ring_points:
+            continue
+        elev = np.degrees(np.arcsin(np.clip(pts[ring, 2] / dist[ring], -1.0, 1.0)))
+        spreads.append(float(np.std(elev)))
+
+    return float(np.mean(spreads)) if spreads else float("inf")
+
+
+# --- Structured VLP-32C lidar model --------------------------------------------
+# AV2 provides no native firing-column index, only per-point ``laser_number``
+# (0..31 within a unit) and ``offset_ns``. The two are a faithful proxy for the
+# firing pattern: ``offset_ns`` quantizes into firing columns (one VLP-32C
+# revolution at 10 Hz), and ``laser_number`` selects the beam (row). This lets us
+# reconstruct the rows x columns structure required for a structured spinning
+# lidar model and reuse the generic ``structured_lidar_model`` library.
+
+
+@dataclass(frozen=True)
+class Vlp32cGeometry:
+    """Per-unit VLP-32C geometry recovered from a reference sweep.
+
+    Derived empirically (and stably) from the data rather than a hard-coded spec
+    table, so it self-corrects to the dataset's actual calibration.
+    """
+
+    elevations_rad: np.ndarray  # [32] float32, model row order (elevation high -> low)
+    laser_to_row: np.ndarray  # [32] int, maps laser_number (0..31) -> model row index
+    column_period_ns: float  # firing-column period (one beam refire interval)
+    n_columns: int  # upsampled columns per revolution (native_columns * resolution_factor)
+    spinning_direction: Literal["cw", "ccw"]  # apparent spin in this unit's own frame
+    column_azimuths_rad: np.ndarray  # [n_columns] float32, per-(upsampled-)column azimuth
+    row_azimuth_offsets_rad: np.ndarray  # [32] float32, per-row azimuth offset from the column
+    resolution_factor: int  # column upsampling factor (1 = native firing resolution)
+
+
+def derive_vlp32c_geometry(
+    xyz_decompensated: np.ndarray,
+    laser_number_in_unit: np.ndarray,
+    offset_ns: np.ndarray,
+    min_valid_distance_m: float = 2.0,
+    far_range_m: float = 5.0,
+    n_refine_iterations: int = 5,
+    resolution_factor: int = 4,
+) -> Vlp32cGeometry:
+    """Recover the VLP-32C firing geometry from one *decompensated* sweep.
+
+    The input must be the decompensated point cloud (each point in the sensor frame
+    at its own measurement time). On the raw motion-compensated cloud the azimuths
+    are smeared by ego motion (~0.5 deg), which would dominate the model error; on
+    the decompensated cloud the firing geometry is clean (~0.05 deg).
+
+    The model is ``azimuth(point) = column_azimuth[col] + row_azimuth_offset[row]``.
+    The 32 beams of a firing column are *not* co-azimuthal -- VLP-32C fires them
+    across a wide span -- so the per-row offset is essential (it is the dominant
+    structure, several degrees) and is fit empirically rather than assumed.
+
+    Args:
+        xyz_decompensated: decompensated points in the unit's sensor frame, [N, 3].
+        laser_number_in_unit: per-point beam index within the unit (0..31), [N].
+        offset_ns: per-point nanosecond offset from the sweep start, [N].
+        min_valid_distance_m: ignore near-range returns when estimating elevation.
+        far_range_m: distance threshold for azimuth fitting (near returns are noisy).
+        n_refine_iterations: alternating column/row-offset refinement passes.
+    """
+    dist = np.linalg.norm(xyz_decompensated, axis=1)
+    valid = dist > min_valid_distance_m
+    elev = np.arcsin(np.clip(xyz_decompensated[valid, 2] / dist[valid], -1.0, 1.0))
+    lasers = laser_number_in_unit[valid]
+
+    # Median elevation per laser, then sort lasers by elevation (high -> low).
+    median_elev = np.full(VLP32C_N_BEAMS, np.nan, dtype=np.float64)
+    for laser in range(VLP32C_N_BEAMS):
+        sel = lasers == laser
+        if sel.any():
+            median_elev[laser] = float(np.median(elev[sel]))
+    if np.isnan(median_elev).any():
+        good = ~np.isnan(median_elev)
+        median_elev[~good] = np.interp(np.flatnonzero(~good), np.flatnonzero(good), median_elev[good])
+
+    laser_order_high_to_low = np.argsort(-median_elev)  # laser indices, highest elevation first
+    elevations_rad = median_elev[laser_order_high_to_low].astype(np.float32)
+    laser_to_row = np.empty(VLP32C_N_BEAMS, dtype=np.int64)
+    laser_to_row[laser_order_high_to_low] = np.arange(VLP32C_N_BEAMS)
+    row = laser_to_row[laser_number_in_unit]
+
+    column_period_ns, n_columns = _estimate_column_timing(laser_number_in_unit, offset_ns)
+
+    o = offset_ns.astype(np.int64)
+    # Wrap modulo one revolution: the sweep slightly exceeds one revolution, and the
+    # overlap folds back onto early columns (same physical azimuth).
+    col = np.round((o - o.min()) / column_period_ns).astype(np.int64) % n_columns
+    az = np.arctan2(xyz_decompensated[:, 1], xyz_decompensated[:, 0])
+    far = dist > far_range_m
+
+    # Detect spin direction from the sign of azimuth-vs-column (the two stacked units
+    # fire in opposite phase, so they spin oppositely in their own frames).
+    az_unwrapped = np.unwrap(az[far][np.argsort(col[far])])
+    slope = float(np.polyfit(np.arange(len(az_unwrapped)), az_unwrapped, 1)[0]) if len(az_unwrapped) > 1 else -1.0
+    spinning_direction: Literal["cw", "ccw"] = "cw" if slope < 0 else "ccw"
+
+    # Jointly fit per-column azimuths and per-row azimuth offsets by alternating
+    # circular medians. The 32 beams of a column span several degrees of azimuth,
+    # captured by the per-row offset; averaging it into the column azimuth (offset=0)
+    # would leave multi-degree per-row error.
+    def circmean(a: np.ndarray) -> float:
+        return float(np.arctan2(np.mean(np.sin(a)), np.mean(np.cos(a))))
+
+    # Steep downward beams (e.g. the lowest VLP-32C laser at ~-25 deg) only ever hit
+    # nearby ground, so they have no far-range returns. Fit their azimuth offset from
+    # whatever valid returns they do have (near-range) rather than leaving it at 0.
+    valid = dist > min_valid_distance_m
+    row_has_far = np.zeros(VLP32C_N_BEAMS, dtype=bool)
+    for r in range(VLP32C_N_BEAMS):
+        row_has_far[r] = bool(((row == r) & far).any())
+
+    row_offsets = np.zeros(VLP32C_N_BEAMS, dtype=np.float64)
+    col_az = np.zeros(n_columns, dtype=np.float64)
+    for _ in range(max(n_refine_iterations, 1)):
+        # column azimuth = circular median of (az - row_offset) over far points in the
+        # column (far returns give the cleanest, least range-dependent azimuth).
+        adj = np.angle(np.exp(1j * (az - row_offsets[row])))
+        col_az[:] = np.nan
+        for c in np.unique(col[far]):
+            sel = (col == c) & far
+            col_az[c] = np.arctan2(np.median(np.sin(adj[sel])), np.median(np.cos(adj[sel])))
+        good = ~np.isnan(col_az)
+        if not good.any():
+            break
+        col_az = np.interp(np.arange(n_columns), np.flatnonzero(good), np.unwrap(col_az[good]))
+        # row offset = circular mean of (az - column azimuth). Use far returns where a
+        # row has them; otherwise fall back to all valid (near-range) returns so every
+        # row gets a real offset estimate.
+        for r in range(VLP32C_N_BEAMS):
+            sel = (row == r) & (far if row_has_far[r] else valid)
+            if sel.any():
+                row_offsets[r] = circmean(np.angle(np.exp(1j * (az[sel] - col_az[col[sel]]))))
+
+    column_azimuths_rad = enforce_spinning_monotonic(col_az, n_columns, spinning_direction)
+    row_azimuth_offsets_rad = np.angle(np.exp(1j * row_offsets)).astype(np.float32)
+
+    # Upsample the column-azimuth grid. The native column step is ~0.2 deg, so the
+    # per-frame integer column shift (see reconstruct_model_elements) would quantize
+    # alignment to ~0.1 deg. Upsampling shrinks the step by resolution_factor,
+    # removing that quantization (4x -> ~0.025 deg).
+    factor = max(int(resolution_factor), 1)
+    if factor > 1:
+        column_azimuths_rad = _upsample_azimuths(column_azimuths_rad, factor, spinning_direction)
+
+    return Vlp32cGeometry(
+        elevations_rad=elevations_rad,
+        laser_to_row=laser_to_row,
+        column_period_ns=column_period_ns,
+        n_columns=n_columns * factor,
+        spinning_direction=spinning_direction,
+        column_azimuths_rad=column_azimuths_rad,
+        row_azimuth_offsets_rad=row_azimuth_offsets_rad,
+        resolution_factor=factor,
+    )
+
+
+def _upsample_azimuths(
+    column_azimuths_rad: np.ndarray, factor: int, spinning_direction: Literal["cw", "ccw"]
+) -> np.ndarray:
+    """Interpolate column azimuths to ``len * factor`` entries, preserving monotonicity."""
+    n = len(column_azimuths_rad)
+    src_idx = np.arange(n) * factor
+    dst_idx = np.arange(n * factor)
+    unwrapped = np.unwrap(column_azimuths_rad.astype(np.float64))
+    upsampled = np.interp(dst_idx, src_idx, unwrapped)
+    return enforce_spinning_monotonic(upsampled, n * factor, spinning_direction)
+
+
+def build_vlp32c_model(geometry: Vlp32cGeometry) -> RowOffsetStructuredSpinningLidarModelParameters:
+    """Build a structured VLP-32C model from recovered geometry.
+
+    Uses the empirically measured per-column azimuths, per-row azimuth offsets and
+    elevation table. The per-row offsets capture the (several-degree) intra-column
+    firing spread and are essential for sub-degree reconstruction. The spin
+    direction is the one detected for this unit (the two stacked units fire in
+    opposite phase, so they spin oppositely in their own frames).
+    """
+    return RowOffsetStructuredSpinningLidarModelParameters(
+        spinning_frequency_hz=VLP32C_SPINNING_FREQUENCY_HZ,
+        spinning_direction=geometry.spinning_direction,
+        n_rows=VLP32C_N_BEAMS,
+        n_columns=geometry.n_columns,
+        row_elevations_rad=geometry.elevations_rad,
+        column_azimuths_rad=geometry.column_azimuths_rad,
+        row_azimuth_offsets_rad=geometry.row_azimuth_offsets_rad,
+    )
+
+
+def _estimate_column_timing(laser_number_in_unit: np.ndarray, offset_ns: np.ndarray) -> tuple[float, int]:
+    """Estimate the firing-column period and column count from beam-0 refire timing.
+
+    The column count spans exactly one revolution. An AV2 sweep covers slightly
+    more than one revolution (~1.02 rev over ~102 ms at 10 Hz), so columns are
+    wrapped modulo this count (see :func:`reconstruct_model_elements`): the few
+    degrees of overlap fold back onto the early columns, which represent the same
+    physical azimuth. Sizing to one revolution keeps the column-azimuth ramp below
+    2*pi, as the structured spinning-lidar model requires.
+    """
+    o = offset_ns.astype(np.int64)
+    beam0_times = np.sort(o[laser_number_in_unit == 0])
+    if len(beam0_times) >= 2:
+        gaps = np.diff(beam0_times)
+        # Use the median of the small, regular gaps (drop large no-return stretches).
+        column_period_ns = float(np.median(gaps[gaps <= np.median(gaps) * 1.5]))
+    else:
+        # Fallback: one revolution divided by a nominal VLP-32C column count.
+        column_period_ns = VLP32C_SCAN_DURATION_US * 1000.0 / 1800.0
+    revolution_ns = VLP32C_SCAN_DURATION_US * 1000.0
+    n_columns = max(int(round(revolution_ns / column_period_ns)), 2)
+    return column_period_ns, n_columns
+
+
+def reconstruct_model_elements(
+    laser_number_in_unit: np.ndarray,
+    offset_ns: np.ndarray,
+    geometry: Vlp32cGeometry,
+    xyz_decompensated: np.ndarray,
+    min_valid_distance_m: float = 5.0,
+) -> np.ndarray:
+    """Build per-point ``model_element`` = (row, column) for one frame.
+
+    Row comes from the laser->row map (elevation order). The column comes from
+    quantizing ``offset_ns`` by the firing-column period (wrapped modulo one
+    revolution), then applying a single per-frame column shift.
+
+    The per-frame shift is essential: the static model fixes one mapping from
+    ``offset_ns`` to azimuth, but the sensor's spin phase at a given ``offset_ns``
+    drifts a degree or so between sweeps (and ``offset_ns`` is referenced to a
+    per-sweep start). Without re-aligning, frames other than the one the model was
+    derived from are systematically rotated by up to ~1.2 deg. We estimate the
+    frame's rigid azimuth offset from the model (circular mean of the residual
+    between measured and model-predicted azimuth over far returns) and convert it
+    to an integer column shift, which restores sub-0.1 deg accuracy on every frame.
+
+    Args:
+        xyz_decompensated: per-point decompensated points in the sensor frame [N, 3],
+            used to measure this frame's azimuth phase relative to the model.
+        min_valid_distance_m: far-range threshold for the phase estimate.
+
+    Returns a [N, 2] uint16 array (row, column).
+    """
+    row = geometry.laser_to_row[laser_number_in_unit]
+    o = offset_ns.astype(np.int64)
+    # Native firing column from the firing timing, scaled onto the (upsampled) grid.
+    native_col = (o - o.min()) / geometry.column_period_ns
+    col = (np.round(native_col).astype(np.int64) * geometry.resolution_factor) % geometry.n_columns
+
+    # Re-align this frame to the model. The static model fixes one mapping from
+    # offset_ns to azimuth, but between sweeps the spin phase drifts (a rigid
+    # rotation) AND the spin rate varies slightly within a sweep (a drift that grows
+    # with the column index). We fit the residual azimuth as an affine function of
+    # the native column, residual ~= a + b * native_col, and fold it back into the
+    # column index. The constant term handles the phase, the linear term the
+    # intra-sweep rate drift; without the linear term some scenes retain ~0.25 deg.
+    dist = np.linalg.norm(xyz_decompensated, axis=1)
+    far = dist > min_valid_distance_m
+    if far.any():
+        az = np.arctan2(xyz_decompensated[far, 1], xyz_decompensated[far, 0])
+        predicted = geometry.column_azimuths_rad[col[far]] + geometry.row_azimuth_offsets_rad[row[far]]
+        residual = np.angle(np.exp(1j * (az - predicted)))
+        column_step_rad = 2.0 * np.pi / geometry.n_columns
+        sign = -1.0 if geometry.spinning_direction == "cw" else 1.0
+        # Affine fit residual_columns ~= a + b * native_col (least squares).
+        residual_cols = sign * residual / column_step_rad
+        nc_far = native_col[far]
+        coeffs = np.polyfit(nc_far, residual_cols, 1)
+        col_shift = np.round(np.polyval(coeffs, native_col)).astype(np.int64)
+        col = (col + col_shift) % geometry.n_columns
+
+    return np.stack([row, col], axis=1).astype(np.uint16)
diff --git a/tools/data_converter/argoverse2/utils_test.py b/tools/data_converter/argoverse2/utils_test.py
new file mode 100644
index 00000000..bcc0ea4b
--- /dev/null
+++ b/tools/data_converter/argoverse2/utils_test.py
@@ -0,0 +1,272 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data-free unit tests for the Argoverse 2 VLP-32C lidar-model derivation.
+
+These run in CI (no external dataset needed): a synthetic spinning-lidar sweep is
+generated with a known firing geometry, the model is derived from it, and the
+reconstruction accuracy is asserted -- the same property the eval tool measures.
+This guards the firing-pattern reconstruction (and in particular the
+more-than-one-revolution column wrap) against regressions.
+"""
+
+from __future__ import annotations
+
+import tempfile
+import unittest
+
+from pathlib import Path
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.feather as feather
+
+from upath import UPath
+
+from ncore.impl.data.types import IdealPinholeCameraModelParameters, ShutterType
+from ncore.impl.sensors.lidar import StructuredLidarModel
+from tools.data_converter.argoverse2.utils import (
+    VLP32C_N_BEAMS,
+    VLP32C_SCAN_DURATION_US,
+    build_vlp32c_model,
+    derive_vlp32c_geometry,
+    read_intrinsics,
+    reconstruct_model_elements,
+)
+
+
+# A VLP-32C-like, deliberately non-uniform elevation table (degrees), and a
+# laser-number -> physical order that is NOT elevation-sorted, so the test
+# exercises the laser->row recovery.
+_TRUE_ELEVATIONS_DEG = np.linspace(15.0, -25.0, VLP32C_N_BEAMS)
+_LASER_PERMUTATION = np.array(
+    [(i * 7) % VLP32C_N_BEAMS for i in range(VLP32C_N_BEAMS)], dtype=np.int64
+)  # laser_number -> index into the (elevation-sorted) beam list
+
+
+def _synthesize_sweep(
+    spinning_direction: str,
+    revolutions: float,
+    column_period_ns: float = 55296.0,
+    seed: int = 0,
+    start_az_rad: float | None = None,
+    az_rate_factor: float = 1.0,
+):
+    """Build a synthetic decompensated VLP-32C sweep with a known geometry.
+
+    Returns (xyz_decompensated [N,3], laser_number_in_unit [N], offset_ns [N],
+    true_elevations_rad [32] in laser order, true_row_offsets_rad [32] in laser order).
+    """
+    rng = np.random.default_rng(seed)
+    sign = -1.0 if spinning_direction == "cw" else 1.0
+
+    # Per-laser true elevation (in laser-number order) and a per-laser azimuth
+    # offset spanning several degrees (VLP-32C beams in a column are not co-azimuthal).
+    elev_sorted_rad = np.radians(_TRUE_ELEVATIONS_DEG)  # highest..lowest
+    true_elev_by_laser = np.empty(VLP32C_N_BEAMS)
+    true_elev_by_laser[np.arange(VLP32C_N_BEAMS)] = elev_sorted_rad[_LASER_PERMUTATION]
+    true_offset_by_laser = np.radians(np.linspace(-4.0, 4.0, VLP32C_N_BEAMS))[_LASER_PERMUTATION]
+
+    revolution_ns = VLP32C_SCAN_DURATION_US * 1000.0
+    n_cols = int(round(revolution_ns / column_period_ns))
+    total_cols = int(round(n_cols * revolutions))
+
+    laser_list = []
+    offset_list = []
+    xyz_list = []
+    base_az = rng.uniform(-np.pi, np.pi) if start_az_rad is None else start_az_rad
+    for c in range(total_cols):
+        # column reference azimuth advances with the spin. az_rate_factor != 1
+        # models an intra-sweep spin-rate drift relative to the offset_ns timing
+        # (the azimuth advances slightly faster/slower than the nominal column rate).
+        col_az = base_az + sign * 2.0 * np.pi * c * az_rate_factor / n_cols
+        for laser in range(VLP32C_N_BEAMS):
+            # drop ~10% of returns to mimic sparsity; keep enough far points
+            if rng.random() < 0.1:
+                continue
+            az = col_az + true_offset_by_laser[laser]
+            el = true_elev_by_laser[laser]
+            rng_m = rng.uniform(25.0, 60.0)  # far-range so azimuth fitting is clean
+            x = rng_m * np.cos(el) * np.cos(az)
+            y = rng_m * np.cos(el) * np.sin(az)
+            z = rng_m * np.sin(el)
+            xyz_list.append((x, y, z))
+            laser_list.append(laser)
+            offset_list.append(int(round(c * column_period_ns)))
+
+    xyz = np.array(xyz_list, dtype=np.float64)
+    laser = np.array(laser_list, dtype=np.int64)
+    offset = np.array(offset_list, dtype=np.int64)
+    return xyz, laser, offset, true_elev_by_laser, true_offset_by_laser
+
+
+class TestVlp32cModelDerivation(unittest.TestCase):
+    def _check(self, spinning_direction: str, revolutions: float) -> None:
+        xyz, laser, offset, _, _ = _synthesize_sweep(spinning_direction, revolutions)
+
+        geometry = derive_vlp32c_geometry(xyz, laser, offset)
+        self.assertEqual(geometry.spinning_direction, spinning_direction)
+        # Columns are upsampled for sub-column alignment precision.
+        self.assertGreater(geometry.resolution_factor, 1)
+        self.assertEqual(geometry.n_columns, len(geometry.column_azimuths_rad))
+
+        model = build_vlp32c_model(geometry)
+        self.assertEqual(model.n_rows, VLP32C_N_BEAMS)
+        # The column-azimuth ramp must stay strictly below one revolution; the model
+        # constructor enforces this, so building it at all proves the >1-rev wrap held.
+        self.assertLess(abs(float(model.column_azimuths_rad[-1] - model.column_azimuths_rad[0])), 2.0 * np.pi)
+
+        elem = reconstruct_model_elements(laser, offset, geometry, xyz)
+        self.assertEqual(elem.dtype, np.uint16)
+        self.assertTrue(np.all(elem[:, 0] < model.n_rows))
+        self.assertTrue(np.all(elem[:, 1] < model.n_columns))
+
+        sm = StructuredLidarModel.maybe_from_parameters(model, device="cpu")
+        assert sm is not None
+        predicted = sm.elements_to_sensor_points(elem, np.ones(len(elem), dtype=np.float32)).cpu().numpy()
+        predicted /= np.linalg.norm(predicted, axis=1, keepdims=True)
+        direction = xyz / np.linalg.norm(xyz, axis=1, keepdims=True)
+        cos = np.clip(np.sum(predicted * direction, axis=1), -1.0, 1.0)
+        err_deg = np.degrees(np.arccos(cos))
+        median_err = float(np.median(err_deg))
+        self.assertLess(
+            median_err,
+            0.1,
+            f"{spinning_direction} {revolutions}rev: reconstruction error {median_err:.4f} deg too high",
+        )
+
+    def test_cw_single_revolution(self) -> None:
+        self._check("cw", revolutions=1.0)
+
+    def test_cw_more_than_one_revolution(self) -> None:
+        # The regression case: an AV2 sweep covers ~1.02 revolutions, so the column
+        # ramp would exceed 2*pi unless columns are wrapped modulo one revolution.
+        self._check("cw", revolutions=1.05)
+
+    def test_ccw_more_than_one_revolution(self) -> None:
+        # The two stacked units spin oppositely in their own frames; cover the ccw case.
+        self._check("ccw", revolutions=1.05)
+
+    def test_model_generalizes_across_phase_shifted_frames(self) -> None:
+        """The model derived from one sweep must reconstruct other sweeps accurately.
+
+        The sensor's spin phase at a given ``offset_ns`` drifts between sweeps, so a
+        sweep other than the one the model was derived from is rigidly rotated in
+        azimuth. ``reconstruct_model_elements`` must re-align per frame; without it,
+        these frames are off by ~1 deg (the multi-scene failure this guards against).
+        """
+        # Derive the model from a reference sweep at one phase.
+        ref_xyz, ref_laser, ref_offset, _, _ = _synthesize_sweep("cw", revolutions=1.05, start_az_rad=0.3)
+        geometry = derive_vlp32c_geometry(ref_xyz, ref_laser, ref_offset)
+        model = build_vlp32c_model(geometry)
+        sm = StructuredLidarModel.maybe_from_parameters(model, device="cpu")
+        assert sm is not None
+
+        # Reconstruct several sweeps captured at different spin phases.
+        for shift_deg in (-30.0, -1.0, 1.0, 30.0):
+            xyz, laser, offset, _, _ = _synthesize_sweep(
+                "cw", revolutions=1.05, seed=1, start_az_rad=0.3 + np.radians(shift_deg)
+            )
+            elem = reconstruct_model_elements(laser, offset, geometry, xyz)
+            self.assertTrue(np.all(elem[:, 1] < model.n_columns))
+            predicted = sm.elements_to_sensor_points(elem, np.ones(len(elem), dtype=np.float32)).cpu().numpy()
+            predicted /= np.linalg.norm(predicted, axis=1, keepdims=True)
+            direction = xyz / np.linalg.norm(xyz, axis=1, keepdims=True)
+            cos = np.clip(np.sum(predicted * direction, axis=1), -1.0, 1.0)
+            median_err = float(np.degrees(np.median(np.arccos(cos))))
+            self.assertLess(
+                median_err,
+                0.1,
+                f"phase shift {shift_deg} deg: reconstruction error {median_err:.4f} deg too high",
+            )
+
+    def test_model_handles_intra_sweep_rate_drift(self) -> None:
+        """A frame whose spin rate drifts vs offset_ns is reconstructed accurately.
+
+        On some scenes the azimuth advances slightly faster/slower than the nominal
+        ``offset_ns`` column rate within a sweep. A single rigid phase shift cannot
+        correct this (it leaves ~0.25 deg); the affine (phase + linear) per-frame
+        alignment in ``reconstruct_model_elements`` does. This guards that fix.
+        """
+        ref_xyz, ref_laser, ref_offset, _, _ = _synthesize_sweep("cw", revolutions=1.05, start_az_rad=0.2)
+        geometry = derive_vlp32c_geometry(ref_xyz, ref_laser, ref_offset)
+        model = build_vlp32c_model(geometry)
+        sm = StructuredLidarModel.maybe_from_parameters(model, device="cpu")
+        assert sm is not None
+
+        # A sweep whose azimuth advances 0.3% faster than the nominal column rate
+        # (~1 deg of drift accumulated over the revolution) plus a phase offset.
+        xyz, laser, offset, _, _ = _synthesize_sweep(
+            "cw", revolutions=1.05, seed=2, start_az_rad=0.2 + np.radians(5.0), az_rate_factor=1.003
+        )
+        elem = reconstruct_model_elements(laser, offset, geometry, xyz)
+        predicted = sm.elements_to_sensor_points(elem, np.ones(len(elem), dtype=np.float32)).cpu().numpy()
+        predicted /= np.linalg.norm(predicted, axis=1, keepdims=True)
+        direction = xyz / np.linalg.norm(xyz, axis=1, keepdims=True)
+        cos = np.clip(np.sum(predicted * direction, axis=1), -1.0, 1.0)
+        median_err = float(np.degrees(np.median(np.arccos(cos))))
+        self.assertLess(median_err, 0.1, f"intra-sweep drift: reconstruction error {median_err:.4f} deg too high")
+
+
+class TestReadIntrinsics(unittest.TestCase):
+    """``read_intrinsics`` builds an ideal pinhole and preserves k1/k2/k3."""
+
+    def _write_intrinsics(self, log_dir: Path) -> None:
+        (log_dir / "calibration").mkdir(parents=True, exist_ok=True)
+        table = pa.table(
+            {
+                "sensor_name": ["ring_front_center", "ring_rear_left"],
+                "fx_px": [1685.0, 1683.0],
+                "fy_px": [1685.5, 1683.5],
+                "cx_px": [775.0, 773.0],
+                "cy_px": [1023.0, 1021.0],
+                "k1": [-0.27, -0.26],
+                "k2": [0.11, 0.10],
+                "k3": [-0.018, -0.017],
+                "height_px": [2048, 1550],
+                "width_px": [1550, 2048],
+            }
+        )
+        feather.write_feather(table, str(log_dir / "calibration" / "intrinsics.feather"))
+
+    def test_model_is_ideal_pinhole_global_shutter(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            log_dir = Path(tmp)
+            self._write_intrinsics(log_dir)
+            intrinsics = read_intrinsics(UPath(log_dir))
+
+        self.assertEqual(set(intrinsics), {"ring_front_center", "ring_rear_left"})
+        cam = intrinsics["ring_front_center"]
+        self.assertIsInstance(cam.model, IdealPinholeCameraModelParameters)
+        # Global shutter is assumed because the released imagery is already undistorted.
+        self.assertEqual(cam.model.shutter_type, ShutterType.GLOBAL)
+        self.assertIsNone(cam.model.external_distortion_parameters)
+        np.testing.assert_array_equal(cam.model.resolution, np.array([1550, 2048], dtype=np.uint64))
+        np.testing.assert_allclose(cam.model.focal_length, np.array([1685.0, 1685.5], dtype=np.float32))
+        np.testing.assert_allclose(cam.model.principal_point, np.array([775.0, 1023.0], dtype=np.float32))
+
+    def test_original_distortion_coefficients_preserved(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            log_dir = Path(tmp)
+            self._write_intrinsics(log_dir)
+            intrinsics = read_intrinsics(UPath(log_dir))
+
+        # The raw lens coefficients are kept verbatim (not applied to the images).
+        self.assertEqual(intrinsics["ring_front_center"].original_distortion_k1k2k3, (-0.27, 0.11, -0.018))
+        self.assertEqual(intrinsics["ring_rear_left"].original_distortion_k1k2k3, (-0.26, 0.10, -0.017))
+
+
+if __name__ == "__main__":
+    unittest.main()