From 900c047fb226946279a6f721b3f915950b4f3146 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Sat, 6 Jun 2026 08:42:36 -0700 Subject: [PATCH 1/7] feat: modernize parser bindings and release workflow --- .github/workflows/release.yml | 93 +++- .github/workflows/rust.yaml | 22 +- .gitignore | 4 +- AGENTS.md | 83 +++ BUILD.md | 68 ++- CHANGELOG.md | 19 + Cargo.toml | 15 +- README.md | 107 +++- UPDATE_DESIGN.md | 421 +++++++++++++++ benches/parse_bench.rs | 36 ++ pyproject.toml | 19 + src/lib.rs | 947 ++++++++++++++++++++++++++++++++-- tests/benchmark.py | 153 ++++++ tests/test_api.py | 116 +++++ 14 files changed, 2020 insertions(+), 83 deletions(-) create mode 100644 AGENTS.md create mode 100644 UPDATE_DESIGN.md create mode 100644 benches/parse_bench.rs create mode 100644 tests/benchmark.py create mode 100644 tests/test_api.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e3b277b..6dc937e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,25 +6,100 @@ permissions: on: push: tags: - - v[0-9]+.* + - "v*" + workflow_dispatch: + inputs: + publish: + description: "Publish artifacts to PyPI (manual runs default to build-only)" + required: true + type: boolean + default: false + +env: + CARGO_TERM_COLOR: always jobs: - # make sure release content has correct format and README is up-to-date - format-check: + checks: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Run format check run: cargo fmt --check + - name: Run clippy + run: cargo clippy -- -D warnings + + build-sdist: + needs: checks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - uses: actions/upload-artifact@v4 + with: + name: sdist + path: dist/*.tar.gz + + build-wheels: + needs: checks + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: x86_64 + - os: ubuntu-latest + target: aarch64 + - os: macos-13 + target: x86_64 + - os: macos-14 + target: aarch64 + - os: windows-latest + target: x64 + steps: + - uses: actions/checkout@v4 + - uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist + - uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }}-${{ matrix.target }} + path: dist/*.whl + + publish-pypi: + needs: [build-sdist, build-wheels] + runs-on: ubuntu-latest + if: github.event_name == 'push' || inputs.publish == true + environment: pypi + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + with: + command: upload + args: --non-interactive --skip-existing dist/* + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} create-release: - needs: format-check + needs: [build-sdist, build-wheels, publish-pypi] runs-on: ubuntu-latest + if: github.event_name == 'push' steps: - uses: actions/checkout@v4 - - uses: taiki-e/create-gh-release-action@v1 + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 with: - # (optional) Path to changelog. - changelog: CHANGELOG.md - # (required) GitHub token for creating GitHub Releases. - token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + files: dist/* + body_path: CHANGELOG.md diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index ad64d6d..33dacbc 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -10,15 +10,25 @@ env: CARGO_TERM_COLOR: always jobs: - build: - + rust-checks: runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - + - uses: actions/checkout@v4 - name: Run format check run: cargo fmt --check - - name: Run clippy run: cargo clippy -- -D warnings + + python-api: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install test tools + run: python -m pip install --upgrade pip maturin pytest pytest-benchmark + - name: Build extension in-place + run: maturin develop + - name: Run Python API tests + run: pytest tests/test_api.py diff --git a/.gitignore b/.gitignore index 9ed12d6..fe499f0 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,6 @@ cache .DS_Store -.pypirc \ No newline at end of file +.pypirc +__pycache__/ +*.py[cod] diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..0cb0db5 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,83 @@ +# PROJECT KNOWLEDGE BASE + +## OVERVIEW + +Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes a single `Parser` class and `Elem` dataclass via PyO3, built with maturin. + +## STRUCTURE + +``` +├── src/lib.rs # Entire Python extension: Parser + Elem PyO3 classes +├── examples/ # Python usage examples +├── build.sh # Maturin build for multiple Python versions +├── Dockerfile # Ubuntu builder for cross-platform Linux wheels +├── Cargo.toml # Rust crate: pybgpkit-parser, depends on bgpkit-parser +├── pyproject.toml # Maturin build-system config +├── build.rs # PyO3 extension module linker setup +└── .github/workflows/ # Rust fmt/clippy CI + tag-based release +``` + +## WHERE TO LOOK + +| Task | Location | +|------|----------| +| Change exposed Python API | `src/lib.rs` | +| Update underlying parser logic | `Cargo.toml` → bump `bgpkit-parser` version | +| Add Python version support | `build.sh` + `Dockerfile` + `README.md` | +| Build/test locally | `maturin develop` (see README.md) | +| Build wheels for release | GitHub Actions `release.yml` / `maturin build --release` locally | +| Publish to PyPI | Push `v*` tag; CI publishes with `PYPI_API_TOKEN` | + +## CODE MAP + +- **`Elem`** — PyO3 class wrapping a parsed BGP element. Has `#[pyo3(get, set)]` fields and `to_dict()` / `__str__` / `__getstate__` methods. +- **`Parser`** — PyO3 class wrapping `bgpkit_parser::BgpkitParser`. Constructor takes `url`, optional `filters` (HashMap), and optional `cache_dir`. Implements `__iter__`/`__next__` for Python iteration. +- **`convert_elem`** — Internal fn mapping `BgpElem` → `Elem` (Rust type → PyO3 type). + +## CONVENTIONS + +- Rust fmt/clippy enforced in CI (`cargo fmt --check`, `cargo clippy -- -D warnings`) +- `PyValueError` used for filter errors propagated to Python +- `unsafe impl Send + Sync for Parser` — required because `ElemIterator>` is not auto-Send +- `#[pyo3(name = "__str__")]` used for JSON string representation of `Elem` +- `atomic` field returns `"AG"`/`"NAG"` strings (not bool) +- `elem_type` field returns `"A"` (announce) or `"W"` (withdraw) + +## ANTI-PATTERNS + +- **Do NOT** change PyO3/maturin versions without updating both `Cargo.toml` and `build.rs` (`pyo3-build-config` must match) +- **Do NOT** test release publishing with a beta tag unless the package version is also beta; use `workflow_dispatch` with `publish=false` for build-only checks +- **Do NOT** add `unsafe Send/Sync` for new types without verifying thread safety with the underlying Rust iterator +- **Do NOT** use `.unwrap()` on user inputs (URL/filters); already handled in `BgpkitParser::new` but be careful with new additions +- **Do NOT** make `Elem` fields write-only or remove getters without noting in CHANGELOG as breaking (v0.6.0 was a breaking change) + +## COMMANDS + +```bash +# Local dev build (installs to active venv) +maturin develop + +# Build wheel locally +maturin build --release + +# Build and publish release via CI +git tag v0.7.0 +git push origin v0.7.0 + +# Manual fallback only +bash build.sh + +# Format + lint +cargo fmt --check +cargo clippy -- -D warnings + +# Publish (after building on all platforms) +twine upload --skip-existing target/wheels/* +``` + +## NOTES + +- `bgpkit-parser` crate version bump is the primary release trigger (see CHANGELOG for version history) +- Release workflow: `rust.yaml` runs Rust + Python API checks on PR/push; `release.yml` builds ABI3 wheels and publishes on `v*` tag push +- Supports Python 3.9+ via ABI3 wheels +- No Python tests in-repo; examples in `examples/` serve as smoke tests diff --git a/BUILD.md b/BUILD.md index 9a5551c..eb2b649 100644 --- a/BUILD.md +++ b/BUILD.md @@ -1,31 +1,65 @@ # Build and Publish Guide -## Pre-requisites +## Automated Release (Recommended) -- `maturin` -- `docker` - - run `docker build . -t bgpkit-builder:latest` to build the builder image +Release builds are handled by GitHub Actions via `.github/workflows/release.yml`. -## Build and Upload Checklist +Push a version tag to build and publish: -1. run [`build.sh`](./build.sh) on Apple Silicon Mac -2. run [`build.sh`](./build.sh) inside docker on Apple Silicon Mac -3. run [`build.sh`](./build.sh) on Intel Mac -4. run [`build.sh`](./build.sh) inside docker on Intel Mac - -Then run +```bash +git tag v0.7.0 +git push origin v0.7.0 ``` -twine upload --skip-existing target/wheels/* + +The release workflow will: + +1. Run `cargo fmt --check` and `cargo clippy -- -D warnings` +2. Build the source distribution (`sdist`) +3. Build ABI3 wheels for: + - Linux x86_64 + - Linux aarch64 + - macOS x86_64 + - macOS arm64 + - Windows x86_64 +4. Publish artifacts to PyPI using `PYPI_API_TOKEN` +5. Create a GitHub Release and attach the built artifacts + +Manual workflow runs (`workflow_dispatch`) are build-only by default. They only publish when the `publish` input is explicitly enabled. + +## Required GitHub Secret + +Create a project-scoped PyPI API token and save it as a GitHub Actions secret: + +```text +PYPI_API_TOKEN ``` -## Build Linux packages in Docker +Recommended token scope: only the `pybgpkit-parser` project. + +## Local Development Build -Build image using the [Dockerfile](./Dockerfile) provided +```bash +maturin develop ``` -docker build -t bgpkit-builder:latest . + +This builds the extension and installs it into the active Python environment. + +## Local Wheel Build + +```bash +maturin build --release ``` -Run `docker run --rm -it bgpkit-builder:latest bash` to open a shell in the container +Built wheels are written under `target/wheels/`. + +## Manual Publish Fallback + +If CI is unavailable, build locally and upload with `twine`: + ```bash -bash build.sh +python -m pip install --upgrade maturin twine +maturin build --release --sdist +twine upload --skip-existing target/wheels/* ``` + +The historical `build.sh` and `Dockerfile` are kept as fallback tools for reproducing older manual builds, but CI is the preferred release path. diff --git a/CHANGELOG.md b/CHANGELOG.md index fce43b7..f933bb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All notable changes to this project will be documented in this file. +## 0.7.0 - TBD + +### Highlights + +* Update `bgpkit-parser` to v0.17.0. +* Update PyO3 to v0.28 and enable ABI3 wheels for Python 3.9+. +* Add `peer_bgp_id` and `only_to_customer` fields to `Elem`. +* Add reusable `Filter` class and `Parser.from_filters(...)` constructor. +* Add Rust-like `Elem` utility methods: `is_announcement`, `is_withdrawal`, `get_origin_asn`, `get_origin_asns`, `has_as_path`, `as_dict`, `to_json`, `to_psv`, and `get_psv_header`. +* Add `Elem.origin_asn` property and module constants `ELEM_TYPE_ANNOUNCE`, `ELEM_TYPE_WITHDRAW`, and `PSV_HEADER`. +* Add Python-native filter helper constructors: `Filter.peer_ip`, `Filter.peer_ips`, `Filter.origin_asn`, `Filter.prefix`, and `Filter.elem_type`. +* Add stream-consuming `Parser.count()` and `Parser.iter_batches(batch_size)` helpers. +* Add `RouteElem` and `RouteParser` for upstream route-level parsing (`BgpRouteElem`) and faster route identity scans. +* Add high-performance projected tuple iteration: `iter_tuples(fields)` and `iter_tuple_batches(fields, batch_size)` for `Parser` and `RouteParser`. +* Add field presets `BASIC_FIELDS`, `ROUTE_FIELDS`, and `NEXT_HOP_FIELDS`. +* Optimize `Parser.parse_all()` and batch iteration by parsing while detached from the Python interpreter before converting results into Python objects. +* Add Rust and Python benchmark scaffolding. +* Automate wheel builds and PyPI publishing via GitHub Actions. + ## 0.6.2 - 2025-06-06 ### Fix regressions diff --git a/Cargo.toml b/Cargo.toml index 0b56ac5..555a506 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ description = "BGPKIT Parser Python Binding" keywords = ["bgp", "mrt", "parser"] repository = "https://github.com/bgpkit/bgpkit-parser-py" documentation = "https://docs.rs/bgpkit-parser-py" -version = "0.6.2" +version = "0.7.0" authors = ["Mingwei Zhang "] edition = "2021" license = "MIT" @@ -15,10 +15,17 @@ name = "pybgpkit_parser" crate-type = ["cdylib", "rlib"] [dependencies] -bgpkit-parser = "0.11.1" -pyo3 = { version = "0.25", features = ["extension-module"] } +bgpkit-parser = "0.17.0" +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1" [build-dependencies] -pyo3-build-config = "0.25" +pyo3-build-config = "0.28" + +[dev-dependencies] +criterion = "0.5" + +[[bench]] +name = "parse_bench" +harness = false diff --git a/README.md b/README.md index bbe240d..60044d4 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,34 @@ for elem in parser: break ``` +## Filters + +The original dictionary-based filter API is still supported: + +```python +parser = Parser(url, filters={"peer_ips": "185.1.8.65,2001:7f8:73:0:3:fa4:0:1"}) +``` + +Reusable Rust-backed filters are also available: + +```python +from pybgpkit_parser import Filter, Parser + +filters = [ + Filter.peer_ips(["185.1.8.65", "2001:7f8:73:0:3:fa4:0:1"]), + Filter.elem_type("a"), +] +parser = Parser.from_filters(url, filters) +``` + +Available helper constructors: + +- `Filter.peer_ip(...)` +- `Filter.peer_ips([...])` +- `Filter.origin_asn(...)` +- `Filter.prefix(...)` +- `Filter.elem_type(...)` + ## Available fields for `Elem` ```rust @@ -53,6 +81,8 @@ for elem in parser: #[pyo3(get, set)] pub peer_asn: u32, #[pyo3(get, set)] + pub peer_bgp_id: Option, + #[pyo3(get, set)] pub prefix: String, #[pyo3(get, set)] pub next_hop: Option, @@ -74,6 +104,8 @@ for elem in parser: pub aggr_asn: Option, #[pyo3(get, set)] pub aggr_ip: Option, + #[pyo3(get, set)] + pub only_to_customer: Option, } ``` @@ -95,6 +127,79 @@ python3 -m pip install pybgpkit-parser `maturin develop` builds local python module and add to the venv. +## High-performance projected iteration + +For best performance, prefer projected tuple iteration when you only need a subset of fields. This avoids creating full `Elem` objects and skips conversion for unused fields. + +```python +from pybgpkit_parser import Parser, ROUTE_FIELDS + +# Fast: only converts requested fields +for timestamp, prefix, as_path in Parser(url).iter_tuples(["timestamp", "prefix", "as_path"]): + pass + +# Faster for large files: batch Python boundary crossings +fields = ["timestamp", "prefix", "as_path"] +for batch in Parser(url).iter_tuple_batches(fields, batch_size=10_000): + for timestamp, prefix, as_path in batch: + pass +``` + +Available field presets: + +- `BASIC_FIELDS`: `timestamp`, `elem_type`, `peer_ip`, `peer_asn`, `prefix` +- `ROUTE_FIELDS`: `BASIC_FIELDS` + `as_path` +- `NEXT_HOP_FIELDS`: `BASIC_FIELDS` + `next_hop` + +You can also pass your own field list, e.g. `Parser(url).iter_tuples(["peer_asn", "prefix"])`. + +## Utility methods + +`Elem` exposes Rust-like helper methods: + +- `is_announcement()` +- `is_withdrawal()` +- `get_origin_asn()` +- `get_origin_asns()` +- `has_as_path()` +- `to_dict()` / `as_dict()` +- `origin_asn` property +- `to_json()` +- `to_psv()` +- `Elem.get_psv_header()` +- module constants: `ELEM_TYPE_ANNOUNCE`, `ELEM_TYPE_WITHDRAW`, `PSV_HEADER` + +`Parser` also provides stream-consuming helpers: + +- `count()` +- `iter_batches(batch_size)` +- `iter_tuples(fields)` — recommended for high-performance subset-field scans +- `iter_tuple_batches(fields, batch_size)` — recommended for large-file scans + +## Route-level parsing + +`RouteParser` exposes upstream `BgpRouteElem` iteration for faster scans when you only need route identity fields: + +```python +from pybgpkit_parser import RouteParser + +for route in RouteParser(url): + print(route.timestamp, route.peer_ip, route.peer_asn, route.prefix, route.as_path) +``` + +`RouteElem` fields: + +- `timestamp` +- `elem_type` +- `peer_ip` +- `peer_asn` +- `prefix` +- `as_path` + +`RouteParser` supports the same constructor style, `from_filters(...)`, `parse_all()`, `parse_next()`, `count()`, `iter_batches(batch_size)`, `iter_tuples(fields)`, and `iter_tuple_batches(fields, batch_size)`. + +For route scans, this is the fastest object-based API; for maximum throughput use `RouteParser.iter_tuples(ROUTE_FIELDS)` or `RouteParser.iter_tuple_batches(ROUTE_FIELDS, batch_size)`. + ## Build and publish -See [BUILD.md](./BUILD.md) for more details. \ No newline at end of file +See [BUILD.md](./BUILD.md) for automated GitHub Actions release details. \ No newline at end of file diff --git a/UPDATE_DESIGN.md b/UPDATE_DESIGN.md new file mode 100644 index 0000000..50fdef7 --- /dev/null +++ b/UPDATE_DESIGN.md @@ -0,0 +1,421 @@ +# bgpkit-parser-py Update Plan — Single PR + +## Overview + +This plan covers a comprehensive update of `bgpkit-parser-py` in **one pull request**. The PR modernizes dependencies, expands the Python API surface, adds performance benchmarks, and replaces the manual release process with fully automated CI/CD. + +## Work Streams (4 in 1 PR) + +| # | Stream | Scope | Risk | +|---|--------|-------|------| +| 1 | **Bump `bgpkit-parser`** | `0.11.1` → `0.17.0` | Medium — API changes, new fields | +| 2 | **Bump PyO3** | `0.25` → `0.28.3` | Medium — `Bound` API migration | +| 3 | **Expand Python API** | New fields, `RouteParser`, projected tuple iteration, `Filter` helpers, `__repr__`, `from_filters`, `count`, `iter_batches`, constants | Low — additive changes | +| 4 | **Benchmarks + CI/CD** | Python benchmark script, `criterion`, `maturin-action` workflow | Low — new files, no Rust API changes | + +## Design Decisions + +### 1. PyO3 Version: 0.28.3 (not 0.29) + +**Decision:** Target PyO3 0.28.3. + +**Rationale:** Latest stable release. 0.29 has breaking `pyo3-build-config` changes that require a direct dependency on `pyo3` or `pyo3-ffi`. 0.28.3 gives us free-threaded Python support and the mature `Bound` API without extra migration risk. + +### 2. `Bound` API Migration + +**Decision:** Migrate Python-object helpers to the `Bound<'py, T>` API where appropriate (e.g., `to_dict`), but keep `Py` for returned element objects where it remains the clean PyO3 return type. + +**Rationale:** PyO3 0.28 deprecates the old GIL-ref API. The `Bound` API is now primary. However, `Py::new()` still returns `Py` which works fine. We migrate incrementally — what compiles cleanly with `Bound` gets converted; what doesn't stays on `Py`. + +### 3. `Elem` Field Access: Keep `#[pyo3(get, set)]` + +**Decision:** Retain direct field access on `Elem`. + +**Rationale:** v0.6.0 was a breaking change that removed getters and caused user pain. The current `get, set` approach is ergonomic for Python users. No change. + +### 4. New Fields: `peer_bgp_id` and `only_to_customer` + +**Decision:** Add both to the Python `Elem` class. + +**Rationale:** +- `peer_bgp_id`: PEER_INDEX_TABLE in TableDumpV2/RIB records. `Option`. +- `only_to_customer`: RFC 9234 OTC attribute. `Option`. `None` for withdrawals (fixed upstream in v0.16.0). + +### 5. Iterator Strategy: Expose Elem and Route Iterators, Defer Fallible/Record/Update + +**Decision:** Keep `Parser` for `BgpElem` iteration and add `RouteParser` for upstream `BgpRouteElem` route-level iteration. Defer fallible, record, raw-record, and update iterators. + +**Rationale:** `BgpRouteElem` is a compact route identity type and maps cleanly to Python (`RouteElem`). It is the best performance-oriented iterator to expose now. Fallible/record/update iterators return different result/enum/nested record types and should be added in follow-up PRs with more API design. + +### 6. Filter API: Add `Filter` Class + `from_filters` Constructor + +**Decision:** Keep the existing `HashMap` filter constructor. Add a new `Filter` PyO3 class and a `from_filters` classmethod on `Parser`. + +**Rationale:** The string-based API is backward-compatible. The new `Filter` class exposes the upstream `Filter::new()` constructor, and `from_filters` lets users pass pre-built `Filter` objects (enabling reuse and avoiding string parsing overhead). + +### 7. Performance Benchmark: `pytest-benchmark` + `criterion` + +**Decision:** Add a Python-side benchmark script and Rust-side (`criterion`) benchmarks. + +**Rationale:** We need to quantify the "Python tax" per element. The benchmark compares: +- Rust native iteration (baseline) +- Python `for elem in parser` iteration (GIL crossing + object allocation) +- Python `parse_all` (bulk collection) +- Python `to_dict()` overhead (serialization) + +Also optimize `parse_all()` by parsing while detached from the Python interpreter (`py.detach(...)`) before converting the collected Rust `Elem` values into Python objects. + +### 8. CI/CD: `maturin-action` + PyPI Token + +**Decision:** Use `PyO3/maturin-action` for all wheel builds, publish via `MATURIN_PYPI_TOKEN`, and use ABI3 (`abi3-py39`) to build one wheel per platform instead of per Python version. + +**Rationale:** +- `maturin-action` handles cross-compilation, manylinux, and all platforms automatically +- Replaces the manual process (2 Macs + Docker + `twine upload`) +- Produces ABI3 wheels for macOS x86_64/arm64, Linux x86_64/aarch64, Windows x86_64 +- PyPI token is simpler than OIDC (Trusted Publishing) for immediate setup +- Manual `workflow_dispatch` runs are build-only by default to avoid accidental PyPI publication + +### 9. `unsafe impl Send/Sync` for `Parser` + +**Decision:** Keep and verify after the `bgpkit-parser` bump. + +**Rationale:** The `ElemIterator` type may have changed in v0.17.0. If `BgpkitParser::into_iter()` no longer returns `Send`, we switch to a different approach (e.g., `into_elem_iter()` or `into_fallible_elem_iter()`). + +## Data Structures + +### Python `Elem` (updated) + +```rust +#[pyclass] +#[derive(Clone, PartialEq, Serialize)] +pub struct Elem { + #[pyo3(get, set)] pub timestamp: f64, + #[pyo3(get, set)] pub elem_type: String, + #[pyo3(get, set)] pub peer_ip: String, + #[pyo3(get, set)] pub peer_asn: u32, + #[pyo3(get, set)] pub prefix: String, + #[pyo3(get, set)] pub next_hop: Option, + #[pyo3(get, set)] pub as_path: Option, + #[pyo3(get, set)] pub origin_asns: Option>, + #[pyo3(get, set)] pub origin: Option, + #[pyo3(get, set)] pub local_pref: Option, + #[pyo3(get, set)] pub med: Option, + #[pyo3(get, set)] pub communities: Option>, + #[pyo3(get, set)] pub atomic: Option, + #[pyo3(get, set)] pub aggr_asn: Option, + #[pyo3(get, set)] pub aggr_ip: Option, + // NEW in v0.17 + #[pyo3(get, set)] pub peer_bgp_id: Option, + #[pyo3(get, set)] pub only_to_customer: Option, +} +``` + +### Python `RouteElem` (new) + +```rust +#[pyclass] +pub struct RouteElem { + #[pyo3(get, set)] pub timestamp: f64, + #[pyo3(get, set)] pub elem_type: String, + #[pyo3(get, set)] pub peer_ip: String, + #[pyo3(get, set)] pub peer_asn: u32, + #[pyo3(get, set)] pub prefix: String, + #[pyo3(get, set)] pub as_path: Option, +} +``` + +### Python `Filter` (new) + +```rust +#[pyclass] +pub struct Filter { + inner: bgpkit_parser::parser::Filter, +} + +#[pymethods] +impl Filter { + #[new] + #[pyo3(signature = (filter_type, filter_value))] + fn new(filter_type: String, filter_value: String) -> PyResult { + let inner = bgpkit_parser::parser::Filter::new(filter_type.as_str(), filter_value.as_str()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(Filter { inner }) + } +} +``` + +## Single-PR Implementation Plan + +### Phase 1: Dependency Bump + Compile + +**1. `Cargo.toml`** +```toml +[package] +name = "pybgpkit-parser" +version = "0.7.0" +# ... rest unchanged ... + +[dependencies] +bgpkit-parser = "0.17.0" +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1" + +[build-dependencies] +pyo3-build-config = "0.28" +``` + +**2. `build.rs`** +```rust +fn main() { + pyo3_build_config::add_extension_module_link_args(); +} +``` +No changes — `add_extension_module_link_args()` is stable in 0.28. + +**3. `pyproject.toml`** +```toml +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "pybgpkit-parser" +version = "0.7.0" +description = "Python binding for bgpkit-parser" +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +``` + +**4. Run `cargo check`** +- Fix compile errors iteratively +- Check if `ElemIterator` type changed in v0.17.0 +- Verify `unsafe impl Send` still compiles + +### Phase 2: `src/lib.rs` Rewrite + +**5. `convert_elem` update** +- Map new fields: `peer_bgp_id`, `only_to_customer` +- Keep all existing field mappings + +**6. `Elem` struct update** +- Add `peer_bgp_id: Option` +- Add `only_to_customer: Option` +- Keep all existing `#[pyo3(get, set)]` fields + +**7. `Elem::to_dict` update** +- Add `"peer_bgp_id"` and `"only_to_customer"` entries + +**8. `Elem` utility methods (new)** +- `is_announcement()` +- `is_withdrawal()` +- `get_origin_asn()` +- `get_origin_asns()` +- `has_as_path()` +- `as_dict()` +- `origin_asn` property +- `to_json()` +- `to_psv()` +- `get_psv_header()` + +**9. `Elem::__repr__` (new)** +```rust +#[pyo3(name = "__repr__")] +fn repr(&self) -> PyResult { + Ok(format!("", self.prefix, self.peer_ip, self.elem_type)) +} +``` + +**10. `Filter` class (new)** +- `#[pyclass]` wrapper around `bgpkit_parser::parser::Filter` +- `#[new]` constructor: `Filter(filter_type, filter_value)` +- Python-native helper constructors: `peer_ip`, `peer_ips`, `origin_asn`, `prefix`, `elem_type` +- Expose to the module + +**11. `Parser` struct update** +- Keep `elem_iter: ElemIterator>` +- Verify `Send` bound after v0.17.0 bump + +**12. `Parser::new` (keep signature)** +```rust +#[pyo3(signature = (url, filters=None, cache_dir=None))] +fn new( + url: String, + filters: Option>, + cache_dir: Option, +) -> PyResult +``` +Keep backward-compatible `HashMap` filter API. + +**13. `Parser::from_filters` (new, classmethod)** +```rust +#[staticmethod] +#[pyo3(signature = (url, filters, cache_dir=None))] +fn from_filters( + url: String, + filters: Vec>, + cache_dir: Option, +) -> PyResult { ... } +``` +- Construct `BgpkitParser` from URL +- Add filters via `with_filters` or `add_filters` using the `.inner` of each `Filter` +- Return `Parser` with the iterator + +**14. `Parser::parse_all` (optimize)** +- Keep `Vec>` return type +- Use `py.detach(...)` to parse and convert `BgpElem` → Rust `Elem` outside the Python interpreter, then reacquire Python only to allocate Python objects + +**15. `Parser::parse_next` (error-safe allocation)** +- Return `PyResult>>` +- Avoid `.unwrap()` on Python allocation + +**16. `Parser::__next__` (error-safe allocation)** +- Return `PyResult>>` +- Avoid `.unwrap()` on Python allocation + +**17. Parser and RouteParser utility methods + module registration** +- Add `Parser.count()` (stream-consuming) +- Add `Parser.iter_batches(batch_size)` returning `BatchIterator` +- Add `RouteElem` and `RouteParser` for upstream `into_route_iter()` +- Add `RouteParser.count()`, `RouteParser.parse_all()`, `RouteParser.parse_next()`, and `RouteParser.iter_batches(batch_size)` +- Add projected tuple iteration: `iter_tuples(fields)` and `iter_tuple_batches(fields, batch_size)` for `Parser` and `RouteParser` +- Register `Elem`, `RouteElem`, `Filter`, `Parser`, `BatchIterator`, tuple iterators, `RouteParser`, and `RouteBatchIterator` +- Add module constants: `ELEM_TYPE_ANNOUNCE`, `ELEM_TYPE_WITHDRAW`, `PSV_HEADER`, `BASIC_FIELDS`, `ROUTE_FIELDS`, `NEXT_HOP_FIELDS` + +### Phase 3: Benchmarks + +**17. `benches/parse_bench.rs` (new)** +- `criterion` benchmark for Rust native iteration +- `[[bench]]` entry in `Cargo.toml` + +**18. `tests/benchmark.py` (new)** +- `time.perf_counter` benchmark +- Compares: `parse_all`, `for elem in parser`, `iter_batches`, projected tuple iteration, `RouteParser` equivalents, `to_dict()` +- Uses `https://spaces.bgpkit.org/parser/update-example` as test data + +**19. `tests/test_api.py` (new)** +- Test `Filter` construction +- Test `Parser.from_filters` +- Test new `Elem` fields (`peer_bgp_id`, `only_to_customer`) +- Test `__repr__` and `__str__` +- Test `to_dict()` contains all fields + +### Phase 4: CI/CD + +**20. `.github/workflows/release.yml` (full rewrite)** +- Replace the current simple format-check + create-release workflow +- Add: `build-sdist` job, `build-wheels` matrix job, `publish-pypi` job +- Use `PyO3/maturin-action@v1` +- Platform matrix: macOS x86_64, macOS arm64, Linux x86_64, Linux aarch64, Windows x86_64 +- ABI3 Python compatibility: 3.9–3.13 from one wheel per platform +- Trigger: `v[0-9]+.*` tags + `workflow_dispatch` +- PyPI publish via `MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}` +- GitHub Release creation via `taiki-e/create-gh-release-action@v1` + +**21. `.github/workflows/rust.yaml`** +- Keep as-is (format + clippy on PR/push) + +**22. `BUILD.md` (rewrite)** +- Document the automated release process (push tag → CI builds + publishes) +- Keep the manual Docker/`build.sh` process as fallback documentation + +**23. `CHANGELOG.md` (add entry)** +- v0.7.0: dependency bump, new fields, `Filter` class, benchmarks, CI/CD automation + +**24. `README.md` (update)** +- Update `Elem` field list to show new fields +- Document `Filter` class and `from_filters` constructor +- Update installation notes + +### Phase 5: Test + Verify + +**25. Local testing** +- `maturin develop` +- Run `examples/filter_count_print.py` +- Run `tests/benchmark.py` +- Run `tests/test_api.py` + +**26. CI testing** +- Open PR — `rust.yaml` runs format + clippy +- Verify no regressions + +**27. Post-merge release test** +- Run `workflow_dispatch` with `publish=false` to verify build-only release workflow +- Verify all artifacts build successfully +- Push the real `v0.7.0` tag only when ready to publish to PyPI + +## Changes to Existing Files (Summary) + +| File | Change | +|------|--------| +| `Cargo.toml` | Bump `bgpkit-parser` → `0.17.0`, `pyo3` → `0.28` with `abi3-py39`, `pyo3-build-config` → `0.28`, add `[[bench]]` | +| `src/lib.rs` | Full rewrite: new fields, `RouteParser`, projected tuple iteration, `Filter` helpers, `from_filters`, `__repr__`, `count`, `iter_batches`, constants, `Bound` API migration | +| `pyproject.toml` | Add `project` metadata, `requires-python`, classifiers | +| `BUILD.md` | Rewrite to document CI workflow; keep manual fallback | +| `CHANGELOG.md` | Add v0.7.0 entry | +| `README.md` | Update API docs, new fields, `Filter` class | +| `.github/workflows/release.yml` | Full rewrite: add build matrix + PyPI publish + GitHub Release | + +## New Files (Summary) + +| File | Purpose | +|------|---------| +| `benches/parse_bench.rs` | Rust `criterion` benchmark for native iteration | +| `tests/benchmark.py` | Python benchmark comparing `parse_all` vs iteration vs `to_dict` | +| `tests/test_api.py` | Python tests for new API (`Filter`, new fields, `from_filters`) | + +## Pre-PR Checklist + +- [ ] `PYPI_API_TOKEN` secret added to GitHub repo +- [ ] `cargo check` passes after dependency bump +- [ ] `maturin develop` builds successfully +- [ ] `examples/filter_count_print.py` runs without error +- [ ] `tests/test_api.py` passes +- [ ] `tests/benchmark.py` runs and produces meaningful numbers +- [ ] `benches/parse_bench.rs` compiles and runs +- [ ] `cargo fmt` passes +- [ ] `cargo clippy -- -D warnings` passes + +## Post-Merge Checklist + +- [ ] Run release workflow manually with `publish=false` +- [ ] Verify all artifacts are built but not published +- [ ] Verify GitHub Release is created with changelog +- [ ] If beta works, delete tag and push `v0.7.0` +- [ ] Update `AGENTS.md` with new anti-patterns (CI token handling, etc.) + +## Open Questions + +1. **Should we expose `BgpRouteElem` / `into_route_iter`?** + - **Decision:** Yes. Exposed as `RouteElem` and `RouteParser` because it maps cleanly and directly supports performance comparisons. + +2. **Should we expose `MrtRecord` / `into_record_iter`?** + - **Default:** No. Adds complexity to the `Parser` struct (different iterator types). Follow-up PR. + +3. **Should we add Windows to the CI matrix?** + - **Decision:** Yes. `maturin-action` supports it out of the box. No platform-specific Rust code. + +4. **Should we build `universal2` macOS wheels instead of separate x86_64/arm64?** + - **Default:** No. Separate wheels are smaller. Revisit if users complain. + +5. **Should we migrate to Trusted Publishing (OIDC) instead of API token?** + - **Default:** No. API token works now. OIDC is a follow-up security improvement. + +## Notes + +- `bgpkit-parser` 0.17.0 requires Rust 1.87.0 (MSRV). We have 1.91.1. ✓ +- PyO3 0.28.3 requires Rust 1.83.0. We have 1.91.1. ✓ +- The `unsafe impl Send/Sync` for `Parser` must be verified after the bump. If the v0.17.0 `ElemIterator` is no longer `Send`, we use a different approach. +- `maturin-action` builds `manylinux` wheels automatically. No custom `Dockerfile` needed for CI. +- Keep `Dockerfile` and `build.sh` as manual fallbacks. Remove in a future cleanup PR. +- The CI workflow will produce **5 ABI3 wheels** (one per supported platform) + **1 sdist** per release. diff --git a/benches/parse_bench.rs b/benches/parse_bench.rs new file mode 100644 index 0000000..b082bbe --- /dev/null +++ b/benches/parse_bench.rs @@ -0,0 +1,36 @@ +use bgpkit_parser::BgpkitParser; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +fn bench_input() -> String { + std::env::var("BGP_TEST_FILE") + .unwrap_or_else(|_| "https://spaces.bgpkit.org/parser/update-example".to_string()) +} + +fn bench_rust_native_iteration(c: &mut Criterion) { + let input = bench_input(); + c.bench_function("rust native elem iteration", |b| { + b.iter(|| { + let parser = BgpkitParser::new(input.as_str()).expect("create parser"); + let count = parser.into_elem_iter().count(); + black_box(count); + }); + }); +} + +fn bench_rust_native_route_iteration(c: &mut Criterion) { + let input = bench_input(); + c.bench_function("rust native route iteration", |b| { + b.iter(|| { + let parser = BgpkitParser::new(input.as_str()).expect("create parser"); + let count = parser.into_route_iter().count(); + black_box(count); + }); + }); +} + +criterion_group!( + benches, + bench_rust_native_iteration, + bench_rust_native_route_iteration +); +criterion_main!(benches); diff --git a/pyproject.toml b/pyproject.toml index 07dd28c..149af72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,22 @@ [build-system] requires = ["maturin>=1.0,<2.0"] build-backend = "maturin" + +[project] +name = "pybgpkit-parser" +dynamic = ["version"] +description = "Python binding for bgpkit-parser" +readme = "README.md" +requires-python = ">=3.9" +license = "MIT" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.optional-dependencies] +dev = ["pytest", "pytest-benchmark"] diff --git a/src/lib.rs b/src/lib.rs index ec4747d..601e875 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,10 @@ use bgpkit_parser::models::*; +use bgpkit_parser::parser::Filter as BgpkitFilter; use bgpkit_parser::*; +use pyo3::conversion::IntoPyObjectExt; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::types::PyTuple; use serde::Serialize; use std::collections::HashMap; use std::io::Read; @@ -17,6 +20,7 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { }, peer_ip: elem.peer_ip.to_string(), peer_asn: elem.peer_asn.to_u32(), + peer_bgp_id: elem.peer_bgp_id.map(|v| v.to_string()), prefix: elem.prefix.to_string(), next_hop: elem.next_hop.map(|v| v.to_string()), as_path: elem.as_path.map(|v| v.to_string()), @@ -35,10 +39,219 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { }, aggr_asn: elem.aggr_asn.map(|v| v.to_u32()), aggr_ip: elem.aggr_ip.map(|v| v.to_string()), + only_to_customer: elem.only_to_customer.map(|v| v.to_u32()), } } - #[pyclass] + fn convert_route_elem(elem: BgpRouteElem) -> RouteElem { + RouteElem { + timestamp: elem.timestamp, + elem_type: match elem.elem_type { + ElemType::ANNOUNCE => "A".to_string(), + ElemType::WITHDRAW => "W".to_string(), + }, + peer_ip: elem.peer_ip.to_string(), + peer_asn: elem.peer_asn.to_u32(), + prefix: elem.prefix.to_string(), + as_path: elem.as_path.map(|v| v.to_string()), + } + } + + fn new_parser( + url: &str, + cache_dir: Option<&str>, + ) -> PyResult>> { + match cache_dir { + None => BgpkitParser::new(url).map_err(|e| PyValueError::new_err(e.to_string())), + Some(dir) => { + BgpkitParser::new_cached(url, dir).map_err(|e| PyValueError::new_err(e.to_string())) + } + } + } + + fn option_to_string(v: &Option) -> String { + v.as_ref().map(|x| x.to_string()).unwrap_or_default() + } + + fn option_vec_to_string(v: &Option>) -> String { + v.as_ref() + .map(|items| { + items + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(",") + }) + .unwrap_or_default() + } + + #[derive(Clone, Copy)] + enum ElemField { + Timestamp, + ElemType, + PeerIp, + PeerAsn, + PeerBgpId, + Prefix, + NextHop, + AsPath, + OriginAsns, + OriginAsn, + Origin, + LocalPref, + Med, + Communities, + Atomic, + AggrAsn, + AggrIp, + OnlyToCustomer, + } + + #[derive(Clone, Copy)] + enum RouteField { + Timestamp, + ElemType, + PeerIp, + PeerAsn, + Prefix, + AsPath, + } + + fn parse_elem_field(field: &str) -> PyResult { + match field { + "timestamp" => Ok(ElemField::Timestamp), + "elem_type" | "type" => Ok(ElemField::ElemType), + "peer_ip" => Ok(ElemField::PeerIp), + "peer_asn" => Ok(ElemField::PeerAsn), + "peer_bgp_id" => Ok(ElemField::PeerBgpId), + "prefix" => Ok(ElemField::Prefix), + "next_hop" => Ok(ElemField::NextHop), + "as_path" => Ok(ElemField::AsPath), + "origin_asns" => Ok(ElemField::OriginAsns), + "origin_asn" => Ok(ElemField::OriginAsn), + "origin" => Ok(ElemField::Origin), + "local_pref" => Ok(ElemField::LocalPref), + "med" => Ok(ElemField::Med), + "communities" => Ok(ElemField::Communities), + "atomic" => Ok(ElemField::Atomic), + "aggr_asn" => Ok(ElemField::AggrAsn), + "aggr_ip" => Ok(ElemField::AggrIp), + "only_to_customer" => Ok(ElemField::OnlyToCustomer), + _ => Err(PyValueError::new_err(format!("unknown field: {field}"))), + } + } + + fn parse_route_field(field: &str) -> PyResult { + match field { + "timestamp" => Ok(RouteField::Timestamp), + "elem_type" | "type" => Ok(RouteField::ElemType), + "peer_ip" => Ok(RouteField::PeerIp), + "peer_asn" => Ok(RouteField::PeerAsn), + "prefix" => Ok(RouteField::Prefix), + "as_path" => Ok(RouteField::AsPath), + _ => Err(PyValueError::new_err(format!( + "unknown route field: {field}" + ))), + } + } + + fn parse_elem_fields(fields: Vec) -> PyResult> { + if fields.is_empty() { + return Err(PyValueError::new_err("fields must not be empty")); + } + fields + .iter() + .map(|f| parse_elem_field(f.as_str())) + .collect() + } + + fn parse_route_fields(fields: Vec) -> PyResult> { + if fields.is_empty() { + return Err(PyValueError::new_err("fields must not be empty")); + } + fields + .iter() + .map(|f| parse_route_field(f.as_str())) + .collect() + } + + fn elem_field_to_py(py: Python, elem: &BgpElem, field: ElemField) -> PyResult> { + match field { + ElemField::Timestamp => elem.timestamp.into_py_any(py), + ElemField::ElemType => match elem.elem_type { + ElemType::ANNOUNCE => "A".into_py_any(py), + ElemType::WITHDRAW => "W".into_py_any(py), + }, + ElemField::PeerIp => elem.peer_ip.to_string().into_py_any(py), + ElemField::PeerAsn => elem.peer_asn.to_u32().into_py_any(py), + ElemField::PeerBgpId => elem.peer_bgp_id.map(|v| v.to_string()).into_py_any(py), + ElemField::Prefix => elem.prefix.to_string().into_py_any(py), + ElemField::NextHop => elem.next_hop.map(|v| v.to_string()).into_py_any(py), + ElemField::AsPath => elem.as_path.as_ref().map(|v| v.to_string()).into_py_any(py), + ElemField::OriginAsns => elem + .origin_asns + .as_ref() + .map(|v| v.iter().map(|x| x.to_u32()).collect::>()) + .into_py_any(py), + ElemField::OriginAsn => elem + .origin_asns + .as_ref() + .and_then(|origin_asns| (origin_asns.len() == 1).then_some(origin_asns[0].to_u32())) + .into_py_any(py), + ElemField::Origin => elem.origin.map(|v| v.to_string()).into_py_any(py), + ElemField::LocalPref => elem.local_pref.into_py_any(py), + ElemField::Med => elem.med.into_py_any(py), + ElemField::Communities => elem + .communities + .as_ref() + .map(|v| v.iter().map(|x| x.to_string()).collect::>()) + .into_py_any(py), + ElemField::Atomic => elem.atomic.into_py_any(py), + ElemField::AggrAsn => elem.aggr_asn.map(|v| v.to_u32()).into_py_any(py), + ElemField::AggrIp => elem.aggr_ip.map(|v| v.to_string()).into_py_any(py), + ElemField::OnlyToCustomer => elem.only_to_customer.map(|v| v.to_u32()).into_py_any(py), + } + } + + fn route_field_to_py( + py: Python, + elem: &BgpRouteElem, + field: RouteField, + ) -> PyResult> { + match field { + RouteField::Timestamp => elem.timestamp.into_py_any(py), + RouteField::ElemType => match elem.elem_type { + ElemType::ANNOUNCE => "A".into_py_any(py), + ElemType::WITHDRAW => "W".into_py_any(py), + }, + RouteField::PeerIp => elem.peer_ip.to_string().into_py_any(py), + RouteField::PeerAsn => elem.peer_asn.to_u32().into_py_any(py), + RouteField::Prefix => elem.prefix.to_string().into_py_any(py), + RouteField::AsPath => elem.as_path.as_ref().map(|v| v.to_string()).into_py_any(py), + } + } + + fn elem_to_tuple(py: Python, elem: BgpElem, fields: &[ElemField]) -> PyResult> { + let values = fields + .iter() + .map(|field| elem_field_to_py(py, &elem, *field)) + .collect::>>()?; + Ok(PyTuple::new(py, values)?.unbind()) + } + + fn route_to_tuple( + py: Python, + elem: BgpRouteElem, + fields: &[RouteField], + ) -> PyResult> { + let values = fields + .iter() + .map(|field| route_field_to_py(py, &elem, *field)) + .collect::>>()?; + Ok(PyTuple::new(py, values)?.unbind()) + } + + #[pyclass(skip_from_py_object)] #[derive(Clone, PartialEq, Serialize)] pub struct Elem { #[pyo3(get, set)] @@ -50,6 +263,8 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { #[pyo3(get, set)] pub peer_asn: u32, #[pyo3(get, set)] + pub peer_bgp_id: Option, + #[pyo3(get, set)] pub prefix: String, #[pyo3(get, set)] pub next_hop: Option, @@ -71,50 +286,315 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { pub aggr_asn: Option, #[pyo3(get, set)] pub aggr_ip: Option, + #[pyo3(get, set)] + pub only_to_customer: Option, } #[pymethods] impl Elem { - pub fn to_dict(&self, py: Python) -> PyObject { + pub fn to_dict<'py>(&self, py: Python<'py>) -> PyResult> { + use pyo3::types::PyDict; + let dict = PyDict::new(py); + dict.set_item("timestamp", self.timestamp)?; + dict.set_item("elem_type", self.elem_type.clone())?; + dict.set_item("peer_ip", self.peer_ip.clone())?; + dict.set_item("peer_asn", self.peer_asn)?; + dict.set_item("peer_bgp_id", self.peer_bgp_id.clone())?; + dict.set_item("prefix", self.prefix.clone())?; + dict.set_item("next_hop", self.next_hop.clone())?; + dict.set_item("as_path", self.as_path.clone())?; + dict.set_item("origin_asns", self.origin_asns.clone())?; + dict.set_item("origin", self.origin.clone())?; + dict.set_item("local_pref", self.local_pref)?; + dict.set_item("med", self.med)?; + dict.set_item("communities", self.communities.clone())?; + dict.set_item("atomic", self.atomic.clone())?; + dict.set_item("aggr_asn", self.aggr_asn)?; + dict.set_item("aggr_ip", self.aggr_ip.clone())?; + dict.set_item("only_to_customer", self.only_to_customer)?; + Ok(dict) + } + + pub fn as_dict<'py>(&self, py: Python<'py>) -> PyResult> { + self.to_dict(py) + } + + #[getter(origin_asn)] + fn origin_asn_value(&self) -> Option { + self.get_origin_asn() + } + + fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult> { + self.to_dict(py) + } + + #[pyo3(name = "__str__")] + fn str_repr(&self) -> PyResult { + self.to_json() + } + + #[pyo3(name = "__repr__")] + fn repr(&self) -> PyResult { + Ok(format!( + "", + self.prefix, self.peer_ip, self.elem_type + )) + } + + pub fn is_announcement(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("A") + } + + pub fn is_withdrawal(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("W") + } + + pub fn get_origin_asn(&self) -> Option { + self.origin_asns + .as_ref() + .and_then(|origin_asns| (origin_asns.len() == 1).then_some(origin_asns[0])) + } + + pub fn get_origin_asns(&self) -> Option> { + self.origin_asns.clone() + } + + pub fn has_as_path(&self) -> bool { + self.as_path.is_some() + } + + pub fn to_json(&self) -> PyResult { + serde_json::to_string(self).map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[staticmethod] + pub fn get_psv_header() -> String { + [ + "type", + "timestamp", + "peer_ip", + "peer_asn", + "prefix", + "as_path", + "origin_asns", + "origin", + "next_hop", + "local_pref", + "med", + "communities", + "atomic", + "aggr_asn", + "aggr_ip", + "only_to_customer", + ] + .join("|") + } + + pub fn to_psv(&self) -> String { + format!( + "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}", + self.elem_type, + self.timestamp, + self.peer_ip, + self.peer_asn, + self.prefix, + option_to_string(&self.as_path), + option_vec_to_string(&self.origin_asns), + option_to_string(&self.origin), + option_to_string(&self.next_hop), + option_to_string(&self.local_pref), + option_to_string(&self.med), + option_vec_to_string(&self.communities), + option_to_string(&self.atomic), + option_to_string(&self.aggr_asn), + option_to_string(&self.aggr_ip), + option_to_string(&self.only_to_customer), + ) + } + } + + #[pyclass(skip_from_py_object)] + #[derive(Clone, PartialEq, Serialize)] + pub struct RouteElem { + #[pyo3(get, set)] + pub timestamp: f64, + #[pyo3(get, set)] + pub elem_type: String, + #[pyo3(get, set)] + pub peer_ip: String, + #[pyo3(get, set)] + pub peer_asn: u32, + #[pyo3(get, set)] + pub prefix: String, + #[pyo3(get, set)] + pub as_path: Option, + } + + #[pymethods] + impl RouteElem { + pub fn to_dict<'py>(&self, py: Python<'py>) -> PyResult> { use pyo3::types::PyDict; let dict = PyDict::new(py); - dict.set_item("timestamp", self.timestamp).unwrap(); - dict.set_item("elem_type", self.elem_type.clone()).unwrap(); - dict.set_item("peer_ip", self.peer_ip.clone()).unwrap(); - dict.set_item("peer_asn", self.peer_asn).unwrap(); - dict.set_item("prefix", self.prefix.clone()).unwrap(); - dict.set_item("next_hop", self.next_hop.clone()).unwrap(); - dict.set_item("as_path", self.as_path.clone()).unwrap(); - dict.set_item("origin_asns", self.origin_asns.clone()) - .unwrap(); - dict.set_item("origin", self.origin.clone()).unwrap(); - dict.set_item("local_pref", self.local_pref).unwrap(); - dict.set_item("med", self.med).unwrap(); - dict.set_item("communities", self.communities.clone()) - .unwrap(); - dict.set_item("atomic", self.atomic.clone()).unwrap(); - dict.set_item("aggr_asn", self.aggr_asn).unwrap(); - dict.set_item("aggr_ip", self.aggr_ip.clone()).unwrap(); - dict.into() - } - - fn __getstate__(&self, py: Python) -> PyObject { + dict.set_item("timestamp", self.timestamp)?; + dict.set_item("elem_type", self.elem_type.clone())?; + dict.set_item("peer_ip", self.peer_ip.clone())?; + dict.set_item("peer_asn", self.peer_asn)?; + dict.set_item("prefix", self.prefix.clone())?; + dict.set_item("as_path", self.as_path.clone())?; + Ok(dict) + } + + pub fn as_dict<'py>(&self, py: Python<'py>) -> PyResult> { self.to_dict(py) } + pub fn is_announcement(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("A") + } + + pub fn is_withdrawal(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("W") + } + + pub fn has_as_path(&self) -> bool { + self.as_path.is_some() + } + + pub fn to_json(&self) -> PyResult { + serde_json::to_string(self).map_err(|e| PyValueError::new_err(e.to_string())) + } + #[pyo3(name = "__str__")] fn str_repr(&self) -> PyResult { - Ok(serde_json::to_string(self).unwrap().to_string()) + self.to_json() + } + + #[pyo3(name = "__repr__")] + fn repr(&self) -> PyResult { + Ok(format!( + "", + self.prefix, self.peer_ip, self.elem_type + )) + } + } + + #[pyclass(name = "Filter", skip_from_py_object)] + #[derive(Clone)] + struct PyFilter { + inner: BgpkitFilter, + } + + #[pymethods] + impl PyFilter { + #[new] + #[pyo3(signature = (filter_type, filter_value))] + fn new(filter_type: String, filter_value: String) -> PyResult { + Self::from_parts(filter_type.as_str(), filter_value.as_str()) + } + + #[staticmethod] + fn peer_ip(peer_ip: String) -> PyResult { + Self::from_parts("peer_ip", peer_ip.as_str()) + } + + #[staticmethod] + fn peer_ips(peer_ips: Vec) -> PyResult { + Self::from_parts("peer_ips", peer_ips.join(",").as_str()) + } + + #[staticmethod] + fn origin_asn(origin_asn: u32) -> PyResult { + Self::from_parts("origin_asn", origin_asn.to_string().as_str()) + } + + #[staticmethod] + fn prefix(prefix: String) -> PyResult { + Self::from_parts("prefix", prefix.as_str()) + } + + #[staticmethod] + fn elem_type(elem_type: String) -> PyResult { + Self::from_parts("type", elem_type.as_str()) + } + + #[pyo3(name = "__repr__")] + fn repr(&self) -> PyResult { + Ok(format!("", self.inner)) + } + } + + impl PyFilter { + fn from_parts(filter_type: &str, filter_value: &str) -> PyResult { + let inner = BgpkitFilter::new(filter_type, filter_value) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(PyFilter { inner }) } } #[pyclass] struct Parser { - elem_iter: ElemIterator>, + elem_iter: Option>>, + } + + #[pyclass] + struct BatchIterator { + elem_iter: Option>>, + batch_size: usize, + } + + #[pyclass] + struct RouteParser { + route_iter: Option>>, + } + + #[pyclass] + struct RouteBatchIterator { + route_iter: Option>>, + batch_size: usize, + } + + #[pyclass] + struct TupleIterator { + elem_iter: Option>>, + fields: Vec, + } + + #[pyclass] + struct TupleBatchIterator { + elem_iter: Option>>, + fields: Vec, + batch_size: usize, + } + + #[pyclass] + struct RouteTupleIterator { + route_iter: Option>>, + fields: Vec, + } + + #[pyclass] + struct RouteTupleBatchIterator { + route_iter: Option>>, + fields: Vec, + batch_size: usize, } unsafe impl Send for Parser {} unsafe impl Sync for Parser {} + unsafe impl Send for BatchIterator {} + unsafe impl Sync for BatchIterator {} + unsafe impl Send for RouteParser {} + unsafe impl Sync for RouteParser {} + unsafe impl Send for RouteBatchIterator {} + unsafe impl Sync for RouteBatchIterator {} + unsafe impl Send for TupleIterator {} + unsafe impl Sync for TupleIterator {} + unsafe impl Send for TupleBatchIterator {} + unsafe impl Sync for TupleBatchIterator {} + unsafe impl Send for RouteTupleIterator {} + unsafe impl Sync for RouteTupleIterator {} + unsafe impl Send for RouteTupleBatchIterator {} + unsafe impl Sync for RouteTupleBatchIterator {} #[pymethods] impl Parser { @@ -125,50 +605,427 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { filters: Option>, cache_dir: Option, ) -> PyResult { - let mut parser = match cache_dir { - None => BgpkitParser::new(url.as_str()).unwrap(), - Some(dir) => BgpkitParser::new_cached(url.as_str(), dir.as_str()).unwrap(), - }; + let mut parser = new_parser(url.as_str(), cache_dir.as_deref())?; if let Some(filters) = filters { for (k, v) in filters { - parser = match parser.add_filter(k.as_str(), v.as_str()) { - Ok(p) => p, - Err(e) => return Err(PyValueError::new_err(e.to_string())), - } + parser = parser + .add_filter(k.as_str(), v.as_str()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; } } - let elem_iter = parser.into_iter(); + let elem_iter = Some(parser.into_elem_iter()); + Ok(Parser { elem_iter }) + } + + #[staticmethod] + #[pyo3(signature = (url, filters, cache_dir=None))] + fn from_filters( + url: String, + filters: Vec>, + cache_dir: Option, + ) -> PyResult { + let parser = new_parser(url.as_str(), cache_dir.as_deref())?; + let filters = filters + .iter() + .map(|f| f.inner.clone()) + .collect::>(); + let elem_iter = Some(parser.with_filters(&filters).into_elem_iter()); Ok(Parser { elem_iter }) } fn parse_all(&mut self, py: Python) -> PyResult>> { - let mut elems = vec![]; - for e in self.elem_iter.by_ref() { - elems.push(Py::new(py, convert_elem(e))?); - } - Ok(elems) + let Some(mut elem_iter) = self.elem_iter.take() else { + return Ok(Vec::new()); + }; + let elems = py.detach(|| elem_iter.by_ref().map(convert_elem).collect::>()); + elems.into_iter().map(|e| Py::new(py, e)).collect() } fn parse_next(&mut self, py: Python) -> PyResult>> { - Ok(self - .elem_iter + let Some(elem_iter) = self.elem_iter.as_mut() else { + return Ok(None); + }; + elem_iter .next() - .map(|e| Py::new(py, convert_elem(e)).unwrap())) + .map(|e| Py::new(py, convert_elem(e))) + .transpose() + } + + fn count(&mut self, py: Python) -> usize { + let Some(elem_iter) = self.elem_iter.take() else { + return 0; + }; + py.detach(|| elem_iter.count()) + } + + fn iter_batches(&mut self, batch_size: usize) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(BatchIterator { + elem_iter: self.elem_iter.take(), + batch_size, + }) + } + + fn iter_tuples(&mut self, fields: Vec) -> PyResult { + Ok(TupleIterator { + elem_iter: self.elem_iter.take(), + fields: parse_elem_fields(fields)?, + }) + } + + fn iter_tuple_batches( + &mut self, + fields: Vec, + batch_size: usize, + ) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(TupleBatchIterator { + elem_iter: self.elem_iter.take(), + fields: parse_elem_fields(fields)?, + batch_size, + }) } fn __iter__(slf: PyRef) -> PyRef { slf } - fn __next__(mut slf: PyRefMut, py: Python) -> Option> { - slf.elem_iter + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + elem_iter .next() - .map(|e| Py::new(py, convert_elem(e)).unwrap()) + .map(|e| Py::new(py, convert_elem(e))) + .transpose() + } + } + + #[pymethods] + impl BatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let batch_size = slf.batch_size; + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + + let elems = py.detach(|| { + elem_iter + .by_ref() + .take(batch_size) + .map(convert_elem) + .collect::>() + }); + + if elems.is_empty() { + slf.elem_iter = None; + return Ok(None); + } + + elems + .into_iter() + .map(|e| Py::new(py, e)) + .collect::>>() + .map(Some) + } + } + + #[pymethods] + impl TupleIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let fields = slf.fields.clone(); + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + elem_iter + .next() + .map(|elem| elem_to_tuple(py, elem, &fields)) + .transpose() + } + } + + #[pymethods] + impl TupleBatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let fields = slf.fields.clone(); + let batch_size = slf.batch_size; + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + + let elems = py.detach(|| { + elem_iter + .by_ref() + .take(batch_size) + .collect::>() + }); + if elems.is_empty() { + slf.elem_iter = None; + return Ok(None); + } + + elems + .into_iter() + .map(|elem| elem_to_tuple(py, elem, &fields)) + .collect::>>() + .map(Some) + } + } + + #[pymethods] + impl RouteParser { + #[new] + #[pyo3(signature = (url, filters=None, cache_dir=None))] + fn new( + url: String, + filters: Option>, + cache_dir: Option, + ) -> PyResult { + let mut parser = new_parser(url.as_str(), cache_dir.as_deref())?; + + if let Some(filters) = filters { + for (k, v) in filters { + parser = parser + .add_filter(k.as_str(), v.as_str()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + } + } + let route_iter = Some(parser.into_route_iter()); + Ok(RouteParser { route_iter }) + } + + #[staticmethod] + #[pyo3(signature = (url, filters, cache_dir=None))] + fn from_filters( + url: String, + filters: Vec>, + cache_dir: Option, + ) -> PyResult { + let parser = new_parser(url.as_str(), cache_dir.as_deref())?; + let filters = filters + .iter() + .map(|f| f.inner.clone()) + .collect::>(); + let route_iter = Some(parser.with_filters(&filters).into_route_iter()); + Ok(RouteParser { route_iter }) + } + + fn parse_all(&mut self, py: Python) -> PyResult>> { + let Some(mut route_iter) = self.route_iter.take() else { + return Ok(Vec::new()); + }; + let routes = py.detach(|| { + route_iter + .by_ref() + .map(convert_route_elem) + .collect::>() + }); + routes.into_iter().map(|e| Py::new(py, e)).collect() + } + + fn parse_next(&mut self, py: Python) -> PyResult>> { + let Some(route_iter) = self.route_iter.as_mut() else { + return Ok(None); + }; + route_iter + .next() + .map(|e| Py::new(py, convert_route_elem(e))) + .transpose() + } + + fn count(&mut self, py: Python) -> usize { + let Some(route_iter) = self.route_iter.take() else { + return 0; + }; + py.detach(|| route_iter.count()) + } + + fn iter_batches(&mut self, batch_size: usize) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(RouteBatchIterator { + route_iter: self.route_iter.take(), + batch_size, + }) + } + + fn iter_tuples(&mut self, fields: Vec) -> PyResult { + Ok(RouteTupleIterator { + route_iter: self.route_iter.take(), + fields: parse_route_fields(fields)?, + }) + } + + fn iter_tuple_batches( + &mut self, + fields: Vec, + batch_size: usize, + ) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(RouteTupleBatchIterator { + route_iter: self.route_iter.take(), + fields: parse_route_fields(fields)?, + batch_size, + }) + } + + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + route_iter + .next() + .map(|e| Py::new(py, convert_route_elem(e))) + .transpose() + } + } + + #[pymethods] + impl RouteTupleIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let fields = slf.fields.clone(); + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + route_iter + .next() + .map(|route| route_to_tuple(py, route, &fields)) + .transpose() + } + } + + #[pymethods] + impl RouteTupleBatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let fields = slf.fields.clone(); + let batch_size = slf.batch_size; + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + + let routes = py.detach(|| { + route_iter + .by_ref() + .take(batch_size) + .collect::>() + }); + if routes.is_empty() { + slf.route_iter = None; + return Ok(None); + } + + routes + .into_iter() + .map(|route| route_to_tuple(py, route, &fields)) + .collect::>>() + .map(Some) + } + } + + #[pymethods] + impl RouteBatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let batch_size = slf.batch_size; + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + + let routes = py.detach(|| { + route_iter + .by_ref() + .take(batch_size) + .map(convert_route_elem) + .collect::>() + }); + + if routes.is_empty() { + slf.route_iter = None; + return Ok(None); + } + + routes + .into_iter() + .map(|e| Py::new(py, e)) + .collect::>>() + .map(Some) } } m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add("ELEM_TYPE_ANNOUNCE", "A")?; + m.add("ELEM_TYPE_WITHDRAW", "W")?; + m.add("PSV_HEADER", Elem::get_psv_header())?; + m.add( + "BASIC_FIELDS", + vec!["timestamp", "elem_type", "peer_ip", "peer_asn", "prefix"], + )?; + m.add( + "ROUTE_FIELDS", + vec![ + "timestamp", + "elem_type", + "peer_ip", + "peer_asn", + "prefix", + "as_path", + ], + )?; + m.add( + "NEXT_HOP_FIELDS", + vec![ + "timestamp", + "elem_type", + "peer_ip", + "peer_asn", + "prefix", + "next_hop", + ], + )?; Ok(()) } diff --git a/tests/benchmark.py b/tests/benchmark.py new file mode 100644 index 0000000..1aa9936 --- /dev/null +++ b/tests/benchmark.py @@ -0,0 +1,153 @@ +"""Python-side performance benchmark for pybgpkit-parser. + +Run after a release build for meaningful numbers: + + maturin develop --release + python tests/benchmark.py [path-or-url] + +This script intentionally uses a remote BGPKIT sample file. It is meant for +manual performance checks, not deterministic CI assertions. +""" + +import os +import sys +import time + +from pybgpkit_parser import Parser, RouteParser + +URL = sys.argv[1] if len(sys.argv) > 1 else os.environ.get( + "PYBGPKIT_BENCH_INPUT", + "https://spaces.bgpkit.org/parser/update-example", +) + + +def bench_iteration(): + parser = Parser(URL) + start = time.perf_counter() + count = 0 + for _elem in parser: + count += 1 + elapsed = time.perf_counter() - start + return "iteration", count, elapsed + + +def bench_parse_all(): + parser = Parser(URL) + start = time.perf_counter() + elems = parser.parse_all() + elapsed = time.perf_counter() - start + return "parse_all", len(elems), elapsed + + +def bench_iter_batches(batch_size=1000): + parser = Parser(URL) + start = time.perf_counter() + count = 0 + for batch in parser.iter_batches(batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"iter_batches({batch_size})", count, elapsed + + +def bench_iter_tuples(): + parser = Parser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for _row in parser.iter_tuples(fields): + count += 1 + elapsed = time.perf_counter() - start + return "iter_tuples(timestamp,prefix,as_path)", count, elapsed + + +def bench_iter_tuple_batches(batch_size=1000): + parser = Parser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for batch in parser.iter_tuple_batches(fields, batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"iter_tuple_batches({batch_size})", count, elapsed + + +def bench_route_iteration(): + parser = RouteParser(URL) + start = time.perf_counter() + count = 0 + for _route in parser: + count += 1 + elapsed = time.perf_counter() - start + return "route_iteration", count, elapsed + + +def bench_route_iter_batches(batch_size=1000): + parser = RouteParser(URL) + start = time.perf_counter() + count = 0 + for batch in parser.iter_batches(batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"route_iter_batches({batch_size})", count, elapsed + + +def bench_route_iter_tuples(): + parser = RouteParser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for _row in parser.iter_tuples(fields): + count += 1 + elapsed = time.perf_counter() - start + return "route_iter_tuples(timestamp,prefix,as_path)", count, elapsed + + +def bench_route_iter_tuple_batches(batch_size=1000): + parser = RouteParser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for batch in parser.iter_tuple_batches(fields, batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"route_iter_tuple_batches({batch_size})", count, elapsed + + +def bench_route_parse_all(): + parser = RouteParser(URL) + start = time.perf_counter() + routes = parser.parse_all() + elapsed = time.perf_counter() - start + return "route_parse_all", len(routes), elapsed + + +def bench_to_dict(): + parser = Parser(URL) + elems = parser.parse_all() + start = time.perf_counter() + for elem in elems: + elem.to_dict() + elapsed = time.perf_counter() - start + return "to_dict", len(elems), elapsed + + +def main(): + for name, count, elapsed in [ + bench_iteration(), + bench_iter_batches(), + bench_parse_all(), + bench_iter_tuples(), + bench_iter_tuple_batches(), + bench_route_iteration(), + bench_route_iter_batches(), + bench_route_parse_all(), + bench_route_iter_tuples(), + bench_route_iter_tuple_batches(), + bench_to_dict(), + ]: + rate = count / elapsed if elapsed else 0 + print(f"{name}: {count:,} elems in {elapsed:.3f}s ({rate:,.0f} elems/s)") + + +if __name__ == "__main__": + main() diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..33cb4f2 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,116 @@ +import os +import pytest + +from pybgpkit_parser import ( + ELEM_TYPE_ANNOUNCE, + ELEM_TYPE_WITHDRAW, + BASIC_FIELDS, + NEXT_HOP_FIELDS, + ROUTE_FIELDS, + Filter, + PSV_HEADER, + Parser, + RouteParser, +) + +URL = "https://spaces.bgpkit.org/parser/update-example" + + +def test_filter_repr_and_helpers(): + filt = Filter("peer_ips", "185.1.8.65,2001:7f8:73:0:3:fa4:0:1") + assert "Filter" in repr(filt) + assert "Filter" in repr(Filter.peer_ip("185.1.8.65")) + assert "Filter" in repr(Filter.peer_ips(["185.1.8.65", "2001:7f8:73:0:3:fa4:0:1"])) + assert "Filter" in repr(Filter.origin_asn(13335)) + assert "Filter" in repr(Filter.prefix("1.1.1.0/24")) + assert "Filter" in repr(Filter.elem_type("a")) + + +def test_module_constants(): + assert ELEM_TYPE_ANNOUNCE == "A" + assert ELEM_TYPE_WITHDRAW == "W" + assert PSV_HEADER.startswith("type|timestamp") + assert BASIC_FIELDS == ["timestamp", "elem_type", "peer_ip", "peer_asn", "prefix"] + assert ROUTE_FIELDS[-1] == "as_path" + assert NEXT_HOP_FIELDS[-1] == "next_hop" + + +def test_invalid_filter_raises_value_error(): + with pytest.raises(ValueError): + Filter("peer_ips", "not-an-ip") + + +@pytest.mark.skipif( + os.environ.get("PYBGPKIT_RUN_NETWORK_TESTS") != "1", + reason="network smoke test; set PYBGPKIT_RUN_NETWORK_TESTS=1 to enable", +) +def test_parser_iteration_and_elem_api_network(): + parser = Parser.from_filters(URL, [Filter("peer_ips", "185.1.8.65")]) + elem = parser.parse_next() + assert elem is not None + + data = elem.to_dict() + assert elem.as_dict() == data + assert "peer_bgp_id" in data + assert "only_to_customer" in data + assert elem.elem_type in {"A", "W"} + assert elem.is_announcement() or elem.is_withdrawal() + assert elem.has_as_path() == (elem.as_path is not None) + assert elem.origin_asn == elem.get_origin_asn() + assert isinstance(elem.to_json(), str) + assert isinstance(elem.to_psv(), str) + assert "Elem" in repr(elem) + + +def test_parser_count_and_batches_network(): + if os.environ.get("PYBGPKIT_RUN_NETWORK_TESTS") != "1": + pytest.skip("network smoke test; set PYBGPKIT_RUN_NETWORK_TESTS=1 to enable") + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + assert parser.count() > 0 + assert parser.count() == 0 + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + batches = parser.iter_batches(1000) + first = next(batches) + assert first + assert len(first) <= 1000 + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + row = next(parser.iter_tuples(["peer_ip", "peer_asn", "prefix"])) + assert len(row) == 3 + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + batch = next(parser.iter_tuple_batches(["peer_ip", "prefix"], 1000)) + assert batch + assert len(batch[0]) == 2 + + +def test_route_parser_network(): + if os.environ.get("PYBGPKIT_RUN_NETWORK_TESTS") != "1": + pytest.skip("network smoke test; set PYBGPKIT_RUN_NETWORK_TESTS=1 to enable") + + parser = RouteParser.from_filters(URL, [Filter.peer_ip("185.1.8.65")]) + route = parser.parse_next() + assert route is not None + assert route.elem_type in {"A", "W"} + assert route.as_dict() == route.to_dict() + assert route.has_as_path() == (route.as_path is not None) + assert "RouteElem" in repr(route) + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + assert parser.count() > 0 + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + first = next(parser.iter_batches(1000)) + assert first + assert len(first) <= 1000 + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + row = next(parser.iter_tuples(["peer_ip", "peer_asn", "prefix"])) + assert len(row) == 3 + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + batch = next(parser.iter_tuple_batches(["peer_ip", "prefix"], 1000)) + assert batch + assert len(batch[0]) == 2 From 786fe2ed6c237b10c56377b10a7d9554511538a0 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 8 Jun 2026 18:59:30 -0700 Subject: [PATCH 2/7] fix: install built wheel in Python CI --- .github/workflows/rust.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 33dacbc..0012c57 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -28,7 +28,9 @@ jobs: python-version: "3.11" - name: Install test tools run: python -m pip install --upgrade pip maturin pytest pytest-benchmark - - name: Build extension in-place - run: maturin develop + - name: Build wheel + run: maturin build --release --out dist + - name: Install built wheel + run: python -m pip install --force-reinstall dist/*.whl - name: Run Python API tests run: pytest tests/test_api.py From d90e513e802d3846dd675d044e2efb9e203f0769 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 8 Jun 2026 19:08:55 -0700 Subject: [PATCH 3/7] ci: use trusted publishing for PyPI releases --- .github/workflows/release.yml | 9 ++++----- AGENTS.md | 5 +++-- BUILD.md | 17 ++++++++++------- UPDATE_DESIGN.md | 12 ++++++------ 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6dc937e..7927bb0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,6 +2,7 @@ name: Release permissions: contents: write + id-token: write on: push: @@ -81,12 +82,10 @@ jobs: path: dist merge-multiple: true - name: Publish to PyPI - uses: PyO3/maturin-action@v1 + uses: pypa/gh-action-pypi-publish@release/v1 with: - command: upload - args: --non-interactive --skip-existing dist/* - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + packages-dir: dist + skip-existing: true create-release: needs: [build-sdist, build-wheels, publish-pypi] diff --git a/AGENTS.md b/AGENTS.md index 0cb0db5..5e39276 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,7 +26,7 @@ Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes a single `Pars | Add Python version support | `build.sh` + `Dockerfile` + `README.md` | | Build/test locally | `maturin develop` (see README.md) | | Build wheels for release | GitHub Actions `release.yml` / `maturin build --release` locally | -| Publish to PyPI | Push `v*` tag; CI publishes with `PYPI_API_TOKEN` | +| Publish to PyPI | Push `v*` tag; CI publishes via PyPI Trusted Publishing (OIDC) | ## CODE MAP @@ -47,6 +47,7 @@ Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes a single `Pars - **Do NOT** change PyO3/maturin versions without updating both `Cargo.toml` and `build.rs` (`pyo3-build-config` must match) - **Do NOT** test release publishing with a beta tag unless the package version is also beta; use `workflow_dispatch` with `publish=false` for build-only checks +- **Do NOT** add long-lived PyPI API tokens; use PyPI Trusted Publishing with GitHub OIDC (`environment: pypi`) - **Do NOT** add `unsafe Send/Sync` for new types without verifying thread safety with the underlying Rust iterator - **Do NOT** use `.unwrap()` on user inputs (URL/filters); already handled in `BgpkitParser::new` but be careful with new additions - **Do NOT** make `Elem` fields write-only or remove getters without noting in CHANGELOG as breaking (v0.6.0 was a breaking change) @@ -78,6 +79,6 @@ twine upload --skip-existing target/wheels/* ## NOTES - `bgpkit-parser` crate version bump is the primary release trigger (see CHANGELOG for version history) -- Release workflow: `rust.yaml` runs Rust + Python API checks on PR/push; `release.yml` builds ABI3 wheels and publishes on `v*` tag push +- Release workflow: `rust.yaml` runs Rust + Python API checks on PR/push; `release.yml` builds ABI3 wheels and publishes on `v*` tag push via Trusted Publishing - Supports Python 3.9+ via ABI3 wheels - No Python tests in-repo; examples in `examples/` serve as smoke tests diff --git a/BUILD.md b/BUILD.md index eb2b649..d0285b6 100644 --- a/BUILD.md +++ b/BUILD.md @@ -21,20 +21,23 @@ The release workflow will: - macOS x86_64 - macOS arm64 - Windows x86_64 -4. Publish artifacts to PyPI using `PYPI_API_TOKEN` +4. Publish artifacts to PyPI using PyPI Trusted Publishing (OIDC) 5. Create a GitHub Release and attach the built artifacts Manual workflow runs (`workflow_dispatch`) are build-only by default. They only publish when the `publish` input is explicitly enabled. -## Required GitHub Secret +## PyPI Trusted Publishing Setup -Create a project-scoped PyPI API token and save it as a GitHub Actions secret: +Configure a trusted publisher for the existing `pybgpkit-parser` PyPI project: -```text -PYPI_API_TOKEN -``` +| Field | Value | +|------|-------| +| Owner | `bgpkit` | +| Repository name | `bgpkit-parser-py` | +| Workflow name | `release.yml` | +| Environment name | `pypi` | -Recommended token scope: only the `pybgpkit-parser` project. +The workflow uses GitHub Actions OIDC (`id-token: write`) and does not require a long-lived PyPI API token. ## Local Development Build diff --git a/UPDATE_DESIGN.md b/UPDATE_DESIGN.md index 50fdef7..a0c4387 100644 --- a/UPDATE_DESIGN.md +++ b/UPDATE_DESIGN.md @@ -65,15 +65,15 @@ This plan covers a comprehensive update of `bgpkit-parser-py` in **one pull requ Also optimize `parse_all()` by parsing while detached from the Python interpreter (`py.detach(...)`) before converting the collected Rust `Elem` values into Python objects. -### 8. CI/CD: `maturin-action` + PyPI Token +### 8. CI/CD: `maturin-action` + PyPI Trusted Publishing -**Decision:** Use `PyO3/maturin-action` for all wheel builds, publish via `MATURIN_PYPI_TOKEN`, and use ABI3 (`abi3-py39`) to build one wheel per platform instead of per Python version. +**Decision:** Use `PyO3/maturin-action` for all wheel builds, publish via PyPI Trusted Publishing (OIDC), and use ABI3 (`abi3-py39`) to build one wheel per platform instead of per Python version. **Rationale:** - `maturin-action` handles cross-compilation, manylinux, and all platforms automatically - Replaces the manual process (2 Macs + Docker + `twine upload`) - Produces ABI3 wheels for macOS x86_64/arm64, Linux x86_64/aarch64, Windows x86_64 -- PyPI token is simpler than OIDC (Trusted Publishing) for immediate setup +- Trusted Publishing avoids long-lived PyPI API tokens - Manual `workflow_dispatch` runs are build-only by default to avoid accidental PyPI publication ### 9. `unsafe impl Send/Sync` for `Parser` @@ -319,7 +319,7 @@ fn from_filters( - Platform matrix: macOS x86_64, macOS arm64, Linux x86_64, Linux aarch64, Windows x86_64 - ABI3 Python compatibility: 3.9–3.13 from one wheel per platform - Trigger: `v[0-9]+.*` tags + `workflow_dispatch` -- PyPI publish via `MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}` +- PyPI publish via PyPI Trusted Publishing / GitHub OIDC (`id-token: write`) - GitHub Release creation via `taiki-e/create-gh-release-action@v1` **21. `.github/workflows/rust.yaml`** @@ -376,7 +376,7 @@ fn from_filters( ## Pre-PR Checklist -- [ ] `PYPI_API_TOKEN` secret added to GitHub repo +- [ ] PyPI Trusted Publisher configured for `bgpkit/bgpkit-parser-py`, workflow `release.yml`, environment `pypi` - [ ] `cargo check` passes after dependency bump - [ ] `maturin develop` builds successfully - [ ] `examples/filter_count_print.py` runs without error @@ -409,7 +409,7 @@ fn from_filters( - **Default:** No. Separate wheels are smaller. Revisit if users complain. 5. **Should we migrate to Trusted Publishing (OIDC) instead of API token?** - - **Default:** No. API token works now. OIDC is a follow-up security improvement. + - **Decision:** Yes. Release publishing uses `pypa/gh-action-pypi-publish` with GitHub OIDC and `environment: pypi`. ## Notes From 3a6c66e1662effe052a76d6658f51585e3b2fe5e Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 8 Jun 2026 19:18:46 -0700 Subject: [PATCH 4/7] fix: address PR review comments --- .github/workflows/release.yml | 2 +- .github/workflows/rust.yaml | 2 ++ AGENTS.md | 2 +- pyproject.toml | 2 +- src/lib.rs | 53 +++++++++++++---------------------- 5 files changed, 25 insertions(+), 36 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7927bb0..f234c26 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ permissions: on: push: tags: - - "v*" + - "v[0-9]*.[0-9]*.[0-9]*" workflow_dispatch: inputs: publish: diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 0012c57..256e234 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -34,3 +34,5 @@ jobs: run: python -m pip install --force-reinstall dist/*.whl - name: Run Python API tests run: pytest tests/test_api.py + env: + PYBGPKIT_RUN_NETWORK_TESTS: "1" diff --git a/AGENTS.md b/AGENTS.md index 5e39276..fe4665c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -81,4 +81,4 @@ twine upload --skip-existing target/wheels/* - `bgpkit-parser` crate version bump is the primary release trigger (see CHANGELOG for version history) - Release workflow: `rust.yaml` runs Rust + Python API checks on PR/push; `release.yml` builds ABI3 wheels and publishes on `v*` tag push via Trusted Publishing - Supports Python 3.9+ via ABI3 wheels -- No Python tests in-repo; examples in `examples/` serve as smoke tests +- Python API tests live in `tests/test_api.py`; network smoke coverage is gated by `PYBGPKIT_RUN_NETWORK_TESTS=1` diff --git a/pyproject.toml b/pyproject.toml index 149af72..0812d5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dynamic = ["version"] description = "Python binding for bgpkit-parser" readme = "README.md" requires-python = ">=3.9" -license = "MIT" +license = { text = "MIT" } classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: 3.9", diff --git a/src/lib.rs b/src/lib.rs index 601e875..faeb62d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -531,71 +531,54 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { } } - #[pyclass] + #[pyclass(unsendable)] struct Parser { elem_iter: Option>>, } - #[pyclass] + #[pyclass(unsendable)] struct BatchIterator { elem_iter: Option>>, batch_size: usize, } - #[pyclass] + #[pyclass(unsendable)] struct RouteParser { route_iter: Option>>, } - #[pyclass] + #[pyclass(unsendable)] struct RouteBatchIterator { route_iter: Option>>, batch_size: usize, } - #[pyclass] + #[pyclass(unsendable)] struct TupleIterator { elem_iter: Option>>, fields: Vec, } - #[pyclass] + #[pyclass(unsendable)] struct TupleBatchIterator { elem_iter: Option>>, fields: Vec, batch_size: usize, } - #[pyclass] + #[pyclass(unsendable)] struct RouteTupleIterator { route_iter: Option>>, fields: Vec, } - #[pyclass] + #[pyclass(unsendable)] struct RouteTupleBatchIterator { route_iter: Option>>, fields: Vec, batch_size: usize, } - unsafe impl Send for Parser {} - unsafe impl Sync for Parser {} - unsafe impl Send for BatchIterator {} - unsafe impl Sync for BatchIterator {} - unsafe impl Send for RouteParser {} - unsafe impl Sync for RouteParser {} - unsafe impl Send for RouteBatchIterator {} - unsafe impl Sync for RouteBatchIterator {} - unsafe impl Send for TupleIterator {} - unsafe impl Sync for TupleIterator {} - unsafe impl Send for TupleBatchIterator {} - unsafe impl Sync for TupleBatchIterator {} - unsafe impl Send for RouteTupleIterator {} - unsafe impl Sync for RouteTupleIterator {} - unsafe impl Send for RouteTupleBatchIterator {} - unsafe impl Sync for RouteTupleBatchIterator {} - #[pymethods] impl Parser { #[new] @@ -746,13 +729,14 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { } fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { - let fields = slf.fields.clone(); + let slf = &mut *slf; + let fields = slf.fields.as_slice(); let Some(elem_iter) = slf.elem_iter.as_mut() else { return Ok(None); }; elem_iter .next() - .map(|elem| elem_to_tuple(py, elem, &fields)) + .map(|elem| elem_to_tuple(py, elem, fields)) .transpose() } } @@ -764,7 +748,8 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { } fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { - let fields = slf.fields.clone(); + let slf = &mut *slf; + let fields = slf.fields.as_slice(); let batch_size = slf.batch_size; let Some(elem_iter) = slf.elem_iter.as_mut() else { return Ok(None); @@ -783,7 +768,7 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { elems .into_iter() - .map(|elem| elem_to_tuple(py, elem, &fields)) + .map(|elem| elem_to_tuple(py, elem, fields)) .collect::>>() .map(Some) } @@ -911,13 +896,14 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { } fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { - let fields = slf.fields.clone(); + let slf = &mut *slf; + let fields = slf.fields.as_slice(); let Some(route_iter) = slf.route_iter.as_mut() else { return Ok(None); }; route_iter .next() - .map(|route| route_to_tuple(py, route, &fields)) + .map(|route| route_to_tuple(py, route, fields)) .transpose() } } @@ -929,7 +915,8 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { } fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { - let fields = slf.fields.clone(); + let slf = &mut *slf; + let fields = slf.fields.as_slice(); let batch_size = slf.batch_size; let Some(route_iter) = slf.route_iter.as_mut() else { return Ok(None); @@ -948,7 +935,7 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { routes .into_iter() - .map(|route| route_to_tuple(py, route, &fields)) + .map(|route| route_to_tuple(py, route, fields)) .collect::>>() .map(Some) } From 145f077b5a317eef5ce70271728aa2d200f5616b Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 8 Jun 2026 19:34:50 -0700 Subject: [PATCH 5/7] fix: address second round of PR review comments --- .github/workflows/release.yml | 2 +- .github/workflows/rust.yaml | 6 ++---- AGENTS.md | 11 +++++++---- UPDATE_DESIGN.md | 6 +++--- src/lib.rs | 4 +++- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f234c26..aac4338 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ permissions: on: push: tags: - - "v[0-9]*.[0-9]*.[0-9]*" + - "v[0-9]+.[0-9]+.[0-9]+" workflow_dispatch: inputs: publish: diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 256e234..b3a6e69 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -32,7 +32,5 @@ jobs: run: maturin build --release --out dist - name: Install built wheel run: python -m pip install --force-reinstall dist/*.whl - - name: Run Python API tests - run: pytest tests/test_api.py - env: - PYBGPKIT_RUN_NETWORK_TESTS: "1" + - name: Run Python API tests (unit) + run: pytest tests/test_api.py -v diff --git a/AGENTS.md b/AGENTS.md index fe4665c..55ead6e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,7 +2,7 @@ ## OVERVIEW -Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes a single `Parser` class and `Elem` dataclass via PyO3, built with maturin. +Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes `Parser` (full elems), `RouteParser` (route-level scans), `Filter` helpers, and projected tuple iteration via PyO3, built with maturin. ## STRUCTURE @@ -31,14 +31,17 @@ Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes a single `Pars ## CODE MAP - **`Elem`** — PyO3 class wrapping a parsed BGP element. Has `#[pyo3(get, set)]` fields and `to_dict()` / `__str__` / `__getstate__` methods. -- **`Parser`** — PyO3 class wrapping `bgpkit_parser::BgpkitParser`. Constructor takes `url`, optional `filters` (HashMap), and optional `cache_dir`. Implements `__iter__`/`__next__` for Python iteration. +- **`Parser`** — PyO3 class wrapping `bgpkit_parser::BgpkitParser`. Constructor takes `url`, optional `filters` (HashMap), and optional `cache_dir`. Implements `__iter__`/`__next__`, `count`, `iter_batches`, `iter_tuples`, and `iter_tuple_batches`. +- **`RouteParser`** — PyO3 class wrapping `BgpkitParser::into_route_iter()`. Returns lightweight `RouteElem` values. Same iteration/helper surface as `Parser`. +- **`Filter`** — PyO3 class wrapping `bgpkit_parser::parser::Filter`. Constructors: `__init__`, `peer_ip`, `peer_ips`, `origin_asn`, `prefix`, `elem_type`. +- **`TupleIterator` / `TupleBatchIterator`** — High-performance projected tuple iteration for `Parser` and `RouteParser`. - **`convert_elem`** — Internal fn mapping `BgpElem` → `Elem` (Rust type → PyO3 type). ## CONVENTIONS - Rust fmt/clippy enforced in CI (`cargo fmt --check`, `cargo clippy -- -D warnings`) - `PyValueError` used for filter errors propagated to Python -- `unsafe impl Send + Sync for Parser` — required because `ElemIterator>` is not auto-Send +- Iterator-backed pyclasses use `#[pyclass(unsendable)]`; no `unsafe impl Send/Sync` - `#[pyo3(name = "__str__")]` used for JSON string representation of `Elem` - `atomic` field returns `"AG"`/`"NAG"` strings (not bool) - `elem_type` field returns `"A"` (announce) or `"W"` (withdraw) @@ -48,7 +51,7 @@ Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes a single `Pars - **Do NOT** change PyO3/maturin versions without updating both `Cargo.toml` and `build.rs` (`pyo3-build-config` must match) - **Do NOT** test release publishing with a beta tag unless the package version is also beta; use `workflow_dispatch` with `publish=false` for build-only checks - **Do NOT** add long-lived PyPI API tokens; use PyPI Trusted Publishing with GitHub OIDC (`environment: pypi`) -- **Do NOT** add `unsafe Send/Sync` for new types without verifying thread safety with the underlying Rust iterator +- **Do NOT** add `unsafe impl Send/Sync` to `#[pyclass]` types; use `#[pyclass(unsendable)]` instead - **Do NOT** use `.unwrap()` on user inputs (URL/filters); already handled in `BgpkitParser::new` but be careful with new additions - **Do NOT** make `Elem` fields write-only or remove getters without noting in CHANGELOG as breaking (v0.6.0 was a breaking change) diff --git a/UPDATE_DESIGN.md b/UPDATE_DESIGN.md index a0c4387..a73d911 100644 --- a/UPDATE_DESIGN.md +++ b/UPDATE_DESIGN.md @@ -76,11 +76,11 @@ Also optimize `parse_all()` by parsing while detached from the Python interprete - Trusted Publishing avoids long-lived PyPI API tokens - Manual `workflow_dispatch` runs are build-only by default to avoid accidental PyPI publication -### 9. `unsafe impl Send/Sync` for `Parser` +### 9. Thread safety for `pyclass` iterator wrappers -**Decision:** Keep and verify after the `bgpkit-parser` bump. +**Decision:** Use `#[pyclass(unsendable)]` for all iterator-backed PyO3 wrapper types rather than `unsafe impl Send/Sync`. -**Rationale:** The `ElemIterator` type may have changed in v0.17.0. If `BgpkitParser::into_iter()` no longer returns `Send`, we switch to a different approach (e.g., `into_elem_iter()` or `into_fallible_elem_iter()`). +**Rationale:** The underlying upstream iterator types (`ElemIterator`, `RouteIterator`) are not guaranteed thread-safe, and Python can share/transfer objects across threads. Marking them `unsendable` is the safe, idiomatic PyO3 approach. ## Data Structures diff --git a/src/lib.rs b/src/lib.rs index faeb62d..8a06b6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -386,13 +386,14 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { "aggr_asn", "aggr_ip", "only_to_customer", + "peer_bgp_id", ] .join("|") } pub fn to_psv(&self) -> String { format!( - "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}", + "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}", self.elem_type, self.timestamp, self.peer_ip, @@ -409,6 +410,7 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { option_to_string(&self.aggr_asn), option_to_string(&self.aggr_ip), option_to_string(&self.only_to_customer), + option_to_string(&self.peer_bgp_id), ) } } From ba1902544ddfe556bba21b6edf462375e84a975f Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 8 Jun 2026 19:36:50 -0700 Subject: [PATCH 6/7] fix: update stale unsafe Send/Sync note in UPDATE_DESIGN.md --- UPDATE_DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UPDATE_DESIGN.md b/UPDATE_DESIGN.md index a73d911..4e0f262 100644 --- a/UPDATE_DESIGN.md +++ b/UPDATE_DESIGN.md @@ -415,7 +415,7 @@ fn from_filters( - `bgpkit-parser` 0.17.0 requires Rust 1.87.0 (MSRV). We have 1.91.1. ✓ - PyO3 0.28.3 requires Rust 1.83.0. We have 1.91.1. ✓ -- The `unsafe impl Send/Sync` for `Parser` must be verified after the bump. If the v0.17.0 `ElemIterator` is no longer `Send`, we use a different approach. +- All PyO3 wrapper types use `#[pyclass(unsendable)]` instead of `unsafe impl Send/Sync`. - `maturin-action` builds `manylinux` wheels automatically. No custom `Dockerfile` needed for CI. -- Keep `Dockerfile` and `build.sh` as manual fallbacks. Remove in a future cleanup PR. +- Keep `Dockerfile` and `build.sh` as manual fallbacks. `build.sh` references per-version Python builds which are superseded by ABI3; replace or remove in a future cleanup PR. - The CI workflow will produce **5 ABI3 wheels** (one per supported platform) + **1 sdist** per release. From 52407721a298aae139b5c4016a7c079c964aa7f3 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 8 Jun 2026 19:40:40 -0700 Subject: [PATCH 7/7] chore: remove obsolete build.sh, Dockerfile, UPDATE_DESIGN.md --- AGENTS.md | 12 +- BUILD.md | 1 - Dockerfile | 33 ---- UPDATE_DESIGN.md | 421 ----------------------------------------------- build.sh | 19 --- 5 files changed, 4 insertions(+), 482 deletions(-) delete mode 100644 Dockerfile delete mode 100644 UPDATE_DESIGN.md delete mode 100644 build.sh diff --git a/AGENTS.md b/AGENTS.md index 55ead6e..7329031 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,13 +7,13 @@ Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes `Parser` (full ## STRUCTURE ``` -├── src/lib.rs # Entire Python extension: Parser + Elem PyO3 classes +├── src/lib.rs # Entire Python extension: Parser/elem/route PyO3 classes ├── examples/ # Python usage examples -├── build.sh # Maturin build for multiple Python versions -├── Dockerfile # Ubuntu builder for cross-platform Linux wheels ├── Cargo.toml # Rust crate: pybgpkit-parser, depends on bgpkit-parser ├── pyproject.toml # Maturin build-system config ├── build.rs # PyO3 extension module linker setup +├── benches/ # Rust criterion benchmarks +├── tests/ # Python API tests and benchmark └── .github/workflows/ # Rust fmt/clippy CI + tag-based release ``` @@ -23,9 +23,8 @@ Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes `Parser` (full |------|----------| | Change exposed Python API | `src/lib.rs` | | Update underlying parser logic | `Cargo.toml` → bump `bgpkit-parser` version | -| Add Python version support | `build.sh` + `Dockerfile` + `README.md` | | Build/test locally | `maturin develop` (see README.md) | -| Build wheels for release | GitHub Actions `release.yml` / `maturin build --release` locally | +| Build wheels for release | GitHub Actions `release.yml` (push `v*` tag) | | Publish to PyPI | Push `v*` tag; CI publishes via PyPI Trusted Publishing (OIDC) | ## CODE MAP @@ -68,9 +67,6 @@ maturin build --release git tag v0.7.0 git push origin v0.7.0 -# Manual fallback only -bash build.sh - # Format + lint cargo fmt --check cargo clippy -- -D warnings diff --git a/BUILD.md b/BUILD.md index d0285b6..376a088 100644 --- a/BUILD.md +++ b/BUILD.md @@ -65,4 +65,3 @@ maturin build --release --sdist twine upload --skip-existing target/wheels/* ``` -The historical `build.sh` and `Dockerfile` are kept as fallback tools for reproducing older manual builds, but CI is the preferred release path. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 2d5c9a1..0000000 --- a/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -FROM ubuntu:noble - -RUN apt update && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y curl libssl-dev pkg-config build-essential software-properties-common tzdata git vim cmake - -# install different versions of Python -RUN add-apt-repository ppa:deadsnakes/ppa -y && apt update - -RUN apt install -y python3.9 python3.9-distutils -RUN apt install -y python3.10 python3.10-distutils -RUN apt install -y python3.11 python3.11-distutils -RUN apt install -y python3.12 -RUN apt install -y python3.13 - -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3.12 get-pip.py --break-system-packages && rm get-pip.py -RUN curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3.13 get-pip.py --break-system-packages && rm get-pip.py - -# install maturin -RUN python3.13 -m pip install maturin patchelf twine -# install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - -WORKDIR /io/bgpkit-parser-py -COPY ./src ./src -COPY ./build.rs . -COPY ./build.sh . -COPY ./Cargo.toml . -COPY ./README.md . -COPY ./pyproject.toml . - -COPY ./.pypirc /root/.pypirc \ No newline at end of file diff --git a/UPDATE_DESIGN.md b/UPDATE_DESIGN.md deleted file mode 100644 index 4e0f262..0000000 --- a/UPDATE_DESIGN.md +++ /dev/null @@ -1,421 +0,0 @@ -# bgpkit-parser-py Update Plan — Single PR - -## Overview - -This plan covers a comprehensive update of `bgpkit-parser-py` in **one pull request**. The PR modernizes dependencies, expands the Python API surface, adds performance benchmarks, and replaces the manual release process with fully automated CI/CD. - -## Work Streams (4 in 1 PR) - -| # | Stream | Scope | Risk | -|---|--------|-------|------| -| 1 | **Bump `bgpkit-parser`** | `0.11.1` → `0.17.0` | Medium — API changes, new fields | -| 2 | **Bump PyO3** | `0.25` → `0.28.3` | Medium — `Bound` API migration | -| 3 | **Expand Python API** | New fields, `RouteParser`, projected tuple iteration, `Filter` helpers, `__repr__`, `from_filters`, `count`, `iter_batches`, constants | Low — additive changes | -| 4 | **Benchmarks + CI/CD** | Python benchmark script, `criterion`, `maturin-action` workflow | Low — new files, no Rust API changes | - -## Design Decisions - -### 1. PyO3 Version: 0.28.3 (not 0.29) - -**Decision:** Target PyO3 0.28.3. - -**Rationale:** Latest stable release. 0.29 has breaking `pyo3-build-config` changes that require a direct dependency on `pyo3` or `pyo3-ffi`. 0.28.3 gives us free-threaded Python support and the mature `Bound` API without extra migration risk. - -### 2. `Bound` API Migration - -**Decision:** Migrate Python-object helpers to the `Bound<'py, T>` API where appropriate (e.g., `to_dict`), but keep `Py` for returned element objects where it remains the clean PyO3 return type. - -**Rationale:** PyO3 0.28 deprecates the old GIL-ref API. The `Bound` API is now primary. However, `Py::new()` still returns `Py` which works fine. We migrate incrementally — what compiles cleanly with `Bound` gets converted; what doesn't stays on `Py`. - -### 3. `Elem` Field Access: Keep `#[pyo3(get, set)]` - -**Decision:** Retain direct field access on `Elem`. - -**Rationale:** v0.6.0 was a breaking change that removed getters and caused user pain. The current `get, set` approach is ergonomic for Python users. No change. - -### 4. New Fields: `peer_bgp_id` and `only_to_customer` - -**Decision:** Add both to the Python `Elem` class. - -**Rationale:** -- `peer_bgp_id`: PEER_INDEX_TABLE in TableDumpV2/RIB records. `Option`. -- `only_to_customer`: RFC 9234 OTC attribute. `Option`. `None` for withdrawals (fixed upstream in v0.16.0). - -### 5. Iterator Strategy: Expose Elem and Route Iterators, Defer Fallible/Record/Update - -**Decision:** Keep `Parser` for `BgpElem` iteration and add `RouteParser` for upstream `BgpRouteElem` route-level iteration. Defer fallible, record, raw-record, and update iterators. - -**Rationale:** `BgpRouteElem` is a compact route identity type and maps cleanly to Python (`RouteElem`). It is the best performance-oriented iterator to expose now. Fallible/record/update iterators return different result/enum/nested record types and should be added in follow-up PRs with more API design. - -### 6. Filter API: Add `Filter` Class + `from_filters` Constructor - -**Decision:** Keep the existing `HashMap` filter constructor. Add a new `Filter` PyO3 class and a `from_filters` classmethod on `Parser`. - -**Rationale:** The string-based API is backward-compatible. The new `Filter` class exposes the upstream `Filter::new()` constructor, and `from_filters` lets users pass pre-built `Filter` objects (enabling reuse and avoiding string parsing overhead). - -### 7. Performance Benchmark: `pytest-benchmark` + `criterion` - -**Decision:** Add a Python-side benchmark script and Rust-side (`criterion`) benchmarks. - -**Rationale:** We need to quantify the "Python tax" per element. The benchmark compares: -- Rust native iteration (baseline) -- Python `for elem in parser` iteration (GIL crossing + object allocation) -- Python `parse_all` (bulk collection) -- Python `to_dict()` overhead (serialization) - -Also optimize `parse_all()` by parsing while detached from the Python interpreter (`py.detach(...)`) before converting the collected Rust `Elem` values into Python objects. - -### 8. CI/CD: `maturin-action` + PyPI Trusted Publishing - -**Decision:** Use `PyO3/maturin-action` for all wheel builds, publish via PyPI Trusted Publishing (OIDC), and use ABI3 (`abi3-py39`) to build one wheel per platform instead of per Python version. - -**Rationale:** -- `maturin-action` handles cross-compilation, manylinux, and all platforms automatically -- Replaces the manual process (2 Macs + Docker + `twine upload`) -- Produces ABI3 wheels for macOS x86_64/arm64, Linux x86_64/aarch64, Windows x86_64 -- Trusted Publishing avoids long-lived PyPI API tokens -- Manual `workflow_dispatch` runs are build-only by default to avoid accidental PyPI publication - -### 9. Thread safety for `pyclass` iterator wrappers - -**Decision:** Use `#[pyclass(unsendable)]` for all iterator-backed PyO3 wrapper types rather than `unsafe impl Send/Sync`. - -**Rationale:** The underlying upstream iterator types (`ElemIterator`, `RouteIterator`) are not guaranteed thread-safe, and Python can share/transfer objects across threads. Marking them `unsendable` is the safe, idiomatic PyO3 approach. - -## Data Structures - -### Python `Elem` (updated) - -```rust -#[pyclass] -#[derive(Clone, PartialEq, Serialize)] -pub struct Elem { - #[pyo3(get, set)] pub timestamp: f64, - #[pyo3(get, set)] pub elem_type: String, - #[pyo3(get, set)] pub peer_ip: String, - #[pyo3(get, set)] pub peer_asn: u32, - #[pyo3(get, set)] pub prefix: String, - #[pyo3(get, set)] pub next_hop: Option, - #[pyo3(get, set)] pub as_path: Option, - #[pyo3(get, set)] pub origin_asns: Option>, - #[pyo3(get, set)] pub origin: Option, - #[pyo3(get, set)] pub local_pref: Option, - #[pyo3(get, set)] pub med: Option, - #[pyo3(get, set)] pub communities: Option>, - #[pyo3(get, set)] pub atomic: Option, - #[pyo3(get, set)] pub aggr_asn: Option, - #[pyo3(get, set)] pub aggr_ip: Option, - // NEW in v0.17 - #[pyo3(get, set)] pub peer_bgp_id: Option, - #[pyo3(get, set)] pub only_to_customer: Option, -} -``` - -### Python `RouteElem` (new) - -```rust -#[pyclass] -pub struct RouteElem { - #[pyo3(get, set)] pub timestamp: f64, - #[pyo3(get, set)] pub elem_type: String, - #[pyo3(get, set)] pub peer_ip: String, - #[pyo3(get, set)] pub peer_asn: u32, - #[pyo3(get, set)] pub prefix: String, - #[pyo3(get, set)] pub as_path: Option, -} -``` - -### Python `Filter` (new) - -```rust -#[pyclass] -pub struct Filter { - inner: bgpkit_parser::parser::Filter, -} - -#[pymethods] -impl Filter { - #[new] - #[pyo3(signature = (filter_type, filter_value))] - fn new(filter_type: String, filter_value: String) -> PyResult { - let inner = bgpkit_parser::parser::Filter::new(filter_type.as_str(), filter_value.as_str()) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - Ok(Filter { inner }) - } -} -``` - -## Single-PR Implementation Plan - -### Phase 1: Dependency Bump + Compile - -**1. `Cargo.toml`** -```toml -[package] -name = "pybgpkit-parser" -version = "0.7.0" -# ... rest unchanged ... - -[dependencies] -bgpkit-parser = "0.17.0" -pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1" - -[build-dependencies] -pyo3-build-config = "0.28" -``` - -**2. `build.rs`** -```rust -fn main() { - pyo3_build_config::add_extension_module_link_args(); -} -``` -No changes — `add_extension_module_link_args()` is stable in 0.28. - -**3. `pyproject.toml`** -```toml -[build-system] -requires = ["maturin>=1.0,<2.0"] -build-backend = "maturin" - -[project] -name = "pybgpkit-parser" -version = "0.7.0" -description = "Python binding for bgpkit-parser" -readme = "README.md" -license = { text = "MIT" } -requires-python = ">=3.9" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", -] -``` - -**4. Run `cargo check`** -- Fix compile errors iteratively -- Check if `ElemIterator` type changed in v0.17.0 -- Verify `unsafe impl Send` still compiles - -### Phase 2: `src/lib.rs` Rewrite - -**5. `convert_elem` update** -- Map new fields: `peer_bgp_id`, `only_to_customer` -- Keep all existing field mappings - -**6. `Elem` struct update** -- Add `peer_bgp_id: Option` -- Add `only_to_customer: Option` -- Keep all existing `#[pyo3(get, set)]` fields - -**7. `Elem::to_dict` update** -- Add `"peer_bgp_id"` and `"only_to_customer"` entries - -**8. `Elem` utility methods (new)** -- `is_announcement()` -- `is_withdrawal()` -- `get_origin_asn()` -- `get_origin_asns()` -- `has_as_path()` -- `as_dict()` -- `origin_asn` property -- `to_json()` -- `to_psv()` -- `get_psv_header()` - -**9. `Elem::__repr__` (new)** -```rust -#[pyo3(name = "__repr__")] -fn repr(&self) -> PyResult { - Ok(format!("", self.prefix, self.peer_ip, self.elem_type)) -} -``` - -**10. `Filter` class (new)** -- `#[pyclass]` wrapper around `bgpkit_parser::parser::Filter` -- `#[new]` constructor: `Filter(filter_type, filter_value)` -- Python-native helper constructors: `peer_ip`, `peer_ips`, `origin_asn`, `prefix`, `elem_type` -- Expose to the module - -**11. `Parser` struct update** -- Keep `elem_iter: ElemIterator>` -- Verify `Send` bound after v0.17.0 bump - -**12. `Parser::new` (keep signature)** -```rust -#[pyo3(signature = (url, filters=None, cache_dir=None))] -fn new( - url: String, - filters: Option>, - cache_dir: Option, -) -> PyResult -``` -Keep backward-compatible `HashMap` filter API. - -**13. `Parser::from_filters` (new, classmethod)** -```rust -#[staticmethod] -#[pyo3(signature = (url, filters, cache_dir=None))] -fn from_filters( - url: String, - filters: Vec>, - cache_dir: Option, -) -> PyResult { ... } -``` -- Construct `BgpkitParser` from URL -- Add filters via `with_filters` or `add_filters` using the `.inner` of each `Filter` -- Return `Parser` with the iterator - -**14. `Parser::parse_all` (optimize)** -- Keep `Vec>` return type -- Use `py.detach(...)` to parse and convert `BgpElem` → Rust `Elem` outside the Python interpreter, then reacquire Python only to allocate Python objects - -**15. `Parser::parse_next` (error-safe allocation)** -- Return `PyResult>>` -- Avoid `.unwrap()` on Python allocation - -**16. `Parser::__next__` (error-safe allocation)** -- Return `PyResult>>` -- Avoid `.unwrap()` on Python allocation - -**17. Parser and RouteParser utility methods + module registration** -- Add `Parser.count()` (stream-consuming) -- Add `Parser.iter_batches(batch_size)` returning `BatchIterator` -- Add `RouteElem` and `RouteParser` for upstream `into_route_iter()` -- Add `RouteParser.count()`, `RouteParser.parse_all()`, `RouteParser.parse_next()`, and `RouteParser.iter_batches(batch_size)` -- Add projected tuple iteration: `iter_tuples(fields)` and `iter_tuple_batches(fields, batch_size)` for `Parser` and `RouteParser` -- Register `Elem`, `RouteElem`, `Filter`, `Parser`, `BatchIterator`, tuple iterators, `RouteParser`, and `RouteBatchIterator` -- Add module constants: `ELEM_TYPE_ANNOUNCE`, `ELEM_TYPE_WITHDRAW`, `PSV_HEADER`, `BASIC_FIELDS`, `ROUTE_FIELDS`, `NEXT_HOP_FIELDS` - -### Phase 3: Benchmarks - -**17. `benches/parse_bench.rs` (new)** -- `criterion` benchmark for Rust native iteration -- `[[bench]]` entry in `Cargo.toml` - -**18. `tests/benchmark.py` (new)** -- `time.perf_counter` benchmark -- Compares: `parse_all`, `for elem in parser`, `iter_batches`, projected tuple iteration, `RouteParser` equivalents, `to_dict()` -- Uses `https://spaces.bgpkit.org/parser/update-example` as test data - -**19. `tests/test_api.py` (new)** -- Test `Filter` construction -- Test `Parser.from_filters` -- Test new `Elem` fields (`peer_bgp_id`, `only_to_customer`) -- Test `__repr__` and `__str__` -- Test `to_dict()` contains all fields - -### Phase 4: CI/CD - -**20. `.github/workflows/release.yml` (full rewrite)** -- Replace the current simple format-check + create-release workflow -- Add: `build-sdist` job, `build-wheels` matrix job, `publish-pypi` job -- Use `PyO3/maturin-action@v1` -- Platform matrix: macOS x86_64, macOS arm64, Linux x86_64, Linux aarch64, Windows x86_64 -- ABI3 Python compatibility: 3.9–3.13 from one wheel per platform -- Trigger: `v[0-9]+.*` tags + `workflow_dispatch` -- PyPI publish via PyPI Trusted Publishing / GitHub OIDC (`id-token: write`) -- GitHub Release creation via `taiki-e/create-gh-release-action@v1` - -**21. `.github/workflows/rust.yaml`** -- Keep as-is (format + clippy on PR/push) - -**22. `BUILD.md` (rewrite)** -- Document the automated release process (push tag → CI builds + publishes) -- Keep the manual Docker/`build.sh` process as fallback documentation - -**23. `CHANGELOG.md` (add entry)** -- v0.7.0: dependency bump, new fields, `Filter` class, benchmarks, CI/CD automation - -**24. `README.md` (update)** -- Update `Elem` field list to show new fields -- Document `Filter` class and `from_filters` constructor -- Update installation notes - -### Phase 5: Test + Verify - -**25. Local testing** -- `maturin develop` -- Run `examples/filter_count_print.py` -- Run `tests/benchmark.py` -- Run `tests/test_api.py` - -**26. CI testing** -- Open PR — `rust.yaml` runs format + clippy -- Verify no regressions - -**27. Post-merge release test** -- Run `workflow_dispatch` with `publish=false` to verify build-only release workflow -- Verify all artifacts build successfully -- Push the real `v0.7.0` tag only when ready to publish to PyPI - -## Changes to Existing Files (Summary) - -| File | Change | -|------|--------| -| `Cargo.toml` | Bump `bgpkit-parser` → `0.17.0`, `pyo3` → `0.28` with `abi3-py39`, `pyo3-build-config` → `0.28`, add `[[bench]]` | -| `src/lib.rs` | Full rewrite: new fields, `RouteParser`, projected tuple iteration, `Filter` helpers, `from_filters`, `__repr__`, `count`, `iter_batches`, constants, `Bound` API migration | -| `pyproject.toml` | Add `project` metadata, `requires-python`, classifiers | -| `BUILD.md` | Rewrite to document CI workflow; keep manual fallback | -| `CHANGELOG.md` | Add v0.7.0 entry | -| `README.md` | Update API docs, new fields, `Filter` class | -| `.github/workflows/release.yml` | Full rewrite: add build matrix + PyPI publish + GitHub Release | - -## New Files (Summary) - -| File | Purpose | -|------|---------| -| `benches/parse_bench.rs` | Rust `criterion` benchmark for native iteration | -| `tests/benchmark.py` | Python benchmark comparing `parse_all` vs iteration vs `to_dict` | -| `tests/test_api.py` | Python tests for new API (`Filter`, new fields, `from_filters`) | - -## Pre-PR Checklist - -- [ ] PyPI Trusted Publisher configured for `bgpkit/bgpkit-parser-py`, workflow `release.yml`, environment `pypi` -- [ ] `cargo check` passes after dependency bump -- [ ] `maturin develop` builds successfully -- [ ] `examples/filter_count_print.py` runs without error -- [ ] `tests/test_api.py` passes -- [ ] `tests/benchmark.py` runs and produces meaningful numbers -- [ ] `benches/parse_bench.rs` compiles and runs -- [ ] `cargo fmt` passes -- [ ] `cargo clippy -- -D warnings` passes - -## Post-Merge Checklist - -- [ ] Run release workflow manually with `publish=false` -- [ ] Verify all artifacts are built but not published -- [ ] Verify GitHub Release is created with changelog -- [ ] If beta works, delete tag and push `v0.7.0` -- [ ] Update `AGENTS.md` with new anti-patterns (CI token handling, etc.) - -## Open Questions - -1. **Should we expose `BgpRouteElem` / `into_route_iter`?** - - **Decision:** Yes. Exposed as `RouteElem` and `RouteParser` because it maps cleanly and directly supports performance comparisons. - -2. **Should we expose `MrtRecord` / `into_record_iter`?** - - **Default:** No. Adds complexity to the `Parser` struct (different iterator types). Follow-up PR. - -3. **Should we add Windows to the CI matrix?** - - **Decision:** Yes. `maturin-action` supports it out of the box. No platform-specific Rust code. - -4. **Should we build `universal2` macOS wheels instead of separate x86_64/arm64?** - - **Default:** No. Separate wheels are smaller. Revisit if users complain. - -5. **Should we migrate to Trusted Publishing (OIDC) instead of API token?** - - **Decision:** Yes. Release publishing uses `pypa/gh-action-pypi-publish` with GitHub OIDC and `environment: pypi`. - -## Notes - -- `bgpkit-parser` 0.17.0 requires Rust 1.87.0 (MSRV). We have 1.91.1. ✓ -- PyO3 0.28.3 requires Rust 1.83.0. We have 1.91.1. ✓ -- All PyO3 wrapper types use `#[pyclass(unsendable)]` instead of `unsafe impl Send/Sync`. -- `maturin-action` builds `manylinux` wheels automatically. No custom `Dockerfile` needed for CI. -- Keep `Dockerfile` and `build.sh` as manual fallbacks. `build.sh` references per-version Python builds which are superseded by ABI3; replace or remove in a future cleanup PR. -- The CI workflow will produce **5 ABI3 wheels** (one per supported platform) + **1 sdist** per release. diff --git a/build.sh b/build.sh deleted file mode 100644 index 6987208..0000000 --- a/build.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Path to the .pypirc file -PYPICRC_FILE="$HOME/.pypirc" - -# Check if .pypirc file exists -if [ ! -f "$PYPICRC_FILE" ]; then - echo "Error: .pypirc file does not exist" - exit 1 -fi - -rm -f target/wheels/* - -maturin build --sdist --interpreter python3.9 -maturin build --sdist --interpreter python3.10 -maturin build --sdist --interpreter python3.11 -maturin build --sdist --interpreter python3.12 -maturin build --sdist --interpreter python3.13