diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e3b277b..aac4338 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,29 +2,103 @@ name: Release permissions: contents: write + id-token: write on: push: tags: - - v[0-9]+.* + - "v[0-9]+.[0-9]+.[0-9]+" + workflow_dispatch: + inputs: + publish: + description: "Publish artifacts to PyPI (manual runs default to build-only)" + required: true + type: boolean + default: false + +env: + CARGO_TERM_COLOR: always jobs: - # make sure release content has correct format and README is up-to-date - format-check: + checks: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Run format check run: cargo fmt --check + - name: Run clippy + run: cargo clippy -- -D warnings + + build-sdist: + needs: checks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - uses: actions/upload-artifact@v4 + with: + name: sdist + path: dist/*.tar.gz + + build-wheels: + needs: checks + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: x86_64 + - os: ubuntu-latest + target: aarch64 + - os: macos-13 + target: x86_64 + - os: macos-14 + target: aarch64 + - os: windows-latest + target: x64 + steps: + - uses: actions/checkout@v4 + - uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist + - uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }}-${{ matrix.target }} + path: dist/*.whl + + publish-pypi: + needs: [build-sdist, build-wheels] + runs-on: ubuntu-latest + if: github.event_name == 'push' || inputs.publish == true + environment: pypi + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist + skip-existing: true create-release: - needs: format-check + needs: [build-sdist, build-wheels, publish-pypi] runs-on: ubuntu-latest + if: github.event_name == 'push' steps: - uses: actions/checkout@v4 - - uses: taiki-e/create-gh-release-action@v1 + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 with: - # (optional) Path to changelog. - changelog: CHANGELOG.md - # (required) GitHub token for creating GitHub Releases. - token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + files: dist/* + body_path: CHANGELOG.md diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index ad64d6d..b3a6e69 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -10,15 +10,27 @@ env: CARGO_TERM_COLOR: always jobs: - build: - + rust-checks: runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - + - uses: actions/checkout@v4 - name: Run format check run: cargo fmt --check - - name: Run clippy run: cargo clippy -- -D warnings + + python-api: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install test tools + run: python -m pip install --upgrade pip maturin pytest pytest-benchmark + - name: Build wheel + run: maturin build --release --out dist + - name: Install built wheel + run: python -m pip install --force-reinstall dist/*.whl + - name: Run Python API tests (unit) + run: pytest tests/test_api.py -v diff --git a/.gitignore b/.gitignore index 9ed12d6..fe499f0 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,6 @@ cache .DS_Store -.pypirc \ No newline at end of file +.pypirc +__pycache__/ +*.py[cod] diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..7329031 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,83 @@ +# PROJECT KNOWLEDGE BASE + +## OVERVIEW + +Python binding for `bgpkit-parser` (Rust MRT/BGP parser). Exposes `Parser` (full elems), `RouteParser` (route-level scans), `Filter` helpers, and projected tuple iteration via PyO3, built with maturin. + +## STRUCTURE + +``` +├── src/lib.rs # Entire Python extension: Parser/elem/route PyO3 classes +├── examples/ # Python usage examples +├── Cargo.toml # Rust crate: pybgpkit-parser, depends on bgpkit-parser +├── pyproject.toml # Maturin build-system config +├── build.rs # PyO3 extension module linker setup +├── benches/ # Rust criterion benchmarks +├── tests/ # Python API tests and benchmark +└── .github/workflows/ # Rust fmt/clippy CI + tag-based release +``` + +## WHERE TO LOOK + +| Task | Location | +|------|----------| +| Change exposed Python API | `src/lib.rs` | +| Update underlying parser logic | `Cargo.toml` → bump `bgpkit-parser` version | +| Build/test locally | `maturin develop` (see README.md) | +| Build wheels for release | GitHub Actions `release.yml` (push `v*` tag) | +| Publish to PyPI | Push `v*` tag; CI publishes via PyPI Trusted Publishing (OIDC) | + +## CODE MAP + +- **`Elem`** — PyO3 class wrapping a parsed BGP element. Has `#[pyo3(get, set)]` fields and `to_dict()` / `__str__` / `__getstate__` methods. +- **`Parser`** — PyO3 class wrapping `bgpkit_parser::BgpkitParser`. Constructor takes `url`, optional `filters` (HashMap), and optional `cache_dir`. Implements `__iter__`/`__next__`, `count`, `iter_batches`, `iter_tuples`, and `iter_tuple_batches`. +- **`RouteParser`** — PyO3 class wrapping `BgpkitParser::into_route_iter()`. Returns lightweight `RouteElem` values. Same iteration/helper surface as `Parser`. +- **`Filter`** — PyO3 class wrapping `bgpkit_parser::parser::Filter`. Constructors: `__init__`, `peer_ip`, `peer_ips`, `origin_asn`, `prefix`, `elem_type`. +- **`TupleIterator` / `TupleBatchIterator`** — High-performance projected tuple iteration for `Parser` and `RouteParser`. +- **`convert_elem`** — Internal fn mapping `BgpElem` → `Elem` (Rust type → PyO3 type). + +## CONVENTIONS + +- Rust fmt/clippy enforced in CI (`cargo fmt --check`, `cargo clippy -- -D warnings`) +- `PyValueError` used for filter errors propagated to Python +- Iterator-backed pyclasses use `#[pyclass(unsendable)]`; no `unsafe impl Send/Sync` +- `#[pyo3(name = "__str__")]` used for JSON string representation of `Elem` +- `atomic` field returns `"AG"`/`"NAG"` strings (not bool) +- `elem_type` field returns `"A"` (announce) or `"W"` (withdraw) + +## ANTI-PATTERNS + +- **Do NOT** change PyO3/maturin versions without updating both `Cargo.toml` and `build.rs` (`pyo3-build-config` must match) +- **Do NOT** test release publishing with a beta tag unless the package version is also beta; use `workflow_dispatch` with `publish=false` for build-only checks +- **Do NOT** add long-lived PyPI API tokens; use PyPI Trusted Publishing with GitHub OIDC (`environment: pypi`) +- **Do NOT** add `unsafe impl Send/Sync` to `#[pyclass]` types; use `#[pyclass(unsendable)]` instead +- **Do NOT** use `.unwrap()` on user inputs (URL/filters); already handled in `BgpkitParser::new` but be careful with new additions +- **Do NOT** make `Elem` fields write-only or remove getters without noting in CHANGELOG as breaking (v0.6.0 was a breaking change) + +## COMMANDS + +```bash +# Local dev build (installs to active venv) +maturin develop + +# Build wheel locally +maturin build --release + +# Build and publish release via CI +git tag v0.7.0 +git push origin v0.7.0 + +# Format + lint +cargo fmt --check +cargo clippy -- -D warnings + +# Publish (after building on all platforms) +twine upload --skip-existing target/wheels/* +``` + +## NOTES + +- `bgpkit-parser` crate version bump is the primary release trigger (see CHANGELOG for version history) +- Release workflow: `rust.yaml` runs Rust + Python API checks on PR/push; `release.yml` builds ABI3 wheels and publishes on `v*` tag push via Trusted Publishing +- Supports Python 3.9+ via ABI3 wheels +- Python API tests live in `tests/test_api.py`; network smoke coverage is gated by `PYBGPKIT_RUN_NETWORK_TESTS=1` diff --git a/BUILD.md b/BUILD.md index 9a5551c..376a088 100644 --- a/BUILD.md +++ b/BUILD.md @@ -1,31 +1,67 @@ # Build and Publish Guide -## Pre-requisites +## Automated Release (Recommended) -- `maturin` -- `docker` - - run `docker build . -t bgpkit-builder:latest` to build the builder image +Release builds are handled by GitHub Actions via `.github/workflows/release.yml`. -## Build and Upload Checklist +Push a version tag to build and publish: -1. run [`build.sh`](./build.sh) on Apple Silicon Mac -2. run [`build.sh`](./build.sh) inside docker on Apple Silicon Mac -3. run [`build.sh`](./build.sh) on Intel Mac -4. run [`build.sh`](./build.sh) inside docker on Intel Mac - -Then run -``` -twine upload --skip-existing target/wheels/* +```bash +git tag v0.7.0 +git push origin v0.7.0 ``` -## Build Linux packages in Docker +The release workflow will: + +1. Run `cargo fmt --check` and `cargo clippy -- -D warnings` +2. Build the source distribution (`sdist`) +3. Build ABI3 wheels for: + - Linux x86_64 + - Linux aarch64 + - macOS x86_64 + - macOS arm64 + - Windows x86_64 +4. Publish artifacts to PyPI using PyPI Trusted Publishing (OIDC) +5. Create a GitHub Release and attach the built artifacts + +Manual workflow runs (`workflow_dispatch`) are build-only by default. They only publish when the `publish` input is explicitly enabled. + +## PyPI Trusted Publishing Setup + +Configure a trusted publisher for the existing `pybgpkit-parser` PyPI project: + +| Field | Value | +|------|-------| +| Owner | `bgpkit` | +| Repository name | `bgpkit-parser-py` | +| Workflow name | `release.yml` | +| Environment name | `pypi` | + +The workflow uses GitHub Actions OIDC (`id-token: write`) and does not require a long-lived PyPI API token. + +## Local Development Build -Build image using the [Dockerfile](./Dockerfile) provided +```bash +maturin develop ``` -docker build -t bgpkit-builder:latest . + +This builds the extension and installs it into the active Python environment. + +## Local Wheel Build + +```bash +maturin build --release ``` -Run `docker run --rm -it bgpkit-builder:latest bash` to open a shell in the container +Built wheels are written under `target/wheels/`. + +## Manual Publish Fallback + +If CI is unavailable, build locally and upload with `twine`: + ```bash -bash build.sh +python -m pip install --upgrade maturin twine +maturin build --release --sdist +twine upload --skip-existing target/wheels/* ``` + diff --git a/CHANGELOG.md b/CHANGELOG.md index fce43b7..f933bb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All notable changes to this project will be documented in this file. +## 0.7.0 - TBD + +### Highlights + +* Update `bgpkit-parser` to v0.17.0. +* Update PyO3 to v0.28 and enable ABI3 wheels for Python 3.9+. +* Add `peer_bgp_id` and `only_to_customer` fields to `Elem`. +* Add reusable `Filter` class and `Parser.from_filters(...)` constructor. +* Add Rust-like `Elem` utility methods: `is_announcement`, `is_withdrawal`, `get_origin_asn`, `get_origin_asns`, `has_as_path`, `as_dict`, `to_json`, `to_psv`, and `get_psv_header`. +* Add `Elem.origin_asn` property and module constants `ELEM_TYPE_ANNOUNCE`, `ELEM_TYPE_WITHDRAW`, and `PSV_HEADER`. +* Add Python-native filter helper constructors: `Filter.peer_ip`, `Filter.peer_ips`, `Filter.origin_asn`, `Filter.prefix`, and `Filter.elem_type`. +* Add stream-consuming `Parser.count()` and `Parser.iter_batches(batch_size)` helpers. +* Add `RouteElem` and `RouteParser` for upstream route-level parsing (`BgpRouteElem`) and faster route identity scans. +* Add high-performance projected tuple iteration: `iter_tuples(fields)` and `iter_tuple_batches(fields, batch_size)` for `Parser` and `RouteParser`. +* Add field presets `BASIC_FIELDS`, `ROUTE_FIELDS`, and `NEXT_HOP_FIELDS`. +* Optimize `Parser.parse_all()` and batch iteration by parsing while detached from the Python interpreter before converting results into Python objects. +* Add Rust and Python benchmark scaffolding. +* Automate wheel builds and PyPI publishing via GitHub Actions. + ## 0.6.2 - 2025-06-06 ### Fix regressions diff --git a/Cargo.toml b/Cargo.toml index 0b56ac5..555a506 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ description = "BGPKIT Parser Python Binding" keywords = ["bgp", "mrt", "parser"] repository = "https://github.com/bgpkit/bgpkit-parser-py" documentation = "https://docs.rs/bgpkit-parser-py" -version = "0.6.2" +version = "0.7.0" authors = ["Mingwei Zhang "] edition = "2021" license = "MIT" @@ -15,10 +15,17 @@ name = "pybgpkit_parser" crate-type = ["cdylib", "rlib"] [dependencies] -bgpkit-parser = "0.11.1" -pyo3 = { version = "0.25", features = ["extension-module"] } +bgpkit-parser = "0.17.0" +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1" [build-dependencies] -pyo3-build-config = "0.25" +pyo3-build-config = "0.28" + +[dev-dependencies] +criterion = "0.5" + +[[bench]] +name = "parse_bench" +harness = false diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 2d5c9a1..0000000 --- a/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -FROM ubuntu:noble - -RUN apt update && DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y curl libssl-dev pkg-config build-essential software-properties-common tzdata git vim cmake - -# install different versions of Python -RUN add-apt-repository ppa:deadsnakes/ppa -y && apt update - -RUN apt install -y python3.9 python3.9-distutils -RUN apt install -y python3.10 python3.10-distutils -RUN apt install -y python3.11 python3.11-distutils -RUN apt install -y python3.12 -RUN apt install -y python3.13 - -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3.12 get-pip.py --break-system-packages && rm get-pip.py -RUN curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3.13 get-pip.py --break-system-packages && rm get-pip.py - -# install maturin -RUN python3.13 -m pip install maturin patchelf twine -# install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - -WORKDIR /io/bgpkit-parser-py -COPY ./src ./src -COPY ./build.rs . -COPY ./build.sh . -COPY ./Cargo.toml . -COPY ./README.md . -COPY ./pyproject.toml . - -COPY ./.pypirc /root/.pypirc \ No newline at end of file diff --git a/README.md b/README.md index bbe240d..60044d4 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,34 @@ for elem in parser: break ``` +## Filters + +The original dictionary-based filter API is still supported: + +```python +parser = Parser(url, filters={"peer_ips": "185.1.8.65,2001:7f8:73:0:3:fa4:0:1"}) +``` + +Reusable Rust-backed filters are also available: + +```python +from pybgpkit_parser import Filter, Parser + +filters = [ + Filter.peer_ips(["185.1.8.65", "2001:7f8:73:0:3:fa4:0:1"]), + Filter.elem_type("a"), +] +parser = Parser.from_filters(url, filters) +``` + +Available helper constructors: + +- `Filter.peer_ip(...)` +- `Filter.peer_ips([...])` +- `Filter.origin_asn(...)` +- `Filter.prefix(...)` +- `Filter.elem_type(...)` + ## Available fields for `Elem` ```rust @@ -53,6 +81,8 @@ for elem in parser: #[pyo3(get, set)] pub peer_asn: u32, #[pyo3(get, set)] + pub peer_bgp_id: Option, + #[pyo3(get, set)] pub prefix: String, #[pyo3(get, set)] pub next_hop: Option, @@ -74,6 +104,8 @@ for elem in parser: pub aggr_asn: Option, #[pyo3(get, set)] pub aggr_ip: Option, + #[pyo3(get, set)] + pub only_to_customer: Option, } ``` @@ -95,6 +127,79 @@ python3 -m pip install pybgpkit-parser `maturin develop` builds local python module and add to the venv. +## High-performance projected iteration + +For best performance, prefer projected tuple iteration when you only need a subset of fields. This avoids creating full `Elem` objects and skips conversion for unused fields. + +```python +from pybgpkit_parser import Parser, ROUTE_FIELDS + +# Fast: only converts requested fields +for timestamp, prefix, as_path in Parser(url).iter_tuples(["timestamp", "prefix", "as_path"]): + pass + +# Faster for large files: batch Python boundary crossings +fields = ["timestamp", "prefix", "as_path"] +for batch in Parser(url).iter_tuple_batches(fields, batch_size=10_000): + for timestamp, prefix, as_path in batch: + pass +``` + +Available field presets: + +- `BASIC_FIELDS`: `timestamp`, `elem_type`, `peer_ip`, `peer_asn`, `prefix` +- `ROUTE_FIELDS`: `BASIC_FIELDS` + `as_path` +- `NEXT_HOP_FIELDS`: `BASIC_FIELDS` + `next_hop` + +You can also pass your own field list, e.g. `Parser(url).iter_tuples(["peer_asn", "prefix"])`. + +## Utility methods + +`Elem` exposes Rust-like helper methods: + +- `is_announcement()` +- `is_withdrawal()` +- `get_origin_asn()` +- `get_origin_asns()` +- `has_as_path()` +- `to_dict()` / `as_dict()` +- `origin_asn` property +- `to_json()` +- `to_psv()` +- `Elem.get_psv_header()` +- module constants: `ELEM_TYPE_ANNOUNCE`, `ELEM_TYPE_WITHDRAW`, `PSV_HEADER` + +`Parser` also provides stream-consuming helpers: + +- `count()` +- `iter_batches(batch_size)` +- `iter_tuples(fields)` — recommended for high-performance subset-field scans +- `iter_tuple_batches(fields, batch_size)` — recommended for large-file scans + +## Route-level parsing + +`RouteParser` exposes upstream `BgpRouteElem` iteration for faster scans when you only need route identity fields: + +```python +from pybgpkit_parser import RouteParser + +for route in RouteParser(url): + print(route.timestamp, route.peer_ip, route.peer_asn, route.prefix, route.as_path) +``` + +`RouteElem` fields: + +- `timestamp` +- `elem_type` +- `peer_ip` +- `peer_asn` +- `prefix` +- `as_path` + +`RouteParser` supports the same constructor style, `from_filters(...)`, `parse_all()`, `parse_next()`, `count()`, `iter_batches(batch_size)`, `iter_tuples(fields)`, and `iter_tuple_batches(fields, batch_size)`. + +For route scans, this is the fastest object-based API; for maximum throughput use `RouteParser.iter_tuples(ROUTE_FIELDS)` or `RouteParser.iter_tuple_batches(ROUTE_FIELDS, batch_size)`. + ## Build and publish -See [BUILD.md](./BUILD.md) for more details. \ No newline at end of file +See [BUILD.md](./BUILD.md) for automated GitHub Actions release details. \ No newline at end of file diff --git a/benches/parse_bench.rs b/benches/parse_bench.rs new file mode 100644 index 0000000..b082bbe --- /dev/null +++ b/benches/parse_bench.rs @@ -0,0 +1,36 @@ +use bgpkit_parser::BgpkitParser; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +fn bench_input() -> String { + std::env::var("BGP_TEST_FILE") + .unwrap_or_else(|_| "https://spaces.bgpkit.org/parser/update-example".to_string()) +} + +fn bench_rust_native_iteration(c: &mut Criterion) { + let input = bench_input(); + c.bench_function("rust native elem iteration", |b| { + b.iter(|| { + let parser = BgpkitParser::new(input.as_str()).expect("create parser"); + let count = parser.into_elem_iter().count(); + black_box(count); + }); + }); +} + +fn bench_rust_native_route_iteration(c: &mut Criterion) { + let input = bench_input(); + c.bench_function("rust native route iteration", |b| { + b.iter(|| { + let parser = BgpkitParser::new(input.as_str()).expect("create parser"); + let count = parser.into_route_iter().count(); + black_box(count); + }); + }); +} + +criterion_group!( + benches, + bench_rust_native_iteration, + bench_rust_native_route_iteration +); +criterion_main!(benches); diff --git a/build.sh b/build.sh deleted file mode 100644 index 6987208..0000000 --- a/build.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Path to the .pypirc file -PYPICRC_FILE="$HOME/.pypirc" - -# Check if .pypirc file exists -if [ ! -f "$PYPICRC_FILE" ]; then - echo "Error: .pypirc file does not exist" - exit 1 -fi - -rm -f target/wheels/* - -maturin build --sdist --interpreter python3.9 -maturin build --sdist --interpreter python3.10 -maturin build --sdist --interpreter python3.11 -maturin build --sdist --interpreter python3.12 -maturin build --sdist --interpreter python3.13 diff --git a/pyproject.toml b/pyproject.toml index 07dd28c..0812d5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,22 @@ [build-system] requires = ["maturin>=1.0,<2.0"] build-backend = "maturin" + +[project] +name = "pybgpkit-parser" +dynamic = ["version"] +description = "Python binding for bgpkit-parser" +readme = "README.md" +requires-python = ">=3.9" +license = { text = "MIT" } +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.optional-dependencies] +dev = ["pytest", "pytest-benchmark"] diff --git a/src/lib.rs b/src/lib.rs index ec4747d..8a06b6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,10 @@ use bgpkit_parser::models::*; +use bgpkit_parser::parser::Filter as BgpkitFilter; use bgpkit_parser::*; +use pyo3::conversion::IntoPyObjectExt; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::types::PyTuple; use serde::Serialize; use std::collections::HashMap; use std::io::Read; @@ -17,6 +20,7 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { }, peer_ip: elem.peer_ip.to_string(), peer_asn: elem.peer_asn.to_u32(), + peer_bgp_id: elem.peer_bgp_id.map(|v| v.to_string()), prefix: elem.prefix.to_string(), next_hop: elem.next_hop.map(|v| v.to_string()), as_path: elem.as_path.map(|v| v.to_string()), @@ -35,10 +39,219 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { }, aggr_asn: elem.aggr_asn.map(|v| v.to_u32()), aggr_ip: elem.aggr_ip.map(|v| v.to_string()), + only_to_customer: elem.only_to_customer.map(|v| v.to_u32()), } } - #[pyclass] + fn convert_route_elem(elem: BgpRouteElem) -> RouteElem { + RouteElem { + timestamp: elem.timestamp, + elem_type: match elem.elem_type { + ElemType::ANNOUNCE => "A".to_string(), + ElemType::WITHDRAW => "W".to_string(), + }, + peer_ip: elem.peer_ip.to_string(), + peer_asn: elem.peer_asn.to_u32(), + prefix: elem.prefix.to_string(), + as_path: elem.as_path.map(|v| v.to_string()), + } + } + + fn new_parser( + url: &str, + cache_dir: Option<&str>, + ) -> PyResult>> { + match cache_dir { + None => BgpkitParser::new(url).map_err(|e| PyValueError::new_err(e.to_string())), + Some(dir) => { + BgpkitParser::new_cached(url, dir).map_err(|e| PyValueError::new_err(e.to_string())) + } + } + } + + fn option_to_string(v: &Option) -> String { + v.as_ref().map(|x| x.to_string()).unwrap_or_default() + } + + fn option_vec_to_string(v: &Option>) -> String { + v.as_ref() + .map(|items| { + items + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(",") + }) + .unwrap_or_default() + } + + #[derive(Clone, Copy)] + enum ElemField { + Timestamp, + ElemType, + PeerIp, + PeerAsn, + PeerBgpId, + Prefix, + NextHop, + AsPath, + OriginAsns, + OriginAsn, + Origin, + LocalPref, + Med, + Communities, + Atomic, + AggrAsn, + AggrIp, + OnlyToCustomer, + } + + #[derive(Clone, Copy)] + enum RouteField { + Timestamp, + ElemType, + PeerIp, + PeerAsn, + Prefix, + AsPath, + } + + fn parse_elem_field(field: &str) -> PyResult { + match field { + "timestamp" => Ok(ElemField::Timestamp), + "elem_type" | "type" => Ok(ElemField::ElemType), + "peer_ip" => Ok(ElemField::PeerIp), + "peer_asn" => Ok(ElemField::PeerAsn), + "peer_bgp_id" => Ok(ElemField::PeerBgpId), + "prefix" => Ok(ElemField::Prefix), + "next_hop" => Ok(ElemField::NextHop), + "as_path" => Ok(ElemField::AsPath), + "origin_asns" => Ok(ElemField::OriginAsns), + "origin_asn" => Ok(ElemField::OriginAsn), + "origin" => Ok(ElemField::Origin), + "local_pref" => Ok(ElemField::LocalPref), + "med" => Ok(ElemField::Med), + "communities" => Ok(ElemField::Communities), + "atomic" => Ok(ElemField::Atomic), + "aggr_asn" => Ok(ElemField::AggrAsn), + "aggr_ip" => Ok(ElemField::AggrIp), + "only_to_customer" => Ok(ElemField::OnlyToCustomer), + _ => Err(PyValueError::new_err(format!("unknown field: {field}"))), + } + } + + fn parse_route_field(field: &str) -> PyResult { + match field { + "timestamp" => Ok(RouteField::Timestamp), + "elem_type" | "type" => Ok(RouteField::ElemType), + "peer_ip" => Ok(RouteField::PeerIp), + "peer_asn" => Ok(RouteField::PeerAsn), + "prefix" => Ok(RouteField::Prefix), + "as_path" => Ok(RouteField::AsPath), + _ => Err(PyValueError::new_err(format!( + "unknown route field: {field}" + ))), + } + } + + fn parse_elem_fields(fields: Vec) -> PyResult> { + if fields.is_empty() { + return Err(PyValueError::new_err("fields must not be empty")); + } + fields + .iter() + .map(|f| parse_elem_field(f.as_str())) + .collect() + } + + fn parse_route_fields(fields: Vec) -> PyResult> { + if fields.is_empty() { + return Err(PyValueError::new_err("fields must not be empty")); + } + fields + .iter() + .map(|f| parse_route_field(f.as_str())) + .collect() + } + + fn elem_field_to_py(py: Python, elem: &BgpElem, field: ElemField) -> PyResult> { + match field { + ElemField::Timestamp => elem.timestamp.into_py_any(py), + ElemField::ElemType => match elem.elem_type { + ElemType::ANNOUNCE => "A".into_py_any(py), + ElemType::WITHDRAW => "W".into_py_any(py), + }, + ElemField::PeerIp => elem.peer_ip.to_string().into_py_any(py), + ElemField::PeerAsn => elem.peer_asn.to_u32().into_py_any(py), + ElemField::PeerBgpId => elem.peer_bgp_id.map(|v| v.to_string()).into_py_any(py), + ElemField::Prefix => elem.prefix.to_string().into_py_any(py), + ElemField::NextHop => elem.next_hop.map(|v| v.to_string()).into_py_any(py), + ElemField::AsPath => elem.as_path.as_ref().map(|v| v.to_string()).into_py_any(py), + ElemField::OriginAsns => elem + .origin_asns + .as_ref() + .map(|v| v.iter().map(|x| x.to_u32()).collect::>()) + .into_py_any(py), + ElemField::OriginAsn => elem + .origin_asns + .as_ref() + .and_then(|origin_asns| (origin_asns.len() == 1).then_some(origin_asns[0].to_u32())) + .into_py_any(py), + ElemField::Origin => elem.origin.map(|v| v.to_string()).into_py_any(py), + ElemField::LocalPref => elem.local_pref.into_py_any(py), + ElemField::Med => elem.med.into_py_any(py), + ElemField::Communities => elem + .communities + .as_ref() + .map(|v| v.iter().map(|x| x.to_string()).collect::>()) + .into_py_any(py), + ElemField::Atomic => elem.atomic.into_py_any(py), + ElemField::AggrAsn => elem.aggr_asn.map(|v| v.to_u32()).into_py_any(py), + ElemField::AggrIp => elem.aggr_ip.map(|v| v.to_string()).into_py_any(py), + ElemField::OnlyToCustomer => elem.only_to_customer.map(|v| v.to_u32()).into_py_any(py), + } + } + + fn route_field_to_py( + py: Python, + elem: &BgpRouteElem, + field: RouteField, + ) -> PyResult> { + match field { + RouteField::Timestamp => elem.timestamp.into_py_any(py), + RouteField::ElemType => match elem.elem_type { + ElemType::ANNOUNCE => "A".into_py_any(py), + ElemType::WITHDRAW => "W".into_py_any(py), + }, + RouteField::PeerIp => elem.peer_ip.to_string().into_py_any(py), + RouteField::PeerAsn => elem.peer_asn.to_u32().into_py_any(py), + RouteField::Prefix => elem.prefix.to_string().into_py_any(py), + RouteField::AsPath => elem.as_path.as_ref().map(|v| v.to_string()).into_py_any(py), + } + } + + fn elem_to_tuple(py: Python, elem: BgpElem, fields: &[ElemField]) -> PyResult> { + let values = fields + .iter() + .map(|field| elem_field_to_py(py, &elem, *field)) + .collect::>>()?; + Ok(PyTuple::new(py, values)?.unbind()) + } + + fn route_to_tuple( + py: Python, + elem: BgpRouteElem, + fields: &[RouteField], + ) -> PyResult> { + let values = fields + .iter() + .map(|field| route_field_to_py(py, &elem, *field)) + .collect::>>()?; + Ok(PyTuple::new(py, values)?.unbind()) + } + + #[pyclass(skip_from_py_object)] #[derive(Clone, PartialEq, Serialize)] pub struct Elem { #[pyo3(get, set)] @@ -50,6 +263,8 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { #[pyo3(get, set)] pub peer_asn: u32, #[pyo3(get, set)] + pub peer_bgp_id: Option, + #[pyo3(get, set)] pub prefix: String, #[pyo3(get, set)] pub next_hop: Option, @@ -71,50 +286,300 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { pub aggr_asn: Option, #[pyo3(get, set)] pub aggr_ip: Option, + #[pyo3(get, set)] + pub only_to_customer: Option, } #[pymethods] impl Elem { - pub fn to_dict(&self, py: Python) -> PyObject { + pub fn to_dict<'py>(&self, py: Python<'py>) -> PyResult> { use pyo3::types::PyDict; let dict = PyDict::new(py); - dict.set_item("timestamp", self.timestamp).unwrap(); - dict.set_item("elem_type", self.elem_type.clone()).unwrap(); - dict.set_item("peer_ip", self.peer_ip.clone()).unwrap(); - dict.set_item("peer_asn", self.peer_asn).unwrap(); - dict.set_item("prefix", self.prefix.clone()).unwrap(); - dict.set_item("next_hop", self.next_hop.clone()).unwrap(); - dict.set_item("as_path", self.as_path.clone()).unwrap(); - dict.set_item("origin_asns", self.origin_asns.clone()) - .unwrap(); - dict.set_item("origin", self.origin.clone()).unwrap(); - dict.set_item("local_pref", self.local_pref).unwrap(); - dict.set_item("med", self.med).unwrap(); - dict.set_item("communities", self.communities.clone()) - .unwrap(); - dict.set_item("atomic", self.atomic.clone()).unwrap(); - dict.set_item("aggr_asn", self.aggr_asn).unwrap(); - dict.set_item("aggr_ip", self.aggr_ip.clone()).unwrap(); - dict.into() - } - - fn __getstate__(&self, py: Python) -> PyObject { + dict.set_item("timestamp", self.timestamp)?; + dict.set_item("elem_type", self.elem_type.clone())?; + dict.set_item("peer_ip", self.peer_ip.clone())?; + dict.set_item("peer_asn", self.peer_asn)?; + dict.set_item("peer_bgp_id", self.peer_bgp_id.clone())?; + dict.set_item("prefix", self.prefix.clone())?; + dict.set_item("next_hop", self.next_hop.clone())?; + dict.set_item("as_path", self.as_path.clone())?; + dict.set_item("origin_asns", self.origin_asns.clone())?; + dict.set_item("origin", self.origin.clone())?; + dict.set_item("local_pref", self.local_pref)?; + dict.set_item("med", self.med)?; + dict.set_item("communities", self.communities.clone())?; + dict.set_item("atomic", self.atomic.clone())?; + dict.set_item("aggr_asn", self.aggr_asn)?; + dict.set_item("aggr_ip", self.aggr_ip.clone())?; + dict.set_item("only_to_customer", self.only_to_customer)?; + Ok(dict) + } + + pub fn as_dict<'py>(&self, py: Python<'py>) -> PyResult> { + self.to_dict(py) + } + + #[getter(origin_asn)] + fn origin_asn_value(&self) -> Option { + self.get_origin_asn() + } + + fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult> { self.to_dict(py) } #[pyo3(name = "__str__")] fn str_repr(&self) -> PyResult { - Ok(serde_json::to_string(self).unwrap().to_string()) + self.to_json() + } + + #[pyo3(name = "__repr__")] + fn repr(&self) -> PyResult { + Ok(format!( + "", + self.prefix, self.peer_ip, self.elem_type + )) + } + + pub fn is_announcement(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("A") + } + + pub fn is_withdrawal(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("W") + } + + pub fn get_origin_asn(&self) -> Option { + self.origin_asns + .as_ref() + .and_then(|origin_asns| (origin_asns.len() == 1).then_some(origin_asns[0])) + } + + pub fn get_origin_asns(&self) -> Option> { + self.origin_asns.clone() + } + + pub fn has_as_path(&self) -> bool { + self.as_path.is_some() + } + + pub fn to_json(&self) -> PyResult { + serde_json::to_string(self).map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[staticmethod] + pub fn get_psv_header() -> String { + [ + "type", + "timestamp", + "peer_ip", + "peer_asn", + "prefix", + "as_path", + "origin_asns", + "origin", + "next_hop", + "local_pref", + "med", + "communities", + "atomic", + "aggr_asn", + "aggr_ip", + "only_to_customer", + "peer_bgp_id", + ] + .join("|") + } + + pub fn to_psv(&self) -> String { + format!( + "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}", + self.elem_type, + self.timestamp, + self.peer_ip, + self.peer_asn, + self.prefix, + option_to_string(&self.as_path), + option_vec_to_string(&self.origin_asns), + option_to_string(&self.origin), + option_to_string(&self.next_hop), + option_to_string(&self.local_pref), + option_to_string(&self.med), + option_vec_to_string(&self.communities), + option_to_string(&self.atomic), + option_to_string(&self.aggr_asn), + option_to_string(&self.aggr_ip), + option_to_string(&self.only_to_customer), + option_to_string(&self.peer_bgp_id), + ) + } + } + + #[pyclass(skip_from_py_object)] + #[derive(Clone, PartialEq, Serialize)] + pub struct RouteElem { + #[pyo3(get, set)] + pub timestamp: f64, + #[pyo3(get, set)] + pub elem_type: String, + #[pyo3(get, set)] + pub peer_ip: String, + #[pyo3(get, set)] + pub peer_asn: u32, + #[pyo3(get, set)] + pub prefix: String, + #[pyo3(get, set)] + pub as_path: Option, + } + + #[pymethods] + impl RouteElem { + pub fn to_dict<'py>(&self, py: Python<'py>) -> PyResult> { + use pyo3::types::PyDict; + let dict = PyDict::new(py); + dict.set_item("timestamp", self.timestamp)?; + dict.set_item("elem_type", self.elem_type.clone())?; + dict.set_item("peer_ip", self.peer_ip.clone())?; + dict.set_item("peer_asn", self.peer_asn)?; + dict.set_item("prefix", self.prefix.clone())?; + dict.set_item("as_path", self.as_path.clone())?; + Ok(dict) + } + + pub fn as_dict<'py>(&self, py: Python<'py>) -> PyResult> { + self.to_dict(py) + } + + pub fn is_announcement(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("A") + } + + pub fn is_withdrawal(&self) -> bool { + self.elem_type.eq_ignore_ascii_case("W") + } + + pub fn has_as_path(&self) -> bool { + self.as_path.is_some() + } + + pub fn to_json(&self) -> PyResult { + serde_json::to_string(self).map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[pyo3(name = "__str__")] + fn str_repr(&self) -> PyResult { + self.to_json() + } + + #[pyo3(name = "__repr__")] + fn repr(&self) -> PyResult { + Ok(format!( + "", + self.prefix, self.peer_ip, self.elem_type + )) + } + } + + #[pyclass(name = "Filter", skip_from_py_object)] + #[derive(Clone)] + struct PyFilter { + inner: BgpkitFilter, + } + + #[pymethods] + impl PyFilter { + #[new] + #[pyo3(signature = (filter_type, filter_value))] + fn new(filter_type: String, filter_value: String) -> PyResult { + Self::from_parts(filter_type.as_str(), filter_value.as_str()) + } + + #[staticmethod] + fn peer_ip(peer_ip: String) -> PyResult { + Self::from_parts("peer_ip", peer_ip.as_str()) + } + + #[staticmethod] + fn peer_ips(peer_ips: Vec) -> PyResult { + Self::from_parts("peer_ips", peer_ips.join(",").as_str()) + } + + #[staticmethod] + fn origin_asn(origin_asn: u32) -> PyResult { + Self::from_parts("origin_asn", origin_asn.to_string().as_str()) + } + + #[staticmethod] + fn prefix(prefix: String) -> PyResult { + Self::from_parts("prefix", prefix.as_str()) + } + + #[staticmethod] + fn elem_type(elem_type: String) -> PyResult { + Self::from_parts("type", elem_type.as_str()) + } + + #[pyo3(name = "__repr__")] + fn repr(&self) -> PyResult { + Ok(format!("", self.inner)) + } + } + + impl PyFilter { + fn from_parts(filter_type: &str, filter_value: &str) -> PyResult { + let inner = BgpkitFilter::new(filter_type, filter_value) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(PyFilter { inner }) } } - #[pyclass] + #[pyclass(unsendable)] struct Parser { - elem_iter: ElemIterator>, + elem_iter: Option>>, + } + + #[pyclass(unsendable)] + struct BatchIterator { + elem_iter: Option>>, + batch_size: usize, + } + + #[pyclass(unsendable)] + struct RouteParser { + route_iter: Option>>, + } + + #[pyclass(unsendable)] + struct RouteBatchIterator { + route_iter: Option>>, + batch_size: usize, + } + + #[pyclass(unsendable)] + struct TupleIterator { + elem_iter: Option>>, + fields: Vec, + } + + #[pyclass(unsendable)] + struct TupleBatchIterator { + elem_iter: Option>>, + fields: Vec, + batch_size: usize, + } + + #[pyclass(unsendable)] + struct RouteTupleIterator { + route_iter: Option>>, + fields: Vec, } - unsafe impl Send for Parser {} - unsafe impl Sync for Parser {} + #[pyclass(unsendable)] + struct RouteTupleBatchIterator { + route_iter: Option>>, + fields: Vec, + batch_size: usize, + } #[pymethods] impl Parser { @@ -125,50 +590,431 @@ fn pybgpkit_parser(_py: Python, m: &Bound) -> PyResult<()> { filters: Option>, cache_dir: Option, ) -> PyResult { - let mut parser = match cache_dir { - None => BgpkitParser::new(url.as_str()).unwrap(), - Some(dir) => BgpkitParser::new_cached(url.as_str(), dir.as_str()).unwrap(), - }; + let mut parser = new_parser(url.as_str(), cache_dir.as_deref())?; if let Some(filters) = filters { for (k, v) in filters { - parser = match parser.add_filter(k.as_str(), v.as_str()) { - Ok(p) => p, - Err(e) => return Err(PyValueError::new_err(e.to_string())), - } + parser = parser + .add_filter(k.as_str(), v.as_str()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; } } - let elem_iter = parser.into_iter(); + let elem_iter = Some(parser.into_elem_iter()); + Ok(Parser { elem_iter }) + } + + #[staticmethod] + #[pyo3(signature = (url, filters, cache_dir=None))] + fn from_filters( + url: String, + filters: Vec>, + cache_dir: Option, + ) -> PyResult { + let parser = new_parser(url.as_str(), cache_dir.as_deref())?; + let filters = filters + .iter() + .map(|f| f.inner.clone()) + .collect::>(); + let elem_iter = Some(parser.with_filters(&filters).into_elem_iter()); Ok(Parser { elem_iter }) } fn parse_all(&mut self, py: Python) -> PyResult>> { - let mut elems = vec![]; - for e in self.elem_iter.by_ref() { - elems.push(Py::new(py, convert_elem(e))?); - } - Ok(elems) + let Some(mut elem_iter) = self.elem_iter.take() else { + return Ok(Vec::new()); + }; + let elems = py.detach(|| elem_iter.by_ref().map(convert_elem).collect::>()); + elems.into_iter().map(|e| Py::new(py, e)).collect() } fn parse_next(&mut self, py: Python) -> PyResult>> { - Ok(self - .elem_iter + let Some(elem_iter) = self.elem_iter.as_mut() else { + return Ok(None); + }; + elem_iter .next() - .map(|e| Py::new(py, convert_elem(e)).unwrap())) + .map(|e| Py::new(py, convert_elem(e))) + .transpose() + } + + fn count(&mut self, py: Python) -> usize { + let Some(elem_iter) = self.elem_iter.take() else { + return 0; + }; + py.detach(|| elem_iter.count()) + } + + fn iter_batches(&mut self, batch_size: usize) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(BatchIterator { + elem_iter: self.elem_iter.take(), + batch_size, + }) + } + + fn iter_tuples(&mut self, fields: Vec) -> PyResult { + Ok(TupleIterator { + elem_iter: self.elem_iter.take(), + fields: parse_elem_fields(fields)?, + }) + } + + fn iter_tuple_batches( + &mut self, + fields: Vec, + batch_size: usize, + ) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(TupleBatchIterator { + elem_iter: self.elem_iter.take(), + fields: parse_elem_fields(fields)?, + batch_size, + }) } fn __iter__(slf: PyRef) -> PyRef { slf } - fn __next__(mut slf: PyRefMut, py: Python) -> Option> { - slf.elem_iter + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + elem_iter .next() - .map(|e| Py::new(py, convert_elem(e)).unwrap()) + .map(|e| Py::new(py, convert_elem(e))) + .transpose() + } + } + + #[pymethods] + impl BatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let batch_size = slf.batch_size; + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + + let elems = py.detach(|| { + elem_iter + .by_ref() + .take(batch_size) + .map(convert_elem) + .collect::>() + }); + + if elems.is_empty() { + slf.elem_iter = None; + return Ok(None); + } + + elems + .into_iter() + .map(|e| Py::new(py, e)) + .collect::>>() + .map(Some) + } + } + + #[pymethods] + impl TupleIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let slf = &mut *slf; + let fields = slf.fields.as_slice(); + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + elem_iter + .next() + .map(|elem| elem_to_tuple(py, elem, fields)) + .transpose() + } + } + + #[pymethods] + impl TupleBatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let slf = &mut *slf; + let fields = slf.fields.as_slice(); + let batch_size = slf.batch_size; + let Some(elem_iter) = slf.elem_iter.as_mut() else { + return Ok(None); + }; + + let elems = py.detach(|| { + elem_iter + .by_ref() + .take(batch_size) + .collect::>() + }); + if elems.is_empty() { + slf.elem_iter = None; + return Ok(None); + } + + elems + .into_iter() + .map(|elem| elem_to_tuple(py, elem, fields)) + .collect::>>() + .map(Some) + } + } + + #[pymethods] + impl RouteParser { + #[new] + #[pyo3(signature = (url, filters=None, cache_dir=None))] + fn new( + url: String, + filters: Option>, + cache_dir: Option, + ) -> PyResult { + let mut parser = new_parser(url.as_str(), cache_dir.as_deref())?; + + if let Some(filters) = filters { + for (k, v) in filters { + parser = parser + .add_filter(k.as_str(), v.as_str()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + } + } + let route_iter = Some(parser.into_route_iter()); + Ok(RouteParser { route_iter }) + } + + #[staticmethod] + #[pyo3(signature = (url, filters, cache_dir=None))] + fn from_filters( + url: String, + filters: Vec>, + cache_dir: Option, + ) -> PyResult { + let parser = new_parser(url.as_str(), cache_dir.as_deref())?; + let filters = filters + .iter() + .map(|f| f.inner.clone()) + .collect::>(); + let route_iter = Some(parser.with_filters(&filters).into_route_iter()); + Ok(RouteParser { route_iter }) + } + + fn parse_all(&mut self, py: Python) -> PyResult>> { + let Some(mut route_iter) = self.route_iter.take() else { + return Ok(Vec::new()); + }; + let routes = py.detach(|| { + route_iter + .by_ref() + .map(convert_route_elem) + .collect::>() + }); + routes.into_iter().map(|e| Py::new(py, e)).collect() + } + + fn parse_next(&mut self, py: Python) -> PyResult>> { + let Some(route_iter) = self.route_iter.as_mut() else { + return Ok(None); + }; + route_iter + .next() + .map(|e| Py::new(py, convert_route_elem(e))) + .transpose() + } + + fn count(&mut self, py: Python) -> usize { + let Some(route_iter) = self.route_iter.take() else { + return 0; + }; + py.detach(|| route_iter.count()) + } + + fn iter_batches(&mut self, batch_size: usize) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(RouteBatchIterator { + route_iter: self.route_iter.take(), + batch_size, + }) + } + + fn iter_tuples(&mut self, fields: Vec) -> PyResult { + Ok(RouteTupleIterator { + route_iter: self.route_iter.take(), + fields: parse_route_fields(fields)?, + }) + } + + fn iter_tuple_batches( + &mut self, + fields: Vec, + batch_size: usize, + ) -> PyResult { + if batch_size == 0 { + return Err(PyValueError::new_err("batch_size must be greater than 0")); + } + Ok(RouteTupleBatchIterator { + route_iter: self.route_iter.take(), + fields: parse_route_fields(fields)?, + batch_size, + }) + } + + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + route_iter + .next() + .map(|e| Py::new(py, convert_route_elem(e))) + .transpose() + } + } + + #[pymethods] + impl RouteTupleIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>> { + let slf = &mut *slf; + let fields = slf.fields.as_slice(); + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + route_iter + .next() + .map(|route| route_to_tuple(py, route, fields)) + .transpose() + } + } + + #[pymethods] + impl RouteTupleBatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let slf = &mut *slf; + let fields = slf.fields.as_slice(); + let batch_size = slf.batch_size; + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + + let routes = py.detach(|| { + route_iter + .by_ref() + .take(batch_size) + .collect::>() + }); + if routes.is_empty() { + slf.route_iter = None; + return Ok(None); + } + + routes + .into_iter() + .map(|route| route_to_tuple(py, route, fields)) + .collect::>>() + .map(Some) + } + } + + #[pymethods] + impl RouteBatchIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(mut slf: PyRefMut, py: Python) -> PyResult>>> { + let batch_size = slf.batch_size; + let Some(route_iter) = slf.route_iter.as_mut() else { + return Ok(None); + }; + + let routes = py.detach(|| { + route_iter + .by_ref() + .take(batch_size) + .map(convert_route_elem) + .collect::>() + }); + + if routes.is_empty() { + slf.route_iter = None; + return Ok(None); + } + + routes + .into_iter() + .map(|e| Py::new(py, e)) + .collect::>>() + .map(Some) } } m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add("ELEM_TYPE_ANNOUNCE", "A")?; + m.add("ELEM_TYPE_WITHDRAW", "W")?; + m.add("PSV_HEADER", Elem::get_psv_header())?; + m.add( + "BASIC_FIELDS", + vec!["timestamp", "elem_type", "peer_ip", "peer_asn", "prefix"], + )?; + m.add( + "ROUTE_FIELDS", + vec![ + "timestamp", + "elem_type", + "peer_ip", + "peer_asn", + "prefix", + "as_path", + ], + )?; + m.add( + "NEXT_HOP_FIELDS", + vec![ + "timestamp", + "elem_type", + "peer_ip", + "peer_asn", + "prefix", + "next_hop", + ], + )?; Ok(()) } diff --git a/tests/benchmark.py b/tests/benchmark.py new file mode 100644 index 0000000..1aa9936 --- /dev/null +++ b/tests/benchmark.py @@ -0,0 +1,153 @@ +"""Python-side performance benchmark for pybgpkit-parser. + +Run after a release build for meaningful numbers: + + maturin develop --release + python tests/benchmark.py [path-or-url] + +This script intentionally uses a remote BGPKIT sample file. It is meant for +manual performance checks, not deterministic CI assertions. +""" + +import os +import sys +import time + +from pybgpkit_parser import Parser, RouteParser + +URL = sys.argv[1] if len(sys.argv) > 1 else os.environ.get( + "PYBGPKIT_BENCH_INPUT", + "https://spaces.bgpkit.org/parser/update-example", +) + + +def bench_iteration(): + parser = Parser(URL) + start = time.perf_counter() + count = 0 + for _elem in parser: + count += 1 + elapsed = time.perf_counter() - start + return "iteration", count, elapsed + + +def bench_parse_all(): + parser = Parser(URL) + start = time.perf_counter() + elems = parser.parse_all() + elapsed = time.perf_counter() - start + return "parse_all", len(elems), elapsed + + +def bench_iter_batches(batch_size=1000): + parser = Parser(URL) + start = time.perf_counter() + count = 0 + for batch in parser.iter_batches(batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"iter_batches({batch_size})", count, elapsed + + +def bench_iter_tuples(): + parser = Parser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for _row in parser.iter_tuples(fields): + count += 1 + elapsed = time.perf_counter() - start + return "iter_tuples(timestamp,prefix,as_path)", count, elapsed + + +def bench_iter_tuple_batches(batch_size=1000): + parser = Parser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for batch in parser.iter_tuple_batches(fields, batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"iter_tuple_batches({batch_size})", count, elapsed + + +def bench_route_iteration(): + parser = RouteParser(URL) + start = time.perf_counter() + count = 0 + for _route in parser: + count += 1 + elapsed = time.perf_counter() - start + return "route_iteration", count, elapsed + + +def bench_route_iter_batches(batch_size=1000): + parser = RouteParser(URL) + start = time.perf_counter() + count = 0 + for batch in parser.iter_batches(batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"route_iter_batches({batch_size})", count, elapsed + + +def bench_route_iter_tuples(): + parser = RouteParser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for _row in parser.iter_tuples(fields): + count += 1 + elapsed = time.perf_counter() - start + return "route_iter_tuples(timestamp,prefix,as_path)", count, elapsed + + +def bench_route_iter_tuple_batches(batch_size=1000): + parser = RouteParser(URL) + fields = ["timestamp", "prefix", "as_path"] + start = time.perf_counter() + count = 0 + for batch in parser.iter_tuple_batches(fields, batch_size): + count += len(batch) + elapsed = time.perf_counter() - start + return f"route_iter_tuple_batches({batch_size})", count, elapsed + + +def bench_route_parse_all(): + parser = RouteParser(URL) + start = time.perf_counter() + routes = parser.parse_all() + elapsed = time.perf_counter() - start + return "route_parse_all", len(routes), elapsed + + +def bench_to_dict(): + parser = Parser(URL) + elems = parser.parse_all() + start = time.perf_counter() + for elem in elems: + elem.to_dict() + elapsed = time.perf_counter() - start + return "to_dict", len(elems), elapsed + + +def main(): + for name, count, elapsed in [ + bench_iteration(), + bench_iter_batches(), + bench_parse_all(), + bench_iter_tuples(), + bench_iter_tuple_batches(), + bench_route_iteration(), + bench_route_iter_batches(), + bench_route_parse_all(), + bench_route_iter_tuples(), + bench_route_iter_tuple_batches(), + bench_to_dict(), + ]: + rate = count / elapsed if elapsed else 0 + print(f"{name}: {count:,} elems in {elapsed:.3f}s ({rate:,.0f} elems/s)") + + +if __name__ == "__main__": + main() diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..33cb4f2 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,116 @@ +import os +import pytest + +from pybgpkit_parser import ( + ELEM_TYPE_ANNOUNCE, + ELEM_TYPE_WITHDRAW, + BASIC_FIELDS, + NEXT_HOP_FIELDS, + ROUTE_FIELDS, + Filter, + PSV_HEADER, + Parser, + RouteParser, +) + +URL = "https://spaces.bgpkit.org/parser/update-example" + + +def test_filter_repr_and_helpers(): + filt = Filter("peer_ips", "185.1.8.65,2001:7f8:73:0:3:fa4:0:1") + assert "Filter" in repr(filt) + assert "Filter" in repr(Filter.peer_ip("185.1.8.65")) + assert "Filter" in repr(Filter.peer_ips(["185.1.8.65", "2001:7f8:73:0:3:fa4:0:1"])) + assert "Filter" in repr(Filter.origin_asn(13335)) + assert "Filter" in repr(Filter.prefix("1.1.1.0/24")) + assert "Filter" in repr(Filter.elem_type("a")) + + +def test_module_constants(): + assert ELEM_TYPE_ANNOUNCE == "A" + assert ELEM_TYPE_WITHDRAW == "W" + assert PSV_HEADER.startswith("type|timestamp") + assert BASIC_FIELDS == ["timestamp", "elem_type", "peer_ip", "peer_asn", "prefix"] + assert ROUTE_FIELDS[-1] == "as_path" + assert NEXT_HOP_FIELDS[-1] == "next_hop" + + +def test_invalid_filter_raises_value_error(): + with pytest.raises(ValueError): + Filter("peer_ips", "not-an-ip") + + +@pytest.mark.skipif( + os.environ.get("PYBGPKIT_RUN_NETWORK_TESTS") != "1", + reason="network smoke test; set PYBGPKIT_RUN_NETWORK_TESTS=1 to enable", +) +def test_parser_iteration_and_elem_api_network(): + parser = Parser.from_filters(URL, [Filter("peer_ips", "185.1.8.65")]) + elem = parser.parse_next() + assert elem is not None + + data = elem.to_dict() + assert elem.as_dict() == data + assert "peer_bgp_id" in data + assert "only_to_customer" in data + assert elem.elem_type in {"A", "W"} + assert elem.is_announcement() or elem.is_withdrawal() + assert elem.has_as_path() == (elem.as_path is not None) + assert elem.origin_asn == elem.get_origin_asn() + assert isinstance(elem.to_json(), str) + assert isinstance(elem.to_psv(), str) + assert "Elem" in repr(elem) + + +def test_parser_count_and_batches_network(): + if os.environ.get("PYBGPKIT_RUN_NETWORK_TESTS") != "1": + pytest.skip("network smoke test; set PYBGPKIT_RUN_NETWORK_TESTS=1 to enable") + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + assert parser.count() > 0 + assert parser.count() == 0 + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + batches = parser.iter_batches(1000) + first = next(batches) + assert first + assert len(first) <= 1000 + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + row = next(parser.iter_tuples(["peer_ip", "peer_asn", "prefix"])) + assert len(row) == 3 + + parser = Parser(URL, filters={"peer_ips": "185.1.8.65"}) + batch = next(parser.iter_tuple_batches(["peer_ip", "prefix"], 1000)) + assert batch + assert len(batch[0]) == 2 + + +def test_route_parser_network(): + if os.environ.get("PYBGPKIT_RUN_NETWORK_TESTS") != "1": + pytest.skip("network smoke test; set PYBGPKIT_RUN_NETWORK_TESTS=1 to enable") + + parser = RouteParser.from_filters(URL, [Filter.peer_ip("185.1.8.65")]) + route = parser.parse_next() + assert route is not None + assert route.elem_type in {"A", "W"} + assert route.as_dict() == route.to_dict() + assert route.has_as_path() == (route.as_path is not None) + assert "RouteElem" in repr(route) + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + assert parser.count() > 0 + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + first = next(parser.iter_batches(1000)) + assert first + assert len(first) <= 1000 + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + row = next(parser.iter_tuples(["peer_ip", "peer_asn", "prefix"])) + assert len(row) == 3 + + parser = RouteParser(URL, filters={"peer_ips": "185.1.8.65"}) + batch = next(parser.iter_tuple_batches(["peer_ip", "prefix"], 1000)) + assert batch + assert len(batch[0]) == 2