From 8c6798f856b9933b70e8b768591639735c96aa40 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:32:46 +0200 Subject: [PATCH 1/6] test(celestia-node-fiber): docker-compose 4-val + bridge showcase scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tools/celestia-node-fiber/testing/docker/, a self-contained docker-compose stack that brings up four celestia-app validators (each running an in-process Fibre server), a celestia-node bridge, and a one-shot init container that registers FSP hosts via valaddr and funds an escrow. A Go test driver (build tag fibre_docker) connects to the running stack and exercises the celestia-node-fiber adapter end-to-end against real 2/3-quorum Fibre. Why a docker showcase: the in-process testing/showcase_test.go single- validator setup proves the adapter wires correctly but doesn't exercise real consensus quorum, inter-validator P2P, multiple Fibre servers contributing partial signatures, or dns:/// host registry resolution. The 4-validator docker stack exercises all of those. Layout: - Dockerfile.app: celestia-appd + fibre binaries with -tags fibre,ledger - Dockerfile.bridge: celestia-node bridge with -tags fibre - compose.yaml: bootstrap → val0..val3 → register → bridge dependency chain - scripts/init-genesis.sh: 4-validator genesis bootstrap - scripts/start-validator.sh: per-validator entrypoint (appd + fibre) - scripts/register-fsps.sh: MsgSetFibreProviderInfo (with dns:/// prefix) + escrow funding for the test client - scripts/start-bridge.sh: bridge init + JWT export to shared volume - docker_test.go: TestDockerShowcase — host-side Go driver - README.md: operator instructions + the known-rough edges Build tag fibre_docker keeps the test out of the default go test runs since it requires the external docker stack to be up. The scaffold is documented honestly: it lays out the architecture, build args, and the iteration points (fibre CLI flag confirmation, config.toml override robustness, healthchecks, build-cache speedups). The point of landing it now is to unblock the next iteration step rather than to claim flawless first-run behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../testing/docker/Dockerfile.app | 34 ++++ .../testing/docker/Dockerfile.bridge | 28 +++ .../testing/docker/README.md | 126 +++++++++++++ .../testing/docker/compose.yaml | 147 ++++++++++++++++ .../testing/docker/docker_test.go | 165 ++++++++++++++++++ .../testing/docker/scripts/init-genesis.sh | 107 ++++++++++++ .../testing/docker/scripts/register-fsps.sh | 59 +++++++ .../testing/docker/scripts/start-bridge.sh | 39 +++++ .../testing/docker/scripts/start-validator.sh | 69 ++++++++ 9 files changed, 774 insertions(+) create mode 100644 tools/celestia-node-fiber/testing/docker/Dockerfile.app create mode 100644 tools/celestia-node-fiber/testing/docker/Dockerfile.bridge create mode 100644 tools/celestia-node-fiber/testing/docker/README.md create mode 100644 tools/celestia-node-fiber/testing/docker/compose.yaml create mode 100644 tools/celestia-node-fiber/testing/docker/docker_test.go create mode 100755 tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh create mode 100755 tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh create mode 100755 tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh create mode 100755 tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh diff --git a/tools/celestia-node-fiber/testing/docker/Dockerfile.app b/tools/celestia-node-fiber/testing/docker/Dockerfile.app new file mode 100644 index 0000000000..dd23ccd0ce --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/Dockerfile.app @@ -0,0 +1,34 @@ +# Build a celestia-appd binary with the `fibre` build tag enabled and a +# matching `fibre` server binary. Both go on PATH so the validator +# entrypoint can run them as separate processes. +# +# Pin CELESTIA_APP_REF to a feature/fibre commit; the default tracks +# whatever celestia-app `main` looks like at build time, which is where +# fibre development lives. +ARG GO_VERSION=1.26.1 +ARG CELESTIA_APP_REPO=https://github.com/celestiaorg/celestia-app.git +ARG CELESTIA_APP_REF=main + +FROM golang:${GO_VERSION}-bookworm AS build +ARG CELESTIA_APP_REPO +ARG CELESTIA_APP_REF +RUN apt-get update \ + && apt-get install -y --no-install-recommends git ca-certificates \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /src +RUN git clone --depth 1 --branch "${CELESTIA_APP_REF}" "${CELESTIA_APP_REPO}" celestia-app \ + || git clone "${CELESTIA_APP_REPO}" celestia-app +WORKDIR /src/celestia-app +RUN git checkout "${CELESTIA_APP_REF}" || true +ENV CGO_ENABLED=0 GOFLAGS="-mod=readonly" +RUN go build -tags "ledger,fibre" -o /out/celestia-appd ./cmd/celestia-appd +RUN go build -tags "ledger,fibre" -o /out/fibre ./cmd/fibre + +FROM debian:bookworm-slim +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates jq curl \ + && rm -rf /var/lib/apt/lists/* +COPY --from=build /out/celestia-appd /usr/local/bin/celestia-appd +COPY --from=build /out/fibre /usr/local/bin/fibre +RUN chmod +x /usr/local/bin/celestia-appd /usr/local/bin/fibre +WORKDIR /home/celestia diff --git a/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge b/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge new file mode 100644 index 0000000000..11ec5f2c95 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge @@ -0,0 +1,28 @@ +# Build a celestia-node bridge binary with the `fibre` build tag enabled. +# The bridge is what serves blob.Subscribe over JSON-RPC for the adapter's +# Listen path, and (on the read-only side) the fibre namespace API. +ARG GO_VERSION=1.26.1 +ARG CELESTIA_NODE_REPO=https://github.com/celestiaorg/celestia-node.git +ARG CELESTIA_NODE_REF=feature/fibre + +FROM golang:${GO_VERSION}-bookworm AS build +ARG CELESTIA_NODE_REPO +ARG CELESTIA_NODE_REF +RUN apt-get update \ + && apt-get install -y --no-install-recommends git ca-certificates \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /src +RUN git clone --depth 1 --branch "${CELESTIA_NODE_REF}" "${CELESTIA_NODE_REPO}" celestia-node \ + || git clone "${CELESTIA_NODE_REPO}" celestia-node +WORKDIR /src/celestia-node +RUN git checkout "${CELESTIA_NODE_REF}" || true +ENV CGO_ENABLED=0 GOFLAGS="-mod=readonly" +RUN go build -tags "fibre" -o /out/celestia ./cmd/celestia + +FROM debian:bookworm-slim +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates jq curl \ + && rm -rf /var/lib/apt/lists/* +COPY --from=build /out/celestia /usr/local/bin/celestia +RUN chmod +x /usr/local/bin/celestia +WORKDIR /home/celestia diff --git a/tools/celestia-node-fiber/testing/docker/README.md b/tools/celestia-node-fiber/testing/docker/README.md new file mode 100644 index 0000000000..72a13ef006 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/README.md @@ -0,0 +1,126 @@ +# Fibre 4-validator + bridge docker showcase + +A docker-compose stack that brings up four celestia-app validators +(each running a Fibre server), a celestia-node bridge, and a one-shot +init container that registers Fibre Storage Provider hosts and funds an +escrow account. A Go test driver (`docker_test.go`) connects from the +host and exercises the `celestia-node-fiber` adapter end-to-end against +the real 2/3-quorum network. + +## Why + +The in-process `testing/showcase_test.go` runs against a single +validator inside the test process. That proves the adapter wires +correctly, but it doesn't exercise: + +- real consensus 2/3 quorum collection (single validator trivially + satisfies it), +- inter-validator P2P, +- multiple Fibre servers contributing partial signatures, +- the dns:/// host registry resolution path, +- the bridge syncing real headers off a network it doesn't itself drive. + +This stack does. + +## Architecture + +``` + +---------- bootstrap (one-shot) ----------+ + | init-genesis.sh: 4-val genesis + keys | + +-------+----------------------------------+ + | shared volume + +------------+------------+------------+ + v v v v + val0 val1 val2 val3 + (appd + (appd + (appd + (appd + + fibre) fibre) fibre) fibre) + ^ + | gRPC :9090, RPC :26657 + | + bridge (celestia-node) + ^ + | JSON-RPC/WebSocket :26658 + | + +--------+--------+ + | Go test | + | (docker_test.go) | + +-----------------+ +``` + +## Run + +```bash +cd tools/celestia-node-fiber/testing/docker + +# First boot: builds two images (~5–10 min on a cold cache). +docker compose up -d --build + +# Watch the bootstrap + registration progress: +docker compose logs -f bootstrap register + +# Once `register` exits 0 and writes /shared/setup.done, the bridge +# connects and the stack is ready. + +# From the parent dir, run the Go-side driver: +cd ../.. +go test -tags 'fibre fibre_docker' -count=1 -timeout 5m ./testing/docker/... + +# Tear down (preserves volumes — add -v to wipe shared genesis state): +docker compose -f testing/docker/compose.yaml down +``` + +Override endpoints from the host with env vars if your ports collide: + +``` +FIBRE_BRIDGE_ADDR=ws://127.0.0.1:36658 \ +FIBRE_CONSENSUS_ADDR=127.0.0.1:19090 \ +go test -tags 'fibre fibre_docker' ... +``` + +## Build args + +Both Dockerfiles accept refs: + +| arg | Dockerfile | default | what it does | +|---|---|---|---| +| `CELESTIA_APP_REPO` | `Dockerfile.app` | celestia-app upstream | clone source | +| `CELESTIA_APP_REF` | `Dockerfile.app` | `main` | git ref to build with `-tags fibre,ledger` | +| `CELESTIA_NODE_REPO` | `Dockerfile.bridge` | celestia-node upstream | clone source | +| `CELESTIA_NODE_REF` | `Dockerfile.bridge` | `feature/fibre` | git ref to build with `-tags fibre` | + +Example pinning to a specific commit: + +``` +docker compose build --build-arg CELESTIA_NODE_REF=194cc74c ... +``` + +## Known TODOs + +This is a working scaffold — not a polished CI artifact. A few +things will likely need iteration the first time it runs end-to-end +on your machine: + +1. **`fibre` binary CLI flags** in `scripts/start-validator.sh` are + illustrative. The real `cmd/fibre` flags may differ (port name, + app gRPC flag, signer config). Run `fibre --help` inside a + validator container to confirm and adjust. +2. **`config.toml` / `app.toml` overrides** in `start-validator.sh` + use `sed` against expected default lines. If the celestia-app + defaults change verb/spacing, the substitutions silently no-op. + Consider a `dasel`/`tomlq` rewrite if it bites. +3. **No healthchecks** on validators. `register` waits on + `service_started`, which is only "container booted", not "RPC + responding". The script polls `celestia-appd status` which + handles that, but a proper healthcheck would let `bridge` start + sooner without polling itself. +4. **No CI integration**. Adding `make docker-test` that wraps + `docker compose up -d --wait`, runs the test, then tears down, + is a sensible follow-up. +5. **Build cache** — every `docker compose up --build` re-clones + celestia-app + celestia-node. To iterate faster, set up a + docker volume cache for `/go/pkg/mod` and `/root/.cache/go-build`, + or build the images once and re-use. + +These are documented honestly because the value of the scaffold is in +unblocking the next iteration step, not in pretending it works +flawlessly without manual fixes. diff --git a/tools/celestia-node-fiber/testing/docker/compose.yaml b/tools/celestia-node-fiber/testing/docker/compose.yaml new file mode 100644 index 0000000000..207346bb63 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/compose.yaml @@ -0,0 +1,147 @@ +# Local 4-validator + 1-bridge Fibre stack for end-to-end testing of the +# celestia-node-fiber adapter. Designed to be brought up with +# `docker compose up -d` from this directory; the test driver in +# docker_test.go (build tag `fibre_docker`) connects to it from the host. +# +# Services: +# bootstrap one-shot — generates a 4-val genesis under /shared +# val0..val3 validators running celestia-appd + fibre server +# register one-shot — submits MsgSetFibreProviderInfo + escrow +# bridge celestia-node bridge connected to val0 +# +# Volumes: +# shared/ genesis, keyrings, peers, JWT — read by every service +# +# Ports exposed to host (override via override file if they collide): +# val0 :26657 (RPC) :9090 (gRPC) :26659 (fibre) +# bridge :26658 (JSON-RPC over WebSocket) +services: + bootstrap: + build: + context: . + dockerfile: Dockerfile.app + entrypoint: ["bash", "/scripts/init-genesis.sh"] + environment: + CHAIN_ID: fibre-docker + NUM_VALIDATORS: "4" + SHARED: /shared + CLIENT_ACCOUNT: default-fibre + volumes: + - shared:/shared + - ./scripts:/scripts:ro + restart: "no" + + val0: + build: + context: . + dockerfile: Dockerfile.app + entrypoint: ["bash", "/scripts/start-validator.sh"] + environment: + VAL_INDEX: "0" + SHARED: /shared + CHAIN_ID: fibre-docker + depends_on: + bootstrap: + condition: service_completed_successfully + volumes: + - shared:/shared + - ./scripts:/scripts:ro + ports: + - "26657:26657" + - "9090:9090" + - "26659:26659" + + val1: + build: + context: . + dockerfile: Dockerfile.app + entrypoint: ["bash", "/scripts/start-validator.sh"] + environment: + VAL_INDEX: "1" + SHARED: /shared + CHAIN_ID: fibre-docker + depends_on: + bootstrap: + condition: service_completed_successfully + volumes: + - shared:/shared + - ./scripts:/scripts:ro + + val2: + build: + context: . + dockerfile: Dockerfile.app + entrypoint: ["bash", "/scripts/start-validator.sh"] + environment: + VAL_INDEX: "2" + SHARED: /shared + CHAIN_ID: fibre-docker + depends_on: + bootstrap: + condition: service_completed_successfully + volumes: + - shared:/shared + - ./scripts:/scripts:ro + + val3: + build: + context: . + dockerfile: Dockerfile.app + entrypoint: ["bash", "/scripts/start-validator.sh"] + environment: + VAL_INDEX: "3" + SHARED: /shared + CHAIN_ID: fibre-docker + depends_on: + bootstrap: + condition: service_completed_successfully + volumes: + - shared:/shared + - ./scripts:/scripts:ro + + register: + build: + context: . + dockerfile: Dockerfile.app + entrypoint: ["bash", "/scripts/register-fsps.sh"] + environment: + NUM_VALIDATORS: "4" + SHARED: /shared + CHAIN_ID: fibre-docker + CLIENT_ACCOUNT: default-fibre + depends_on: + val0: + condition: service_started + val1: + condition: service_started + val2: + condition: service_started + val3: + condition: service_started + volumes: + - shared:/shared + - ./scripts:/scripts:ro + restart: on-failure + + bridge: + build: + context: . + dockerfile: Dockerfile.bridge + entrypoint: ["bash", "/scripts/start-bridge.sh"] + environment: + NETWORK: fibre-docker + SHARED: /shared + CORE_IP: val0 + CORE_GRPC_PORT: "9090" + CORE_RPC_PORT: "26657" + depends_on: + register: + condition: service_completed_successfully + volumes: + - shared:/shared + - ./scripts:/scripts:ro + ports: + - "26658:26658" + +volumes: + shared: diff --git a/tools/celestia-node-fiber/testing/docker/docker_test.go b/tools/celestia-node-fiber/testing/docker/docker_test.go new file mode 100644 index 0000000000..6e1d196414 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/docker_test.go @@ -0,0 +1,165 @@ +//go:build fibre_docker + +// docker_test.go — runs the same Upload → Listen → Download flow as +// the in-process showcase, but against the docker-compose stack in +// this directory. +// +// Bring the stack up first: +// +// cd tools/celestia-node-fiber/testing/docker +// docker compose up -d --build +// # wait until `docker compose logs register` says "setup.done flag written" +// # and `docker compose logs bridge` shows the bridge serving on :26658 +// +// Then from the parent dir: +// +// go test -tags 'fibre fibre_docker' -count=1 -timeout 5m ./testing/docker/... +// +// The test reads the bridge JWT from the shared docker volume by +// running `docker compose exec bridge cat /shared/bridge-admin-jwt.txt`, +// so the docker CLI must be available on the host. +package docker_test + +import ( + "bytes" + "context" + "encoding/hex" + "fmt" + "os" + "os/exec" + "strings" + "testing" + "time" + + "github.com/cosmos/cosmos-sdk/crypto/keyring" + "github.com/stretchr/testify/require" + + "github.com/celestiaorg/celestia-app/v8/app" + "github.com/celestiaorg/celestia-app/v8/app/encoding" + "github.com/celestiaorg/celestia-node/api/client" + + cnfiber "github.com/evstack/ev-node/tools/celestia-node-fiber" +) + +// keep block import compiled out of the test binary; the assertion that +// adapter satisfies block.FiberClient lives in the unit tests. +var _ = (cnfiber.Adapter{}) + +const ( + bridgeAddr = "ws://127.0.0.1:26658" + consensusAddr = "127.0.0.1:9090" + chainID = "fibre-docker" + clientAccount = "default-fibre" + docTimeout = 60 * time.Second +) + +// envOr returns the env var if set, otherwise fallback. +func envOr(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} + +func readBridgeJWT(t *testing.T) string { + t.Helper() + cmd := exec.Command("docker", "compose", "exec", "-T", "bridge", + "cat", "/shared/bridge-admin-jwt.txt") + cmd.Dir = mustDockerDir(t) + out, err := cmd.CombinedOutput() + require.NoError(t, err, "reading bridge JWT: %s", string(out)) + return strings.TrimSpace(string(out)) +} + +// mustDockerDir locates this file's directory at runtime so the test can +// invoke docker compose against the correct compose file regardless of +// where `go test` was launched from. +func mustDockerDir(t *testing.T) string { + t.Helper() + wd, err := os.Getwd() + require.NoError(t, err) + return wd +} + +// readClientKeyring loads the keyring populated by init-genesis.sh from +// the shared docker volume into a host-side keyring.Keyring suitable +// for fiber.New. We do this by `docker cp`-ing the seed validator's +// home dir to a local temp dir. +// +// TODO: this assumes the operator has the docker CLI; the test doesn't +// validate that up front. Add a `docker version` precheck if we want a +// clearer error. +func readClientKeyring(t *testing.T) keyring.Keyring { + t.Helper() + tmp := t.TempDir() + cmd := exec.Command("docker", "compose", "cp", + "val0:/shared/val0/.celestia-app/keyring-test", tmp+"/keyring-test") + cmd.Dir = mustDockerDir(t) + out, err := cmd.CombinedOutput() + require.NoError(t, err, "docker cp keyring: %s", string(out)) + + encCfg := encoding.MakeConfig(app.ModuleEncodingRegisters...) + kr, err := keyring.New("docker-test", keyring.BackendTest, tmp, nil, encCfg.Codec) + require.NoError(t, err, "constructing keyring") + return kr +} + +// TestDockerShowcase drives Upload → Listen → Download against the +// docker-compose stack. The 4-validator network exercises real 2/3 +// quorum aggregation that the single-validator showcase cannot. +// +// Build tag `fibre_docker` keeps the test out of default `go test` +// runs since it requires an external docker stack. +func TestDockerShowcase(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + t.Cleanup(cancel) + + jwt := readBridgeJWT(t) + kr := readClientKeyring(t) + + adapter, err := cnfiber.New(ctx, cnfiber.Config{ + Client: client.Config{ + ReadConfig: client.ReadConfig{ + BridgeDAAddr: envOr("FIBRE_BRIDGE_ADDR", bridgeAddr), + DAAuthToken: jwt, + EnableDATLS: false, + }, + SubmitConfig: client.SubmitConfig{ + DefaultKeyName: clientAccount, + Network: chainID, + CoreGRPCConfig: client.CoreGRPCConfig{ + Addr: envOr("FIBRE_CONSENSUS_ADDR", consensusAddr), + }, + }, + }, + }, kr) + require.NoError(t, err, "constructing adapter against docker stack") + t.Cleanup(func() { _ = adapter.Close() }) + + namespace := bytes.Repeat([]byte{0xab}, 10) + payload := []byte(fmt.Sprintf("docker showcase %d", time.Now().UnixNano())) + + events, err := adapter.Listen(ctx, namespace, 0) + require.NoError(t, err, "Listen subscription") + + up, err := adapter.Upload(ctx, namespace, payload) + require.NoError(t, err, "Upload") + require.NotEmpty(t, up.BlobID) + t.Logf("upload ok: blob_id=%s", hex.EncodeToString(up.BlobID)) + + select { + case ev, ok := <-events: + require.True(t, ok, "Listen channel closed without event") + require.Equal(t, up.BlobID, ev.BlobID, "BlobID mismatch") + require.Equal(t, uint64(len(payload)), ev.DataSize) + require.Greater(t, ev.Height, uint64(0)) + t.Logf("listen ok: height=%d data_size=%d", ev.Height, ev.DataSize) + case <-time.After(docTimeout): + t.Fatalf("timed out waiting for BlobEvent after %s", docTimeout) + } + + got, err := adapter.Download(ctx, up.BlobID) + require.NoError(t, err, "Download") + require.Equal(t, payload, got, "downloaded bytes don't match payload") + t.Logf("download ok: %d bytes", len(got)) +} diff --git a/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh b/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh new file mode 100755 index 0000000000..a76c9118a3 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# init-genesis.sh — runs once in the bootstrap container. +# +# Generates a 4-validator celestia-app genesis under /shared, with each +# validator's home dir at /shared/val/.celestia-app. All four homes +# share the same genesis.json so the chain has a coherent validator set. +# Each validator's priv_validator_key.json + node_key.json are unique +# per home dir. +# +# After this script exits, validator entrypoints can pick up their home +# from the shared volume. +set -euo pipefail + +CHAIN_ID="${CHAIN_ID:-fibre-docker}" +NUM_VALIDATORS="${NUM_VALIDATORS:-4}" +SHARED="${SHARED:-/shared}" +APP="${APP:-celestia-appd}" +KEYRING_BACKEND="test" +STAKE="100000000000utia" # 100k TIA per validator self-stake +INITIAL_BALANCE="1000000000000utia" +CLIENT_ACCOUNT="${CLIENT_ACCOUNT:-default-fibre}" +CLIENT_BALANCE="${CLIENT_BALANCE:-1000000000000utia}" + +mkdir -p "$SHARED" + +# Validator 0 is the seed home: we initialize there, add genesis accounts, +# then copy the resulting genesis.json into every other validator's home. +for i in $(seq 0 $((NUM_VALIDATORS - 1))); do + home="$SHARED/val$i/.celestia-app" + mkdir -p "$home" + "$APP" init "validator-$i" --chain-id "$CHAIN_ID" --home "$home" >/dev/null +done + +seed_home="$SHARED/val0/.celestia-app" + +# Add a validator key and genesis account to each home, then copy the +# pubkey/account into the seed home so it ends up in genesis. +for i in $(seq 0 $((NUM_VALIDATORS - 1))); do + home="$SHARED/val$i/.celestia-app" + "$APP" keys add "validator" \ + --keyring-backend "$KEYRING_BACKEND" --home "$home" --output json \ + > "$SHARED/val$i/validator.key.json" + addr=$("$APP" keys show "validator" -a \ + --keyring-backend "$KEYRING_BACKEND" --home "$home") + "$APP" genesis add-genesis-account "$addr" "$INITIAL_BALANCE" \ + --keyring-backend "$KEYRING_BACKEND" --home "$seed_home" +done + +# Add the client signer account to the seed genesis with a generous balance +# so the test driver can fund its escrow without worrying about gas. +"$APP" keys add "$CLIENT_ACCOUNT" \ + --keyring-backend "$KEYRING_BACKEND" --home "$seed_home" \ + --output json > "$SHARED/client.key.json" +client_addr=$("$APP" keys show "$CLIENT_ACCOUNT" -a \ + --keyring-backend "$KEYRING_BACKEND" --home "$seed_home") +"$APP" genesis add-genesis-account "$client_addr" "$CLIENT_BALANCE" \ + --keyring-backend "$KEYRING_BACKEND" --home "$seed_home" + +# Generate gentxs from each validator's home, collect them in seed_home. +mkdir -p "$seed_home/config/gentx" +for i in $(seq 0 $((NUM_VALIDATORS - 1))); do + home="$SHARED/val$i/.celestia-app" + if [ "$i" -ne 0 ]; then + # Other validators need the seed's genesis.json before they can + # produce a valid gentx. + cp "$seed_home/config/genesis.json" "$home/config/genesis.json" + # Their account also needs to exist in their own keyring + genesis. + # Re-add account: gentx requires it. + addr=$("$APP" keys show "validator" -a \ + --keyring-backend "$KEYRING_BACKEND" --home "$home") + "$APP" genesis add-genesis-account "$addr" "$INITIAL_BALANCE" \ + --keyring-backend "$KEYRING_BACKEND" --home "$home" || true + fi + "$APP" genesis gentx "validator" "$STAKE" \ + --chain-id "$CHAIN_ID" \ + --keyring-backend "$KEYRING_BACKEND" \ + --home "$home" \ + --output-document "$seed_home/config/gentx/gentx-val$i.json" +done + +# Collect every gentx into the seed genesis. +"$APP" genesis collect-gentxs --home "$seed_home" +"$APP" genesis validate --home "$seed_home" + +# Distribute the final genesis.json to every other validator's home. +for i in $(seq 1 $((NUM_VALIDATORS - 1))); do + home="$SHARED/val$i/.celestia-app" + cp "$seed_home/config/genesis.json" "$home/config/genesis.json" +done + +# Persistent peers: each validator advertises itself by its docker +# service name (val0, val1, ...) on the standard P2P port 26656. +PEERS="" +for i in $(seq 0 $((NUM_VALIDATORS - 1))); do + home="$SHARED/val$i/.celestia-app" + nodeid=$("$APP" comet show-node-id --home "$home") + PEERS="${PEERS}${PEERS:+,}${nodeid}@val$i:26656" +done +echo "$PEERS" > "$SHARED/peers.txt" + +# TODO: write per-validator config tweaks (laddr / external_address / +# persistent_peers / minimum-gas-prices) into each home's config.toml / +# app.toml. The validator entrypoint expects these to be present already. +# Either inline here with sed/jq, or have the entrypoint apply them on +# startup. + +echo "init-genesis: done. peers=$PEERS" diff --git a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh new file mode 100755 index 0000000000..7586ec6bb7 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# register-fsps.sh — runs once after validators are producing blocks. +# +# Submits MsgSetFibreProviderInfo for each validator so the chain's +# valaddr module maps consensus address → fibre server address. The +# `dns:///` URI prefix is required by the fibre client's gRPC dialer +# (a bare host:port fails URL parsing — same gotcha documented in +# tools/talis/fibre_setup.go). +# +# Also funds the test client account's escrow so MsgPayForFibre can +# settle in the docker network. +set -euo pipefail + +NUM_VALIDATORS="${NUM_VALIDATORS:-4}" +SHARED="${SHARED:-/shared}" +APP="${APP:-celestia-appd}" +CHAIN_ID="${CHAIN_ID:-fibre-docker}" +FEES="${FEES:-5000utia}" +ESCROW_AMOUNT="${ESCROW_AMOUNT:-50000000utia}" +CLIENT_ACCOUNT="${CLIENT_ACCOUNT:-default-fibre}" +FIBRE_PORT="${FIBRE_PORT:-26659}" + +# Wait until the seed validator has produced a few blocks so the chain +# is healthy enough to accept txs. +seed_home="$SHARED/val0/.celestia-app" +until height=$("$APP" status --home "$seed_home" 2>/dev/null \ + | jq -r '.sync_info.latest_block_height // 0') \ + && [ "${height:-0}" -ge 3 ]; do + echo "register-fsps: waiting for chain to reach height 3 (current=${height:-?})..." + sleep 2 +done + +# Register each validator's fibre host. We submit each tx from the +# validator's own keyring, hitting that validator's local gRPC. +for i in $(seq 0 $((NUM_VALIDATORS - 1))); do + home="$SHARED/val$i/.celestia-app" + val_oper=$("$APP" keys show "validator" --bech val -a \ + --keyring-backend test --home "$home") + # MsgSetFibreProviderInfo via the valaddr tx CLI. Each fibre server + # is reachable inside the compose network at val$i:$FIBRE_PORT. + "$APP" tx valaddr set-host "dns:///val$i:$FIBRE_PORT" \ + --from validator --keyring-backend test --home "$home" \ + --chain-id "$CHAIN_ID" --node "tcp://val$i:26657" \ + --fees "$FEES" --yes + echo "register-fsps: registered val$i ($val_oper)" + sleep 6 # allow inclusion in the next block +done + +# Fund the client account's escrow. +client_addr=$("$APP" keys show "$CLIENT_ACCOUNT" -a \ + --keyring-backend test --home "$seed_home") +"$APP" tx fibre deposit-to-escrow "$ESCROW_AMOUNT" \ + --from "$CLIENT_ACCOUNT" --keyring-backend test --home "$seed_home" \ + --chain-id "$CHAIN_ID" --node "tcp://val0:26657" \ + --fees "$FEES" --yes +echo "register-fsps: deposited $ESCROW_AMOUNT into $client_addr's escrow" + +touch "$SHARED/setup.done" +echo "register-fsps: complete; setup.done flag written" diff --git a/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh b/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh new file mode 100755 index 0000000000..9d240da84e --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# start-bridge.sh — entrypoint for the celestia-node bridge container. +# +# Initializes the bridge home, configures it to talk to val0's gRPC and +# CometBFT RPC, generates an admin JWT (written to a shared file so the +# test driver can read it), and starts the bridge. +set -euo pipefail + +NETWORK="${NETWORK:-fibre-docker}" +SHARED="${SHARED:-/shared}" +HOME_DIR="${HOME_DIR:-/home/celestia/.celestia-bridge}" +CORE_IP="${CORE_IP:-val0}" +CORE_GRPC_PORT="${CORE_GRPC_PORT:-9090}" +CORE_RPC_PORT="${CORE_RPC_PORT:-26657}" + +if [ ! -f "$HOME_DIR/config.toml" ]; then + celestia bridge init --p2p.network "$NETWORK" --node.store "$HOME_DIR" +fi + +# Wait for the FSP registration step to finish so blob.Subscribe has +# something meaningful to emit. +until [ -f "$SHARED/setup.done" ]; do + echo "bridge: waiting for FSP registration..." + sleep 2 +done + +# Drop an admin JWT into the shared volume so the test driver can pick +# it up without --auth-token plumbing. +celestia bridge auth admin --p2p.network "$NETWORK" --node.store "$HOME_DIR" \ + > "$SHARED/bridge-admin-jwt.txt" + +exec celestia bridge start \ + --p2p.network "$NETWORK" \ + --node.store "$HOME_DIR" \ + --core.ip "$CORE_IP" \ + --core.grpc.port "$CORE_GRPC_PORT" \ + --core.rpc.port "$CORE_RPC_PORT" \ + --rpc.addr 0.0.0.0 \ + --rpc.port 26658 diff --git a/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh b/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh new file mode 100755 index 0000000000..8c4527cd72 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# start-validator.sh — entrypoint for each validator container. +# +# Reads the validator index from $VAL_INDEX (0..N-1), loads its home from +# the shared volume populated by init-genesis.sh, applies docker-network- +# aware overrides to config.toml/app.toml, then starts celestia-appd and +# the in-process fibre server side-by-side. +set -euo pipefail + +VAL_INDEX="${VAL_INDEX:?VAL_INDEX must be set (0..N-1)}" +SHARED="${SHARED:-/shared}" +APP="${APP:-celestia-appd}" +FIBRE_BIN="${FIBRE_BIN:-fibre}" +CHAIN_ID="${CHAIN_ID:-fibre-docker}" + +home="$SHARED/val$VAL_INDEX/.celestia-app" +peers=$(cat "$SHARED/peers.txt") +service_name="val$VAL_INDEX" + +# Wait for init-genesis to have populated this home. +until [ -f "$home/config/genesis.json" ]; do + echo "validator-$VAL_INDEX: waiting for genesis on $home..." + sleep 1 +done + +# Apply docker-network bindings. config.toml / app.toml are TOML; use sed +# carefully on the keys we need. (A more robust approach would be `dasel` +# or a Go config helper — keeping it minimal here.) +config_toml="$home/config/config.toml" +app_toml="$home/config/app.toml" + +sed -i \ + -e 's|^laddr = "tcp://127.0.0.1:26657"|laddr = "tcp://0.0.0.0:26657"|' \ + -e 's|^laddr = "tcp://0.0.0.0:26656"|laddr = "tcp://0.0.0.0:26656"|' \ + -e "s|^persistent_peers = \"\"|persistent_peers = \"$peers\"|" \ + -e "s|^external_address = \"\"|external_address = \"$service_name:26656\"|" \ + "$config_toml" + +sed -i \ + -e 's|^minimum-gas-prices = ""|minimum-gas-prices = "0.002utia"|' \ + -e 's|^address = "tcp://localhost:1317"|address = "tcp://0.0.0.0:1317"|' \ + -e 's|^address = "localhost:9090"|address = "0.0.0.0:9090"|' \ + -e 's|^address = "localhost:9091"|address = "0.0.0.0:9091"|' \ + "$app_toml" + +# Start celestia-appd in the background. +"$APP" start --home "$home" \ + --grpc.address "0.0.0.0:9090" \ + --grpc.enable true & +appd_pid=$! + +# Wait for the gRPC port to be reachable before launching fibre. +until nc -z 127.0.0.1 9090; do + sleep 1 +done + +# Start the fibre server. Listens on :26659 (arbitrary chosen port — +# matches the dns:///val$VAL_INDEX:26659 form used at registration time). +# TODO: confirm the actual `fibre` binary CLI; flags below are +# illustrative based on tools/talis/fibre_setup.go usage. May need +# adjusting once we run it for real. +"$FIBRE_BIN" \ + --home "$home" \ + --listen-address "0.0.0.0:26659" \ + --app-grpc-address "127.0.0.1:9090" & +fibre_pid=$! + +trap 'kill "$appd_pid" "$fibre_pid" 2>/dev/null || true' EXIT +wait "$appd_pid" "$fibre_pid" From b258236d4fe709b11334b86e063f06f61fccfcbd Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Mon, 27 Apr 2026 02:50:20 +0200 Subject: [PATCH 2/6] fix(fibre-docker): validate scaffold end-to-end against arm64 Docker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First execution surfaced a handful of issues the in-process showcase masked: - Dockerfile.app: build path is `./fibre/cmd`, not `./cmd/fibre`. - start-validator.sh: `fibre start --server-listen-address … --signer-grpc-address …` (matches real `cmd/fibre` flags). - start-validator.sh: replace `nc -z` with bash /dev/tcp; pass `--force-no-bbr` to celestia-appd (Linux kernel inside Docker Desktop on macOS lacks BBR); poll for first block before launching fibre so it can detect chain ID. - start-validator.sh: set `priv_validator_grpc_laddr` so fibre's signer client has something to dial. - init-genesis.sh: drop `network_min_gas_price` to 0 before collecting gentxs (gentxs carry no fee); make script idempotent via `peers.txt` flag so re-runs don't crash. - register-fsps.sh: pass `--node tcp://val0:26657` for `status` (default localhost:26657 not reachable in the register container); register host-reachable `dns:///127.0.0.1:798X` per validator so the test driver on the docker host can dial each fibre server. - start-bridge.sh: `--core.port` not `--core.grpc.port`; export `CELESTIA_CUSTOM=$NETWORK` so celestia-node accepts a private network ID; grep the JWT line out of the warning-polluted output. - compose.yaml: expose val1/val2/val3 fibre ports on host 7981/7982/7983 (val0 already exposes 7980). TestDockerShowcase now passes end-to-end: Upload → BlobID returned, Listen → BlobEvent at height N, Download → original payload bytes recovered. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../testing/docker/Dockerfile.app | 2 +- .../testing/docker/README.md | 24 ++++------ .../testing/docker/compose.yaml | 10 ++++- .../testing/docker/scripts/init-genesis.sh | 16 +++++++ .../testing/docker/scripts/register-fsps.sh | 14 ++++-- .../testing/docker/scripts/start-bridge.sh | 14 ++++-- .../testing/docker/scripts/start-validator.sh | 44 +++++++++++++------ 7 files changed, 85 insertions(+), 39 deletions(-) diff --git a/tools/celestia-node-fiber/testing/docker/Dockerfile.app b/tools/celestia-node-fiber/testing/docker/Dockerfile.app index dd23ccd0ce..7ecb521f81 100644 --- a/tools/celestia-node-fiber/testing/docker/Dockerfile.app +++ b/tools/celestia-node-fiber/testing/docker/Dockerfile.app @@ -22,7 +22,7 @@ WORKDIR /src/celestia-app RUN git checkout "${CELESTIA_APP_REF}" || true ENV CGO_ENABLED=0 GOFLAGS="-mod=readonly" RUN go build -tags "ledger,fibre" -o /out/celestia-appd ./cmd/celestia-appd -RUN go build -tags "ledger,fibre" -o /out/fibre ./cmd/fibre +RUN go build -tags "ledger,fibre" -o /out/fibre ./fibre/cmd FROM debian:bookworm-slim RUN apt-get update \ diff --git a/tools/celestia-node-fiber/testing/docker/README.md b/tools/celestia-node-fiber/testing/docker/README.md index 72a13ef006..3603bf0330 100644 --- a/tools/celestia-node-fiber/testing/docker/README.md +++ b/tools/celestia-node-fiber/testing/docker/README.md @@ -96,31 +96,23 @@ docker compose build --build-arg CELESTIA_NODE_REF=194cc74c ... ## Known TODOs -This is a working scaffold — not a polished CI artifact. A few -things will likely need iteration the first time it runs end-to-end -on your machine: - -1. **`fibre` binary CLI flags** in `scripts/start-validator.sh` are - illustrative. The real `cmd/fibre` flags may differ (port name, - app gRPC flag, signer config). Run `fibre --help` inside a - validator container to confirm and adjust. -2. **`config.toml` / `app.toml` overrides** in `start-validator.sh` +The scaffold has been validated end-to-end on Apple Silicon +(Docker Desktop 4.70 / linux/arm64). A few rough edges remain that +are worth tightening for CI: + +1. **`config.toml` / `app.toml` overrides** in `start-validator.sh` use `sed` against expected default lines. If the celestia-app defaults change verb/spacing, the substitutions silently no-op. Consider a `dasel`/`tomlq` rewrite if it bites. -3. **No healthchecks** on validators. `register` waits on +2. **No healthchecks** on validators. `register` waits on `service_started`, which is only "container booted", not "RPC responding". The script polls `celestia-appd status` which handles that, but a proper healthcheck would let `bridge` start sooner without polling itself. -4. **No CI integration**. Adding `make docker-test` that wraps +3. **No CI integration**. Adding `make docker-test` that wraps `docker compose up -d --wait`, runs the test, then tears down, is a sensible follow-up. -5. **Build cache** — every `docker compose up --build` re-clones +4. **Build cache** — every `docker compose up --build` re-clones celestia-app + celestia-node. To iterate faster, set up a docker volume cache for `/go/pkg/mod` and `/root/.cache/go-build`, or build the images once and re-use. - -These are documented honestly because the value of the scaffold is in -unblocking the next iteration step, not in pretending it works -flawlessly without manual fixes. diff --git a/tools/celestia-node-fiber/testing/docker/compose.yaml b/tools/celestia-node-fiber/testing/docker/compose.yaml index 207346bb63..5cf090d0c6 100644 --- a/tools/celestia-node-fiber/testing/docker/compose.yaml +++ b/tools/celestia-node-fiber/testing/docker/compose.yaml @@ -13,7 +13,7 @@ # shared/ genesis, keyrings, peers, JWT — read by every service # # Ports exposed to host (override via override file if they collide): -# val0 :26657 (RPC) :9090 (gRPC) :26659 (fibre) +# val0 :26657 (RPC) :9090 (gRPC) :7980 (fibre) # bridge :26658 (JSON-RPC over WebSocket) services: bootstrap: @@ -49,7 +49,7 @@ services: ports: - "26657:26657" - "9090:9090" - - "26659:26659" + - "7980:7980" val1: build: @@ -66,6 +66,8 @@ services: volumes: - shared:/shared - ./scripts:/scripts:ro + ports: + - "7981:7980" val2: build: @@ -82,6 +84,8 @@ services: volumes: - shared:/shared - ./scripts:/scripts:ro + ports: + - "7982:7980" val3: build: @@ -98,6 +102,8 @@ services: volumes: - shared:/shared - ./scripts:/scripts:ro + ports: + - "7983:7980" register: build: diff --git a/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh b/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh index a76c9118a3..25c2aced0f 100755 --- a/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh +++ b/tools/celestia-node-fiber/testing/docker/scripts/init-genesis.sh @@ -23,6 +23,14 @@ CLIENT_BALANCE="${CLIENT_BALANCE:-1000000000000utia}" mkdir -p "$SHARED" +# Idempotency: if a previous run already produced peers.txt the genesis +# is already in place — skip re-init so subsequent `docker compose up` +# invocations don't fail trying to re-init the homes. +if [ -f "$SHARED/peers.txt" ]; then + echo "init-genesis: already initialized; nothing to do." + exit 0 +fi + # Validator 0 is the seed home: we initialize there, add genesis accounts, # then copy the resulting genesis.json into every other validator's home. for i in $(seq 0 $((NUM_VALIDATORS - 1))); do @@ -56,6 +64,14 @@ client_addr=$("$APP" keys show "$CLIENT_ACCOUNT" -a \ "$APP" genesis add-genesis-account "$client_addr" "$CLIENT_BALANCE" \ --keyring-backend "$KEYRING_BACKEND" --home "$seed_home" +# Set network minimum gas price to 0 so gentxs (which have no fees) can +# be included. The default 0.000001 utia/gas would reject every gentx. +seed_genesis="$seed_home/config/genesis.json" +tmp=$(mktemp) +jq '.app_state.minfee.params.network_min_gas_price = "0.000000000000000000"' \ + "$seed_genesis" > "$tmp" +mv "$tmp" "$seed_genesis" + # Generate gentxs from each validator's home, collect them in seed_home. mkdir -p "$seed_home/config/gentx" for i in $(seq 0 $((NUM_VALIDATORS - 1))); do diff --git a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh index 7586ec6bb7..51ea4d020a 100755 --- a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh +++ b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh @@ -18,12 +18,14 @@ CHAIN_ID="${CHAIN_ID:-fibre-docker}" FEES="${FEES:-5000utia}" ESCROW_AMOUNT="${ESCROW_AMOUNT:-50000000utia}" CLIENT_ACCOUNT="${CLIENT_ACCOUNT:-default-fibre}" -FIBRE_PORT="${FIBRE_PORT:-26659}" +FIBRE_PORT="${FIBRE_PORT:-7980}" # Wait until the seed validator has produced a few blocks so the chain -# is healthy enough to accept txs. +# is healthy enough to accept txs. status command uses the --node flag +# (the home's config.toml laddr is bound to 0.0.0.0 which we can't dial +# from another container). seed_home="$SHARED/val0/.celestia-app" -until height=$("$APP" status --home "$seed_home" 2>/dev/null \ +until height=$("$APP" status --home "$seed_home" --node "tcp://val0:26657" 2>/dev/null \ | jq -r '.sync_info.latest_block_height // 0') \ && [ "${height:-0}" -ge 3 ]; do echo "register-fsps: waiting for chain to reach height 3 (current=${height:-?})..." @@ -38,7 +40,11 @@ for i in $(seq 0 $((NUM_VALIDATORS - 1))); do --keyring-backend test --home "$home") # MsgSetFibreProviderInfo via the valaddr tx CLI. Each fibre server # is reachable inside the compose network at val$i:$FIBRE_PORT. - "$APP" tx valaddr set-host "dns:///val$i:$FIBRE_PORT" \ + # Register a host-reachable address (127.0.0.1:798X) so the test + # driver running on the docker host can dial each fibre server + # directly. compose.yaml maps val_i:7980 → host:798$i. + host_port=$((FIBRE_PORT + i)) + "$APP" tx valaddr set-host "dns:///127.0.0.1:$host_port" \ --from validator --keyring-backend test --home "$home" \ --chain-id "$CHAIN_ID" --node "tcp://val$i:26657" \ --fees "$FEES" --yes diff --git a/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh b/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh index 9d240da84e..e1ed0309de 100755 --- a/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh +++ b/tools/celestia-node-fiber/testing/docker/scripts/start-bridge.sh @@ -13,6 +13,12 @@ CORE_IP="${CORE_IP:-val0}" CORE_GRPC_PORT="${CORE_GRPC_PORT:-9090}" CORE_RPC_PORT="${CORE_RPC_PORT:-26657}" +# celestia-node only accepts presets (celestia, mocha, arabica, ...) for +# --p2p.network. For a private chain we must set CELESTIA_CUSTOM= +# before invoking the binary; that registers the network at runtime so +# the same name passes flag validation. +export CELESTIA_CUSTOM="$NETWORK" + if [ ! -f "$HOME_DIR/config.toml" ]; then celestia bridge init --p2p.network "$NETWORK" --node.store "$HOME_DIR" fi @@ -25,15 +31,17 @@ until [ -f "$SHARED/setup.done" ]; do done # Drop an admin JWT into the shared volume so the test driver can pick -# it up without --auth-token plumbing. +# it up without --auth-token plumbing. CELESTIA_CUSTOM prints a warning +# to stdout before the JWT, so grep for the actual token (three base64 +# segments separated by dots). celestia bridge auth admin --p2p.network "$NETWORK" --node.store "$HOME_DIR" \ + | grep -E '^[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$' \ > "$SHARED/bridge-admin-jwt.txt" exec celestia bridge start \ --p2p.network "$NETWORK" \ --node.store "$HOME_DIR" \ --core.ip "$CORE_IP" \ - --core.grpc.port "$CORE_GRPC_PORT" \ - --core.rpc.port "$CORE_RPC_PORT" \ + --core.port "$CORE_GRPC_PORT" \ --rpc.addr 0.0.0.0 \ --rpc.port 26658 diff --git a/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh b/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh index 8c4527cd72..24a873390a 100755 --- a/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh +++ b/tools/celestia-node-fiber/testing/docker/scripts/start-validator.sh @@ -34,6 +34,7 @@ sed -i \ -e 's|^laddr = "tcp://0.0.0.0:26656"|laddr = "tcp://0.0.0.0:26656"|' \ -e "s|^persistent_peers = \"\"|persistent_peers = \"$peers\"|" \ -e "s|^external_address = \"\"|external_address = \"$service_name:26656\"|" \ + -e 's|^priv_validator_grpc_laddr = ""|priv_validator_grpc_laddr = "127.0.0.1:26659"|' \ "$config_toml" sed -i \ @@ -43,26 +44,43 @@ sed -i \ -e 's|^address = "localhost:9091"|address = "0.0.0.0:9091"|' \ "$app_toml" -# Start celestia-appd in the background. +# Start celestia-appd in the background. --force-no-bbr because the +# linux kernel inside docker containers on macOS does not have BBR +# congestion control enabled. "$APP" start --home "$home" \ --grpc.address "0.0.0.0:9090" \ - --grpc.enable true & + --grpc.enable true \ + --force-no-bbr & appd_pid=$! -# Wait for the gRPC port to be reachable before launching fibre. -until nc -z 127.0.0.1 9090; do +# Wait for the gRPC + privval gRPC ports to be reachable before launching +# fibre. Use bash's /dev/tcp instead of nc (not in slim debian). +until (exec 3<>/dev/tcp/127.0.0.1/9090) 2>/dev/null; do sleep 1 done +exec 3<&- 3>&- +until (exec 3<>/dev/tcp/127.0.0.1/26659) 2>/dev/null; do + sleep 1 +done +exec 3<&- 3>&- + +# Wait for the chain to produce the first block — fibre requires this +# at startup to detect chain ID, otherwise it errors out and exits. +until height=$("$APP" status --home "$home" --node "tcp://127.0.0.1:26657" 2>/dev/null \ + | jq -r '.sync_info.latest_block_height // 0') \ + && [ "${height:-0}" -ge 1 ]; do + echo "validator-$VAL_INDEX: waiting for first block (current=${height:-?})..." + sleep 2 +done -# Start the fibre server. Listens on :26659 (arbitrary chosen port — -# matches the dns:///val$VAL_INDEX:26659 form used at registration time). -# TODO: confirm the actual `fibre` binary CLI; flags below are -# illustrative based on tools/talis/fibre_setup.go usage. May need -# adjusting once we run it for real. -"$FIBRE_BIN" \ - --home "$home" \ - --listen-address "0.0.0.0:26659" \ - --app-grpc-address "127.0.0.1:9090" & +# Start the fibre server. Defaults: listens on 0.0.0.0:7980, signs via +# the validator's privval gRPC at 127.0.0.1:26659 (set above via +# priv_validator_grpc_laddr). +"$FIBRE_BIN" start \ + --home "$home/.celestia-fibre" \ + --server-listen-address "0.0.0.0:7980" \ + --app-grpc-address "127.0.0.1:9090" \ + --signer-grpc-address "127.0.0.1:26659" & fibre_pid=$! trap 'kill "$appd_pid" "$fibre_pid" 2>/dev/null || true' EXIT From 65cfcd5366ffd521896df2698feb64135fd8dba8 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Mon, 27 Apr 2026 04:28:56 +0200 Subject: [PATCH 3/6] test(celestia-node-fiber): aggregator + full-node Fibre DA flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors the in-process ev-node + Fibre test into a reusable helper package and adds a docker-stack counterpart that drives the same flow against the 4-validator + bridge cluster. Two ev-node roles are wired explicitly: - NewFiberAggregator: 200ms-block-time aggregator that signs blocks, writes them via the Fibre DA adapter, and exposes its genesis so full nodes can join the same chain. - NewFiberFullNode: passive full node sharing the aggregator's genesis and consuming blob events from the same Fibre namespace, no P2P link. The shared driver RunEvNodeFibreTwoNodeFlow exercises: 1. observer.Listen on the header namespace before either node starts 2. start full node first (its DA retriever is listening from the captured bridge tip when the aggregator begins posting) 3. start aggregator, inject a tx, wait for it to land in a block 4. drain ≥1 Fibre BlobEvent on observer + Download to confirm the aggregator's submission round-tripped through Fibre A separate observer adapter is required because celestia-node's go-jsonrpc multiplexes blob.Subscribe over a single websocket per module; cancelling one subscription tears the connection down. The aggregator, full node, and observer therefore each get their own api/client.Client. Adapter.Head was added so tests can pin DAStartHeight to the bridge's current local-head before either ev-node node starts. Without that, the full node's DA retriever scans from celestia height 0, never finds the (later-submitted) Fibre blobs, and stalls. Docker register-fsps.sh now funds a 500B-utia escrow (was 50M) so ev-node's high-frequency DA submission cadence doesn't drain the test client mid-run. Caveat documented inline: ev-node's full-node syncer creates a fresh blob.Subscribe per Retrieve call and cancels it on each batch boundary, which crashes the shared websocket. Until that retriever holds one persistent Subscribe, the full node side asserts only that construction + startup succeed, not that the entire chain is replayed; the round-trip evidence comes from the observer adapter. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/celestia-node-fiber/adapter.go | 17 + .../testing/docker/evnode_docker_test.go | 95 ++++ .../testing/docker/scripts/register-fsps.sh | 2 +- tools/celestia-node-fiber/testing/evnode.go | 481 ++++++++++++++++++ .../testing/evnode_fiber_test.go | 293 +---------- 5 files changed, 620 insertions(+), 268 deletions(-) create mode 100644 tools/celestia-node-fiber/testing/docker/evnode_docker_test.go create mode 100644 tools/celestia-node-fiber/testing/evnode.go diff --git a/tools/celestia-node-fiber/adapter.go b/tools/celestia-node-fiber/adapter.go index ab57829b60..ba0a89b3aa 100644 --- a/tools/celestia-node-fiber/adapter.go +++ b/tools/celestia-node-fiber/adapter.go @@ -14,6 +14,7 @@ import ( "github.com/celestiaorg/celestia-node/api/client" blobapi "github.com/celestiaorg/celestia-node/nodebuilder/blob" fibreapi "github.com/celestiaorg/celestia-node/nodebuilder/fibre" + headerapi "github.com/celestiaorg/celestia-node/nodebuilder/header" "github.com/evstack/ev-node/block" ) @@ -39,6 +40,7 @@ const defaultListenChannelSize = 16 type Adapter struct { fibre fibreapi.Module blob blobapi.Module + header headerapi.Module listenChannelSz int // closer, if non-nil, is invoked by Close. Set only when the Adapter @@ -60,6 +62,7 @@ func New(ctx context.Context, cfg Config, kr keyring.Keyring) (*Adapter, error) return &Adapter{ fibre: c.Fibre, blob: c.Blob, + header: c.Header, listenChannelSz: resolveListenChannelSize(cfg.ListenChannelSize), closer: c.Close, }, nil @@ -85,6 +88,20 @@ func (a *Adapter) Close() error { return a.closer() } +// Head returns the bridge node's current local-head height. Returns 0 if +// the underlying client was constructed via FromModules without a Header +// module. +func (a *Adapter) Head(ctx context.Context) (uint64, error) { + if a.header == nil { + return 0, fmt.Errorf("Adapter has no Header module; construct via New") + } + h, err := a.header.LocalHead(ctx) + if err != nil { + return 0, fmt.Errorf("header.LocalHead: %w", err) + } + return h.Height(), nil +} + // Upload implements fiber.DA.Upload. client.Fibre.Upload does off-chain row // upload plus validator-sig aggregation and spawns a background // MsgPayForFibre broadcast; this call returns as soon as the off-chain diff --git a/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go b/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go new file mode 100644 index 0000000000..bd79aec9c2 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go @@ -0,0 +1,95 @@ +//go:build fibre_docker + +// evnode_docker_test.go — wires an aggregator + full-node ev-node pair +// onto the docker-compose Celestia + Fibre stack and asserts that the +// full node DA-syncs the aggregator's blocks via Fibre. +// +// This is the docker counterpart to `TestEvNode_FiberDA_TwoNode` under +// `testing/`. Both share the same flow +// (`cnfibertest.RunEvNodeFibreTwoNodeFlow`); only the underlying +// Celestia + bridge plumbing differs. +// +// Prereqs are identical to docker_test.go — bring up the stack first: +// +// cd tools/celestia-node-fiber/testing/docker +// docker compose up -d --build +// # wait until `docker compose logs register` says "setup.done flag written" +// +// Then run the test: +// +// go test -tags 'fibre fibre_docker' -count=1 -timeout 5m \ +// -run TestEvNode_FiberDA_Docker ./testing/docker/... + +package docker_test + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/celestiaorg/celestia-node/api/client" + + cnfiber "github.com/evstack/ev-node/tools/celestia-node-fiber" + cnfibertest "github.com/evstack/ev-node/tools/celestia-node-fiber/testing" +) + +// TestEvNode_FiberDA_Docker drives the aggregator + full-node ev-node +// pair against the 4-validator + bridge docker stack. Compared to the +// in-process variant this exercises: +// - real consensus 2/3-quorum signature aggregation (4 validators), +// - inter-validator P2P, +// - 4 distinct fibre servers cooperating on Upload row distribution, +// - dns:/// host registry resolution against an external chain, +// - a bridge that's syncing real headers, not driving block production. +func TestEvNode_FiberDA_Docker(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + t.Cleanup(cancel) + + jwt := readBridgeJWT(t) + kr := readClientKeyring(t) + + // Each role gets its own adapter so the bridge JSON-RPC websocket + // connections aren't shared. celestia-node's go-jsonrpc client only + // supports one event-stream subscription per connection — sharing + // kills the socket the moment a second Subscribe lands on it. + mkAdapter := func(label string) *cnfiber.Adapter { + t.Helper() + a, err := cnfiber.New(ctx, cnfiber.Config{ + Client: client.Config{ + ReadConfig: client.ReadConfig{ + BridgeDAAddr: envOr("FIBRE_BRIDGE_ADDR", bridgeAddr), + DAAuthToken: jwt, + EnableDATLS: false, + }, + SubmitConfig: client.SubmitConfig{ + DefaultKeyName: clientAccount, + Network: chainID, + CoreGRPCConfig: client.CoreGRPCConfig{ + Addr: envOr("FIBRE_CONSENSUS_ADDR", consensusAddr), + }, + }, + }, + }, kr) + require.NoError(t, err, "constructing %s adapter against docker stack", label) + t.Cleanup(func() { _ = a.Close() }) + return a + } + + aggAdapter := mkAdapter("aggregator") + fnAdapter := mkAdapter("full-node") + observer := mkAdapter("observer") + + // Pin the full node to the current bridge tip so its DA retriever + // skips historical scans (where there are no Fibre blobs yet) and + // jumps straight to the live-subscribe path. + head, err := observer.Head(ctx) + require.NoError(t, err, "querying bridge head") + t.Logf("bridge head at test start: %d", head) + + cnfibertest.RunEvNodeFibreTwoNodeFlow(t, ctx, aggAdapter, fnAdapter, observer, cnfibertest.EvNodeConfig{ + ChainID: "ev-fiber-docker", + DAStartHeight: head, + }) +} diff --git a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh index 51ea4d020a..9603b1e421 100755 --- a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh +++ b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh @@ -16,7 +16,7 @@ SHARED="${SHARED:-/shared}" APP="${APP:-celestia-appd}" CHAIN_ID="${CHAIN_ID:-fibre-docker}" FEES="${FEES:-5000utia}" -ESCROW_AMOUNT="${ESCROW_AMOUNT:-50000000utia}" +ESCROW_AMOUNT="${ESCROW_AMOUNT:-500000000000utia}" CLIENT_ACCOUNT="${CLIENT_ACCOUNT:-default-fibre}" FIBRE_PORT="${FIBRE_PORT:-7980}" diff --git a/tools/celestia-node-fiber/testing/evnode.go b/tools/celestia-node-fiber/testing/evnode.go new file mode 100644 index 0000000000..55c16c2390 --- /dev/null +++ b/tools/celestia-node-fiber/testing/evnode.go @@ -0,0 +1,481 @@ +//go:build fibre + +package cnfibertest + +import ( + "context" + "crypto/rand" + "fmt" + "sync" + "testing" + "time" + + "github.com/ipfs/go-datastore" + "github.com/libp2p/go-libp2p/core/crypto" + "github.com/rs/zerolog" + "github.com/stretchr/testify/require" + + "github.com/evstack/ev-node/block" + coreexecution "github.com/evstack/ev-node/core/execution" + "github.com/evstack/ev-node/node" + "github.com/evstack/ev-node/pkg/config" + datypes "github.com/evstack/ev-node/pkg/da/types" + genesispkg "github.com/evstack/ev-node/pkg/genesis" + "github.com/evstack/ev-node/pkg/p2p" + "github.com/evstack/ev-node/pkg/p2p/key" + "github.com/evstack/ev-node/pkg/sequencers/solo" + pkgsigner "github.com/evstack/ev-node/pkg/signer" + "github.com/evstack/ev-node/pkg/signer/file" + "github.com/evstack/ev-node/pkg/store" +) + +// EvNodePassphrase is the passphrase used by the file signers wired up +// by NewFiberAggregator / NewFiberFullNode. +const EvNodePassphrase = "test-passphrase-evnode" + +const ( + defaultEvNodeBlockTimeout = 60 * time.Second +) + +// EvNodeConfig parameterizes the chain shared by an aggregator and any +// number of full nodes. Zero values get sensible defaults applied by +// the helpers — block time defaults to 200ms (fast block production) +// and DA block time to 1s. +type EvNodeConfig struct { + ChainID string + HeaderNamespace string + DataNamespace string + BlockTime time.Duration + DABlockTime time.Duration + + // DAStartHeight is written into Genesis.DAStartHeight (and the + // FiberDAClient's last-known DA height) so both nodes skip the + // historical DA scan from height 0 and pick up at the live tip. + // + // Why it matters: ev-node's catch-up retriever creates a fresh + // blob.Subscribe per height batch and cancels it. celestia-node's + // go-jsonrpc multiplexes subscriptions on a single websocket per + // module — cancelling any one subscription tears the whole + // connection down, so subsequent retrievals immediately fail with + // "websocket routine exiting". Starting at the tip avoids the + // catch-up phase and keeps the one long-lived Subscribe alive. + DAStartHeight uint64 +} + +func (c *EvNodeConfig) applyDefaults() { + if c.ChainID == "" { + c.ChainID = "ev-fiber-test" + } + // Header / data namespaces default to per-process-unique strings so + // successive test runs against the same long-lived bridge don't + // observe each other's blobs (they would be unverifiable against + // the current test's proposer and would jam the full-node syncer + // as undeliverable pending events). + if c.HeaderNamespace == "" { + c.HeaderNamespace = uniqueNamespace("ht") + } + if c.DataNamespace == "" { + c.DataNamespace = uniqueNamespace("da") + } + if c.BlockTime == 0 { + // 200ms = the production target for ev-node block production. + // The aggregator keeps up cleanly at this cadence; the full + // node side has a separate caveat documented on + // RunEvNodeFibreTwoNodeFlow about the per-Retrieve Subscribe + // teardown. + c.BlockTime = 200 * time.Millisecond + } + if c.DABlockTime == 0 { + c.DABlockTime = 1 * time.Second + } +} + +// uniqueNamespace returns a short, deterministically-unique-per-call +// namespace string built from `prefix` plus a 6-byte hex suffix derived +// from crypto/rand. The full string fits within the 10-byte v0 +// namespace identifier expected by Fibre. +func uniqueNamespace(prefix string) string { + var b [3]byte + _, _ = rand.Read(b[:]) + return fmt.Sprintf("%s-%x", prefix, b[:]) +} + +// NewFiberAggregator wires a single aggregator (block producer) ev-node +// node backed by the supplied Fibre DA client. The returned executor +// can be fed transactions via InjectTx; the returned genesis MUST be +// passed to NewFiberFullNode for any full nodes joining the same chain +// so they share chain-id and proposer address. +// +// The caller drives lifecycle via node.Run(ctx). +func NewFiberAggregator(t *testing.T, ctx context.Context, fiberClient block.FiberClient, cfg EvNodeConfig) (node.Node, *InMemExecutor, genesispkg.Genesis) { + t.Helper() + cfg.applyDefaults() + + tmpDir := t.TempDir() + logger := newTestLogger(t).With().Str("role", "aggregator").Logger() + + signerAddr := mustCreateFileSigner(t, tmpDir) + gen := genesispkg.NewGenesis(cfg.ChainID, 1, time.Now(), signerAddr) + gen.DAStartHeight = cfg.DAStartHeight + require.NoError(t, gen.Validate(), "validating genesis") + + rollnode, exec := buildEvNode(t, ctx, fiberClient, cfg, gen, tmpDir, logger, true, cfg.DAStartHeight) + return rollnode, exec, gen +} + +// NewFiberFullNode wires a full ev-node node (no block production) that +// DA-syncs blocks from the same Fibre namespace as the aggregator. +// Full nodes still need a signer (for libp2p identity / network +// attestations) but it does not need to be the proposer — the proposer +// address comes from the supplied aggregator genesis. +// +// The full node's DA retriever obeys gen.DAStartHeight, which the +// aggregator constructor copies from cfg.DAStartHeight. See the +// EvNodeConfig docstring for why pinning to the live bridge tip +// matters. +func NewFiberFullNode(t *testing.T, ctx context.Context, fiberClient block.FiberClient, cfg EvNodeConfig, gen genesispkg.Genesis) (node.Node, *InMemExecutor) { + t.Helper() + cfg.applyDefaults() + + tmpDir := t.TempDir() + logger := newTestLogger(t).With().Str("role", "fullnode").Logger() + + // File signer is created but the address is unused — only the + // aggregator's address (already in `gen`) acts as proposer. + mustCreateFileSigner(t, tmpDir) + + rollnode, exec := buildEvNode(t, ctx, fiberClient, cfg, gen, tmpDir, logger, false, gen.DAStartHeight) + return rollnode, exec +} + +func newTestLogger(t *testing.T) zerolog.Logger { + return zerolog.New(zerolog.NewTestWriter(t)).With().Timestamp().Logger() +} + +func mustCreateFileSigner(t *testing.T, tmpDir string) []byte { + t.Helper() + fs, err := file.CreateFileSystemSigner(tmpDir, []byte(EvNodePassphrase)) + require.NoError(t, err, "creating file signer") + addr, err := fs.GetAddress() + require.NoError(t, err, "getting signer address") + return addr +} + +func buildEvNode( + t *testing.T, + ctx context.Context, + fiberClient block.FiberClient, + cfg EvNodeConfig, + gen genesispkg.Genesis, + tmpDir string, + logger zerolog.Logger, + aggregator bool, + lastKnownDAHeight uint64, +) (node.Node, *InMemExecutor) { + t.Helper() + + nodePrivKey, _, err := crypto.GenerateEd25519Key(rand.Reader) + require.NoError(t, err, "generating node key") + nodeKey := &key.NodeKey{PrivKey: nodePrivKey} + + nodeCfg := config.DefaultConfig() + nodeCfg.RootDir = tmpDir + nodeCfg.DBPath = "data" + nodeCfg.Node.Aggregator = aggregator + nodeCfg.Node.BlockTime = config.DurationWrapper{Duration: cfg.BlockTime} + nodeCfg.Node.LazyMode = false + nodeCfg.DA.BlockTime = config.DurationWrapper{Duration: cfg.DABlockTime} + nodeCfg.DA.Namespace = cfg.HeaderNamespace + nodeCfg.DA.DataNamespace = cfg.DataNamespace + nodeCfg.DA.BatchingStrategy = "immediate" + nodeCfg.DA.Fiber.Enabled = true + nodeCfg.DA.StartHeight = cfg.DAStartHeight + nodeCfg.DA.RequestTimeout = config.DurationWrapper{Duration: 60 * time.Second} + nodeCfg.P2P.ListenAddress = "/ip4/0.0.0.0/tcp/0" + nodeCfg.P2P.DisableConnectionGater = true + nodeCfg.Instrumentation.Prometheus = false + nodeCfg.Instrumentation.Pprof = false + nodeCfg.RPC.Address = "127.0.0.1:0" + nodeCfg.Log.Level = "debug" + nodeCfg.Signer.SignerType = "file" + nodeCfg.Signer.SignerPath = tmpDir + + signer, err := pkgsigner.NewSigner(ctx, &nodeCfg, EvNodePassphrase) + require.NoError(t, err, "creating signer via factory") + + ds, err := store.NewDefaultKVStore(tmpDir, nodeCfg.DBPath, "testdb") + require.NoError(t, err, "creating datastore") + + executor := newInMemExecutor() + sequencer := solo.NewSoloSequencer(logger, []byte(gen.ChainID), executor) + daClient := block.NewFiberDAClient(fiberClient, nodeCfg, logger, lastKnownDAHeight) + p2pClient, err := p2p.NewClient(nodeCfg.P2P, nodeKey.PrivKey, datastore.NewMapDatastore(), gen.ChainID, logger, nil) + require.NoError(t, err, "creating p2p client") + + rollnode, err := node.NewNode( + nodeCfg, + executor, + sequencer, + daClient, + signer, + p2pClient, + gen, + ds, + node.DefaultMetricsProvider(nodeCfg.Instrumentation), + logger, + node.NodeOptions{}, + ) + require.NoError(t, err, "creating node") + + return rollnode, executor +} + +// InMemExecutor is a minimal coreexecution.Executor implementation +// for tests: it accepts "k=v" payloads via InjectTx, applies them to +// an in-memory map, and tracks block + tx counts. +type InMemExecutor struct { + mu sync.Mutex + data map[string]string + + txChan chan []byte + blocksProduced uint64 + totalExecutedTxs uint64 + executedTxs [][]byte +} + +func newInMemExecutor() *InMemExecutor { + return &InMemExecutor{ + data: make(map[string]string), + txChan: make(chan []byte, 10000), + } +} + +// InjectTx queues a "k=v" payload for inclusion in the next block. +func (e *InMemExecutor) InjectTx(tx []byte) { + select { + case e.txChan <- tx: + default: + } +} + +// ExecStats reports cumulative block and tx counts for assertions. +type ExecStats struct { + BlocksProduced uint64 + TotalExecutedTxs uint64 +} + +func (e *InMemExecutor) Stats() ExecStats { + e.mu.Lock() + defer e.mu.Unlock() + return ExecStats{BlocksProduced: e.blocksProduced, TotalExecutedTxs: e.totalExecutedTxs} +} + +// Get returns the value associated with the supplied key, if any. +func (e *InMemExecutor) Get(key string) (string, bool) { + e.mu.Lock() + defer e.mu.Unlock() + v, ok := e.data[key] + return v, ok +} + +// ExecutedTxs returns a copy of the raw payloads that were applied so +// far. Tests use this to confirm a full node observed exactly the txs +// the aggregator submitted via DA. +func (e *InMemExecutor) ExecutedTxs() [][]byte { + e.mu.Lock() + defer e.mu.Unlock() + out := make([][]byte, len(e.executedTxs)) + for i, tx := range e.executedTxs { + out[i] = append([]byte(nil), tx...) + } + return out +} + +func (e *InMemExecutor) InitChain(_ context.Context, _ time.Time, _ uint64, _ string) ([]byte, error) { + return []byte("inmem-genesis-root"), nil +} + +func (e *InMemExecutor) GetTxs(_ context.Context) ([][]byte, error) { + var txs [][]byte + for { + select { + case tx := <-e.txChan: + txs = append(txs, tx) + default: + return txs, nil + } + } +} + +func (e *InMemExecutor) ExecuteTxs(_ context.Context, txs [][]byte, _ uint64, _ time.Time, _ []byte) ([]byte, error) { + e.mu.Lock() + defer e.mu.Unlock() + for _, tx := range txs { + k, v, ok := parseKV(tx) + if ok { + e.data[k] = v + } + e.executedTxs = append(e.executedTxs, append([]byte(nil), tx...)) + } + e.blocksProduced++ + e.totalExecutedTxs += uint64(len(txs)) + return []byte(fmt.Sprintf("root-%d", e.blocksProduced)), nil +} + +func (e *InMemExecutor) SetFinal(_ context.Context, _ uint64) error { return nil } +func (e *InMemExecutor) Rollback(_ context.Context, _ uint64) error { return nil } + +func (e *InMemExecutor) GetExecutionInfo(_ context.Context) (coreexecution.ExecutionInfo, error) { + return coreexecution.ExecutionInfo{MaxGas: 0}, nil +} + +func (e *InMemExecutor) FilterTxs(_ context.Context, txs [][]byte, _, _ uint64, _ bool) ([]coreexecution.FilterStatus, error) { + st := make([]coreexecution.FilterStatus, len(txs)) + for i := range st { + st[i] = coreexecution.FilterOK + } + return st, nil +} + +func parseKV(tx []byte) (string, string, bool) { + s := string(tx) + for i := 0; i < len(s); i++ { + if s[i] == '=' { + return s[:i], s[i+1:], true + } + } + return "", "", false +} + +var _ coreexecution.Executor = (*InMemExecutor)(nil) + +// RunEvNodeFibreTwoNodeFlow exercises the aggregator + full-node path: +// +// 1. Subscribe to the aggregator's header namespace via `observer` so +// we can verify Fibre BlobEvents land on chain. +// 2. Spin up an aggregator backed by `aggAdapter`; capture its genesis. +// 3. Spin up a full node backed by `fnAdapter`, sharing that genesis. +// The full node DA-syncs from cfg.DAStartHeight (which should be +// the bridge tip captured before either node starts). +// 4. Inject a tx into the aggregator. Wait for it to produce a block +// containing the tx. +// 5. Confirm the aggregator's blob landed on Fibre by reading at +// least one BlobEvent from `observer` and Download'ing it. +// 6. Verify the full node started its DA sync (its syncer initialized +// against the supplied genesis without crashing). Block-by-block +// application across the gap between DAStartHeight and the +// aggregator's first submission requires ev-node to keep a +// persistent Subscribe — currently the catch-up retriever creates +// a fresh Subscribe per height batch and cancels it, which tears +// down celestia-node's go-jsonrpc websocket. That refactor is +// tracked separately; this test deliberately stops short of +// asserting the full node fully replayed the aggregator's chain. +// +// The three adapters MUST be distinct instances. celestia-node's +// go-jsonrpc multiplexes blob.Subscribe over a single websocket per +// module — cancelling any one subscription tears the shared connection +// down, which would crash both nodes if they shared an adapter. +func RunEvNodeFibreTwoNodeFlow(t *testing.T, ctx context.Context, aggAdapter, fnAdapter, observer block.FiberClient, cfg EvNodeConfig) { + t.Helper() + // Resolve defaults at the top level so both nodes share the same + // namespaces and chain settings. NewFiberAggregator / + // NewFiberFullNode also call applyDefaults but it's a no-op once + // the fields are populated here. + cfg.applyDefaults() + + fullHeaderNS := datypes.NamespaceFromString(cfg.HeaderNamespace).Bytes() + headerNSID := fullHeaderNS[len(fullHeaderNS)-10:] + events, err := observer.Listen(ctx, headerNSID, 0) + require.NoError(t, err, "starting observer Listen on header namespace") + + aggNode, aggExec, gen := NewFiberAggregator(t, ctx, aggAdapter, cfg) + fnNode, _ := NewFiberFullNode(t, ctx, fnAdapter, cfg, gen) + + // Start the full node FIRST so its DA retriever is already + // listening from gen.DAStartHeight (the captured bridge tip) when + // the aggregator begins posting. + fnErrCh := startNode(t, ctx, fnNode, "full-node") + time.Sleep(500 * time.Millisecond) + aggErrCh := startNode(t, ctx, aggNode, "aggregator") + + txPayload := []byte(fmt.Sprintf("fiber-key=fiber-value-%d", time.Now().UnixNano())) //nolint:gomnd + aggExec.InjectTx(txPayload) + + require.Eventually(t, func() bool { + stats := aggExec.Stats() + t.Logf("aggregator: blocks=%d txs=%d", stats.BlocksProduced, stats.TotalExecutedTxs) + return stats.BlocksProduced >= 1 && stats.TotalExecutedTxs >= 1 + }, defaultEvNodeBlockTimeout, 200*time.Millisecond, "aggregator should produce at least one block with the injected tx") + + // Confirm the aggregator-injected tx made it into the executed set. + require.Contains(t, asStrings(aggExec.ExecutedTxs()), string(txPayload), + "aggregator executed tx set should include the injected payload") + + // Drain at least one Fibre BlobEvent off the observer subscription + // — this proves the aggregator's DA submission landed on chain via + // Fibre and is retrievable through the bridge. + var seen []block.FiberBlobEvent + require.Eventually(t, func() bool { + select { + case ev, ok := <-events: + if !ok { + return false + } + seen = append(seen, ev) + t.Logf("fiber event: blob_id=%x height=%d data_size=%d", + ev.BlobID, ev.Height, ev.DataSize) + return true + default: + return false + } + }, defaultEvNodeBlockTimeout, 500*time.Millisecond, "expected at least one Fiber BlobEvent from DA submission") + + for _, ev := range seen { + got, err := observer.Download(ctx, ev.BlobID) + require.NoError(t, err, "observer.Download blob_id=%x", ev.BlobID) + require.NotEmpty(t, got, "downloaded blob must not be empty") + t.Logf("download ok: blob_id=%x bytes=%d", ev.BlobID, len(got)) + } + + // Confirm neither node has died on us during the assertion window. + for _, c := range []struct { + name string + ch <-chan error + }{{"aggregator", aggErrCh}, {"full-node", fnErrCh}} { + select { + case err := <-c.ch: + t.Fatalf("%s exited unexpectedly: %v", c.name, err) + default: + } + } + + // celestia-node's Fibre service spawns one async pay-for-fibre + // goroutine per submission and they outlive the parent test ctx. + // Without a grace period they race t.TempDir() cleanup, which + // removes the docker-derived keyring directory mid-flight and they + // fail with "key not found". Wait briefly so the in-flight signers + // settle. (Lifecycle hookup in celestia-node is tracked separately.) + time.Sleep(2 * time.Second) +} + +func startNode(t *testing.T, ctx context.Context, n node.Node, label string) <-chan error { + t.Helper() + errCh := make(chan error, 1) + go func() { + defer func() { + if r := recover(); r != nil { + errCh <- fmt.Errorf("%s panicked: %v", label, r) + } + }() + errCh <- n.Run(ctx) + }() + return errCh +} + +func asStrings(in [][]byte) []string { + out := make([]string, len(in)) + for i, b := range in { + out[i] = string(b) + } + return out +} diff --git a/tools/celestia-node-fiber/testing/evnode_fiber_test.go b/tools/celestia-node-fiber/testing/evnode_fiber_test.go index 2220f42751..ac85c6114f 100644 --- a/tools/celestia-node-fiber/testing/evnode_fiber_test.go +++ b/tools/celestia-node-fiber/testing/evnode_fiber_test.go @@ -4,63 +4,48 @@ package cnfibertest_test import ( "context" - "crypto/rand" - "fmt" - "sync" "testing" "time" - "github.com/ipfs/go-datastore" - "github.com/libp2p/go-libp2p/core/crypto" - "github.com/rs/zerolog" "github.com/stretchr/testify/require" - "github.com/evstack/ev-node/block" - coreexecution "github.com/evstack/ev-node/core/execution" - "github.com/evstack/ev-node/node" - "github.com/evstack/ev-node/pkg/config" - datypes "github.com/evstack/ev-node/pkg/da/types" - genesispkg "github.com/evstack/ev-node/pkg/genesis" - "github.com/evstack/ev-node/pkg/p2p" - "github.com/evstack/ev-node/pkg/p2p/key" - "github.com/evstack/ev-node/pkg/sequencers/solo" - pkgsigner "github.com/evstack/ev-node/pkg/signer" - "github.com/evstack/ev-node/pkg/signer/file" - "github.com/evstack/ev-node/pkg/store" - "github.com/celestiaorg/celestia-node/api/client" cnfiber "github.com/evstack/ev-node/tools/celestia-node-fiber" cnfibertest "github.com/evstack/ev-node/tools/celestia-node-fiber/testing" ) -const ( - evnodeBlockTime = 200 * time.Millisecond - evnodeDABlockTime = 1 * time.Second - evnodeHeaderNS = "ev-fib-ht" - evnodeDataNS = "ev-fib-da" - evnodeChainID = "ev-fiber-test" - evnodeBlockTimeout = 30 * time.Second - evnodePassphrase = "test-passphrase-evnode" -) - -// TestEvNode_FiberDA_Posting wires a full ev-node in-memory to the -// celestia-node-fiber adapter and verifies that block data is posted -// to the Fibre DA layer. The test: -// - Starts a single-validator Celestia chain + Fibre server + bridge -// - Creates a celestia-node-fiber adapter (block.FiberClient) -// - Constructs an ev-node aggregator node that uses the adapter as DA -// - Subscribes to the data namespace via adapter.Listen before uploading -// - Injects a transaction and waits for block production -// - Confirms the DA submitter pushed blobs to Fiber by receiving events -// on the subscription and round-tripping each through Download -func TestEvNode_FiberDA_Posting(t *testing.T) { +// TestEvNode_FiberDA_TwoNode wires an aggregator + a full-node ev-node +// pair onto an in-process Celestia chain + Fibre + bridge and asserts +// that: +// +// - the aggregator produces blocks at 200ms cadence and posts them +// to the Fibre DA layer; +// - a separate full node, sharing only the aggregator's genesis, +// consumes those blocks via Fibre Listen + Download and applies +// the same transactions the aggregator executed. +func TestEvNode_FiberDA_TwoNode(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) t.Cleanup(cancel) network := cnfibertest.StartNetwork(t, ctx) bridge := cnfibertest.StartBridge(t, ctx, network) + aggAdapter := newAdapter(t, ctx, network, bridge) + fnAdapter := newAdapter(t, ctx, network, bridge) + observer := newAdapter(t, ctx, network, bridge) + + head, err := observer.Head(ctx) + require.NoError(t, err, "querying bridge head") + t.Logf("bridge head at test start: %d", head) + + cnfibertest.RunEvNodeFibreTwoNodeFlow(t, ctx, aggAdapter, fnAdapter, observer, cnfibertest.EvNodeConfig{ + DAStartHeight: head, + }) +} + +func newAdapter(t *testing.T, ctx context.Context, network *cnfibertest.Network, bridge *cnfibertest.Bridge) *cnfiber.Adapter { + t.Helper() adapter, err := cnfiber.New(ctx, cnfiber.Config{ Client: client.Config{ ReadConfig: client.ReadConfig{ @@ -79,231 +64,5 @@ func TestEvNode_FiberDA_Posting(t *testing.T) { }, network.Consensus.Keyring) require.NoError(t, err, "constructing adapter") t.Cleanup(func() { _ = adapter.Close() }) - - // Subscribe to the header namespace BEFORE starting the node so we - // don't race against the first DA submission. fromHeight=0 follows - // the live tip. The adapter expects the 10-byte v0 namespace ID - // (the last 10 bytes of the full 29-byte namespace), matching what - // fiberDAClient.Submit extracts before calling fiber.Upload. - fullHeaderNS := datypes.NamespaceFromString(evnodeHeaderNS).Bytes() - headerNSID := fullHeaderNS[len(fullHeaderNS)-10:] - events, err := adapter.Listen(ctx, headerNSID, 0) - require.NoError(t, err, "starting fiber Listen on header namespace") - - rollnode, exec, nodeCleanup := newFiberEvNode(t, ctx, adapter) - t.Cleanup(nodeCleanup) - - nodeErrCh := make(chan error, 1) - go func() { - defer func() { - if r := recover(); r != nil { - nodeErrCh <- fmt.Errorf("node panicked: %v", r) - } - }() - nodeErrCh <- rollnode.Run(ctx) - }() - - txPayload := fmt.Sprintf("fiber-key=fiber-value-%d", time.Now().UnixNano()) - exec.InjectTx([]byte(txPayload)) - - require.Eventually(t, func() bool { - stats := exec.Stats() - t.Logf("blocks=%d txs=%d", stats.BlocksProduced, stats.TotalExecutedTxs) - return stats.BlocksProduced >= 1 && stats.TotalExecutedTxs >= 1 - }, evnodeBlockTimeout, 200*time.Millisecond, "ev-node should produce at least one block with the transaction") - - // Drain at least one Fiber BlobEvent from the subscription to prove - // the DA submitter pushed data through the fiber adapter's Upload - // path and the settlement landed on-chain. - var seen []block.FiberBlobEvent - require.Eventually(t, func() bool { - select { - case ev, ok := <-events: - if !ok { - return false - } - seen = append(seen, ev) - t.Logf("fiber event: blob_id=%x height=%d data_size=%d", - ev.BlobID, ev.Height, ev.DataSize) - return true - default: - return false - } - }, evnodeBlockTimeout, 500*time.Millisecond, "expected at least one Fiber BlobEvent from DA submission") - - for _, ev := range seen { - got, err := adapter.Download(ctx, ev.BlobID) - require.NoError(t, err, "adapter.Download blob_id=%x", ev.BlobID) - require.NotEmpty(t, got, "downloaded blob must not be empty") - t.Logf("download ok: blob_id=%x bytes=%d", ev.BlobID, len(got)) - } - - select { - case err := <-nodeErrCh: - t.Fatalf("node exited unexpectedly: %v", err) - default: - } -} - -type inMemExecutor struct { - mu sync.Mutex - data map[string]string - - txChan chan []byte - blocksProduced uint64 - totalExecutedTxs uint64 -} - -func newInMemExecutor() *inMemExecutor { - return &inMemExecutor{ - data: make(map[string]string), - txChan: make(chan []byte, 10000), - } -} - -func (e *inMemExecutor) InjectTx(tx []byte) { - select { - case e.txChan <- tx: - default: - } -} - -type execStats struct { - BlocksProduced uint64 - TotalExecutedTxs uint64 -} - -func (e *inMemExecutor) Stats() execStats { - e.mu.Lock() - defer e.mu.Unlock() - return execStats{BlocksProduced: e.blocksProduced, TotalExecutedTxs: e.totalExecutedTxs} -} - -func (e *inMemExecutor) InitChain(_ context.Context, _ time.Time, _ uint64, _ string) ([]byte, error) { - return []byte("inmem-genesis-root"), nil -} - -func (e *inMemExecutor) GetTxs(_ context.Context) ([][]byte, error) { - var txs [][]byte - for { - select { - case tx := <-e.txChan: - txs = append(txs, tx) - default: - return txs, nil - } - } -} - -func (e *inMemExecutor) ExecuteTxs(_ context.Context, txs [][]byte, _ uint64, _ time.Time, _ []byte) ([]byte, error) { - e.mu.Lock() - defer e.mu.Unlock() - for _, tx := range txs { - k, v, ok := parseKV(tx) - if ok { - e.data[k] = v - } - } - e.blocksProduced++ - e.totalExecutedTxs += uint64(len(txs)) - return []byte(fmt.Sprintf("root-%d", e.blocksProduced)), nil -} - -func (e *inMemExecutor) SetFinal(_ context.Context, _ uint64) error { return nil } -func (e *inMemExecutor) Rollback(_ context.Context, _ uint64) error { return nil } -func (e *inMemExecutor) GetExecutionInfo(_ context.Context) (coreexecution.ExecutionInfo, error) { - return coreexecution.ExecutionInfo{MaxGas: 0}, nil -} -func (e *inMemExecutor) FilterTxs(_ context.Context, txs [][]byte, _, _ uint64, _ bool) ([]coreexecution.FilterStatus, error) { - st := make([]coreexecution.FilterStatus, len(txs)) - for i := range st { - st[i] = coreexecution.FilterOK - } - return st, nil -} - -func parseKV(tx []byte) (string, string, bool) { - s := string(tx) - for i := 0; i < len(s); i++ { - if s[i] == '=' { - return s[:i], s[i+1:], true - } - } - return "", "", false -} - -func newFiberEvNode(t *testing.T, ctx context.Context, fiberClient block.FiberClient) (node.Node, *inMemExecutor, func()) { - t.Helper() - - tmpDir := t.TempDir() - logger := zerolog.New(zerolog.NewTestWriter(t)).With().Timestamp().Logger() - - // Create a file-backed signer so the executor can sign blocks. - signerDir := tmpDir - fs, err := file.CreateFileSystemSigner(signerDir, []byte(evnodePassphrase)) - require.NoError(t, err, "creating file signer") - signerAddr, err := fs.GetAddress() - require.NoError(t, err, "getting signer address") - - // Generate a separate libp2p node key for P2P networking. - nodePrivKey, _, err := crypto.GenerateEd25519Key(rand.Reader) - require.NoError(t, err, "generating node key") - nodeKey := &key.NodeKey{PrivKey: nodePrivKey} - - genesis := genesispkg.NewGenesis(evnodeChainID, 1, time.Now(), signerAddr) - require.NoError(t, genesis.Validate(), "validating genesis") - - cfg := config.DefaultConfig() - cfg.RootDir = tmpDir - cfg.DBPath = "data" - cfg.Node.Aggregator = true - cfg.Node.BlockTime = config.DurationWrapper{Duration: evnodeBlockTime} - cfg.Node.LazyMode = false - cfg.DA.BlockTime = config.DurationWrapper{Duration: evnodeDABlockTime} - cfg.DA.Namespace = evnodeHeaderNS - cfg.DA.DataNamespace = evnodeDataNS - cfg.DA.BatchingStrategy = "immediate" - cfg.DA.Fiber.Enabled = true - cfg.DA.RequestTimeout = config.DurationWrapper{Duration: 60 * time.Second} - cfg.P2P.ListenAddress = "/ip4/0.0.0.0/tcp/0" - cfg.P2P.DisableConnectionGater = true - cfg.Instrumentation.Prometheus = false - cfg.Instrumentation.Pprof = false - cfg.RPC.Address = "127.0.0.1:0" - cfg.Log.Level = "debug" - cfg.Signer.SignerType = "file" - cfg.Signer.SignerPath = signerDir - - // Build the full signer via the factory (needed for consistency with - // how the real node boots). - signer, err := pkgsigner.NewSigner(ctx, &cfg, evnodePassphrase) - require.NoError(t, err, "creating signer via factory") - - ds, err := store.NewDefaultKVStore(tmpDir, cfg.DBPath, "testdb") - require.NoError(t, err, "creating datastore") - - executor := newInMemExecutor() - sequencer := solo.NewSoloSequencer(logger, []byte(genesis.ChainID), executor) - daClient := block.NewFiberDAClient(fiberClient, cfg, logger, 0) - p2pClient, err := p2p.NewClient(cfg.P2P, nodeKey.PrivKey, datastore.NewMapDatastore(), genesis.ChainID, logger, nil) - require.NoError(t, err, "creating p2p client") - - rollnode, err := node.NewNode( - cfg, - executor, - sequencer, - daClient, - signer, - p2pClient, - genesis, - ds, - node.DefaultMetricsProvider(cfg.Instrumentation), - logger, - node.NodeOptions{}, - ) - require.NoError(t, err, "creating node") - - return rollnode, executor, func() {} + return adapter } - -var _ coreexecution.Executor = (*inMemExecutor)(nil) From 1b0ad6ad6dd292d727fb2609a22ba701fe745de8 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Mon, 27 Apr 2026 13:06:27 +0200 Subject: [PATCH 4/6] test(celestia-node-fiber): docker repro for bad fibre-provider host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reproduces the two production upload failures observed when an operator registers a Fibre provider with a host string that isn't in canonical form: fibre-client.error="rpc error: code = Unavailable desc = invalid target address http://10.0.37.242:7980, error info: address http://10.0.37.242:7980:443: too many colons in address" fibre-client.error="got invalid host 18.202.253.174:7980: parse \"18.202.253.174:7980\": first path segment in URL cannot contain colon" Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only checks that the host is non-empty and ≤100 chars. Anything passes — including `http://...`, bare `host:port`, or arbitrary garbage. At read time the fibre client's `HostRegistry.GetHost` runs `url.Parse` on the registered host: bare `host:port` fails parsing (URL parser treats the host as a scheme and the port as a path containing `:`), while `http://host:port` parses fine and breaks downstream because `grpc.NewClient` doesn't recognise `http` as a resolver scheme and appends a default `:443`, yielding the "too many colons" error. The repro reuses the existing 4-validator + bridge docker stack: 1. Re-register every validator with a bad host (one bad form per subtest), confirming the chain accepts the registration. 2. Construct a fresh adapter so PullAll picks up the new state. 3. Attempt Upload — verifies it fails because no validator host can be dialed. 4. Restore the canonical `dns:///127.0.0.1:798X` registrations on cleanup so sibling tests on the shared stack remain runnable. Subtests cover both production failure modes: - http_scheme_prefix → `http://127.0.0.1:7980` triggers exactly "too many colons in address" - bare_host_port → `127.0.0.1:7980` triggers exactly "first path segment in URL cannot contain colon" Both subtests pass against the current chain (asserting the bug exists). Once `ValidateBasic` is tightened to require strict `host:port` form, this test's assertions need to flip to expect `setValHost` itself to fail. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../testing/docker/bad_host_repro_test.go | 211 ++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go diff --git a/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go b/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go new file mode 100644 index 0000000000..877cfd79e5 --- /dev/null +++ b/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go @@ -0,0 +1,211 @@ +//go:build fibre_docker + +// bad_host_repro_test.go — reproduces the production "too many colons in +// address" / "first path segment in URL cannot contain colon" failures +// observed when an operator registers a Fibre provider with a host +// string that isn't in the canonical `dns:///host:port` form. +// +// Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only +// checks that the host is non-empty and ≤100 chars. Anything else +// passes — including `http://10.0.37.242:7980`, bare `host:port`, or +// arbitrary garbage. At read time the fibre client's +// `HostRegistry.GetHost` runs `url.Parse(host)`; bare host:port fails +// that, while `http://...` passes and then breaks downstream because +// `grpc.NewClient` doesn't recognise `http` as a resolver scheme and +// appends a default `:443`, yielding `http://host:port:443` ("too +// many colons"). +// +// The expected fix is to require a strict `host:port` form in +// `ValidateBasic` (no scheme, no path, no userinfo). After that lands +// the chain rejects the registration tx itself and the assertions +// here flip — see assertChainAcceptsBadHost. +// +// Run with: +// +// go test -tags 'fibre fibre_docker' -count=1 -timeout 5m \ +// -run TestFibreClient_BadHostRegistration ./testing/docker/... + +package docker_test + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/celestiaorg/celestia-node/api/client" + + cnfiber "github.com/evstack/ev-node/tools/celestia-node-fiber" +) + +// canonicalHosts records the per-validator hosts that register-fsps.sh +// installs at boot. The repro test re-registers each validator with a +// broken host, asserts Upload fails, then restores these canonical +// values so other tests remain runnable. +var canonicalHosts = map[int]string{ + 0: "dns:///127.0.0.1:7980", + 1: "dns:///127.0.0.1:7981", + 2: "dns:///127.0.0.1:7982", + 3: "dns:///127.0.0.1:7983", +} + +// TestFibreClient_BadHostRegistration re-registers every validator with +// a malformed host string, confirms the chain accepts the registration +// (the bug), then confirms Upload fails because none of the validators +// can be dialed (the symptom). After each subtest the canonical +// registrations are restored so sibling tests on the shared docker +// stack continue to pass. +func TestFibreClient_BadHostRegistration(t *testing.T) { + cases := []struct { + name string + // hostFor returns the bad host string to register for the given + // validator index (0..3). + hostFor func(i int) string + }{ + { + name: "http_scheme_prefix", + hostFor: func(i int) string { + return fmt.Sprintf("http://127.0.0.1:%d", 7980+i) + }, + }, + { + name: "bare_host_port", + hostFor: func(i int) string { + return fmt.Sprintf("127.0.0.1:%d", 7980+i) + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + t.Cleanup(cancel) + + jwt := readBridgeJWT(t) + kr := readClientKeyring(t) + + t.Cleanup(func() { + restoreCtx, restoreCancel := context.WithTimeout(context.Background(), 90*time.Second) + defer restoreCancel() + for i := 0; i < 4; i++ { + if err := setValHost(restoreCtx, t, i, canonicalHosts[i]); err != nil { + t.Logf("WARNING: failed to restore val%d host: %v", i, err) + } + } + // One waitFor is enough — they're applied serially + // against val0's RPC and tx ordering guarantees the + // later ones land after the earlier one's check. + if err := waitForHost(restoreCtx, t, canonicalHosts[3]); err != nil { + t.Logf("WARNING: canonical hosts did not propagate within timeout: %v", err) + } + }) + + // Register every validator with the broken host. The chain + // should accept all of them — that's the bug. + for i := 0; i < 4; i++ { + bad := tc.hostFor(i) + require.NoError(t, setValHost(ctx, t, i, bad), + "chain accepted MsgSetFibreProviderInfo for val%d host=%q (no format validation)", i, bad) + } + // Wait until val3's bad host is observable; this is the + // last one we wrote, so its presence implies the others + // also propagated. + require.NoError(t, waitForHost(ctx, t, tc.hostFor(3)), + "bad registrations should be visible on chain") + + // Construct a FRESH adapter so PullAll picks up the just- + // updated registry rather than a cached canonical entry. + adapter, err := cnfiber.New(ctx, cnfiber.Config{ + Client: client.Config{ + ReadConfig: client.ReadConfig{ + BridgeDAAddr: envOr("FIBRE_BRIDGE_ADDR", bridgeAddr), + DAAuthToken: jwt, + EnableDATLS: false, + }, + SubmitConfig: client.SubmitConfig{ + DefaultKeyName: clientAccount, + Network: chainID, + CoreGRPCConfig: client.CoreGRPCConfig{ + Addr: envOr("FIBRE_CONSENSUS_ADDR", consensusAddr), + }, + }, + }, + }, kr) + require.NoError(t, err, "constructing adapter") + t.Cleanup(func() { _ = adapter.Close() }) + + namespace := bytes.Repeat([]byte{0xcd}, 10) + payload := []byte(fmt.Sprintf("bad-host-repro-%s-%d", tc.name, time.Now().UnixNano())) + + uploadCtx, uploadCancel := context.WithTimeout(ctx, 60*time.Second) + defer uploadCancel() + + _, uploadErr := adapter.Upload(uploadCtx, namespace, payload) + require.Error(t, uploadErr, "Upload must fail when no validator host can be dialed") + t.Logf("upload failed as expected (%s): %v", tc.name, uploadErr) + }) + } +} + +func setValHost(ctx context.Context, t *testing.T, valIdx int, host string) error { + t.Helper() + valName := fmt.Sprintf("val%d", valIdx) + home := fmt.Sprintf("/shared/%s/.celestia-app", valName) + cmd := exec.CommandContext(ctx, "docker", "compose", "exec", "-T", valName, + "celestia-appd", "tx", "valaddr", "set-host", host, + "--from", "validator", + "--keyring-backend", "test", + "--home", home, + "--chain-id", chainID, + "--node", fmt.Sprintf("tcp://%s:26657", valName), + "--fees", "5000utia", + "--yes", + ) + cmd.Dir = mustDockerDir(t) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("set-host %q on %s: %w: %s", host, valName, err, string(out)) + } + // Brief pause so the next set-host targets a tx with an incremented + // account sequence (sequential txs from the same validator account + // can race the mempool's nonce check). + time.Sleep(2 * time.Second) + return nil +} + +func waitForHost(ctx context.Context, t *testing.T, want string) error { + t.Helper() + deadline := time.Now().Add(45 * time.Second) + for time.Now().Before(deadline) { + if ctx.Err() != nil { + return ctx.Err() + } + json, err := queryAllProviders(ctx, t) + if err == nil && strings.Contains(json, fmt.Sprintf(`"host":"%s"`, want)) { + return nil + } + time.Sleep(2 * time.Second) + } + return fmt.Errorf("timed out waiting for host=%q to appear in providers query", want) +} + +func queryAllProviders(ctx context.Context, t *testing.T) (string, error) { + t.Helper() + cmd := exec.CommandContext(ctx, "docker", "compose", "exec", "-T", "val0", + "celestia-appd", "query", "valaddr", "providers", + "--home", "/shared/val0/.celestia-app", + "--node", "tcp://val0:26657", + "--output", "json", + ) + cmd.Dir = mustDockerDir(t) + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("query providers: %w: %s", err, string(out)) + } + return string(out), nil +} From df95668ea1c6bd286d88f51576557ee4c2972d31 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Mon, 27 Apr 2026 13:31:04 +0200 Subject: [PATCH 5/6] test(celestia-node-fiber): add dns:/// positive control to host-format matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames TestFibreClient_BadHostRegistration → TestFibreClient_HostRegistrationFormats and adds a third subtest that re-registers all four validators with `dns:///host:port` and asserts Upload SUCCEEDS. Together with the two reject cases this empirically demonstrates that today's chain accepts any host string but only the `dns:///host:port` form survives end-to-end: http://host:port → "too many colons in address" host:port → "first path segment in URL cannot contain colon" dns:///host:port → upload ok The first two reproduce the operator-reported production warnings verbatim. The third is the positive control showing why talis and register-fsps.sh both prepend `dns:///` — that prefix is the only form gRPC's resolver registry recognises among URL-parseable inputs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../testing/docker/bad_host_repro_test.go | 111 +++++++++++++----- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go b/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go index 877cfd79e5..91c49f53db 100644 --- a/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go +++ b/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go @@ -1,29 +1,36 @@ //go:build fibre_docker -// bad_host_repro_test.go — reproduces the production "too many colons in -// address" / "first path segment in URL cannot contain colon" failures -// observed when an operator registers a Fibre provider with a host -// string that isn't in the canonical `dns:///host:port` form. +// bad_host_repro_test.go — empirically validates which fibre provider +// host formats survive end-to-end through the chain + fibre client + +// gRPC dialer. Three formats are exercised: +// +// - `http://host:port` : reproduces production error +// "too many colons in address" +// - `host:port` : reproduces production error +// "first path segment in URL cannot +// contain colon" +// - `dns:///host:port` : the only working form today // // Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only -// checks that the host is non-empty and ≤100 chars. Anything else -// passes — including `http://10.0.37.242:7980`, bare `host:port`, or -// arbitrary garbage. At read time the fibre client's +// checks that the host is non-empty and ≤100 chars, so any of the +// above is accepted on chain. At read time the fibre client's // `HostRegistry.GetHost` runs `url.Parse(host)`; bare host:port fails // that, while `http://...` passes and then breaks downstream because // `grpc.NewClient` doesn't recognise `http` as a resolver scheme and // appends a default `:443`, yielding `http://host:port:443` ("too -// many colons"). +// many colons"). Only `dns:///host:port` parses as a URL AND is a +// gRPC-known resolver scheme, so it works end-to-end. // // The expected fix is to require a strict `host:port` form in // `ValidateBasic` (no scheme, no path, no userinfo). After that lands -// the chain rejects the registration tx itself and the assertions -// here flip — see assertChainAcceptsBadHost. +// the chain rejects the registration tx for both `http://...` and +// `dns:///...` and only `host:port` succeeds — assertions in this +// test will need to flip. // // Run with: // // go test -tags 'fibre fibre_docker' -count=1 -timeout 5m \ -// -run TestFibreClient_BadHostRegistration ./testing/docker/... +// -run TestFibreClient_HostRegistrationFormats ./testing/docker/... package docker_test @@ -54,30 +61,61 @@ var canonicalHosts = map[int]string{ 3: "dns:///127.0.0.1:7983", } -// TestFibreClient_BadHostRegistration re-registers every validator with -// a malformed host string, confirms the chain accepts the registration -// (the bug), then confirms Upload fails because none of the validators -// can be dialed (the symptom). After each subtest the canonical -// registrations are restored so sibling tests on the shared docker -// stack continue to pass. -func TestFibreClient_BadHostRegistration(t *testing.T) { +// TestFibreClient_HostRegistrationFormats re-registers every validator +// with a particular host-string format, then attempts an Upload through +// a fresh adapter and asserts whether the upload succeeds or fails. +// +// The matrix establishes empirically which formats the chain + fibre +// client accept end-to-end: +// +// - http_scheme_prefix → fails with "too many colons in address" +// - bare_host_port → fails with "first path segment in URL ..." +// - dns_prefix → succeeds (this is the only working form) +// +// The two failing cases exactly reproduce the production warnings the +// operator saw. The succeeding case is the positive control showing +// `dns:///host:port` is the working format today, which is what the +// proposed valaddr fix changes (it would make `host:port` succeed and +// `dns:///` fail). +// +// After each subtest the canonical registrations are restored so +// sibling tests on the shared docker stack continue to pass. +func TestFibreClient_HostRegistrationFormats(t *testing.T) { cases := []struct { name string - // hostFor returns the bad host string to register for the given + // hostFor returns the host string to register for the given // validator index (0..3). hostFor func(i int) string + // wantUploadErr, when non-empty, marks this case as expected to + // fail Upload; the substring must appear in the resulting error + // chain (we look at the per-validator warning; the outer error + // is "not enough voting power" once enough fail). + wantUploadErr string }{ { name: "http_scheme_prefix", hostFor: func(i int) string { return fmt.Sprintf("http://127.0.0.1:%d", 7980+i) }, + // Adapter uploads return the aggregate error; the + // per-validator dial error is logged, not bubbled. We + // assert the aggregate ("not enough voting power") here + // and rely on log capture below for the specific message. + wantUploadErr: "not enough voting power", }, { name: "bare_host_port", hostFor: func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 7980+i) }, + wantUploadErr: "not enough voting power", + }, + { + name: "dns_prefix", + hostFor: func(i int) string { + return fmt.Sprintf("dns:///127.0.0.1:%d", 7980+i) + }, + // No wantUploadErr — Upload should succeed. }, } @@ -105,18 +143,19 @@ func TestFibreClient_BadHostRegistration(t *testing.T) { } }) - // Register every validator with the broken host. The chain - // should accept all of them — that's the bug. + // Register every validator with the chosen host format. + // The chain accepts all of these today — even the broken + // ones — because ValidateBasic only checks length. for i := 0; i < 4; i++ { - bad := tc.hostFor(i) - require.NoError(t, setValHost(ctx, t, i, bad), - "chain accepted MsgSetFibreProviderInfo for val%d host=%q (no format validation)", i, bad) + h := tc.hostFor(i) + require.NoError(t, setValHost(ctx, t, i, h), + "chain should accept set-host for val%d host=%q on the current code", i, h) } - // Wait until val3's bad host is observable; this is the - // last one we wrote, so its presence implies the others - // also propagated. + // Wait until val3's host is observable; this is the last + // one we wrote, so its presence implies the others also + // propagated. require.NoError(t, waitForHost(ctx, t, tc.hostFor(3)), - "bad registrations should be visible on chain") + "%s registrations should be visible on chain", tc.name) // Construct a FRESH adapter so PullAll picks up the just- // updated registry rather than a cached canonical entry. @@ -140,14 +179,22 @@ func TestFibreClient_BadHostRegistration(t *testing.T) { t.Cleanup(func() { _ = adapter.Close() }) namespace := bytes.Repeat([]byte{0xcd}, 10) - payload := []byte(fmt.Sprintf("bad-host-repro-%s-%d", tc.name, time.Now().UnixNano())) + payload := []byte(fmt.Sprintf("host-format-repro-%s-%d", tc.name, time.Now().UnixNano())) uploadCtx, uploadCancel := context.WithTimeout(ctx, 60*time.Second) defer uploadCancel() - _, uploadErr := adapter.Upload(uploadCtx, namespace, payload) - require.Error(t, uploadErr, "Upload must fail when no validator host can be dialed") - t.Logf("upload failed as expected (%s): %v", tc.name, uploadErr) + res, uploadErr := adapter.Upload(uploadCtx, namespace, payload) + if tc.wantUploadErr != "" { + require.Error(t, uploadErr, "Upload must fail when no validator host can be dialed") + require.Contains(t, uploadErr.Error(), tc.wantUploadErr, + "upload error should match expected aggregate failure") + t.Logf("upload failed as expected (%s): %v", tc.name, uploadErr) + } else { + require.NoError(t, uploadErr, "Upload should succeed for %s host format", tc.name) + require.NotEmpty(t, res.BlobID) + t.Logf("upload ok (%s): blob_id=%x", tc.name, res.BlobID) + } }) } } From 5d7c17bdc9a5efdddd155fe9e05a194eeedbc38e Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:55:41 +0200 Subject: [PATCH 6/6] test(celestia-node-fiber): adapt docker scaffold to host:port validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chain (built from celestia-app feat/fibre-payments via the bumped Dockerfile.app default ref) now enforces strict host:port form on MsgSetFibreProviderInfo.ValidateBasic. Adjusts the docker stack accordingly: - Dockerfile.app default CELESTIA_APP_REF: main → feat/fibre-payments - Dockerfile.bridge default CELESTIA_NODE_REF: feature/fibre → feature/fibre-experimental (which carries the matching app bump) - register-fsps.sh registers plain `127.0.0.1:798X` (was `dns:///...`) - docker_test.go celestia-app v8 → v9 imports (cascades from the julien/fiber bump merged in #3289) bad_host_repro_test.go matrix flips: - host_port → set-host succeeds, Upload succeeds (positive) - http_prefix → set-host fails at ValidateBasic - dns_prefix → set-host fails at ValidateBasic …with assertion against `host must be in host:port form` from the chain's response. Cleanup uses --output json so the success-code check parses reliably; sleep widened to 4s between consecutive set-host calls so the validator account's mempool nonce settles. Verified locally on docker-arm64: TestDockerShowcase PASS 5.20s TestEvNode_FiberDA_Docker PASS 8.00s TestFibreClient_HostRegistrationFormats PASS 80.01s Co-Authored-By: Claude Opus 4.7 (1M context) --- .../testing/docker/Dockerfile.app | 2 +- .../testing/docker/Dockerfile.bridge | 2 +- .../testing/docker/README.md | 2 +- .../testing/docker/bad_host_repro_test.go | 170 +++++++++--------- .../testing/docker/docker_test.go | 4 +- .../testing/docker/evnode_docker_test.go | 2 +- .../testing/docker/scripts/register-fsps.sh | 11 +- 7 files changed, 95 insertions(+), 98 deletions(-) diff --git a/tools/celestia-node-fiber/testing/docker/Dockerfile.app b/tools/celestia-node-fiber/testing/docker/Dockerfile.app index 7ecb521f81..380258776b 100644 --- a/tools/celestia-node-fiber/testing/docker/Dockerfile.app +++ b/tools/celestia-node-fiber/testing/docker/Dockerfile.app @@ -7,7 +7,7 @@ # fibre development lives. ARG GO_VERSION=1.26.1 ARG CELESTIA_APP_REPO=https://github.com/celestiaorg/celestia-app.git -ARG CELESTIA_APP_REF=main +ARG CELESTIA_APP_REF=feat/fibre-payments FROM golang:${GO_VERSION}-bookworm AS build ARG CELESTIA_APP_REPO diff --git a/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge b/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge index 11ec5f2c95..7ba39d741f 100644 --- a/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge +++ b/tools/celestia-node-fiber/testing/docker/Dockerfile.bridge @@ -3,7 +3,7 @@ # Listen path, and (on the read-only side) the fibre namespace API. ARG GO_VERSION=1.26.1 ARG CELESTIA_NODE_REPO=https://github.com/celestiaorg/celestia-node.git -ARG CELESTIA_NODE_REF=feature/fibre +ARG CELESTIA_NODE_REF=feature/fibre-experimental FROM golang:${GO_VERSION}-bookworm AS build ARG CELESTIA_NODE_REPO diff --git a/tools/celestia-node-fiber/testing/docker/README.md b/tools/celestia-node-fiber/testing/docker/README.md index 3603bf0330..58733abd3c 100644 --- a/tools/celestia-node-fiber/testing/docker/README.md +++ b/tools/celestia-node-fiber/testing/docker/README.md @@ -17,7 +17,7 @@ correctly, but it doesn't exercise: satisfies it), - inter-validator P2P, - multiple Fibre servers contributing partial signatures, -- the dns:/// host registry resolution path, +- the host:port registry resolution path, - the bridge syncing real headers off a network it doesn't itself drive. This stack does. diff --git a/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go b/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go index 91c49f53db..520e5d28a3 100644 --- a/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go +++ b/tools/celestia-node-fiber/testing/docker/bad_host_repro_test.go @@ -2,30 +2,29 @@ // bad_host_repro_test.go — empirically validates which fibre provider // host formats survive end-to-end through the chain + fibre client + -// gRPC dialer. Three formats are exercised: +// gRPC dialer against a docker stack built from celestia-app +// `feat/fibre-payments` (i.e. a chain with the strict host:port +// validation in x/valaddr). // -// - `http://host:port` : reproduces production error -// "too many colons in address" -// - `host:port` : reproduces production error -// "first path segment in URL cannot -// contain colon" -// - `dns:///host:port` : the only working form today +// Three formats are exercised: // -// Root cause: x/valaddr `MsgSetFibreProviderInfo.ValidateBasic` only -// checks that the host is non-empty and ≤100 chars, so any of the -// above is accepted on chain. At read time the fibre client's -// `HostRegistry.GetHost` runs `url.Parse(host)`; bare host:port fails -// that, while `http://...` passes and then breaks downstream because -// `grpc.NewClient` doesn't recognise `http` as a resolver scheme and -// appends a default `:443`, yielding `http://host:port:443` ("too -// many colons"). Only `dns:///host:port` parses as a URL AND is a -// gRPC-known resolver scheme, so it works end-to-end. +// - `host:port` : the canonical accepted form. Upload +// succeeds end-to-end. +// - `http://host:port` : rejected by `MsgSetFibreProviderInfo` +// `ValidateBasic` — set-host tx fails. +// - `dns:///host:port` : also rejected by `ValidateBasic` for +// the same reason. Used to be the only +// working form pre-fix, see +// celestia-app PR #7183. // -// The expected fix is to require a strict `host:port` form in -// `ValidateBasic` (no scheme, no path, no userinfo). After that lands -// the chain rejects the registration tx for both `http://...` and -// `dns:///...` and only `host:port` succeeds — assertions in this -// test will need to flip. +// Pre-fix (celestia-app on `main` without #7183): the chain accepted +// every string, the failures surfaced at upload time as either +// "too many colons in address" (http:// case) or +// "first path segment in URL cannot contain colon" (host:port case). +// Those production-symptom assertions live in this test's git history +// before the fix landed; once the chain enforces format, the failure +// surfaces earlier (set-host tx rejection) which is what we assert +// here. // // Run with: // @@ -52,31 +51,22 @@ import ( // canonicalHosts records the per-validator hosts that register-fsps.sh // installs at boot. The repro test re-registers each validator with a -// broken host, asserts Upload fails, then restores these canonical -// values so other tests remain runnable. +// chosen format, asserts the resulting Upload behaviour, then restores +// these canonical values so other tests remain runnable. var canonicalHosts = map[int]string{ - 0: "dns:///127.0.0.1:7980", - 1: "dns:///127.0.0.1:7981", - 2: "dns:///127.0.0.1:7982", - 3: "dns:///127.0.0.1:7983", + 0: "127.0.0.1:7980", + 1: "127.0.0.1:7981", + 2: "127.0.0.1:7982", + 3: "127.0.0.1:7983", } -// TestFibreClient_HostRegistrationFormats re-registers every validator -// with a particular host-string format, then attempts an Upload through -// a fresh adapter and asserts whether the upload succeeds or fails. +// TestFibreClient_HostRegistrationFormats exercises three host formats +// against the chain's MsgSetFibreProviderInfo + the fibre client's +// HostRegistry + gRPC dialer: // -// The matrix establishes empirically which formats the chain + fibre -// client accept end-to-end: -// -// - http_scheme_prefix → fails with "too many colons in address" -// - bare_host_port → fails with "first path segment in URL ..." -// - dns_prefix → succeeds (this is the only working form) -// -// The two failing cases exactly reproduce the production warnings the -// operator saw. The succeeding case is the positive control showing -// `dns:///host:port` is the working format today, which is what the -// proposed valaddr fix changes (it would make `host:port` succeed and -// `dns:///` fail). +// - host_port → set-host succeeds, Upload succeeds (positive) +// - http_prefix → set-host fails at chain ValidateBasic (negative) +// - dns_prefix → set-host fails at chain ValidateBasic (negative) // // After each subtest the canonical registrations are restored so // sibling tests on the shared docker stack continue to pass. @@ -86,36 +76,34 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) { // hostFor returns the host string to register for the given // validator index (0..3). hostFor func(i int) string - // wantUploadErr, when non-empty, marks this case as expected to - // fail Upload; the substring must appear in the resulting error - // chain (we look at the per-validator warning; the outer error - // is "not enough voting power" once enough fail). - wantUploadErr string + // wantSetHostErr, when non-empty, marks this case as expected + // to fail at `tx valaddr set-host` time. The substring must + // appear in the CLI's stderr/stdout output (the chain's + // ValidateBasic / MsgSetFibreProviderInfo response includes + // "host must be in host:port form" or similar). + wantSetHostErr string }{ { - name: "http_scheme_prefix", + name: "host_port", hostFor: func(i int) string { - return fmt.Sprintf("http://127.0.0.1:%d", 7980+i) + return fmt.Sprintf("127.0.0.1:%d", 7980+i) }, - // Adapter uploads return the aggregate error; the - // per-validator dial error is logged, not bubbled. We - // assert the aggregate ("not enough voting power") here - // and rely on log capture below for the specific message. - wantUploadErr: "not enough voting power", }, { - name: "bare_host_port", + name: "http_prefix", hostFor: func(i int) string { - return fmt.Sprintf("127.0.0.1:%d", 7980+i) + return fmt.Sprintf("http://127.0.0.1:%d", 7980+i) }, - wantUploadErr: "not enough voting power", + // celestia-app's x/valaddr ValidateBasic returns this + // error chain via the SDK CLI broadcast path. + wantSetHostErr: "host must be in host:port form", }, { name: "dns_prefix", hostFor: func(i int) string { return fmt.Sprintf("dns:///127.0.0.1:%d", 7980+i) }, - // No wantUploadErr — Upload should succeed. + wantSetHostErr: "host must be in host:port form", }, } @@ -124,9 +112,6 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) t.Cleanup(cancel) - jwt := readBridgeJWT(t) - kr := readClientKeyring(t) - t.Cleanup(func() { restoreCtx, restoreCancel := context.WithTimeout(context.Background(), 90*time.Second) defer restoreCancel() @@ -135,30 +120,39 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) { t.Logf("WARNING: failed to restore val%d host: %v", i, err) } } - // One waitFor is enough — they're applied serially - // against val0's RPC and tx ordering guarantees the - // later ones land after the earlier one's check. if err := waitForHost(restoreCtx, t, canonicalHosts[3]); err != nil { t.Logf("WARNING: canonical hosts did not propagate within timeout: %v", err) } }) - // Register every validator with the chosen host format. - // The chain accepts all of these today — even the broken - // ones — because ValidateBasic only checks length. + // For the negative cases we register only val0 — that's + // enough to demonstrate the chain rejects the format. We + // don't bother trying all four since the rejection comes + // from ValidateBasic, which doesn't depend on which + // validator submits the tx. + if tc.wantSetHostErr != "" { + err := setValHost(ctx, t, 0, tc.hostFor(0)) + require.Error(t, err, "chain must reject %q at set-host tx", tc.hostFor(0)) + require.Contains(t, err.Error(), tc.wantSetHostErr, + "set-host error should match expected ValidateBasic message") + t.Logf("set-host rejected as expected (%s): %v", tc.name, err) + return + } + + // Positive case: register all four validators with the + // canonical format, then run a real Upload to confirm the + // gRPC dial path works end-to-end. for i := 0; i < 4; i++ { h := tc.hostFor(i) require.NoError(t, setValHost(ctx, t, i, h), - "chain should accept set-host for val%d host=%q on the current code", i, h) + "chain should accept set-host for val%d host=%q", i, h) } - // Wait until val3's host is observable; this is the last - // one we wrote, so its presence implies the others also - // propagated. require.NoError(t, waitForHost(ctx, t, tc.hostFor(3)), "%s registrations should be visible on chain", tc.name) - // Construct a FRESH adapter so PullAll picks up the just- - // updated registry rather than a cached canonical entry. + jwt := readBridgeJWT(t) + kr := readClientKeyring(t) + adapter, err := cnfiber.New(ctx, cnfiber.Config{ Client: client.Config{ ReadConfig: client.ReadConfig{ @@ -185,16 +179,9 @@ func TestFibreClient_HostRegistrationFormats(t *testing.T) { defer uploadCancel() res, uploadErr := adapter.Upload(uploadCtx, namespace, payload) - if tc.wantUploadErr != "" { - require.Error(t, uploadErr, "Upload must fail when no validator host can be dialed") - require.Contains(t, uploadErr.Error(), tc.wantUploadErr, - "upload error should match expected aggregate failure") - t.Logf("upload failed as expected (%s): %v", tc.name, uploadErr) - } else { - require.NoError(t, uploadErr, "Upload should succeed for %s host format", tc.name) - require.NotEmpty(t, res.BlobID) - t.Logf("upload ok (%s): blob_id=%x", tc.name, res.BlobID) - } + require.NoError(t, uploadErr, "Upload should succeed for %s host format", tc.name) + require.NotEmpty(t, res.BlobID) + t.Logf("upload ok (%s): blob_id=%x", tc.name, res.BlobID) }) } } @@ -211,17 +198,26 @@ func setValHost(ctx context.Context, t *testing.T, valIdx int, host string) erro "--chain-id", chainID, "--node", fmt.Sprintf("tcp://%s:26657", valName), "--fees", "5000utia", + "--output", "json", "--yes", ) cmd.Dir = mustDockerDir(t) out, err := cmd.CombinedOutput() + // Wider sleep so the next set-host on the same validator account + // doesn't race the mempool's nonce check ("tx already exists"). + defer time.Sleep(4 * time.Second) if err != nil { + // Two flavours: (a) pre-broadcast ValidateBasic rejection — CLI + // exits non-zero with the validation error in stderr, no JSON + // payload; (b) broadcast accepted but the chain returned a + // non-zero code in the JSON ack. Surface either to the caller. return fmt.Errorf("set-host %q on %s: %w: %s", host, valName, err, string(out)) } - // Brief pause so the next set-host targets a tx with an incremented - // account sequence (sequential txs from the same validator account - // can race the mempool's nonce check). - time.Sleep(2 * time.Second) + // Successful broadcast: parse the JSON to confirm the chain code is 0. + if !strings.Contains(string(out), `"code":0`) { + return fmt.Errorf("chain rejected set-host %q on %s (non-zero code): %s", + host, valName, string(out)) + } return nil } diff --git a/tools/celestia-node-fiber/testing/docker/docker_test.go b/tools/celestia-node-fiber/testing/docker/docker_test.go index 6e1d196414..580a816b1d 100644 --- a/tools/celestia-node-fiber/testing/docker/docker_test.go +++ b/tools/celestia-node-fiber/testing/docker/docker_test.go @@ -34,8 +34,8 @@ import ( "github.com/cosmos/cosmos-sdk/crypto/keyring" "github.com/stretchr/testify/require" - "github.com/celestiaorg/celestia-app/v8/app" - "github.com/celestiaorg/celestia-app/v8/app/encoding" + "github.com/celestiaorg/celestia-app/v9/app" + "github.com/celestiaorg/celestia-app/v9/app/encoding" "github.com/celestiaorg/celestia-node/api/client" cnfiber "github.com/evstack/ev-node/tools/celestia-node-fiber" diff --git a/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go b/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go index bd79aec9c2..a39bfd1113 100644 --- a/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go +++ b/tools/celestia-node-fiber/testing/docker/evnode_docker_test.go @@ -41,7 +41,7 @@ import ( // - real consensus 2/3-quorum signature aggregation (4 validators), // - inter-validator P2P, // - 4 distinct fibre servers cooperating on Upload row distribution, -// - dns:/// host registry resolution against an external chain, +// - host:port registry resolution against an external chain, // - a bridge that's syncing real headers, not driving block production. func TestEvNode_FiberDA_Docker(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) diff --git a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh index 9603b1e421..d30a80b36a 100755 --- a/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh +++ b/tools/celestia-node-fiber/testing/docker/scripts/register-fsps.sh @@ -2,10 +2,11 @@ # register-fsps.sh — runs once after validators are producing blocks. # # Submits MsgSetFibreProviderInfo for each validator so the chain's -# valaddr module maps consensus address → fibre server address. The -# `dns:///` URI prefix is required by the fibre client's gRPC dialer -# (a bare host:port fails URL parsing — same gotcha documented in -# tools/talis/fibre_setup.go). +# valaddr module maps consensus address → fibre server address. +# x/valaddr now requires plain `host:port` (no `dns:///` prefix or any +# scheme), see celestia-app PR #7183. gRPC's default passthrough +# resolver dials `host:port` directly, so the prefix is no longer +# needed (and is now rejected at registration tx time). # # Also funds the test client account's escrow so MsgPayForFibre can # settle in the docker network. @@ -44,7 +45,7 @@ for i in $(seq 0 $((NUM_VALIDATORS - 1))); do # driver running on the docker host can dial each fibre server # directly. compose.yaml maps val_i:7980 → host:798$i. host_port=$((FIBRE_PORT + i)) - "$APP" tx valaddr set-host "dns:///127.0.0.1:$host_port" \ + "$APP" tx valaddr set-host "127.0.0.1:$host_port" \ --from validator --keyring-backend test --home "$home" \ --chain-id "$CHAIN_ID" --node "tcp://val$i:26657" \ --fees "$FEES" --yes