From 29f0dfc141b249513afb93c730b0cc1f2bed37d4 Mon Sep 17 00:00:00 2001 From: evsio0n Date: Wed, 27 May 2026 21:41:36 +0800 Subject: [PATCH] feat(core): exit-node forwarding over NyUnicast Adds explicit exit-node forwarding as a new node-to-node packet type on top of polyamide TC, reworking the earlier exit-only encapsulation in response to maintainer review on #121: - binary NodeIdBin (uint16) replaces variable-length node id strings in the on-the-wire header. The mapping is rebuilt deterministically from CentralCfg on every apply so all peers agree. - the dataplane filter reads an atomic ExitFilterSnapshot pointer instead of touching n.LocalCfg / n.CentralCfg / RouterState. The snapshot is rebuilt on the dispatch goroutine on config apply and on every route mutation, and bundles the per-node next-hop table so the filter performs a single atomic.Load per packet. - the wire format is documented in core/nylon_unicast.go as a generic NyUnicast envelope (fixed 6-byte header, subtype/hop_limit/dst/src), leaving room for future node-targeted subtypes (state query, config push) without re-inventing the encoding. LocalCfg gains AdvertiseExitNode and ExitNode; the corresponding NylonOptions overrides are wired through entrypoint.go. ComputeSysRouteTable adds 0.0.0.0/0 when ExitNode is set and excludes the default-local ranges (loopback, link-local, multicast) so they never tunnel. Tests: - state: NodeIdBin map determinism, zero reserved. - core: header round-trip, source-ownership rule, sys route table. - e2e (tagged): three-node client/relay/exit topology with ExitEncap/ExitTransit/ExitDecap traces. Refs #121 --- core/entrypoint.go | 6 + core/nylon.go | 14 ++ core/nylon_apply.go | 4 + core/nylon_tc.go | 82 ++++++++ core/nylon_unicast.go | 393 +++++++++++++++++++++++++++++++++++++ core/nylon_unicast_test.go | 47 +++++ core/router.go | 21 +- core/router_exit_test.go | 65 ++++++ core/router_utils.go | 40 ++++ e2e/exit_node_test.go | 100 ++++++++++ state/config.go | 5 + state/default_excludes.go | 39 ++++ state/node_id_bin.go | 103 ++++++++++ state/node_id_bin_test.go | 60 ++++++ state/tunables.go | 8 + state/validation.go | 17 ++ 16 files changed, 1003 insertions(+), 1 deletion(-) create mode 100644 core/nylon_unicast.go create mode 100644 core/nylon_unicast_test.go create mode 100644 core/router_exit_test.go create mode 100644 e2e/exit_node_test.go create mode 100644 state/default_excludes.go create mode 100644 state/node_id_bin.go create mode 100644 state/node_id_bin_test.go diff --git a/core/entrypoint.go b/core/entrypoint.go index 226268b8..e6606fe3 100644 --- a/core/entrypoint.go +++ b/core/entrypoint.go @@ -120,6 +120,12 @@ func Bootstrap(centralPath, nodePath, logPath string, verbose bool, opts state.N if logPath != "" { nodeCfg.LogPath = logPath } + if opts.AdvertiseExitNodeSet { + nodeCfg.AdvertiseExitNode = opts.AdvertiseExitNode + } + if opts.ExitNodeSet { + nodeCfg.ExitNode = opts.ExitNode + } state.ExpandCentralConfig(centralCfg) if err = state.CentralConfigValidator(centralCfg); err != nil { diff --git a/core/nylon.go b/core/nylon.go index aa6f31d7..90a17c2b 100644 --- a/core/nylon.go +++ b/core/nylon.go @@ -40,6 +40,17 @@ type Nylon struct { PingBuf *ttlcache.Cache[uint64, EpPing] PeerMap atomic.Pointer[map[state.NyPublicKey]state.NodeId] + // NodeIdMap maps NodeId<->binary node id, refreshed on every central + // config apply. Read on the dataplane to encode and decode unicast + // packet headers without referencing the live CentralCfg. + NodeIdMap atomic.Pointer[state.NodeIdMap] + + // ExitFilter holds the immutable per-packet state needed by the exit + // filter, snapshotted on every config apply. The filter reads only + // this pointer, never the live CentralCfg or LocalCfg, since access + // to those off the dispatch goroutine would otherwise require locks. + ExitFilter atomic.Pointer[ExitFilterSnapshot] + router struct { LastStarvationRequest time.Time IO map[state.NodeId]*IOPending @@ -176,6 +187,9 @@ func (n *Nylon) Init() error { if err != nil { return err } + if err := n.refreshNodeBindings(); err != nil { + return err + } n.PingBuf = ttlcache.New[uint64, EpPing]( ttlcache.WithTTL[uint64, EpPing](5*time.Second), diff --git a/core/nylon_apply.go b/core/nylon_apply.go index b5ad501b..44dee3e7 100644 --- a/core/nylon_apply.go +++ b/core/nylon_apply.go @@ -41,6 +41,10 @@ func (n *Nylon) ApplyCentralConfig(cfg *state.CentralCfg) (ApplyResult, error) { n.reconcileAdvertisedPrefixes(next) n.CentralCfg = *next + if err := n.refreshNodeBindings(); err != nil { + return ApplyRejected, err + } + if err := n.SyncWireGuard(); err != nil { return ApplyRejected, err } diff --git a/core/nylon_tc.go b/core/nylon_tc.go index cce08160..751207d1 100644 --- a/core/nylon_tc.go +++ b/core/nylon_tc.go @@ -20,6 +20,53 @@ const ( func (n *Nylon) InstallTC() { t := n.Trace + // exit-encap filter: outbound IP packets that fall off our routing + // table get wrapped in a NyUnicast / exit packet bound for the + // configured exit node. Installed before the generic forwarder so + // that locally-originated traffic to "the internet" gets captured + // here first. Reads only the atomic ExitFilter snapshot, never the + // live LocalCfg or CentralCfg. + n.Device.InstallFilter(func(dev *device.Device, packet *device.TCElement) (device.TCAction, error) { + snap := n.ExitFilter.Load() + if snap == nil || snap.ExitNodeBin == state.InvalidNodeIdBin { + return device.TcPass, nil + } + if packet.Incoming() || !packet.Validate() { + return device.TcPass, nil + } + ver := packet.GetIPVersion() + if ver != 4 && ver != 6 { + return device.TcPass, nil + } + dst := packet.GetDst() + if _, ok := n.router.ForwardTable.Load().Lookup(dst); ok { + return device.TcPass, nil // overlay route exists; keep normal routing + } + if state.IsDefaultLocalExcludedAddr(dst) { + return device.TcDrop, nil + } + entry, ok := snap.NodeForward[snap.ExitNodeBin] + if !ok || entry.Peer == nil { + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitDrop: %v -> %v, exit %s, reason no_route\n", packet.GetSrc(), dst, snap.ExitNode)) + } + return device.TcDrop, nil + } + src := packet.GetSrc() + if err := wrapExitPacket(packet, snap.ExitNodeBin, snap.LocalIdBin); err != nil { + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitDrop: %v -> %v, exit %s, reason %v\n", src, dst, snap.ExitNode, err)) + } + return device.TcDrop, nil + } + packet.ToPeer = entry.Peer + packet.Priority = device.TcMediumPriority + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitEncap: %v -> %v, exit %s via %s\n", src, dst, snap.ExitNode, entry.Nh)) + } + return device.TcForward, nil + }) + if n.DBG_trace_tc { n.Device.InstallFilter(func(dev *device.Device, packet *device.TCElement) (device.TCAction, error) { if packet.Validate() { // make sure it's an IP packet @@ -128,6 +175,41 @@ func (n *Nylon) InstallTC() { } return device.TcPass, nil }) + + // handle incoming NyUnicast packets. Installed last so that under + // reverse-installation evaluation it runs first; this ensures + // inbound exit-transit packets get re-forwarded before any of the + // IP-routing filters above ever see them. + n.Device.InstallFilter(func(dev *device.Device, packet *device.TCElement) (device.TCAction, error) { + if !packet.Incoming() || packet.GetIPVersion() != NyUnicastProtoId { + return device.TcPass, nil + } + snap := n.ExitFilter.Load() + if snap == nil { + return device.TcDrop, nil + } + payload := packet.Payload() + h, err := parseNyUnicastHeader(payload) + if err != nil { + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitDrop: malformed header: %v\n", err)) + } + return device.TcDrop, nil + } + switch h.subtype { + case NyUnicastSubtypeExit: + action, err := n.handleExitPacket(packet, snap, h) + if err != nil && n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitDrop: reason %v\n", err)) + } + return action, err + default: + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitDrop: unknown unicast subtype %d\n", h.subtype)) + } + return device.TcDrop, nil + } + }) } func (n *Nylon) SendNylon(pkt *protocol.Ny, endpoint conn.Endpoint, peer *device.Peer) error { diff --git a/core/nylon_unicast.go b/core/nylon_unicast.go new file mode 100644 index 00000000..514b1933 --- /dev/null +++ b/core/nylon_unicast.go @@ -0,0 +1,393 @@ +package core + +import ( + "encoding/binary" + "errors" + "fmt" + "net" + "net/netip" + + "github.com/encodeous/nylon/polyamide/device" + "github.com/encodeous/nylon/state" +) + +// NyUnicast is a generic node-to-node tunnel packet type carried inside the +// polyamide TC layer. It supersedes the earlier "NyExit" packet by carving +// out a subtype byte so the same encapsulation can be reused for future +// node-targeted features (config push, state query, etc.) without +// re-inventing the wire format each time. +// +// Wire format (all integers big-endian): +// +// +---------------------+-----------------------+ +// | poly outer header | NyUnicast payload | +// | (PolyHeaderSize=3) | (NyUnicastHeaderSize) | +// +---------------------+-----------------------+ +// 3 B 6 B +// +// Poly outer header (managed by polyamide): +// +// byte 0 : NyUnicastProtoId << 4 (so the IP-version nibble reads as 9) +// bytes 1-2: payload length (total packet length minus PolyHeaderSize) +// +// NyUnicast payload header (this package): +// +// byte 0 : subtype (see NyUnicastSubtype*) +// byte 1 : hop_limit (decremented at every transit node; 0 = drop) +// bytes 2-3: dst NodeIdBin (final destination node, big-endian uint16) +// bytes 4-5: src NodeIdBin (originating node, big-endian uint16) +// bytes 6+ : subtype-specific payload +// +// For NyUnicastSubtypeExit the subtype payload is the original IPv4/IPv6 +// packet emitted by the origin's TUN. The exit node validates that the +// source address belongs to the claimed origin and that the packet arrived +// from the next-hop peer that should be reachable for that origin, then +// "unwraps" it into the local stack. +const ( + NyUnicastProtoId = 9 + NyUnicastHeaderSize = 6 + NyUnicastDefaultHopLim = 64 + + NyUnicastOffsetSubtype = 0 + NyUnicastOffsetHopLimit = 1 + NyUnicastOffsetDst = 2 + NyUnicastOffsetSrc = 4 +) + +type NyUnicastSubtype byte + +const ( + // NyUnicastSubtypeExit wraps an IPv4/IPv6 packet that should leave the + // nylon mesh at the destination node. The subtype payload is the + // untouched inner packet. + NyUnicastSubtypeExit NyUnicastSubtype = 1 +) + +// ExitFilterSnapshot is an immutable view of all the state the exit filter +// needs to make a decision: local identity, exit configuration, the binary +// node-id mapping, per-node source-address ownership, and the binary-keyed +// next-hop forwarding table. +// +// It is rebuilt by the dispatch goroutine on three occasions: at startup, +// after every central config apply, and after every route mutation. The +// filter only ever loads (and never mutates) the pointer. This keeps the +// dataplane path lock-free and entirely free of references into the live +// LocalCfg / CentralCfg / RouterState structures. +type ExitFilterSnapshot struct { + // Local identity. + LocalId state.NodeId + LocalIdBin state.NodeIdBin + + // Local capabilities. + AdvertiseExitNode bool + ExitNode state.NodeId // empty => not configured to use an exit + ExitNodeBin state.NodeIdBin // InvalidNodeIdBin if ExitNode is empty or unmapped + + // Per-node exit source address ownership. Used at the exit node to + // validate that the inner packet source matches the origin claimed in + // the header. Only the node's directly-assigned Addresses count; + // advertised prefixes / anycast addresses are intentionally excluded. + NodeAddrs map[state.NodeIdBin]map[netip.Addr]struct{} + + // Binary -> string lookup, copied out of NodeIdMap for trace/log paths + // that need readable names. + BinNames map[state.NodeIdBin]state.NodeId + + // NodeForward maps a destination node's binary id to the next-hop + // peer (and its NodeId for trace output). Built off the currently + // selected routes — every entry has a finite metric and is reachable + // via a peer that is not the local node. + NodeForward map[state.NodeIdBin]RouteTableEntry +} + +// rebuildExitFilterSnapshot constructs a fresh snapshot from the current +// LocalCfg + CentralCfg + NodeIdMap and the currently selected routes. +// Must be called on the dispatch goroutine. +func (n *Nylon) rebuildExitFilterSnapshot(idMap *state.NodeIdMap) *ExitFilterSnapshot { + snap := &ExitFilterSnapshot{ + LocalId: n.LocalCfg.Id, + AdvertiseExitNode: n.LocalCfg.AdvertiseExitNode, + ExitNode: n.LocalCfg.ExitNode, + NodeAddrs: make(map[state.NodeIdBin]map[netip.Addr]struct{}), + BinNames: make(map[state.NodeIdBin]state.NodeId), + NodeForward: make(map[state.NodeIdBin]RouteTableEntry), + } + if bin, ok := idMap.ToBin(n.LocalCfg.Id); ok { + snap.LocalIdBin = bin + } + if snap.ExitNode != "" { + if bin, ok := idMap.ToBin(snap.ExitNode); ok { + snap.ExitNodeBin = bin + } + } + addAddrs := func(id state.NodeId, addrs []netip.Addr) { + bin, ok := idMap.ToBin(id) + if !ok { + return + } + snap.BinNames[bin] = id + if len(addrs) == 0 { + return + } + m := snap.NodeAddrs[bin] + if m == nil { + m = make(map[netip.Addr]struct{}, len(addrs)) + snap.NodeAddrs[bin] = m + } + for _, a := range addrs { + m[a] = struct{}{} + } + } + for _, r := range n.CentralCfg.Routers { + addAddrs(r.Id, r.Addresses) + } + for _, c := range n.CentralCfg.Clients { + addAddrs(c.Id, c.Addresses) + } + + // Build the per-node forwarding table. We look each node's assigned + // addresses up in the prefix-keyed ForwardTable (already atomic, + // already aggregation-aware) — iterating RouterState.Routes by + // NodeId would miss nodes whose /32 has been folded into a Babel + // supernet. ForwardTable is rebuilt before this snapshot, so the + // lookup sees the latest entries. + if ft := n.router.ForwardTable.Load(); ft != nil { + add := func(id state.NodeId, addrs []netip.Addr) { + if id == n.LocalCfg.Id { + return + } + bin, ok := idMap.ToBin(id) + if !ok { + return + } + if _, exists := snap.NodeForward[bin]; exists { + return + } + for _, addr := range addrs { + entry, ok := ft.Lookup(addr) + if !ok || entry.Blackhole || entry.Peer == nil { + continue + } + snap.NodeForward[bin] = entry + break + } + } + for _, r := range n.CentralCfg.Routers { + add(r.Id, r.Addresses) + } + for _, c := range n.CentralCfg.Clients { + add(c.Id, c.Addresses) + } + } + return snap +} + +// refreshNodeBindings recomputes both the NodeIdMap and ExitFilter snapshots +// from the current config and routing state, and stores them atomically. +// Must be called on the dispatch goroutine after any change that could +// affect either the binary-id assignment (CentralCfg) or the per-node +// next-hop (selected routes / local exit settings). +func (n *Nylon) refreshNodeBindings() error { + idMap, err := state.BuildNodeIdMap(&n.CentralCfg) + if err != nil { + return err + } + n.NodeIdMap.Store(idMap) + n.ExitFilter.Store(n.rebuildExitFilterSnapshot(idMap)) + return nil +} + +// refreshExitFilter rebuilds just the ExitFilter snapshot using the current +// NodeIdMap. Cheaper than refreshNodeBindings; appropriate when only the +// routing state has changed. +func (n *Nylon) refreshExitFilter() { + idMap := n.NodeIdMap.Load() + if idMap == nil { + return + } + n.ExitFilter.Store(n.rebuildExitFilterSnapshot(idMap)) +} + +// nyUnicastHeader is a parsed view of a NyUnicast payload header. +type nyUnicastHeader struct { + subtype NyUnicastSubtype + hopLimit uint8 + dst state.NodeIdBin + src state.NodeIdBin +} + +func parseNyUnicastHeader(payload []byte) (nyUnicastHeader, error) { + if len(payload) < NyUnicastHeaderSize { + return nyUnicastHeader{}, errors.New("nylon: unicast packet shorter than header") + } + return nyUnicastHeader{ + subtype: NyUnicastSubtype(payload[NyUnicastOffsetSubtype]), + hopLimit: payload[NyUnicastOffsetHopLimit], + dst: state.NodeIdBin(binary.BigEndian.Uint16(payload[NyUnicastOffsetDst : NyUnicastOffsetDst+2])), + src: state.NodeIdBin(binary.BigEndian.Uint16(payload[NyUnicastOffsetSrc : NyUnicastOffsetSrc+2])), + }, nil +} + +// writeNyUnicastHeader writes the fixed 6-byte payload header at the start of +// buf. buf must be at least NyUnicastHeaderSize long. +func writeNyUnicastHeader(buf []byte, h nyUnicastHeader) { + buf[NyUnicastOffsetSubtype] = byte(h.subtype) + buf[NyUnicastOffsetHopLimit] = h.hopLimit + binary.BigEndian.PutUint16(buf[NyUnicastOffsetDst:NyUnicastOffsetDst+2], uint16(h.dst)) + binary.BigEndian.PutUint16(buf[NyUnicastOffsetSrc:NyUnicastOffsetSrc+2], uint16(h.src)) +} + +// wrapExitPacket re-frames the current IP packet in `packet` as a NyUnicast +// exit-encap packet bound for `exit` from `origin`. Mutates packet in place. +func wrapExitPacket(packet *device.TCElement, exit, origin state.NodeIdBin) error { + if exit == state.InvalidNodeIdBin || origin == state.InvalidNodeIdBin { + return errors.New("nylon: invalid node id in exit header") + } + + origLen := len(packet.Packet) + headerLen := device.PolyHeaderSize + NyUnicastHeaderSize + totalLen := headerLen + origLen + if totalLen > len(packet.Buffer)-device.MessageTransportHeaderSize { + return errors.New("nylon: packet too large for exit encapsulation") + } + + // Slide inner IP packet right by headerLen and rebase Packet to point + // at the new outer header. We use the message-transport offset so the + // downstream encryptor sees the same Buffer layout as for any other + // packet. + buf := packet.Buffer[device.MessageTransportHeaderSize : device.MessageTransportHeaderSize+totalLen] + copy(buf[headerLen:], packet.Packet) + packet.Packet = buf + + packet.SetIPVersion(NyUnicastProtoId) + packet.SetLength(uint16(totalLen)) + writeNyUnicastHeader(packet.Payload(), nyUnicastHeader{ + subtype: NyUnicastSubtypeExit, + hopLimit: NyUnicastDefaultHopLim, + dst: exit, + src: origin, + }) + return nil +} + +// exitOriginArrivedFromExpectedPeer verifies that an inbound exit packet +// arrived from the next-hop peer that would be used to reach `origin`. This +// prevents another peer from spoofing exit packets on someone else's behalf +// (modulo full-path attestation, which would require per-packet signing). +// Uses only the precomputed snapshot — never touches RouterState directly. +func exitOriginArrivedFromExpectedPeer(snap *ExitFilterSnapshot, origin state.NodeIdBin, fromPeer *device.Peer) bool { + if fromPeer == nil { + return false + } + entry, ok := snap.NodeForward[origin] + if !ok || entry.Peer == nil { + return false + } + return fromPeer.GetPublicKey() == entry.Peer.GetPublicKey() +} + +func packetSrc(packet []byte) (netip.Addr, error) { + return packetAddr(packet, true) +} + +func packetDst(packet []byte) (netip.Addr, error) { + return packetAddr(packet, false) +} + +func packetAddr(packet []byte, src bool) (netip.Addr, error) { + if len(packet) == 0 { + return netip.Addr{}, errors.New("empty inner packet") + } + switch packet[0] >> 4 { + case 4: + offset := device.IPv4offsetDst + if src { + offset = device.IPv4offsetSrc + } + if len(packet) < offset+net.IPv4len { + return netip.Addr{}, errors.New("short IPv4 packet") + } + return netip.AddrFrom4([4]byte(packet[offset : offset+net.IPv4len])), nil + case 6: + offset := device.IPv6offsetDst + if src { + offset = device.IPv6offsetSrc + } + if len(packet) < offset+net.IPv6len { + return netip.Addr{}, errors.New("short IPv6 packet") + } + return netip.AddrFrom16([16]byte(packet[offset : offset+net.IPv6len])), nil + default: + return netip.Addr{}, errors.New("inner packet is not IP") + } +} + +// handleExitPacket dispatches a NyUnicast / NyUnicastSubtypeExit packet: +// transit if we are not the destination, decap back to the local stack if +// we are. Reads only the supplied snapshot, never the live config. +func (n *Nylon) handleExitPacket(packet *device.TCElement, snap *ExitFilterSnapshot, h nyUnicastHeader) (device.TCAction, error) { + t := n.Trace + if h.hopLimit == 0 { + return device.TcDrop, errors.New("exit packet hop limit exceeded") + } + + if h.dst != snap.LocalIdBin { + entry, ok := snap.NodeForward[h.dst] + if !ok || entry.Peer == nil { + return device.TcDrop, fmt.Errorf("no route to exit node bin=%d", h.dst) + } + packet.Payload()[NyUnicastOffsetHopLimit]-- + packet.ToPeer = entry.Peer + packet.Priority = device.TcMediumPriority + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitTransit: origin %s exit %s via %s\n", snap.BinNames[h.src], snap.BinNames[h.dst], entry.Nh)) + } + return device.TcForward, nil + } + + // Terminal: this is the exit node for the packet. + if !snap.AdvertiseExitNode { + return device.TcDrop, errors.New("local node is not advertising exit service") + } + originName, ok := snap.BinNames[h.src] + if !ok { + return device.TcDrop, fmt.Errorf("unknown exit origin bin=%d", h.src) + } + if !exitOriginArrivedFromExpectedPeer(snap, h.src, packet.FromPeer) { + return device.TcDrop, fmt.Errorf("exit packet origin %s did not arrive from expected peer", originName) + } + inner := packet.Payload()[NyUnicastHeaderSize:] + src, err := packetSrc(inner) + if err != nil { + return device.TcDrop, err + } + if !nodeOwnsExitSourceAddr(snap, h.src, src) { + return device.TcDrop, fmt.Errorf("source %s is not owned by origin node %s", src, originName) + } + dst, err := packetDst(inner) + if err != nil { + return device.TcDrop, err + } + if n.DBG_trace_tc { + t.Submit(fmt.Sprintf("ExitDecap: origin %s %s -> %s\n", originName, src, dst)) + } + // Repoint Packet at the inner IP packet so subsequent filters / + // system routing see it as a regular IP packet from this node. + copy(packet.Packet[:len(inner)], inner) + packet.Packet = packet.Packet[:len(inner)] + packet.ParsePacket() + return device.TcBounce, nil +} + +// nodeOwnsExitSourceAddr reports whether `addr` is one of the directly +// assigned addresses of the node identified by `origin`. Advertised prefixes +// and anycast addresses are intentionally excluded — only an address listed +// in the node's Addresses field counts. +func nodeOwnsExitSourceAddr(snap *ExitFilterSnapshot, origin state.NodeIdBin, addr netip.Addr) bool { + addrs := snap.NodeAddrs[origin] + if len(addrs) == 0 { + return false + } + _, ok := addrs[addr] + return ok +} diff --git a/core/nylon_unicast_test.go b/core/nylon_unicast_test.go new file mode 100644 index 00000000..7c79abd0 --- /dev/null +++ b/core/nylon_unicast_test.go @@ -0,0 +1,47 @@ +package core + +import ( + "net/netip" + "testing" + + "github.com/encodeous/nylon/state" + "github.com/stretchr/testify/assert" +) + +func TestNyUnicastHeaderRoundTrip(t *testing.T) { + buf := make([]byte, NyUnicastHeaderSize) + writeNyUnicastHeader(buf, nyUnicastHeader{ + subtype: NyUnicastSubtypeExit, + hopLimit: 42, + dst: state.NodeIdBin(0x1234), + src: state.NodeIdBin(0x00FE), + }) + + h, err := parseNyUnicastHeader(buf) + assert.NoError(t, err) + assert.Equal(t, NyUnicastSubtypeExit, h.subtype) + assert.EqualValues(t, 42, h.hopLimit) + assert.EqualValues(t, 0x1234, h.dst) + assert.EqualValues(t, 0x00FE, h.src) +} + +func TestNyUnicastHeaderShortRejected(t *testing.T) { + _, err := parseNyUnicastHeader(make([]byte, NyUnicastHeaderSize-1)) + assert.Error(t, err) +} + +func TestNodeOwnsExitSourceAddrOnlyAllowsAssignedAddresses(t *testing.T) { + originBin := state.NodeIdBin(1) + addr := netip.MustParseAddr("10.0.0.1") + other := netip.MustParseAddr("10.0.0.99") + + snap := &ExitFilterSnapshot{ + NodeAddrs: map[state.NodeIdBin]map[netip.Addr]struct{}{ + originBin: {addr: {}}, + }, + } + + assert.True(t, nodeOwnsExitSourceAddr(snap, originBin, addr)) + assert.False(t, nodeOwnsExitSourceAddr(snap, originBin, other)) + assert.False(t, nodeOwnsExitSourceAddr(snap, state.NodeIdBin(2), addr)) +} diff --git a/core/router.go b/core/router.go index e9ed6139..2fbf29f8 100644 --- a/core/router.go +++ b/core/router.go @@ -109,6 +109,7 @@ func (n *Nylon) TableInsertRoute(prefix netip.Prefix, route state.SelRoute) { ne.Delete(prefix) n.router.ForwardTable.Store(nf) n.router.ExitTable.Store(ne) + n.refreshExitFilter() return } peer := n.Device.LookupPeer(device.NoisePublicKey(n.GetNode(nh).PubKey)) @@ -126,6 +127,7 @@ func (n *Nylon) TableInsertRoute(prefix netip.Prefix, route state.SelRoute) { } n.router.ForwardTable.Store(nf) n.router.ExitTable.Store(ne) + n.refreshExitFilter() } func (n *Nylon) TableDeleteRoute(prefix netip.Prefix) { @@ -135,6 +137,7 @@ func (n *Nylon) TableDeleteRoute(prefix netip.Prefix) { ne.Delete(prefix) n.router.ForwardTable.Store(nf) n.router.ExitTable.Store(ne) + n.refreshExitFilter() } type IOPending struct { @@ -207,7 +210,19 @@ func (n *Nylon) InitRouter() error { return nil } -// ComputeSysRouteTable computes: computed = prefixes - (((n.CentralCfg.ExcludeIPs U selected self prefixes) - n.LocalCfg.UnexcludeIPs) U n.LocalCfg.ExcludeIPs) +// ComputeSysRouteTable computes the prefixes that should be installed in the +// OS routing table. The formula is roughly: +// +// computed = (prefixes ∪ exitDefaults) +// - (((CentralCfg.ExcludeIPs ∪ selected_self_prefixes ∪ defaultLocalExcludes) +// - LocalCfg.UnexcludeIPs) +// ∪ LocalCfg.ExcludeIPs) +// +// When LocalCfg.ExitNode is set we add 0.0.0.0/0 so the OS sends all +// unrouted traffic into the nylon interface, where the exit-encap filter +// picks it up. Default-local excludes (loopback, link-local, multicast) +// are never advertised through Babel — they are purely local capture +// policy, so they live here rather than in the router. func (n *Nylon) ComputeSysRouteTable() []netip.Prefix { prefixes := make([]netip.Prefix, 0) selectedSelf := make([]netip.Prefix, 0) @@ -220,12 +235,16 @@ func (n *Nylon) ComputeSysRouteTable() []netip.Prefix { excludes := netipx.IPSetBuilder{} excludes.AddSet(state.MakeSet(n.CentralCfg.ExcludeIPs)) + excludes.AddSet(state.MakeSet(state.DefaultLocalExcludes())) excludes.AddSet(state.MakeSet(selectedSelf)) excludes.RemoveSet(state.MakeSet(n.LocalCfg.UnexcludeIPs)) excludes.AddSet(state.MakeSet(n.LocalCfg.ExcludeIPs)) final := netipx.IPSetBuilder{} final.AddSet(state.MakeSet(prefixes)) + if n.LocalCfg.ExitNode != "" { + final.AddPrefix(netip.MustParsePrefix("0.0.0.0/0")) + } res, _ := excludes.IPSet() final.RemoveSet(res) diff --git a/core/router_exit_test.go b/core/router_exit_test.go new file mode 100644 index 00000000..f22625b0 --- /dev/null +++ b/core/router_exit_test.go @@ -0,0 +1,65 @@ +package core + +import ( + "net/netip" + "testing" + + "github.com/encodeous/nylon/state" + "github.com/stretchr/testify/assert" +) + +// TestComputeSysRouteTable_ExitNodeAddsDefault verifies that a node +// configured with an exit node installs 0.0.0.0/0 (so the OS hands all +// unrouted traffic to nylon), while still respecting both the central +// excludes and the default-local exclude list (loopback, multicast, +// link-local). +func TestComputeSysRouteTable_ExitNodeAddsDefault(t *testing.T) { + r := &Nylon{ + ConfigState: state.ConfigState{ + LocalCfg: state.LocalCfg{Id: "node-a"}, + }, + RouterState: &state.RouterState{ + Id: "node-a", + Routes: map[netip.Prefix]state.SelRoute{ + netip.MustParsePrefix("10.0.0.2/32"): { + PubRoute: state.PubRoute{ + Source: state.Source{ + NodeId: "node-b", + Prefix: netip.MustParsePrefix("10.0.0.2/32"), + }, + }, + Nh: "node-b", + }, + }, + }, + } + + // without an exit node configured: just the overlay prefix. + assert.ElementsMatch(t, []netip.Prefix{ + netip.MustParsePrefix("10.0.0.2/32"), + }, r.ComputeSysRouteTable()) + + r.LocalCfg.ExitNode = "node-exit" + r.LocalCfg.ExcludeIPs = []netip.Prefix{netip.MustParsePrefix("192.168.0.0/16")} + routes := r.ComputeSysRouteTable() + + assert.True(t, prefixListContainsAddr(routes, netip.MustParseAddr("10.0.0.2"))) + assert.NotContains(t, routes, netip.MustParsePrefix("192.168.0.0/16")) + for _, route := range routes { + assert.False(t, route.Contains(netip.MustParseAddr("192.168.1.1")), route.String()) + assert.False(t, route.Contains(netip.MustParseAddr("224.0.0.251")), route.String()) + assert.False(t, route.Contains(netip.MustParseAddr("169.254.1.1")), route.String()) + } + // must still cover a public address — i.e. the default route is in + // fact present. + assert.True(t, prefixListContainsAddr(routes, netip.MustParseAddr("8.8.8.8"))) +} + +func prefixListContainsAddr(prefixes []netip.Prefix, addr netip.Addr) bool { + for _, prefix := range prefixes { + if prefix.Contains(addr) { + return true + } + } + return false +} diff --git a/core/router_utils.go b/core/router_utils.go index e4b6bd72..c3871698 100644 --- a/core/router_utils.go +++ b/core/router_utils.go @@ -1,6 +1,7 @@ package core import ( + "github.com/encodeous/nylon/polyamide/device" "github.com/encodeous/nylon/state" ) @@ -14,3 +15,42 @@ func NeighContainsFunc(s *state.RouterState, f func(neigh state.NodeId, route st } return false } + +// ForwardEntryToNode looks up the best route to a given node and returns a +// RouteTableEntry pointing at the next-hop peer. Returns (entry, true) if a +// non-self, finite-metric route exists; (zero, false) otherwise. +// +// This is used by the exit-encap and exit-transit filters to address an +// entire node (rather than a prefix); the prefix-keyed ForwardTable does not +// answer that question directly because a node may not advertise a single +// prefix that uniquely identifies it. Must be called from the dispatch +// goroutine — it touches RouterState.Routes which is not atomic. +func (n *Nylon) ForwardEntryToNode(node state.NodeId) (RouteTableEntry, bool) { + if node == n.LocalCfg.Id { + return RouteTableEntry{Nh: n.LocalCfg.Id}, true + } + + var best state.SelRoute + found := false + for _, route := range n.RouterState.Routes { + if route.NodeId != node || route.Nh == n.LocalCfg.Id || route.Metric == state.INF { + continue + } + if !found || route.Metric < best.Metric { + best = route + found = true + } + } + if !found { + return RouteTableEntry{}, false + } + + peer := n.Device.LookupPeer(device.NoisePublicKey(n.GetNode(best.Nh).PubKey)) + if peer == nil { + return RouteTableEntry{}, false + } + return RouteTableEntry{ + Nh: best.Nh, + Peer: peer, + }, true +} diff --git a/e2e/exit_node_test.go b/e2e/exit_node_test.go new file mode 100644 index 00000000..e61b22f5 --- /dev/null +++ b/e2e/exit_node_test.go @@ -0,0 +1,100 @@ +//go:build e2e + +package e2e + +import ( + "net/netip" + "strings" + "testing" + "time" + + "github.com/encodeous/nylon/state" +) + +// TestExplicitExitNodeMultiHop verifies the end-to-end path of an explicit +// exit-node packet: client encaps, relay transits without ever installing a +// default route, exit decaps and replies. It also validates that the +// downstream trace ("ExitEncap" / "ExitTransit" / "ExitDecap") fires the +// way the NyUnicast filter is supposed to log it. +func TestExplicitExitNodeMultiHop(t *testing.T) { + h := NewHarness(t) + + clientKey := state.GenerateKey() + relayKey := state.GenerateKey() + exitKey := state.GenerateKey() + + clientIP := GetIP(h.Subnet, 10) + relayIP := GetIP(h.Subnet, 11) + exitIP := GetIP(h.Subnet, 12) + + clientNylonIP := "10.0.0.1" + relayNylonIP := "10.0.0.2" + exitNylonIP := "10.0.0.3" + targetIP := "203.0.113.10" + + configDir := h.SetupTestDir() + central := state.CentralCfg{ + Routers: []state.RouterCfg{ + SimpleRouter("client", clientKey.Pubkey(), clientNylonIP, clientIP), + SimpleRouter("relay", relayKey.Pubkey(), relayNylonIP, relayIP), + SimpleRouter("exit", exitKey.Pubkey(), exitNylonIP, exitIP), + }, + Graph: []string{ + "client, relay", + "relay, exit", + }, + Timestamp: time.Now().UnixNano(), + ExcludeIPs: []netip.Prefix{netip.MustParsePrefix(h.Subnet)}, + } + centralPath := h.WriteConfig(configDir, "central.yaml", central) + + clientCfg := SimpleLocal("client", clientKey) + clientCfg.ExitNode = "exit" + clientPath := h.WriteConfig(configDir, "client.yaml", clientCfg) + + relayCfg := SimpleLocal("relay", relayKey) + relayPath := h.WriteConfig(configDir, "relay.yaml", relayCfg) + + exitCfg := SimpleLocal("exit", exitKey) + exitCfg.AdvertiseExitNode = true + exitCfg.PreUp = append(exitCfg.PreUp, "ip addr add "+targetIP+"/32 dev lo") + exitPath := h.WriteConfig(configDir, "exit.yaml", exitCfg) + + h.StartNodes( + NodeSpec{Name: "client", IP: clientIP, CentralConfigPath: centralPath, NodeConfigPath: clientPath}, + NodeSpec{Name: "relay", IP: relayIP, CentralConfigPath: centralPath, NodeConfigPath: relayPath}, + NodeSpec{Name: "exit", IP: exitIP, CentralConfigPath: centralPath, NodeConfigPath: exitPath}, + ) + + h.WaitForLog("client", "installing new route prefix=10.0.0.2/31") + h.WaitForLog("relay", "installing new route prefix=10.0.0.1/32") + h.WaitForLog("relay", "installing new route prefix=10.0.0.3/32") + + // the relay must not install a default route — it's neither + // advertising an exit nor consuming one. + relayRoutes, _, err := h.Exec("relay", []string{"ip", "route", "show", "dev", "nylon0"}) + if err != nil { + t.Fatalf("failed to inspect relay route table: %v", err) + } + if strings.Contains(relayRoutes, "default") || strings.Contains(relayRoutes, "0.0.0.0/0") { + t.Fatalf("relay unexpectedly installed default route:\n%s", relayRoutes) + } + + h.StartTrace("client") + h.StartTrace("relay") + h.StartTrace("exit") + + ping := h.ExecBackground("client", []string{"ping", "-c", "3", targetIP}) + h.WaitForTrace("client", "ExitEncap: 10.0.0.1 -> 203.0.113.10, exit exit via relay") + h.WaitForTrace("relay", "ExitTransit: origin client exit exit via exit") + h.WaitForTrace("exit", "ExitDecap: origin client 10.0.0.1 -> 203.0.113.10") + + stdout, stderr, err := ping.Wait() + if err != nil { + h.PrintLogs("client") + h.PrintLogs("relay") + h.PrintLogs("exit") + t.Fatalf("exit ping failed: %v\nStdout: %s\nStderr: %s", err, stdout, stderr) + } + t.Logf("Ping output:\n%s", stdout) +} diff --git a/state/config.go b/state/config.go index 083493e0..2f9443d0 100644 --- a/state/config.go +++ b/state/config.go @@ -64,6 +64,11 @@ type LocalCfg struct { PreDown []string `yaml:"pre_down,omitempty"` // a list of commands executed in order before the nylon interface is brought down PostUp []string `yaml:"post_up,omitempty"` // a list of commands executed in order after the nylon interface is brought up PostDown []string `yaml:"post_down,omitempty"` // a list of commands executed in order after the nylon interface is brought down + + // Exit-node feature. Mutually exclusive: a node is either an exit + // itself, a client of an exit, or neither. + AdvertiseExitNode bool `yaml:"advertise_exit_node,omitempty"` // accept exit-encap traffic and terminate it on this node + ExitNode NodeId `yaml:"exit_node,omitempty"` // route otherwise-unrouted traffic through this remote node } func (c *CentralCfg) Clone() (error, *CentralCfg) { diff --git a/state/default_excludes.go b/state/default_excludes.go new file mode 100644 index 00000000..ee53d586 --- /dev/null +++ b/state/default_excludes.go @@ -0,0 +1,39 @@ +package state + +import "net/netip" + +// DefaultLocalExcludes are addresses that should never travel through an exit +// node. They are link-local / multicast / loopback ranges that have no +// meaning once they leave the originating link, plus mDNS/SSDP that would +// either disrupt local discovery or be silently dropped by the upstream. +// +// Exit-encap filters drop traffic to these destinations before wrapping it +// into a tunnel; iOS / mobile route builders also use the same list to +// populate excluded routes in NEPacketTunnelNetworkSettings. +func DefaultLocalExcludes() []netip.Prefix { + result := DefaultLocalIPv4Excludes() + result = append(result, + netip.MustParsePrefix("::1/128"), + netip.MustParsePrefix("fe80::/10"), + netip.MustParsePrefix("ff00::/8"), + ) + return result +} + +func DefaultLocalIPv4Excludes() []netip.Prefix { + return []netip.Prefix{ + netip.MustParsePrefix("127.0.0.0/8"), + netip.MustParsePrefix("169.254.0.0/16"), + netip.MustParsePrefix("224.0.0.0/4"), + netip.MustParsePrefix("255.255.255.255/32"), + } +} + +func IsDefaultLocalExcludedAddr(addr netip.Addr) bool { + for _, prefix := range DefaultLocalExcludes() { + if prefix.Contains(addr) { + return true + } + } + return false +} diff --git a/state/node_id_bin.go b/state/node_id_bin.go new file mode 100644 index 00000000..220e30a8 --- /dev/null +++ b/state/node_id_bin.go @@ -0,0 +1,103 @@ +package state + +import ( + "fmt" + "slices" +) + +// NodeIdBin is a compact binary identifier for a node, derived from the +// central config. It is used in dataplane packet headers in place of the +// variable-length NodeId string so the parser can run with a fixed-size +// header on the hot path. +// +// Value 0 is reserved as "invalid / unassigned". Valid binary ids begin at 1 +// and are assigned deterministically (alphabetical order over the union of +// routers and clients in the central config). Both ends of a tunnel must hold +// the same central config to agree on the mapping; this is already a +// prerequisite for routing in nylon. +type NodeIdBin uint16 + +const InvalidNodeIdBin NodeIdBin = 0 + +// MaxNodeIdBin is the largest representable binary node id. The header +// reserves two bytes per id (big-endian uint16), so the upper bound is the +// 16-bit unsigned max. +const MaxNodeIdBin NodeIdBin = 0xFFFF + +// NodeIdMap is an immutable bidirectional NodeId <-> NodeIdBin lookup table. +// Construct via BuildNodeIdMap and treat the returned value as read-only. +type NodeIdMap struct { + byBin []NodeId // index 0 reserved, valid entries at 1..len-1 + byString map[NodeId]NodeIdBin +} + +// BuildNodeIdMap returns a deterministic NodeId <-> NodeIdBin mapping derived +// from the given central config. The same input config produces the same +// mapping on every node, so peers always agree on the encoding. +// +// Returns an error if the config contains more than MaxNodeIdBin nodes. +func BuildNodeIdMap(c *CentralCfg) (*NodeIdMap, error) { + ids := make([]NodeId, 0, len(c.Routers)+len(c.Clients)) + seen := make(map[NodeId]struct{}, len(c.Routers)+len(c.Clients)) + for _, r := range c.Routers { + if _, ok := seen[r.Id]; ok { + continue + } + seen[r.Id] = struct{}{} + ids = append(ids, r.Id) + } + for _, cl := range c.Clients { + if _, ok := seen[cl.Id]; ok { + continue + } + seen[cl.Id] = struct{}{} + ids = append(ids, cl.Id) + } + slices.Sort(ids) + + if uint64(len(ids)) > uint64(MaxNodeIdBin) { + return nil, fmt.Errorf("network has %d nodes, exceeds NodeIdBin capacity of %d", len(ids), MaxNodeIdBin) + } + + m := &NodeIdMap{ + byBin: make([]NodeId, len(ids)+1), // slot 0 reserved + byString: make(map[NodeId]NodeIdBin, len(ids)), + } + for i, id := range ids { + bin := NodeIdBin(i + 1) + m.byBin[bin] = id + m.byString[id] = bin + } + return m, nil +} + +// ToBin returns the binary id for a NodeId, or InvalidNodeIdBin if the node +// is not present in the mapping. +func (m *NodeIdMap) ToBin(id NodeId) (NodeIdBin, bool) { + if m == nil { + return InvalidNodeIdBin, false + } + b, ok := m.byString[id] + return b, ok +} + +// ToString returns the NodeId for a binary id, or ("", false) if the binary +// id is unassigned. +func (m *NodeIdMap) ToString(b NodeIdBin) (NodeId, bool) { + if m == nil || b == InvalidNodeIdBin || int(b) >= len(m.byBin) { + return "", false + } + id := m.byBin[b] + if id == "" { + return "", false + } + return id, true +} + +// Len returns the number of nodes in the mapping. +func (m *NodeIdMap) Len() int { + if m == nil { + return 0 + } + return len(m.byBin) - 1 +} diff --git a/state/node_id_bin_test.go b/state/node_id_bin_test.go new file mode 100644 index 00000000..d630ea87 --- /dev/null +++ b/state/node_id_bin_test.go @@ -0,0 +1,60 @@ +package state + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuildNodeIdMap_DeterministicAcrossOrdering(t *testing.T) { + cfgA := &CentralCfg{ + Routers: []RouterCfg{ + {NodeCfg: NodeCfg{Id: "syd-vm"}}, + {NodeCfg: NodeCfg{Id: "ios-phone"}}, + {NodeCfg: NodeCfg{Id: "openstick"}}, + }, + Clients: []ClientCfg{ + {NodeCfg: NodeCfg{Id: "tablet"}}, + }, + } + cfgB := &CentralCfg{ + Routers: []RouterCfg{ + {NodeCfg: NodeCfg{Id: "openstick"}}, + {NodeCfg: NodeCfg{Id: "syd-vm"}}, + {NodeCfg: NodeCfg{Id: "ios-phone"}}, + }, + Clients: []ClientCfg{ + {NodeCfg: NodeCfg{Id: "tablet"}}, + }, + } + mA, err := BuildNodeIdMap(cfgA) + assert.NoError(t, err) + mB, err := BuildNodeIdMap(cfgB) + assert.NoError(t, err) + + for _, id := range []NodeId{"syd-vm", "ios-phone", "openstick", "tablet"} { + a, ok := mA.ToBin(id) + assert.True(t, ok) + b, ok := mB.ToBin(id) + assert.True(t, ok) + assert.Equal(t, a, b, "binary id for %s should be stable across input order", id) + } +} + +func TestBuildNodeIdMap_ReservesZero(t *testing.T) { + cfg := &CentralCfg{ + Routers: []RouterCfg{{NodeCfg: NodeCfg{Id: "a"}}}, + } + m, err := BuildNodeIdMap(cfg) + assert.NoError(t, err) + + _, ok := m.ToString(InvalidNodeIdBin) + assert.False(t, ok, "InvalidNodeIdBin must not resolve to a node") + + bin, ok := m.ToBin("a") + assert.True(t, ok) + assert.NotEqual(t, InvalidNodeIdBin, bin) + + _, ok = m.ToBin("missing") + assert.False(t, ok) +} diff --git a/state/tunables.go b/state/tunables.go index 2c5f7b8a..9bd31bb6 100644 --- a/state/tunables.go +++ b/state/tunables.go @@ -54,6 +54,14 @@ type NylonOptions struct { DBG_debug bool DBG_trace bool DBG_trace_tc bool + + // Exit-node overrides. Each value is paired with a *Set boolean so an + // empty value can still represent "explicitly cleared by CLI" without + // being ambiguous with "not specified". + AdvertiseExitNode bool + AdvertiseExitNodeSet bool + ExitNode NodeId + ExitNodeSet bool } func DefaultRouterTunables() RouterTunables { diff --git a/state/validation.go b/state/validation.go index 5c0e7009..ac3a5c9b 100644 --- a/state/validation.go +++ b/state/validation.go @@ -56,6 +56,15 @@ func NodeConfigValidator(central *CentralCfg, node *LocalCfg) error { return fmt.Errorf("invalid prefix %s", p) } } + if node.AdvertiseExitNode && node.ExitNode != "" { + return fmt.Errorf("node %s cannot advertise an exit node and use exit_node %s at the same time", node.Id, node.ExitNode) + } + if node.ExitNode == node.Id { + return fmt.Errorf("node %s cannot use itself as exit_node", node.Id) + } + if central != nil && node.ExitNode != "" && !central.IsNode(node.ExitNode) { + return fmt.Errorf("exit_node %s is not in central config", node.ExitNode) + } // check that node is in central config if central != nil && !central.IsNode(node.Id) { return fmt.Errorf("node %s is not in central config", node.Id) @@ -101,6 +110,14 @@ func CentralConfigValidator(cfg *CentralCfg) error { return err } + // The dataplane unicast header carries a 16-bit binary node id; ensure + // the network is small enough to fit. With 64K nodes this is a generous + // bound; if it ever needs to grow, widen NodeIdBin in lockstep with the + // header wire format. + if uint64(len(nodes)) > uint64(MaxNodeIdBin) { + return fmt.Errorf("network has %d nodes, exceeds maximum %d supported by NodeIdBin", len(nodes), MaxNodeIdBin) + } + // ensure each node contains unique prefixes (anycast routing allows duplicate prefixes across nodes) for _, router := range cfg.Routers { routerPrefixes := make(map[netip.Prefix]struct{})