From cac59e3caa1f7adcde43ef07204588846e700ed9 Mon Sep 17 00:00:00 2001 From: Muddyblack Date: Wed, 20 May 2026 16:46:57 +0200 Subject: [PATCH 1/4] init multiserver plugin --- docs/plugins.md | 1 + docs/plugins/multiserver.md | 268 ++++++ netsim/extra/multiserver/defaults.yml | 47 + netsim/extra/multiserver/plugin.py | 842 ++++++++++++++++++ tests/topology/expected/multiserver-auto.yml | 358 ++++++++ .../expected/multiserver-explicit.yml | 382 ++++++++ tests/topology/input/multiserver-auto.yml | 45 + tests/topology/input/multiserver-explicit.yml | 54 ++ 8 files changed, 1997 insertions(+) create mode 100644 docs/plugins/multiserver.md create mode 100644 netsim/extra/multiserver/defaults.yml create mode 100644 netsim/extra/multiserver/plugin.py create mode 100644 tests/topology/expected/multiserver-auto.yml create mode 100644 tests/topology/expected/multiserver-explicit.yml create mode 100644 tests/topology/input/multiserver-auto.yml create mode 100644 tests/topology/input/multiserver-explicit.yml diff --git a/docs/plugins.md b/docs/plugins.md index ac3e94ae64..5c3e917dac 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -19,6 +19,7 @@ plugins/kind.md plugins/mlag.vtep.md plugins/multilab.md + plugins/multiserver.md plugins/node.clone.md plugins/ospf.areas.md plugins/vrrp.version.md diff --git a/docs/plugins/multiserver.md b/docs/plugins/multiserver.md new file mode 100644 index 0000000000..8503ae16f0 --- /dev/null +++ b/docs/plugins/multiserver.md @@ -0,0 +1,268 @@ +(plugin-multiserver)= +# Splitting Topologies Across Multiple Servers + +The *multiserver* plugin distributes a single *netlab* topology across multiple physical servers. It assigns nodes to servers, classifies links as local or cross-server, and generates a self-contained containerlab configuration directory for each server with VXLAN-based interconnects. + +```eval_rst +.. contents:: Table of Contents + :depth: 2 + :local: + :backlinks: none +``` + +```{warning} +* The *multiserver* plugin requires the **containerlab** provider on all servers. +* Containerlab version >= `0.46` is required for native VXLAN link endpoint support. +* All physical servers must have direct IP reachability (e.g. over a management network or dedicated interconnect). +``` + +## Using the Plugin + +* Add `plugin: [ multiserver ]` to lab topology. +* Define target servers in the **multiserver.servers** list. +* Choose an assignment mode (`explicit` or `auto`) with **multiserver.assignment**. + +The plugin runs during `netlab create` and generates self-contained per-server directories (e.g. `server-1/`, `server-2/`) with tailored `clab.yml` files, node configs, and VXLAN scripts ready for deployment. + +## Configuring Plugin Parameters + +The plugin is configured with the **multiserver** topology-level dictionary that has these parameters: + +| Parameter | Type | Meaning | +|-----------|------|---------| +| **assignment** | string | How to assign nodes to servers: `explicit` (default) or `auto` | +| **servers** | list | List of target physical servers | +| **vxlan** | dictionary | Global settings for VXLAN tunnels | +| **replicate** | list | Nodes or groups that must be duplicated on all servers | +| **output_dir** | string | Template for per-server directory names (default: `server-{server_id}`) | + +(multiserver-servers)= +### Server Parameters + +Each entry in the **multiserver.servers** list supports these parameters: + +| Parameter | Type | Meaning | +|-----------|------|---------| +| **id** | integer | Unique identifier for the server (e.g. `1`, `2`) | +| **host** | string | IP address or hostname of the remote server | +| **groups** | list | *netlab* groups whose members are assigned to this server | +| **members** | list | Individual node names assigned to this server | +| **vxlan_dev** | string | Physical interface to bind VXLAN tunnels to on this server | + +(multiserver-vxlan)= +### VXLAN Parameters + +Global VXLAN settings are specified in the **multiserver.vxlan** dictionary: + +| Parameter | Type | Meaning | +|-----------|------|---------| +| **vni_base** | integer | Starting VNI for cross-server links (default: `10000`) | +| **dstport** | integer | UDP destination port for VXLAN traffic (default: `4789`) | +| **dev** | string | Default physical interface to bind VXLAN tunnels (default: `ens33`) | + +(multiserver-assignment)= +## Assignment Modes + +### Explicit Assignment (Default) + +In `explicit` mode, every node must be mapped to a server using the **groups** or **members** attributes of a [server entry](multiserver-servers). Any unassigned node (excluding [replicated nodes](multiserver-replicate)) results in an error. + +```yaml +plugin: [ multiserver ] + +multiserver: + assignment: explicit + servers: + - id: 1 + host: 192.168.168.128 + groups: [ core ] + members: [ edge-node ] + - id: 2 + host: 192.168.168.129 + groups: [ spines, leaves ] +``` + +### Automatic Assignment + +In `auto` mode, nodes that are not explicitly pinned to a server are distributed automatically using a greedy balancing algorithm: + +1. Nodes belonging to a *netlab* group are kept together — the entire group is placed on the server that currently has the fewest nodes. Larger groups are placed first for better balance. +2. Remaining ungrouped nodes are assigned one at a time to the least-loaded server. + +Nodes already pinned via **groups** or **members** attributes count toward server load, so the algorithm balances around any explicit assignments. + +```yaml +plugin: [ multiserver ] + +multiserver: + assignment: auto + servers: + - id: 1 + host: 192.168.168.128 + - id: 2 + host: 192.168.168.129 +``` + +```{tip} +You can pin specific nodes or groups to a server in `auto` mode using **groups** and **members** attributes. Only unassigned nodes are auto-distributed. +``` + +#### Group Granularity + +Because auto mode keeps entire groups together on a single server, the granularity of your groups directly affects how evenly nodes are distributed. Define groups at the smallest unit you want to keep on one server. + +For example, consider a topology with two sites, each containing five nodes: + +```yaml +# BAD: one large group — all 10 nodes land on one server +groups: + sites: + members: [ site1-r1, site1-r2, site1-r3, site1-r4, site1-r5, + site2-r1, site2-r2, site2-r3, site2-r4, site2-r5 ] +``` + +```yaml +# GOOD: per-site groups — one site per server +groups: + site1: + members: [ site1-r1, site1-r2, site1-r3, site1-r4, site1-r5 ] + site2: + members: [ site2-r1, site2-r2, site2-r3, site2-r4, site2-r5 ] + sites: + members: [ site1-r1, site1-r2, site1-r3, site1-r4, site1-r5, + site2-r1, site2-r2, site2-r3, site2-r4, site2-r5 ] +``` + +In the second example the parent `sites` group can still be used for Ansible targeting or shared configuration — it does not affect placement because the child groups (`site1`, `site2`) claim their members first during assignment. + +```{note} +Groups are processed in definition order. Child groups defined **before** a parent group will claim their members first, making the parent group a no-op for assignment. Always define fine-grained groups before aggregate groups in your topology. +``` + +(multiserver-replicate)= +### Replicated Nodes + +Nodes listed in **multiserver.replicate** are instantiated on every server. This is useful for infrastructure services that need local access on each physical host — for example, monitoring collectors, route reflectors, or DNS resolvers. + +Links connecting to replicated nodes are always treated as local, so traffic between a replicated node and its neighbors never crosses the VXLAN overlay. + +```yaml +multiserver: + assignment: auto + servers: + - id: 1 + host: 192.168.168.128 + - id: 2 + host: 192.168.168.129 + replicate: [ prometheus, grafana ] +``` + +## Complete Example + +A minimal two-server topology with explicit assignment: + +```yaml +plugin: [ multiserver ] + +provider: clab + +groups: + spines: + members: [ s1, s2 ] + leaves: + members: [ l1, l2 ] + +nodes: + s1: + device: srlinux + s2: + device: srlinux + l1: + device: srlinux + l2: + device: srlinux + +links: + - s1-l1 + - s1-l2 + - s2-l1 + - s2-l2 + +multiserver: + assignment: explicit + servers: + - id: 1 + host: 192.168.168.128 + groups: [ spines ] + - id: 2 + host: 192.168.168.129 + groups: [ leaves ] + vxlan: + vni_base: 10000 + dev: ens33 +``` + +This places spines on server 1 and leaves on server 2. All four links cross servers and are provisioned as containerlab native VXLAN endpoints. + +## Behind the Scenes + +When the plugin processes the topology, it classifies links into three categories: + +* **Local links** connecting nodes on the same server remain as regular containerlab veth pairs or bridges. +* **Cross-server point-to-point links** are provisioned via containerlab's native VXLAN link endpoints (`type: vxlan` in `clab.yml`). +* **Cross-server multi-access links** use a local Linux bridge on each server, interconnected via host-level VXLAN tunnels configured by generated setup scripts. + +Each per-server directory is self-contained and includes: + +* A tailored `clab.yml` with only the relevant nodes and cross-server VXLAN interfaces +* A filtered `netlab.snapshot.pickle` for use with `netlab up --snapshot` +* Copies of `node_files/`, `host_vars/`, and Ansible config for only the nodes on that server +* `vxlan-setup.sh` and `vxlan-teardown.sh` scripts (when multi-access VXLAN tunnels are needed) + +(multiserver-deployment)= +## Deployment Workflow + +**Step 1: Generate configurations** on your workstation: + +```bash +netlab create topology.yml +``` + +The plugin automatically copies all required files into each server directory — no extra bundling step is needed. + +**Step 2: Copy server directories to remote hosts** (e.g. via rsync): + +```bash +rsync -avz server-1/ user@192.168.168.128:~/lab/server-1/ +rsync -avz server-2/ user@192.168.168.129:~/lab/server-2/ +``` + +**Step 3: Deploy on each server** by running the following on each remote host: + +```bash +sudo netlab up --snapshot -vv +sudo ./vxlan-setup.sh # only if multi-access VXLAN tunnels are present +``` + +```{important} +**Why is `--snapshot` required on remote servers?** + +You must run `sudo netlab up --snapshot` on remote servers to load the topology from the pre-generated snapshot (`netlab.snapshot.pickle`) instead of the original `topology.yml`. + +Running with `topology.yml` directly on remote servers will fail because: +1. **Consistency**: Netlab dynamically allocates IP addresses, interface IDs, and VXLAN VNIs. Independent creation runs on different hosts would result in mismatched allocations. +2. **Recursion**: Running `netlab create` on `topology.yml` on the remote hosts would execute the `multiserver` plugin again, causing it to split the topology recursively and generate nested server subdirectories. +``` + +**Teardown** in reverse order: + +```bash +sudo ./vxlan-teardown.sh +sudo clab destroy -t clab.yml +``` + +## Limitations + +* Only the **containerlab** provider is supported. Libvirt and virtualbox topologies cannot be split across servers. +* Cross-server VXLAN tunnels use a flat VNI space starting at **vni_base**. The maximum VNI value is 16777215 (24-bit). Topologies with more than ~16 million cross-server links will fail validation. +* All physical servers must have direct IP reachability — the plugin does not support NAT traversal or relay hosts between servers. diff --git a/netsim/extra/multiserver/defaults.yml b/netsim/extra/multiserver/defaults.yml new file mode 100644 index 0000000000..bd68aaa3b4 --- /dev/null +++ b/netsim/extra/multiserver/defaults.yml @@ -0,0 +1,47 @@ +# multiserver plugin: split a topology across multiple physical servers +# +# Requires containerlab >= 0.46 (VXLAN link support). +# +# Cross-server links become: +# - P2P links -> containerlab native VXLAN endpoints (self-contained in clab.yml) +# - Bridge links -> local bridge + host VXLAN tunnel (via generated setup script) +# +# Assignment modes: +# - explicit: user must assign every node via servers[].groups or .members +# - auto: unassigned nodes distributed across servers, keeping netlab groups together +# +--- +attributes: + global: + multiserver: + servers: + type: list + _subtype: + id: + type: int + _required: True + host: + type: str + _required: True + groups: list + members: list + vxlan_dev: str + vxlan: + vni_base: int + dstport: int + dev: str + assignment: + type: str + valid_values: [explicit, auto] + replicate: list + output_dir: + type: str + +multiserver: + vxlan: + vni_base: 10000 + dstport: 4789 + dev: ens33 + assignment: explicit + replicate: [] + output_dir: "server-{server_id}" diff --git a/netsim/extra/multiserver/plugin.py b/netsim/extra/multiserver/plugin.py new file mode 100644 index 0000000000..888e39d96c --- /dev/null +++ b/netsim/extra/multiserver/plugin.py @@ -0,0 +1,842 @@ +""" +multiserver plugin — split a netlab topology across multiple physical servers. + +Generates per-server containerlab topology files with cross-server VXLAN links. +Requires containerlab >= 0.46 for native VXLAN link support. + +Cross-server links: + + * P2P links (2 endpoints) → containerlab native VXLAN (type: vxlan in clab.yml) + * Multi-access links (3+ endpoints, bridge) → local bridge + host-level VXLAN tunnel + created by a generated vxlan-setup.sh script + +Server assignment modes: + + * explicit (default) — user assigns nodes via groups/members, unassigned nodes cause + an error. Best when you need precise control over placement. + * auto — unassigned nodes are distributed round-robin across servers. Use this for + automatic splitting: just define the servers and let the plugin balance the nodes. + +Group granularity (auto mode): + + Auto mode keeps entire netlab groups together on one server. Define groups at + the smallest unit you want to keep on a single server. Parent/aggregate groups + are fine — child groups defined first will claim their members before the parent + is reached. See docs/plugins/multiserver.md for details and examples. + +Explicit assignment example: + + plugin: [ multiserver ] + + multiserver: + servers: + - id: 1 + host: 192.168.168.128 + groups: [ hubs ] + members: [ extra-node ] + - id: 2 + host: 10.0.0.67 + groups: [ spines, leaves ] + assignment: explicit + +Automatic splitting example (no groups/members needed): + + plugin: [ multiserver ] + + multiserver: + servers: + - id: 1 + host: 192.168.168.128 + - id: 2 + host: 10.0.0.67 + assignment: auto + replicate: [ prometheus, grafana ] +""" + +import os +import pickle +import shutil +from pathlib import Path + +import yaml +from box import Box +from packaging import version as _pv + +from netsim.data import append_to_list +from netsim.utils import log + +_execute_after = ["fabric", "node.clone"] + +# --------------------------------------------------------------------------- +# Hook: init — validate config + register output hook +# --------------------------------------------------------------------------- + + +def init(topology: Box) -> None: + ms = topology.get("multiserver", None) + if not ms: + return + + # Merge plugin defaults with user config (user values take priority) + defaults = topology.defaults.get("multiserver", Box({})) + topology.multiserver = defaults + ms + + ms = topology.multiserver + servers = ms.get("servers", []) + + # Currently only containerlab is supported — generating per-server Vagrantfiles + # for libvirt/virtualbox would require reimplementing the Vagrant Ruby DSL + provider = topology.get("provider", "") or topology.defaults.get("provider", "") + if provider and provider != "clab": + log.error( + f'multiserver plugin currently supports only the "clab" provider, not "{provider}"', + log.IncorrectValue, + "multiserver", + more_hints=["libvirt and virtualbox support may be added in a future release"], + ) + return + + # Cross-server P2P links use containerlab native VXLAN endpoints (type: vxlan), + # available since containerlab 0.46. netlab already requires >= 0.75 so this + # should always pass, but check explicitly in case the requirement is relaxed. + clab_min = "0.46.0" + clab_ver = str(topology.defaults.providers.clab.get("version", "0.0.0")) + if _pv.Version(clab_ver) < _pv.Version(clab_min): + log.error( + f"multiserver plugin requires containerlab >= {clab_min} for VXLAN links (netlab targets {clab_ver})", + log.IncorrectValue, + "multiserver", + ) + return + + if not servers: + log.error('multiserver plugin requires a "servers" list', log.MissingValue, "multiserver") + return + + if len(servers) < 2: + log.error("multiserver plugin requires at least 2 servers", log.IncorrectValue, "multiserver") + return + + seen_ids: set = set() + for idx, s in enumerate(servers): + if "id" not in s: + log.error(f'Server entry #{idx + 1} missing required "id" field', log.MissingValue, "multiserver") + continue + if "host" not in s: + log.error(f'Server {s.id} missing required "host" field', log.MissingValue, "multiserver") + continue + if s.id in seen_ids: + log.error(f"Duplicate server id {s.id}", log.IncorrectValue, "multiserver") + seen_ids.add(s.id) + + log.exit_on_error() + + # Register the output hook so netlab create calls our output() function + append_to_list(topology.defaults.netlab.create, "plugin", "multiserver") + + +# --------------------------------------------------------------------------- +# Hook: post_transform — resolve server assignments, classify links +# --------------------------------------------------------------------------- + + +def post_transform(topology: Box) -> None: + ms = topology.get("multiserver", None) + if not ms: + return + + servers = ms.servers + server_map = {s.id: s for s in servers} + assignment: dict = {} # node_name -> server_id + # --- Resolve replicated nodes (present on every server) --- + replicated: set = set() + for entry in ms.get("replicate", []): + if entry in topology.nodes: + replicated.add(entry) + elif entry in topology.get("groups", {}): + for member in topology.groups[entry].get("members", []): + replicated.add(member) + else: + log.error(f'multiserver.replicate: "{entry}" is not a node or group', log.IncorrectValue, "multiserver") + + # --- Resolve assignments from server groups + members --- + for server in servers: + for gname in server.get("groups", []): + grp = topology.get("groups", {}).get(gname, None) + if grp is None: + log.error(f'Server {server.id} references unknown group "{gname}"', log.IncorrectValue, "multiserver") + continue + for member in grp.get("members", []): + if member in assignment and assignment[member] != server.id: + log.error( + f"Node {member} assigned to both server {assignment[member]} and {server.id}", + log.IncorrectValue, + "multiserver", + ) + assignment[member] = server.id + + for member in server.get("members", []): + if member not in topology.nodes: + log.error(f'Server {server.id} references unknown node "{member}"', log.IncorrectValue, "multiserver") + continue + if member in assignment and assignment[member] != server.id: + log.error( + f"Node {member} assigned to both server {assignment[member]} and {server.id}", + log.IncorrectValue, + "multiserver", + ) + assignment[member] = server.id + + # --- Handle unassigned nodes (replicated nodes are exempt) --- + unassigned = set(n for n in topology.nodes if n not in assignment and n not in replicated) + + mode = ms.get("assignment", "explicit") + if unassigned: + if mode == "explicit": + log.error( + f"Nodes not assigned to any server: {', '.join(sorted(unassigned))}", + log.MissingValue, + "multiserver", + more_hints=[ + "Assign nodes via multiserver.servers[].groups or .members", + "Or set multiserver.assignment: auto for round-robin distribution", + ], + ) + else: + sorted_sids = sorted(server_map.keys()) + + # Distribute by netlab group to keep related nodes on the same server. + # Groups are assigned round-robin by size (largest first) for balance. + # Ungrouped nodes are distributed individually at the end. + group_buckets: list = [] # [(group_name, [members])] + claimed = set() + for gname, gdata in topology.get("groups", {}).items(): + members = [m for m in gdata.get("members", []) if m in unassigned and m not in claimed] + if members: + group_buckets.append((gname, members)) + claimed.update(members) + + # Sort groups largest-first for better balance + group_buckets.sort(key=lambda g: -len(g[1])) + + # Track node counts per server for balanced distribution + counts = {sid: sum(1 for s in assignment.values() if s == sid) for sid in sorted_sids} + + for gname, members in group_buckets: + # Assign entire group to the server with the fewest nodes + target = min(sorted_sids, key=lambda s: counts[s]) + for m in members: + assignment[m] = target + counts[target] += len(members) + + # Remaining ungrouped nodes: round-robin to least-loaded server + ungrouped = sorted(unassigned - claimed) + for name in ungrouped: + target = min(sorted_sids, key=lambda s: counts[s]) + assignment[name] = target + counts[target] += 1 + + log.exit_on_error() + + # --- Classify links: local vs cross-server --- + vni_base = ms.vxlan.get("vni_base", 10000) + vni = vni_base + cross_count = 0 + + for link in topology.links: + link_servers = set() + for intf in link.get("interfaces", []): + if intf.node in replicated: + continue + sid = assignment.get(intf.node) + if sid is not None: + link_servers.add(sid) + + if len(link_servers) > 1: + link._ms = Box({"cross": True, "vni": vni, "servers": sorted(link_servers)}) + vni += 1 + cross_count += 1 + else: + link._ms = Box( + { + "cross": False, + "servers": sorted(link_servers), + } + ) + + if vni > 16777215: + log.error(f"VXLAN VNI overflow: {vni} exceeds 24-bit maximum (16777215)", log.IncorrectValue, "multiserver") + + log.exit_on_error() + + # Store state for output hook + topology._multiserver = Box( + { + "assignment": assignment, + "server_map": server_map, + "replicated": sorted(replicated), + } + ) + + # Summary — show which groups and nodes landed on each server + for server in servers: + sid = server.id + server_nodes = sorted(n for n, s in assignment.items() if s == sid) + + # Figure out which netlab groups are fully on this server + server_groups = [] + for gname, gdata in topology.get("groups", {}).items(): + members = gdata.get("members", []) + if not members: + continue + on_this = [m for m in members if assignment.get(m) == sid] + if on_this and len(on_this) == len([m for m in members if m in assignment]): + server_groups.append(gname) + + n = len(server_nodes) + log.info(f"Server {sid} ({server.host}): {n} nodes", module="multiserver") + if server_groups: + preview = server_groups[:8] + suffix = f" ... +{len(server_groups) - 8} more" if len(server_groups) > 8 else "" + log.info(f" groups: {', '.join(preview)}{suffix}", module="multiserver") + if n <= 20: + log.info(f" nodes: {', '.join(server_nodes)}", module="multiserver") + else: + preview = server_nodes[:6] + log.info(f" nodes: {', '.join(preview)} ... +{n - 6} more", module="multiserver") + + if replicated: + log.info(f"Replicated on all servers: {', '.join(sorted(replicated))}", module="multiserver") + if cross_count: + log.info(f"{cross_count} cross-server links (VNI {vni_base}–{vni - 1})", module="multiserver") + + +# --------------------------------------------------------------------------- +# Hook: output — generate per-server clab.yml + VXLAN scripts +# --------------------------------------------------------------------------- + + +def output(topology: Box) -> None: + ms = topology.get("multiserver", None) + ms_data = topology.get("_multiserver", None) + if not ms or not ms_data: + return + + assignment = ms_data.assignment + server_map = ms_data.server_map + vxlan_cfg = ms.vxlan + out_tpl = ms.get("output_dir", "server-{server_id}") + + replicated = set(ms_data.get("replicated", [])) + server_folders = [] + + for server in ms.servers: + sid = server.id + local_nodes = {n for n, s in assignment.items() if s == sid} | replicated + if not local_nodes: + continue + + out_dir = out_tpl.format(name=topology.name, server_id=sid) + server_folders.append((out_dir, local_nodes)) + + if Path(out_dir).exists(): + shutil.rmtree(out_dir) + Path(out_dir).mkdir(parents=True, exist_ok=True) + + clab_dict, vxlan_tunnels = _build_server_clab(topology, local_nodes, sid, server_map, vxlan_cfg) + + # Write clab.yml + with open(Path(out_dir) / "clab.yml", "w") as f: + yaml.dump(clab_dict, f, default_flow_style=False, sort_keys=False, indent=2) + + # Write filtered snapshot so 'netlab up --snapshot' works per-server + _write_server_snapshot(topology, local_nodes, out_dir) + + # Generate VXLAN setup/teardown scripts for bridge tunnels + if vxlan_tunnels: + dev = server.get("vxlan_dev", "") or vxlan_cfg.get("dev", "") + if not dev: + log.error( + f"Server {sid} has multi-access cross-server links but no VXLAN device is configured", + log.MissingValue, + "multiserver", + more_hints=["Set multiserver.vxlan.dev or multiserver.servers[].vxlan_dev"], + ) + continue + _write_vxlan_scripts(out_dir, vxlan_tunnels, dev) + + link_count = len(clab_dict.get("topology", {}).get("links", [])) + vx_count = len(vxlan_tunnels) + parts = [f"{len(local_nodes)} nodes", f"{link_count} links"] + if vx_count: + parts.append(f"{vx_count} VXLAN tunnels") + log.info(f"Server {sid}: {out_dir}/ — {', '.join(parts)}", module="multiserver") + + # Register atexit handler to copy node_files, host_vars, etc. into each server + # folder after netlab writes all output files. + if server_folders: + import atexit + atexit.register(_distribute_files_atexit, os.getcwd(), server_folders) + + +def _distribute_files_atexit(lab_folder: str, server_folders: list) -> None: + """Distribute generated files (node_files, host_vars, ansible.cfg, hosts.yml) + to each server folder. Registered via atexit so it runs AFTER netlab has + written all output files. + """ + lab_path = Path(lab_folder) + nf_dir = lab_path / "node_files" + hv_dir = lab_path / "host_vars" + + for sf, local_nodes in server_folders: + sf_path = Path(sf) + if not sf_path.is_dir(): + continue + + # node_files: per-node dirs + shared files (names starting with -) + if nf_dir.is_dir(): + dst_nf = sf_path / "node_files" + dst_nf.mkdir(exist_ok=True) + for item in nf_dir.iterdir(): + if item.name in local_nodes or item.name.startswith("-"): + dst = dst_nf / item.name + if not dst.exists(): + try: + if item.is_dir(): + shutil.copytree(item, dst) + else: + shutil.copy2(item, dst) + except Exception: + pass + + # host_vars: per-node only + if hv_dir.is_dir(): + dst_hv = sf_path / "host_vars" + dst_hv.mkdir(exist_ok=True) + for item in hv_dir.iterdir(): + if item.name in local_nodes: + dst = dst_hv / item.name + if not dst.exists(): + try: + if item.is_dir(): + shutil.copytree(item, dst) + else: + shutil.copy2(item, dst) + except Exception: + pass + + # Copy all other subdirectories (e.g. group_vars, templates, monitoring) + # excluding server folders, node_files, host_vars, and python/git metadata. + server_names = {Path(sf).name for sf, _ in server_folders} + for item in lab_path.iterdir(): + if item.is_dir(): + if item.name in server_names or item.name in ("node_files", "host_vars", "__pycache__", ".git"): + continue + + # Optimization: only copy grafana directory if this server hosts the grafana node + if item.name == "grafana" and "grafana" not in local_nodes: + continue + + dst_dir = sf_path / item.name + if not dst_dir.exists(): + try: + shutil.copytree(item, dst_dir) + except Exception: + pass + + # Ansible inventory and config + for fname in ("ansible.cfg", "hosts.yml"): + src = lab_path / fname + dst = sf_path / fname + if src.exists() and not dst.exists(): + try: + shutil.copy2(src, dst) + except Exception: + pass + + +# =========================================================================== +# Internal helpers +# =========================================================================== + + +def _to_plain(obj: object) -> object: + """Convert Box/BoxList to plain dict/list for clean YAML serialization.""" + if isinstance(obj, Box): + return {k: _to_plain(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_to_plain(v) for v in obj] + return obj + + +def _intf_clab_name(intf: Box) -> str: + """Containerlab interface name for a node interface.""" + return intf.get("clab", {}).get("name", "") or intf.get("ifname", "") + + +def _build_clab_node(nname: str, ndata: Box, topology: Box) -> dict: + """Reconstruct a clab.yml node entry from the transformed topology data.""" + entry: dict = {} + clab = ndata.get("clab", Box({})) + + # Management IPs + nm = clab.get("network-mode", "") + if nm != "none": + if ndata.get("mgmt", {}).get("ipv4"): + entry["mgmt-ipv4"] = str(ndata.mgmt.ipv4) + if ndata.get("mgmt", {}).get("ipv6"): + entry["mgmt-ipv6"] = str(ndata.mgmt.ipv6) + + kind = clab.get("kind", "") or ndata.get("device", "") + entry["kind"] = kind + if kind == "linux" and "restart-policy" not in clab: + entry["restart-policy"] = "no" + + # Pass through standard clab node attributes + special = set(topology.defaults.providers.clab.get("node_config_special", [])) + for attr in topology.defaults.providers.clab.get("attributes", {}).get("node", {}).get("_keys", []): + if attr in clab and attr not in special: + entry[attr] = _to_plain(clab[attr]) + + # srl-agents goes under extras: (matches clab.j2 template) + if "srl-agents" in clab: + entry["extras"] = {"srl-agents": _to_plain(clab["srl-agents"])} + + entry["image"] = str(clab.get("image", "") or ndata.get("box", "")) + entry["runtime"] = str(clab.get("runtime", "") or topology.defaults.providers.clab.get("runtime", "docker")) + + # Groups + if "groups" in topology: + groups = [g for g in topology.groups if nname in topology.groups[g].get("members", [])] + if groups: + entry["group"] = ",".join(groups) + + # Binds — keep paths as-is (relative to the server directory). + # The distribute script copies node_files/ into each server dir, + # so paths like node_files/r1/... work when running from there. + if "binds" in clab: + entry["binds"] = [] + for b in clab.binds: + bind_str = f"{b.source}:{b.target}" + if "mode" in b: + bind_str += f":{b.mode}" + entry["binds"].append(bind_str) + + # Startup config + if "startup-config" in clab: + entry["startup-config"] = str(clab["startup-config"]) + + return entry + + +def _build_server_clab(topology: Box, local_nodes: set, sid: int, server_map: dict, vxlan_cfg: Box) -> tuple: + """Build the clab.yml dict and VXLAN tunnel list for one server.""" + dstport = vxlan_cfg.get("dstport", 4789) + multilab_id = topology.defaults.get("multilab", {}).get("id", 0) + assignment = topology._multiserver.assignment + + clab: dict = { + "name": topology.name, + "prefix": str(topology.defaults.providers.clab.get("lab_prefix", "") or ""), + "mgmt": { + "network": str(topology.addressing.mgmt.get("_network", "") or "netlab_mgmt"), + "ipv4-subnet": str(topology.addressing.mgmt.get("ipv4", "172.20.20.0/24")), + }, + "topology": { + "nodes": {}, + "links": [], + }, + } + + mgmt_bridge = topology.addressing.mgmt.get("_bridge", "") + if mgmt_bridge: + clab["mgmt"]["bridge"] = str(mgmt_bridge) + if topology.defaults.addressing.mgmt.get("ipv6"): + clab["mgmt"]["ipv6-subnet"] = str(topology.defaults.addressing.mgmt.ipv6) + + # --- Nodes --- + for nname, ndata in topology.nodes.items(): + if ndata.get("unmanaged", False): + continue + if nname in local_nodes: + clab["topology"]["nodes"][nname] = _build_clab_node(nname, ndata, topology) + + # --- Links --- + bridges_needed: set = set() + vxlan_tunnels: list = [] + + for link in topology.links: + local_intfs = [i for i in link.get("interfaces", []) if i.node in local_nodes] + if not local_intfs: + continue + + is_cross = link.get("_ms", {}).get("cross", False) + node_count = link.get("node_count", len(link.get("interfaces", []))) + + # ---- Uplink (macvlan) ---- + if link.get("clab", {}).get("uplink", False): + for intf in local_intfs: + clab_name = _intf_clab_name(intf) + clab["topology"]["links"].append({"endpoints": [f"{intf.node}:{clab_name}", f"macvlan:{link.clab.uplink}"]}) + continue + + # ---- Fully local link ---- + if not is_cross: + _render_local_link(clab, link, local_intfs, node_count, bridges_needed, multilab_id, topology) + continue + + # ---- Cross-server P2P (clab native VXLAN) ---- + if node_count == 2: + _render_p2p_vxlan(clab, link, sid, server_map, local_intfs, assignment, dstport) + continue + + # ---- Cross-server multi-access (bridge + host VXLAN) ---- + _render_bridge_vxlan( + clab, + link, + sid, + server_map, + local_intfs, + assignment, + bridges_needed, + vxlan_tunnels, + dstport, + multilab_id, + topology, + ) + + # --- Bridge nodes --- + bridge_type = str(topology.defaults.providers.clab.get("bridge_type", "bridge")) + for brname in sorted(bridges_needed): + clab["topology"]["nodes"][brname] = {"kind": bridge_type} + + if not clab["topology"]["links"]: + del clab["topology"]["links"] + + return clab, vxlan_tunnels + + +def _render_local_link( + clab: dict, link: Box, local_intfs: list, node_count: int, bridges_needed: set, multilab_id: int, topology: Box +) -> None: + """Render a fully-local link (all endpoints on the same server).""" + + # Stub link + if node_count == 1 and local_intfs: + intf = local_intfs[0] + clab["topology"]["links"].append( + { + "type": "dummy", + "endpoint": {"node": intf.node, "interface": _intf_clab_name(intf)}, + } + ) + return + + # P2P link + if node_count == 2: + endpoints = [f"{i.node}:{_intf_clab_name(i)}" for i in local_intfs] + if len(endpoints) == 2: + clab["topology"]["links"].append({"endpoints": endpoints}) + return + + # Multi-access link (bridge) + if node_count > 2 and link.get("bridge"): + bridge = link.bridge + if not link.get("clab", {}).get("external_bridge", False): + bridges_needed.add(bridge) + for intf in local_intfs: + ndata = topology.nodes[intf.node] + bridge_intf = f"bni{multilab_id}n{ndata.id}i{intf.ifindex}" + clab["topology"]["links"].append( + { + "endpoints": [ + f"{intf.node}:{_intf_clab_name(intf)}", + f"{bridge}:{bridge_intf}", + ] + } + ) + + +def _render_p2p_vxlan( + clab: dict, link: Box, local_sid: int, server_map: dict, local_intfs: list, assignment: dict, dstport: int +) -> None: + """Render a P2P cross-server link as a containerlab native VXLAN endpoint.""" + if not local_intfs: + return + + vni = link._ms.vni + local_intf = local_intfs[0] + + # Find the remote server + remote_sid = None + for intf in link.get("interfaces", []): + s = assignment.get(intf.node) + if s is not None and s != local_sid: + remote_sid = s + break + + if remote_sid is None: + return + + clab_name = _intf_clab_name(local_intf) + clab["topology"]["links"].append( + { + "endpoints": [ + f"{local_intf.node}:{clab_name}", + f"host:vx{vni}", + ], + "type": "vxlan", + "remote": str(server_map[remote_sid].host), + "vni": vni, + "udp-port": dstport, + } + ) + + +def _render_bridge_vxlan( + clab: dict, + link: Box, + local_sid: int, + server_map: dict, + local_intfs: list, + assignment: dict, + bridges_needed: set, + vxlan_tunnels: list, + dstport: int, + multilab_id: int, + topology: Box, +) -> None: + """Render a multi-access cross-server link: local bridge + host VXLAN tunnels.""" + vni = link._ms.vni + bridge = link.get("bridge", f"br{link.linkindex}") + + if not link.get("clab", {}).get("external_bridge", False): + bridges_needed.add(bridge) + + # Local node-to-bridge connections + for intf in local_intfs: + ndata = topology.nodes[intf.node] + bridge_intf = f"bni{multilab_id}n{ndata.id}i{intf.ifindex}" + clab["topology"]["links"].append( + { + "endpoints": [ + f"{intf.node}:{_intf_clab_name(intf)}", + f"{bridge}:{bridge_intf}", + ] + } + ) + + # VXLAN tunnels to each remote server that has endpoints on this link + remote_sids: set = set() + for intf in link.get("interfaces", []): + s = assignment.get(intf.node) + if s is not None and s != local_sid: + remote_sids.add(s) + + for rsid in sorted(remote_sids): + vxlan_tunnels.append( + { + "bridge": bridge, + "vni": vni, + "remote": str(server_map[rsid].host), + "dstport": dstport, + "remote_id": rsid, + } + ) + + +# --------------------------------------------------------------------------- +# File operations +# --------------------------------------------------------------------------- + + +def _write_server_snapshot(topology: Box, local_nodes: set, out_dir: str) -> None: + """Write a filtered netlab snapshot containing only this server's nodes. + + This allows 'netlab up --snapshot' to work correctly from a per-server + directory — only local nodes will be targeted for configuration deployment. + + Note: make_paths_absolute() must be called on the copy before pickling so + that the computed f_files / f_tasks / f_dirs keys are present in the + snapshot. The main netlab snapshot (outputs/pickle.py) is written *after* + create.py calls make_paths_absolute(), so it already contains those keys. + Plugin output() hooks run *before* that call, so we have to do it ourselves. + """ + from netsim import __version__ + from netsim.augment.config import make_paths_absolute + from netsim.augment.topology import cleanup_topology + + topo_copy = Box(topology, box_dots=True) + + # Filter nodes to only those on this server + topo_copy.nodes = Box({n: v for n, v in topo_copy.nodes.items() if n in local_nodes}, box_dots=True) + + # Filter links to only those with at least one local endpoint + topo_copy.links = [l for l in topo_copy.links if any(i.node in local_nodes for i in l.get("interfaces", []))] + + # Expand paths (add f_files / f_tasks / f_dirs computed keys). + # create.py calls make_paths_absolute() AFTER plugin output() hooks, so the + # main snapshot has these keys but our per-server copies don't yet. + # netlab initial relies on topology.defaults.paths.t_files.f_files, so we + # must add them before pickling. + make_paths_absolute(topo_copy.defaults.paths) + + # Remove prefix generators and serialize + cleaned = cleanup_topology(topo_copy) + topodict = cleaned.to_dict() + topodict["_netlab_version"] = __version__ + + with open(Path(out_dir) / "netlab.snapshot.pickle", "wb") as f: + pickle.dump(topodict, f) + + +def _write_vxlan_scripts(out_dir: str, tunnels: list, dev: str) -> None: + """Generate bash scripts to create/destroy host-level VXLAN tunnels.""" + + setup = [ + "#!/bin/bash", + "# VXLAN tunnel setup — generated by netlab multiserver plugin", + "# Run AFTER: sudo clab deploy -t clab.yml", + "#", + "# Creates host-level VXLAN tunnels and attaches them to containerlab bridges.", + "# These tunnels carry multi-access (bridged) cross-server traffic.", + "set -e", + "", + ] + + teardown = [ + "#!/bin/bash", + "# VXLAN tunnel teardown — generated by netlab multiserver plugin", + "# Run BEFORE: sudo clab destroy -t clab.yml", + "set -e", + "", + ] + + seen: set = set() + for t in tunnels: + vx_name = f"vxlan{t['vni']}" + key = (vx_name, t["remote"]) + if key in seen: + continue + seen.add(key) + + setup.extend( + [ + f"# VNI {t['vni']} -> {t['remote']} (server {t['remote_id']}) via bridge {t['bridge']}", + f"ip link add {vx_name} type vxlan id {t['vni']} remote {t['remote']} dev {dev} dstport {t['dstport']}", + f"ip link set {vx_name} master {t['bridge']}", + f"ip link set {vx_name} up", + f'echo " {vx_name} -> {t["bridge"]} (remote {t["remote"]})"', + "", + ] + ) + + teardown.append(f'ip link del {vx_name} 2>/dev/null && echo " deleted {vx_name}" || true') + + setup.append('echo "VXLAN setup complete."') + teardown.extend(["", 'echo "VXLAN teardown complete."']) + + for name, lines in [("vxlan-setup.sh", setup), ("vxlan-teardown.sh", teardown)]: + path = Path(out_dir) / name + path.write_text("\n".join(lines) + "\n") + os.chmod(path, 0o755) diff --git a/tests/topology/expected/multiserver-auto.yml b/tests/topology/expected/multiserver-auto.yml new file mode 100644 index 0000000000..747ec73d67 --- /dev/null +++ b/tests/topology/expected/multiserver-auto.yml @@ -0,0 +1,358 @@ +--- +_multiserver: + assignment: + g1_n1: 1 + g1_n2: 1 + g2_n1: 2 + g2_n2: 2 + replicated: + - mon_srv + server_map: + 1: + host: 192.168.128.1 + id: 1 + 2: + host: 192.168.128.2 + id: 2 +groups: + g1: + members: + - g1_n1 + - g1_n2 + g2: + members: + - g2_n1 + - g2_n2 +input: +- topology/input/multiserver-auto.yml +- package:topology-defaults.yml +links: +- _linkname: links[1] + _ms: + cross: false + servers: + - 1 + interfaces: + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.1/30 + node: g1_n1 + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.2/30 + node: g1_n2 + linkindex: 1 + node_count: 2 + prefix: + ipv4: 10.1.0.0/30 + type: p2p +- _linkname: links[2] + _ms: + cross: false + servers: + - 2 + interfaces: + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.5/30 + node: g2_n1 + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.6/30 + node: g2_n2 + linkindex: 2 + node_count: 2 + prefix: + ipv4: 10.1.0.4/30 + type: p2p +- _linkname: links[3] + _ms: + cross: false + servers: + - 1 + interfaces: + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.10/30 + node: mon_srv + - ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.9/30 + node: g1_n1 + linkindex: 3 + node_count: 2 + prefix: + ipv4: 10.1.0.8/30 + type: p2p +- _linkname: links[4] + _ms: + cross: false + servers: + - 2 + interfaces: + - ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.14/30 + node: mon_srv + - ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.13/30 + node: g2_n1 + linkindex: 4 + node_count: 2 + prefix: + ipv4: 10.1.0.12/30 + type: p2p +multiserver: + assignment: auto + output_dir: server-{server_id} + replicate: + - mon_srv + servers: + - host: 192.168.128.1 + id: 1 + - host: 192.168.128.2 + id: 2 + vxlan: + dev: ens33 + dstport: 4789 + vni_base: 10000 +name: input +nodes: + g1_n1: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-g1_n1 + id: 1 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.1/30 + linkindex: 1 + mac_address: caf0.0001.0001 + name: g1_n1 -> g1_n2 + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.2/30 + node: g1_n2 + type: p2p + - clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.9/30 + linkindex: 3 + mac_address: caf0.0001.0002 + name: g1_n1 -> mon_srv + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.10/30 + node: mon_srv + type: p2p + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.1/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.101 + mac: ca:fe:00:01:00:00 + name: g1_n1 + role: router + g1_n2: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-g1_n2 + id: 2 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.2/30 + linkindex: 1 + mac_address: caf0.0002.0001 + name: g1_n2 -> g1_n1 + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.1/30 + node: g1_n1 + type: p2p + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.2/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.102 + mac: ca:fe:00:02:00:00 + name: g1_n2 + role: router + g2_n1: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-g2_n1 + id: 3 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.5/30 + linkindex: 2 + mac_address: caf0.0003.0001 + name: g2_n1 -> g2_n2 + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.6/30 + node: g2_n2 + type: p2p + - clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.13/30 + linkindex: 4 + mac_address: caf0.0003.0002 + name: g2_n1 -> mon_srv + neighbors: + - ifname: Ethernet2 + ipv4: 10.1.0.14/30 + node: mon_srv + type: p2p + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.3/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.103 + mac: ca:fe:00:03:00:00 + name: g2_n1 + role: router + g2_n2: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-g2_n2 + id: 4 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.6/30 + linkindex: 2 + mac_address: caf0.0004.0001 + name: g2_n2 -> g2_n1 + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.5/30 + node: g2_n1 + type: p2p + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.4/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.104 + mac: ca:fe:00:04:00:00 + name: g2_n2 + role: router + mon_srv: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-mon_srv + id: 5 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.10/30 + linkindex: 3 + mac_address: caf0.0005.0001 + name: mon_srv -> g1_n1 + neighbors: + - ifname: Ethernet2 + ipv4: 10.1.0.9/30 + node: g1_n1 + type: p2p + - clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.14/30 + linkindex: 4 + mac_address: caf0.0005.0002 + name: mon_srv -> g2_n1 + neighbors: + - ifname: Ethernet2 + ipv4: 10.1.0.13/30 + node: g2_n1 + type: p2p + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.5/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.105 + mac: ca:fe:00:05:00:00 + name: mon_srv + role: router +plugin: +- multiserver +provider: clab diff --git a/tests/topology/expected/multiserver-explicit.yml b/tests/topology/expected/multiserver-explicit.yml new file mode 100644 index 0000000000..f81b0f51f4 --- /dev/null +++ b/tests/topology/expected/multiserver-explicit.yml @@ -0,0 +1,382 @@ +--- +_multiserver: + assignment: + s1_g1: 1 + s1_member: 1 + s2_g1: 2 + s2_member: 2 + replicated: [] + server_map: + 1: + groups: + - server1_nodes + host: 192.168.128.1 + id: 1 + members: + - s1_member + vxlan_dev: eth1 + 2: + groups: + - server2_nodes + host: 192.168.128.2 + id: 2 + members: + - s2_member + vxlan_dev: eth1 +groups: + server1_nodes: + members: + - s1_g1 + server2_nodes: + members: + - s2_g1 +input: +- topology/input/multiserver-explicit.yml +- package:topology-defaults.yml +links: +- _linkname: links[1] + _ms: + cross: false + servers: + - 1 + interfaces: + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.1/30 + node: s1_g1 + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.2/30 + node: s1_member + linkindex: 1 + node_count: 2 + prefix: + ipv4: 10.1.0.0/30 + type: p2p +- _linkname: links[2] + _ms: + cross: false + servers: + - 2 + interfaces: + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.5/30 + node: s2_g1 + - ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.6/30 + node: s2_member + linkindex: 2 + node_count: 2 + prefix: + ipv4: 10.1.0.4/30 + type: p2p +- _linkname: links[3] + _ms: + cross: true + servers: + - 1 + - 2 + vni: 20000 + interfaces: + - ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.9/30 + node: s1_g1 + - ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.10/30 + node: s2_g1 + linkindex: 3 + node_count: 2 + prefix: + ipv4: 10.1.0.8/30 + type: p2p +- _linkname: links[4] + _ms: + cross: true + servers: + - 1 + - 2 + vni: 20001 + bridge: input_4 + interfaces: + - ifindex: 2 + ifname: Ethernet2 + ipv4: 172.16.0.2/24 + node: s1_member + - ifindex: 2 + ifname: Ethernet2 + ipv4: 172.16.0.4/24 + node: s2_member + - ifindex: 3 + ifname: Ethernet3 + ipv4: 172.16.0.1/24 + node: s1_g1 + linkindex: 4 + node_count: 3 + prefix: + ipv4: 172.16.0.0/24 + type: lan +multiserver: + assignment: explicit + output_dir: server-{server_id} + replicate: [] + servers: + - groups: + - server1_nodes + host: 192.168.128.1 + id: 1 + members: + - s1_member + vxlan_dev: eth1 + - groups: + - server2_nodes + host: 192.168.128.2 + id: 2 + members: + - s2_member + vxlan_dev: eth1 + vxlan: + dev: eth1 + dstport: 4789 + vni_base: 20000 +name: input +nodes: + s1_g1: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-s1_g1 + id: 1 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.1/30 + linkindex: 1 + mac_address: caf0.0001.0001 + name: s1_g1 -> s1_member + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.2/30 + node: s1_member + type: p2p + - clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.9/30 + linkindex: 3 + mac_address: caf0.0001.0002 + name: s1_g1 -> s2_g1 + neighbors: + - ifname: Ethernet2 + ipv4: 10.1.0.10/30 + node: s2_g1 + type: p2p + - bridge: input_4 + clab: + name: et3 + ifindex: 3 + ifname: Ethernet3 + ipv4: 172.16.0.1/24 + linkindex: 4 + mac_address: caf0.0001.0003 + name: s1_g1 -> [s1_member,s2_member] + neighbors: + - ifname: Ethernet2 + ipv4: 172.16.0.2/24 + node: s1_member + - ifname: Ethernet2 + ipv4: 172.16.0.4/24 + node: s2_member + type: lan + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.1/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.101 + mac: ca:fe:00:01:00:00 + name: s1_g1 + role: router + s1_member: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-s1_member + id: 2 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.2/30 + linkindex: 1 + mac_address: caf0.0002.0001 + name: s1_member -> s1_g1 + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.1/30 + node: s1_g1 + type: p2p + - bridge: input_4 + clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 172.16.0.2/24 + linkindex: 4 + mac_address: caf0.0002.0002 + name: s1_member -> [s2_member,s1_g1] + neighbors: + - ifname: Ethernet2 + ipv4: 172.16.0.4/24 + node: s2_member + - ifname: Ethernet3 + ipv4: 172.16.0.1/24 + node: s1_g1 + type: lan + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.2/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.102 + mac: ca:fe:00:02:00:00 + name: s1_member + role: router + s2_g1: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-s2_g1 + id: 3 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.5/30 + linkindex: 2 + mac_address: caf0.0003.0001 + name: s2_g1 -> s2_member + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.6/30 + node: s2_member + type: p2p + - clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 10.1.0.10/30 + linkindex: 3 + mac_address: caf0.0003.0002 + name: s2_g1 -> s1_g1 + neighbors: + - ifname: Ethernet2 + ipv4: 10.1.0.9/30 + node: s1_g1 + type: p2p + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.3/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.103 + mac: ca:fe:00:03:00:00 + name: s2_g1 + role: router + s2_member: + af: + ipv4: true + box: ceos:4.34.2F + clab: + env: + CLAB_MGMT_VRF: management + INTFTYPE: et + kind: ceos + device: eos + hostname: clab-input-s2_member + id: 4 + interfaces: + - clab: + name: et1 + ifindex: 1 + ifname: Ethernet1 + ipv4: 10.1.0.6/30 + linkindex: 2 + mac_address: caf0.0004.0001 + name: s2_member -> s2_g1 + neighbors: + - ifname: Ethernet1 + ipv4: 10.1.0.5/30 + node: s2_g1 + type: p2p + - bridge: input_4 + clab: + name: et2 + ifindex: 2 + ifname: Ethernet2 + ipv4: 172.16.0.4/24 + linkindex: 4 + mac_address: caf0.0004.0002 + name: s2_member -> [s1_member,s1_g1] + neighbors: + - ifname: Ethernet2 + ipv4: 172.16.0.2/24 + node: s1_member + - ifname: Ethernet3 + ipv4: 172.16.0.1/24 + node: s1_g1 + type: lan + loopback: + ifindex: 0 + ifname: Loopback0 + ipv4: 10.0.0.4/32 + neighbors: [] + type: loopback + virtual_interface: true + mgmt: + ifname: Management0 + ipv4: 192.168.121.104 + mac: ca:fe:00:04:00:00 + name: s2_member + role: router +plugin: +- multiserver +provider: clab diff --git a/tests/topology/input/multiserver-auto.yml b/tests/topology/input/multiserver-auto.yml new file mode 100644 index 0000000000..a4d8739117 --- /dev/null +++ b/tests/topology/input/multiserver-auto.yml @@ -0,0 +1,45 @@ +--- +# Test automatic assignment and replication in multiserver plugin +# +provider: clab +plugin: [ multiserver ] + +multiserver: + servers: + - id: 1 + host: 192.168.128.1 + - id: 2 + host: 192.168.128.2 + assignment: auto + replicate: [ mon_srv ] + +groups: + g1: + members: [ g1_n1, g1_n2 ] + g2: + members: [ g2_n1, g2_n2 ] + +nodes: + g1_n1: + device: eos + g1_n2: + device: eos + g2_n1: + device: eos + g2_n2: + device: eos + mon_srv: + device: eos + +links: + # Internal links within group 1 +- g1_n1: + g1_n2: + # Internal links within group 2 +- g2_n1: + g2_n2: + # Connected to replicated node +- mon_srv: + g1_n1: +- mon_srv: + g2_n1: diff --git a/tests/topology/input/multiserver-explicit.yml b/tests/topology/input/multiserver-explicit.yml new file mode 100644 index 0000000000..b50830025c --- /dev/null +++ b/tests/topology/input/multiserver-explicit.yml @@ -0,0 +1,54 @@ +--- +# Test explicit assignment in multiserver plugin +# +provider: clab +plugin: [ multiserver ] + +multiserver: + servers: + - id: 1 + host: 192.168.128.1 + groups: [ server1_nodes ] + members: [ s1_member ] + vxlan_dev: eth1 + - id: 2 + host: 192.168.128.2 + groups: [ server2_nodes ] + members: [ s2_member ] + vxlan_dev: eth1 + assignment: explicit + vxlan: + vni_base: 20000 + dstport: 4789 + dev: eth1 + +groups: + server1_nodes: + members: [ s1_g1 ] + server2_nodes: + members: [ s2_g1 ] + +nodes: + s1_g1: + device: eos + s1_member: + device: eos + s2_g1: + device: eos + s2_member: + device: eos + +links: + # Local link on server 1 +- s1_g1: + s1_member: + # Local link on server 2 +- s2_g1: + s2_member: + # Cross-server P2P link +- s1_g1: + s2_g1: + # Cross-server multi-access bridge link (3+ endpoints across servers) +- s1_member: + s2_member: + s1_g1: From c0c2a1ad33247171a877fe29f88f3bade2a9efe5 Mon Sep 17 00:00:00 2001 From: Muddyblack Date: Wed, 20 May 2026 18:20:35 +0200 Subject: [PATCH 2/4] explain interface overriding --- docs/plugins/multiserver.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/plugins/multiserver.md b/docs/plugins/multiserver.md index 8503ae16f0..445262701f 100644 --- a/docs/plugins/multiserver.md +++ b/docs/plugins/multiserver.md @@ -60,6 +60,8 @@ Global VXLAN settings are specified in the **multiserver.vxlan** dictionary: | **dstport** | integer | UDP destination port for VXLAN traffic (default: `4789`) | | **dev** | string | Default physical interface to bind VXLAN tunnels (default: `ens33`) | +By default, VXLAN tunnels bind to the global default interface specified in **multiserver.vxlan.dev** (which falls back to `ens33` if not configured). If your physical servers use different interface names, you can override this interface per-server using the **vxlan_dev** parameter under each server in the **multiserver.servers** list. + (multiserver-assignment)= ## Assignment Modes @@ -194,12 +196,14 @@ multiserver: - id: 1 host: 192.168.168.128 groups: [ spines ] + vxlan_dev: ens33 # Override per-server (optional) - id: 2 host: 192.168.168.129 groups: [ leaves ] + vxlan_dev: eth0 # Override per-server (optional) vxlan: vni_base: 10000 - dev: ens33 + dev: ens33 # Global default interface ``` This places spines on server 1 and leaves on server 2. All four links cross servers and are provisioned as containerlab native VXLAN endpoints. From cdb71d53f43e29e73af3272b4dca38ff1e0ac39b Mon Sep 17 00:00:00 2001 From: Muddyblack Date: Wed, 20 May 2026 19:14:04 +0200 Subject: [PATCH 3/4] divide and conquer --- netsim/extra/multiserver/plugin.py | 482 ++++++++++++----------------- 1 file changed, 196 insertions(+), 286 deletions(-) diff --git a/netsim/extra/multiserver/plugin.py b/netsim/extra/multiserver/plugin.py index 888e39d96c..e246f87662 100644 --- a/netsim/extra/multiserver/plugin.py +++ b/netsim/extra/multiserver/plugin.py @@ -2,55 +2,7 @@ multiserver plugin — split a netlab topology across multiple physical servers. Generates per-server containerlab topology files with cross-server VXLAN links. -Requires containerlab >= 0.46 for native VXLAN link support. - -Cross-server links: - - * P2P links (2 endpoints) → containerlab native VXLAN (type: vxlan in clab.yml) - * Multi-access links (3+ endpoints, bridge) → local bridge + host-level VXLAN tunnel - created by a generated vxlan-setup.sh script - -Server assignment modes: - - * explicit (default) — user assigns nodes via groups/members, unassigned nodes cause - an error. Best when you need precise control over placement. - * auto — unassigned nodes are distributed round-robin across servers. Use this for - automatic splitting: just define the servers and let the plugin balance the nodes. - -Group granularity (auto mode): - - Auto mode keeps entire netlab groups together on one server. Define groups at - the smallest unit you want to keep on a single server. Parent/aggregate groups - are fine — child groups defined first will claim their members before the parent - is reached. See docs/plugins/multiserver.md for details and examples. - -Explicit assignment example: - - plugin: [ multiserver ] - - multiserver: - servers: - - id: 1 - host: 192.168.168.128 - groups: [ hubs ] - members: [ extra-node ] - - id: 2 - host: 10.0.0.67 - groups: [ spines, leaves ] - assignment: explicit - -Automatic splitting example (no groups/members needed): - - plugin: [ multiserver ] - - multiserver: - servers: - - id: 1 - host: 192.168.168.128 - - id: 2 - host: 10.0.0.67 - assignment: auto - replicate: [ prometheus, grafana ] +See docs/plugins/multiserver.md for usage, examples, and configuration reference. """ import os @@ -60,13 +12,13 @@ import yaml from box import Box -from packaging import version as _pv from netsim.data import append_to_list from netsim.utils import log _execute_after = ["fabric", "node.clone"] + # --------------------------------------------------------------------------- # Hook: init — validate config + register output hook # --------------------------------------------------------------------------- @@ -80,12 +32,9 @@ def init(topology: Box) -> None: # Merge plugin defaults with user config (user values take priority) defaults = topology.defaults.get("multiserver", Box({})) topology.multiserver = defaults + ms - ms = topology.multiserver servers = ms.get("servers", []) - # Currently only containerlab is supported — generating per-server Vagrantfiles - # for libvirt/virtualbox would require reimplementing the Vagrant Ruby DSL provider = topology.get("provider", "") or topology.defaults.get("provider", "") if provider and provider != "clab": log.error( @@ -96,19 +45,6 @@ def init(topology: Box) -> None: ) return - # Cross-server P2P links use containerlab native VXLAN endpoints (type: vxlan), - # available since containerlab 0.46. netlab already requires >= 0.75 so this - # should always pass, but check explicitly in case the requirement is relaxed. - clab_min = "0.46.0" - clab_ver = str(topology.defaults.providers.clab.get("version", "0.0.0")) - if _pv.Version(clab_ver) < _pv.Version(clab_min): - log.error( - f"multiserver plugin requires containerlab >= {clab_min} for VXLAN links (netlab targets {clab_ver})", - log.IncorrectValue, - "multiserver", - ) - return - if not servers: log.error('multiserver plugin requires a "servers" list', log.MissingValue, "multiserver") return @@ -117,18 +53,7 @@ def init(topology: Box) -> None: log.error("multiserver plugin requires at least 2 servers", log.IncorrectValue, "multiserver") return - seen_ids: set = set() - for idx, s in enumerate(servers): - if "id" not in s: - log.error(f'Server entry #{idx + 1} missing required "id" field', log.MissingValue, "multiserver") - continue - if "host" not in s: - log.error(f'Server {s.id} missing required "host" field', log.MissingValue, "multiserver") - continue - if s.id in seen_ids: - log.error(f"Duplicate server id {s.id}", log.IncorrectValue, "multiserver") - seen_ids.add(s.id) - + _validate_servers(servers) log.exit_on_error() # Register the output hook so netlab create calls our output() function @@ -145,54 +70,13 @@ def post_transform(topology: Box) -> None: if not ms: return - servers = ms.servers - server_map = {s.id: s for s in servers} - assignment: dict = {} # node_name -> server_id - # --- Resolve replicated nodes (present on every server) --- - replicated: set = set() - for entry in ms.get("replicate", []): - if entry in topology.nodes: - replicated.add(entry) - elif entry in topology.get("groups", {}): - for member in topology.groups[entry].get("members", []): - replicated.add(member) - else: - log.error(f'multiserver.replicate: "{entry}" is not a node or group', log.IncorrectValue, "multiserver") - - # --- Resolve assignments from server groups + members --- - for server in servers: - for gname in server.get("groups", []): - grp = topology.get("groups", {}).get(gname, None) - if grp is None: - log.error(f'Server {server.id} references unknown group "{gname}"', log.IncorrectValue, "multiserver") - continue - for member in grp.get("members", []): - if member in assignment and assignment[member] != server.id: - log.error( - f"Node {member} assigned to both server {assignment[member]} and {server.id}", - log.IncorrectValue, - "multiserver", - ) - assignment[member] = server.id - - for member in server.get("members", []): - if member not in topology.nodes: - log.error(f'Server {server.id} references unknown node "{member}"', log.IncorrectValue, "multiserver") - continue - if member in assignment and assignment[member] != server.id: - log.error( - f"Node {member} assigned to both server {assignment[member]} and {server.id}", - log.IncorrectValue, - "multiserver", - ) - assignment[member] = server.id - - # --- Handle unassigned nodes (replicated nodes are exempt) --- - unassigned = set(n for n in topology.nodes if n not in assignment and n not in replicated) + server_map = {s.id: s for s in ms.servers} + replicated = _resolve_replicated(ms, topology) + assignment = _resolve_assignments(ms.servers, topology) - mode = ms.get("assignment", "explicit") + unassigned = {n for n in topology.nodes if n not in assignment and n not in replicated} if unassigned: - if mode == "explicit": + if ms.get("assignment", "explicit") == "explicit": log.error( f"Nodes not assigned to any server: {', '.join(sorted(unassigned))}", log.MissingValue, @@ -203,112 +87,21 @@ def post_transform(topology: Box) -> None: ], ) else: - sorted_sids = sorted(server_map.keys()) - - # Distribute by netlab group to keep related nodes on the same server. - # Groups are assigned round-robin by size (largest first) for balance. - # Ungrouped nodes are distributed individually at the end. - group_buckets: list = [] # [(group_name, [members])] - claimed = set() - for gname, gdata in topology.get("groups", {}).items(): - members = [m for m in gdata.get("members", []) if m in unassigned and m not in claimed] - if members: - group_buckets.append((gname, members)) - claimed.update(members) - - # Sort groups largest-first for better balance - group_buckets.sort(key=lambda g: -len(g[1])) - - # Track node counts per server for balanced distribution - counts = {sid: sum(1 for s in assignment.values() if s == sid) for sid in sorted_sids} - - for gname, members in group_buckets: - # Assign entire group to the server with the fewest nodes - target = min(sorted_sids, key=lambda s: counts[s]) - for m in members: - assignment[m] = target - counts[target] += len(members) - - # Remaining ungrouped nodes: round-robin to least-loaded server - ungrouped = sorted(unassigned - claimed) - for name in ungrouped: - target = min(sorted_sids, key=lambda s: counts[s]) - assignment[name] = target - counts[target] += 1 + _auto_distribute(unassigned, server_map, assignment, topology) log.exit_on_error() - # --- Classify links: local vs cross-server --- vni_base = ms.vxlan.get("vni_base", 10000) - vni = vni_base - cross_count = 0 - - for link in topology.links: - link_servers = set() - for intf in link.get("interfaces", []): - if intf.node in replicated: - continue - sid = assignment.get(intf.node) - if sid is not None: - link_servers.add(sid) - - if len(link_servers) > 1: - link._ms = Box({"cross": True, "vni": vni, "servers": sorted(link_servers)}) - vni += 1 - cross_count += 1 - else: - link._ms = Box( - { - "cross": False, - "servers": sorted(link_servers), - } - ) - - if vni > 16777215: - log.error(f"VXLAN VNI overflow: {vni} exceeds 24-bit maximum (16777215)", log.IncorrectValue, "multiserver") - + cross_count = _classify_links(topology, assignment, replicated, vni_base) log.exit_on_error() - # Store state for output hook - topology._multiserver = Box( - { - "assignment": assignment, - "server_map": server_map, - "replicated": sorted(replicated), - } - ) - - # Summary — show which groups and nodes landed on each server - for server in servers: - sid = server.id - server_nodes = sorted(n for n, s in assignment.items() if s == sid) + topology._multiserver = Box({ + "assignment": assignment, + "server_map": server_map, + "replicated": sorted(replicated), + }) - # Figure out which netlab groups are fully on this server - server_groups = [] - for gname, gdata in topology.get("groups", {}).items(): - members = gdata.get("members", []) - if not members: - continue - on_this = [m for m in members if assignment.get(m) == sid] - if on_this and len(on_this) == len([m for m in members if m in assignment]): - server_groups.append(gname) - - n = len(server_nodes) - log.info(f"Server {sid} ({server.host}): {n} nodes", module="multiserver") - if server_groups: - preview = server_groups[:8] - suffix = f" ... +{len(server_groups) - 8} more" if len(server_groups) > 8 else "" - log.info(f" groups: {', '.join(preview)}{suffix}", module="multiserver") - if n <= 20: - log.info(f" nodes: {', '.join(server_nodes)}", module="multiserver") - else: - preview = server_nodes[:6] - log.info(f" nodes: {', '.join(preview)} ... +{n - 6} more", module="multiserver") - - if replicated: - log.info(f"Replicated on all servers: {', '.join(sorted(replicated))}", module="multiserver") - if cross_count: - log.info(f"{cross_count} cross-server links (VNI {vni_base}–{vni - 1})", module="multiserver") + _log_assignment_summary(ms, assignment, replicated, topology, vni_base, cross_count) # --------------------------------------------------------------------------- @@ -380,13 +173,11 @@ def output(topology: Box) -> None: def _distribute_files_atexit(lab_folder: str, server_folders: list) -> None: - """Distribute generated files (node_files, host_vars, ansible.cfg, hosts.yml) - to each server folder. Registered via atexit so it runs AFTER netlab has - written all output files. - """ + """Distribute generated files""" lab_path = Path(lab_folder) nf_dir = lab_path / "node_files" hv_dir = lab_path / "host_vars" + server_names = {Path(sf).name for sf, _ in server_folders} for sf, local_nodes in server_folders: sf_path = Path(sf) @@ -399,15 +190,7 @@ def _distribute_files_atexit(lab_folder: str, server_folders: list) -> None: dst_nf.mkdir(exist_ok=True) for item in nf_dir.iterdir(): if item.name in local_nodes or item.name.startswith("-"): - dst = dst_nf / item.name - if not dst.exists(): - try: - if item.is_dir(): - shutil.copytree(item, dst) - else: - shutil.copy2(item, dst) - except Exception: - pass + _copy_if_missing(item, dst_nf / item.name) # host_vars: per-node only if hv_dir.is_dir(): @@ -415,48 +198,189 @@ def _distribute_files_atexit(lab_folder: str, server_folders: list) -> None: dst_hv.mkdir(exist_ok=True) for item in hv_dir.iterdir(): if item.name in local_nodes: - dst = dst_hv / item.name - if not dst.exists(): - try: - if item.is_dir(): - shutil.copytree(item, dst) - else: - shutil.copy2(item, dst) - except Exception: - pass + _copy_if_missing(item, dst_hv / item.name) # Copy all other subdirectories (e.g. group_vars, templates, monitoring) # excluding server folders, node_files, host_vars, and python/git metadata. - server_names = {Path(sf).name for sf, _ in server_folders} + skip = server_names | {"node_files", "host_vars", "__pycache__", ".git"} for item in lab_path.iterdir(): - if item.is_dir(): - if item.name in server_names or item.name in ("node_files", "host_vars", "__pycache__", ".git"): - continue - - # Optimization: only copy grafana directory if this server hosts the grafana node - if item.name == "grafana" and "grafana" not in local_nodes: - continue - - dst_dir = sf_path / item.name - if not dst_dir.exists(): - try: - shutil.copytree(item, dst_dir) - except Exception: - pass + if not item.is_dir() or item.name in skip: + continue + # Only copy grafana directory if this server hosts the grafana node + if item.name == "grafana" and "grafana" not in local_nodes: + continue + _copy_if_missing(item, sf_path / item.name) # Ansible inventory and config for fname in ("ansible.cfg", "hosts.yml"): src = lab_path / fname - dst = sf_path / fname - if src.exists() and not dst.exists(): - try: - shutil.copy2(src, dst) - except Exception: - pass + if src.exists(): + _copy_if_missing(src, sf_path / fname) + + +def _copy_if_missing(src: Path, dst: Path) -> None: + if dst.exists(): + return + try: + if src.is_dir(): + shutil.copytree(src, dst) + else: + shutil.copy2(src, dst) + except Exception: + pass # =========================================================================== -# Internal helpers +# Internal helpers — post_transform +# =========================================================================== + + +def _validate_servers(servers: list) -> None: + seen_ids: set = set() + for idx, s in enumerate(servers): + if "id" not in s: + log.error(f'Server entry #{idx + 1} missing required "id" field', log.MissingValue, "multiserver") + continue + if "host" not in s: + log.error(f'Server {s.id} missing required "host" field', log.MissingValue, "multiserver") + continue + if s.id in seen_ids: + log.error(f"Duplicate server id {s.id}", log.IncorrectValue, "multiserver") + seen_ids.add(s.id) + + +def _resolve_replicated(ms: Box, topology: Box) -> set: + replicated: set = set() + for entry in ms.get("replicate", []): + if entry in topology.nodes: + replicated.add(entry) + elif entry in topology.get("groups", {}): + for member in topology.groups[entry].get("members", []): + replicated.add(member) + else: + log.error(f'multiserver.replicate: "{entry}" is not a node or group', log.IncorrectValue, "multiserver") + return replicated + + +def _resolve_assignments(servers: list, topology: Box) -> dict: + assignment: dict = {} + for server in servers: + for gname in server.get("groups", []): + grp = topology.get("groups", {}).get(gname, None) + if grp is None: + log.error(f'Server {server.id} references unknown group "{gname}"', log.IncorrectValue, "multiserver") + continue + for member in grp.get("members", []): + if member in assignment and assignment[member] != server.id: + log.error( + f"Node {member} assigned to both server {assignment[member]} and {server.id}", + log.IncorrectValue, + "multiserver", + ) + assignment[member] = server.id + + for member in server.get("members", []): + if member not in topology.nodes: + log.error(f'Server {server.id} references unknown node "{member}"', log.IncorrectValue, "multiserver") + continue + if member in assignment and assignment[member] != server.id: + log.error( + f"Node {member} assigned to both server {assignment[member]} and {server.id}", + log.IncorrectValue, + "multiserver", + ) + assignment[member] = server.id + + return assignment + + +def _auto_distribute(unassigned: set, server_map: dict, assignment: dict, topology: Box) -> None: + """Distribute unassigned nodes across servers, keeping netlab groups together.""" + sorted_sids = sorted(server_map.keys()) + counts = {sid: sum(1 for s in assignment.values() if s == sid) for sid in sorted_sids} + + # Build group buckets: keep group members together, distribute largest groups first + claimed: set = set() + group_buckets: list = [] + for gdata in topology.get("groups", {}).values(): + members = [m for m in gdata.get("members", []) if m in unassigned and m not in claimed] + if members: + group_buckets.append(members) + claimed.update(members) + group_buckets.sort(key=lambda g: -len(g)) + + for members in group_buckets: + target = min(sorted_sids, key=lambda s: counts[s]) + for m in members: + assignment[m] = target + counts[target] += len(members) + + # Remaining ungrouped nodes: one by one to least-loaded server + for name in sorted(unassigned - claimed): + target = min(sorted_sids, key=lambda s: counts[s]) + assignment[name] = target + counts[target] += 1 + + +def _classify_links(topology: Box, assignment: dict, replicated: set, vni_base: int) -> int: + """Assign _ms metadata to each link; return the number of cross-server links.""" + vni = vni_base + for link in topology.links: + link_servers = { + assignment[i.node] + for i in link.get("interfaces", []) + if i.node not in replicated and i.node in assignment + } + if len(link_servers) > 1: + link._ms = Box({"cross": True, "vni": vni, "servers": sorted(link_servers)}) + vni += 1 + else: + link._ms = Box({"cross": False, "servers": sorted(link_servers)}) + + if vni > 16777215: + log.error(f"VXLAN VNI overflow: {vni} exceeds 24-bit maximum (16777215)", log.IncorrectValue, "multiserver") + + return vni - vni_base + + +def _log_assignment_summary( + ms: Box, assignment: dict, replicated: set, topology: Box, vni_base: int, cross_count: int +) -> None: + for server in ms.servers: + sid = server.id + server_nodes = sorted(n for n, s in assignment.items() if s == sid) + n = len(server_nodes) + + server_groups = [] + for gname, gdata in topology.get("groups", {}).items(): + members = gdata.get("members", []) + if not members: + continue + on_this = [m for m in members if assignment.get(m) == sid] + assigned = [m for m in members if m in assignment] + if on_this and len(on_this) == len(assigned): + server_groups.append(gname) + + log.info(f"Server {sid} ({server.host}): {n} nodes", module="multiserver") + if server_groups: + preview = server_groups[:8] + suffix = f" ... +{len(server_groups) - 8} more" if len(server_groups) > 8 else "" + log.info(f" groups: {', '.join(preview)}{suffix}", module="multiserver") + if n <= 20: + log.info(f" nodes: {', '.join(server_nodes)}", module="multiserver") + else: + log.info(f" nodes: {', '.join(server_nodes[:6])} ... +{n - 6} more", module="multiserver") + + if replicated: + log.info(f"Replicated on all servers: {', '.join(sorted(replicated))}", module="multiserver") + if cross_count: + log.info( + f"{cross_count} cross-server links (VNI {vni_base}–{vni_base + cross_count - 1})", module="multiserver" + ) + + +# =========================================================================== +# Internal helpers — clab.yml generation # =========================================================================== @@ -727,12 +651,7 @@ def _render_bridge_vxlan( ) # VXLAN tunnels to each remote server that has endpoints on this link - remote_sids: set = set() - for intf in link.get("interfaces", []): - s = assignment.get(intf.node) - if s is not None and s != local_sid: - remote_sids.add(s) - + remote_sids = {assignment[i.node] for i in link.get("interfaces", []) if assignment.get(i.node) not in (None, local_sid)} for rsid in sorted(remote_sids): vxlan_tunnels.append( { @@ -751,16 +670,11 @@ def _render_bridge_vxlan( def _write_server_snapshot(topology: Box, local_nodes: set, out_dir: str) -> None: - """Write a filtered netlab snapshot containing only this server's nodes. - - This allows 'netlab up --snapshot' to work correctly from a per-server - directory — only local nodes will be targeted for configuration deployment. + """Write a filtered netlab snapshot for this server's nodes only. - Note: make_paths_absolute() must be called on the copy before pickling so - that the computed f_files / f_tasks / f_dirs keys are present in the - snapshot. The main netlab snapshot (outputs/pickle.py) is written *after* - create.py calls make_paths_absolute(), so it already contains those keys. - Plugin output() hooks run *before* that call, so we have to do it ourselves. + Allows 'netlab up --snapshot' to work from a per-server directory. + make_paths_absolute() is called here explicitly because output() hooks run + before create.py does it — without it the snapshot is missing f_files/f_tasks/f_dirs. """ from netsim import __version__ from netsim.augment.config import make_paths_absolute @@ -775,10 +689,6 @@ def _write_server_snapshot(topology: Box, local_nodes: set, out_dir: str) -> Non topo_copy.links = [l for l in topo_copy.links if any(i.node in local_nodes for i in l.get("interfaces", []))] # Expand paths (add f_files / f_tasks / f_dirs computed keys). - # create.py calls make_paths_absolute() AFTER plugin output() hooks, so the - # main snapshot has these keys but our per-server copies don't yet. - # netlab initial relies on topology.defaults.paths.t_files.f_files, so we - # must add them before pickling. make_paths_absolute(topo_copy.defaults.paths) # Remove prefix generators and serialize From c9f2a6f4c5491a0b47189252db5c4fc97295ec04 Mon Sep 17 00:00:00 2001 From: Muddyblack Date: Wed, 20 May 2026 19:39:37 +0200 Subject: [PATCH 4/4] rm --- from yml --- tests/topology/expected/multiserver-auto.yml | 1 - tests/topology/expected/multiserver-explicit.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/topology/expected/multiserver-auto.yml b/tests/topology/expected/multiserver-auto.yml index 747ec73d67..85559675e2 100644 --- a/tests/topology/expected/multiserver-auto.yml +++ b/tests/topology/expected/multiserver-auto.yml @@ -1,4 +1,3 @@ ---- _multiserver: assignment: g1_n1: 1 diff --git a/tests/topology/expected/multiserver-explicit.yml b/tests/topology/expected/multiserver-explicit.yml index f81b0f51f4..70fcb7beec 100644 --- a/tests/topology/expected/multiserver-explicit.yml +++ b/tests/topology/expected/multiserver-explicit.yml @@ -1,4 +1,3 @@ ---- _multiserver: assignment: s1_g1: 1