diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml old mode 100644 new mode 100755 index 588666e..48048a5 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -163,3 +163,29 @@ jobs: if: always() working-directory: demos/multi_ecu_aggregation run: docker compose --profile ci down + + build-and-test-manymove-industrial: + runs-on: ubuntu-24.04 + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build and start manymove_industrial demo (sim + plc + bridge) + working-directory: demos/manymove_industrial + run: docker compose --profile ci up -d --build + + - name: Run smoke tests + run: ./tests/smoke_test_manymove_industrial.sh + + - name: Show container logs on failure + if: failure() + working-directory: demos/manymove_industrial + run: | + docker compose --profile ci logs manymove-sim-ci --tail=200 || true + docker compose --profile ci logs plc-sim --tail=100 || true + docker compose --profile ci logs opcua-bridge --tail=100 || true + + - name: Teardown + if: always() + working-directory: demos/manymove_industrial + run: docker compose --profile ci down diff --git a/demos/manymove_industrial/CMakeLists.txt b/demos/manymove_industrial/CMakeLists.txt new file mode 100644 index 0000000..42ab381 --- /dev/null +++ b/demos/manymove_industrial/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.10) +project(manymove_industrial) + +find_package(ament_cmake REQUIRED) + +install(DIRECTORY config launch + DESTINATION share/${PROJECT_NAME} +) + +ament_package() diff --git a/demos/manymove_industrial/Dockerfile b/demos/manymove_industrial/Dockerfile new file mode 100644 index 0000000..744790a --- /dev/null +++ b/demos/manymove_industrial/Dockerfile @@ -0,0 +1,216 @@ +# manymove_industrial demo image: matches the manymove project's own docker +# style (manymove + Groot + xArm ROS 2 driver, all built from source) and +# layers on the ros2_medkit fault reporting stack. +# +# The base layers reproduce manymove_bringup/docker/Dockerfile.manymove and +# Dockerfile.manymove_xarm so the resulting image runs xarm7_movegroup_fake +# launches identically to the upstream demos. MANYMOVE_REPO defaults to the +# selfpatch fork on the feat/medkit-integration branch where the BT action +# nodes emit MANYMOVE_* fault codes via ros2_medkit_fault_reporter. + +# syntax=docker/dockerfile:1 +ARG ROS_DISTRO=jazzy +ARG BASE_IMAGE=ros:${ROS_DISTRO} +FROM ${BASE_IMAGE} + +ARG ROS_DISTRO=jazzy +ARG USERNAME=manymove +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG MANYMOVE_COLCON_WORKERS= +ARG MANYMOVE_REPO=https://github.com/selfpatch/manymove.git +ARG MANYMOVE_BRANCH=feat/medkit-integration +ARG MANYMOVE_COMMIT= +ARG GROOT_REPO=https://github.com/pastoriomarco/Groot.git +ARG GROOT_BRANCH=master +ARG GROOT_COMMIT= +ARG XARM_REPO=https://github.com/pastoriomarco/xarm_ros2.git +ARG XARM_BRANCH=${ROS_DISTRO} +ARG XARM_COMMIT= + +ENV DEBIAN_FRONTEND=noninteractive \ + MANYMOVE_WS=/opt/manymove_ws \ + ROS_DISTRO=${ROS_DISTRO} \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TZ=UTC \ + USERNAME=${USERNAME} \ + USER=${USERNAME} \ + QT_X11_NO_MITSHM=1 \ + LIBGL_ALWAYS_SOFTWARE=1 \ + MANYMOVE_COLCON_WORKERS=${MANYMOVE_COLCON_WORKERS} + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Isaac ROS apt repo for jazzy provides ros-jazzy-topic-based-ros2-control, +# matching what the upstream manymove docker image expects. +RUN set -euo pipefail; \ + if [[ "${ROS_DISTRO}" == "jazzy" ]]; then \ + apt-get update && \ + apt-get install -y curl gnupg && \ + mkdir -p /usr/share/keyrings && \ + curl -fsSL https://isaac.download.nvidia.com/isaac-ros/repos.key \ + | gpg --dearmor -o /usr/share/keyrings/isaac-ros-archive-keyring.gpg && \ + . /etc/os-release && \ + printf 'deb [signed-by=/usr/share/keyrings/isaac-ros-archive-keyring.gpg] https://isaac.download.nvidia.com/isaac-ros/release-4 %s main\n' "${VERSION_CODENAME}" \ + > /etc/apt/sources.list.d/isaac-ros.list && \ + apt-get update; \ + fi + +# manymove + Groot build deps, MoveIt 2 stack, BT/ros2_control, plus the +# ros2_medkit deb packages this fork instruments against. +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + cmake build-essential curl git gosu locales sudo \ + python3-colcon-common-extensions python3-vcstool python3-rosdep python3-pip \ + nano jq sqlite3 libsqlite3-dev libsystemd-dev nlohmann-json3-dev libcpp-httplib-dev \ + mesa-utils libgl1-mesa-dri libglu1-mesa libglvnd0 libglx-mesa0 libegl1 \ + libxcb-xinerama0 libxkbcommon-x11-0 \ + libzmq3-dev qtbase5-dev qttools5-dev-tools libqt5svg5-dev libqt5opengl5-dev \ + ros-${ROS_DISTRO}-moveit \ + ros-${ROS_DISTRO}-moveit-msgs \ + ros-${ROS_DISTRO}-moveit-core \ + ros-${ROS_DISTRO}-moveit-ros-planning-interface \ + ros-${ROS_DISTRO}-moveit-ros-visualization \ + ros-${ROS_DISTRO}-moveit-servo \ + ros-${ROS_DISTRO}-moveit-visual-tools \ + ros-${ROS_DISTRO}-moveit-resources-panda-moveit-config \ + ros-${ROS_DISTRO}-controller-manager \ + ros-${ROS_DISTRO}-controller-manager-msgs \ + ros-${ROS_DISTRO}-ros2-controllers \ + ros-${ROS_DISTRO}-gripper-controllers \ + ros-${ROS_DISTRO}-control-toolbox \ + ros-${ROS_DISTRO}-control-msgs \ + ros-${ROS_DISTRO}-geometric-shapes \ + ros-${ROS_DISTRO}-behaviortree-cpp-v3 \ + ros-${ROS_DISTRO}-ur \ + ros-${ROS_DISTRO}-ur-moveit-config \ + ros-${ROS_DISTRO}-robotiq-description \ + ros-${ROS_DISTRO}-robotiq-controllers \ + ros-${ROS_DISTRO}-topic-based-ros2-control \ + ros-${ROS_DISTRO}-simulation-interfaces \ + ros-${ROS_DISTRO}-vision-msgs \ + ros-${ROS_DISTRO}-joint-state-publisher \ + ros-${ROS_DISTRO}-joint-state-publisher-gui \ + ros-${ROS_DISTRO}-joint-state-broadcaster \ + ros-${ROS_DISTRO}-joint-trajectory-controller \ + ros-${ROS_DISTRO}-diff-drive-controller \ + ros-${ROS_DISTRO}-tf-transformations \ + ros-${ROS_DISTRO}-xacro \ + ros-${ROS_DISTRO}-image-view \ + ros-${ROS_DISTRO}-find-object-2d \ + ros-${ROS_DISTRO}-joy \ + ros-${ROS_DISTRO}-camera-info-manager \ + ros-${ROS_DISTRO}-hardware-interface \ + ros-${ROS_DISTRO}-ros-gz \ + ros-${ROS_DISTRO}-ros-gz-bridge \ + ros-${ROS_DISTRO}-ros-gz-sim \ + ros-${ROS_DISTRO}-gz-ros2-control \ + ros-${ROS_DISTRO}-rosbag2-storage-mcap \ + ros-${ROS_DISTRO}-foxglove-bridge \ + ros-${ROS_DISTRO}-ros2-medkit-fault-reporter \ + ros-${ROS_DISTRO}-ros2-medkit-msgs \ + ros-${ROS_DISTRO}-ros2-medkit-fault-manager \ + ros-${ROS_DISTRO}-ros2-medkit-gateway \ + ros-${ROS_DISTRO}-ros2-medkit-cmake \ + ros-${ROS_DISTRO}-ros2-medkit-graph-provider \ + ros-${ROS_DISTRO}-ros2-medkit-diagnostic-bridge && \ + rm -rf /var/lib/apt/lists/* + +RUN locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8 + +# Non-root user matching the manymove pattern so X11 forwarding lines up. +RUN set -eux; \ + existing_group="$(getent group ${USER_GID} | cut -d: -f1 || true)"; \ + if [[ -n "${existing_group}" ]]; then \ + if [[ "${existing_group}" != "${USERNAME}" ]]; then \ + groupmod --new-name "${USERNAME}" "${existing_group}"; \ + fi; \ + else \ + groupadd --gid ${USER_GID} "${USERNAME}"; \ + fi; \ + existing_uid_user="$(getent passwd ${USER_UID} | cut -d: -f1 || true)"; \ + if [[ -n "${existing_uid_user}" && "${existing_uid_user}" != "${USERNAME}" ]]; then \ + usermod --login "${USERNAME}" --home "/home/${USERNAME}" --move-home "${existing_uid_user}"; \ + elif getent passwd "${USERNAME}" >/dev/null 2>&1; then \ + usermod --gid ${USER_GID} --home "/home/${USERNAME}" --move-home "${USERNAME}"; \ + else \ + useradd --uid ${USER_UID} --gid ${USER_GID} -m -s /bin/bash "${USERNAME}"; \ + fi; \ + usermod --gid ${USER_GID} --shell /bin/bash "${USERNAME}"; \ + usermod -aG sudo "${USERNAME}"; \ + printf '%s ALL=(ALL) NOPASSWD:ALL\n' "${USERNAME}" > "/etc/sudoers.d/${USERNAME}"; \ + chmod 0440 "/etc/sudoers.d/${USERNAME}" + +RUN mkdir -p /tmp/runtime-${USERNAME} && chown ${USERNAME}:${USER_GID} /tmp/runtime-${USERNAME} +ENV XDG_RUNTIME_DIR=/tmp/runtime-${USERNAME} +RUN mkdir -p /var/lib/ros2_medkit/rosbags && chown -R ${USERNAME}:${USER_GID} /var/lib/ros2_medkit +RUN mkdir -p ${MANYMOVE_WS}/src && chown -R ${USERNAME}:${USER_GID} ${MANYMOVE_WS} + +# Clone the selfpatch manymove fork (override via --build-arg if needed). +RUN gosu ${USERNAME} bash -lc "set -euo pipefail; \ + rm -rf ${MANYMOVE_WS}/src/manymove && \ + git clone --branch ${MANYMOVE_BRANCH} --depth 1 ${MANYMOVE_REPO} ${MANYMOVE_WS}/src/manymove && \ + cd ${MANYMOVE_WS}/src/manymove && \ + if [ -n \"${MANYMOVE_COMMIT}\" ]; then \ + git fetch --depth 1 origin ${MANYMOVE_COMMIT} && \ + git checkout --detach ${MANYMOVE_COMMIT}; \ + fi && \ + git rev-parse HEAD > ${MANYMOVE_WS}/.manymove_commit" + +# Groot - BT visualizer, lets viewers see the BT execution live during demos. +RUN gosu ${USERNAME} bash -lc "set -euo pipefail; \ + cd ${MANYMOVE_WS}/src && \ + rm -rf Groot && \ + git clone --recurse-submodules --branch ${GROOT_BRANCH} ${GROOT_REPO} Groot && \ + cd Groot && \ + cmake -S . -B build && \ + cmake --build build --parallel" + +# xArm ROS 2 driver - source clone is what upstream manymove relies on. +RUN gosu ${USERNAME} bash -lc "set -euo pipefail; \ + cd ${MANYMOVE_WS}/src && \ + rm -rf xarm_ros2 && \ + git clone --recursive --branch ${XARM_BRANCH} ${XARM_REPO} xarm_ros2 && \ + cd xarm_ros2 && git submodule update --init --recursive" + +RUN source /opt/ros/${ROS_DISTRO}/setup.bash && \ + cd ${MANYMOVE_WS} && \ + if [ ! -f /etc/ros/rosdep/sources.list.d/20-default.list ]; then rosdep init; fi && \ + apt-get update && rosdep update && \ + rosdep install --from-paths src --ignore-src --rosdistro ${ROS_DISTRO} -y -r && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN gosu ${USERNAME} bash -lc "set -eo pipefail; \ + source /opt/ros/${ROS_DISTRO}/setup.bash; \ + cd ${MANYMOVE_WS}; \ + parallel_args=\"\"; \ + if [[ -n \"${MANYMOVE_COLCON_WORKERS}\" && \"${MANYMOVE_COLCON_WORKERS}\" =~ ^[0-9]+$ && \"${MANYMOVE_COLCON_WORKERS}\" -gt 0 ]]; then \ + parallel_args=\"--parallel-workers ${MANYMOVE_COLCON_WORKERS}\"; \ + export CMAKE_BUILD_PARALLEL_LEVEL=${MANYMOVE_COLCON_WORKERS}; \ + fi; \ + colcon build --symlink-install \${parallel_args}" +RUN chown -R ${USERNAME}:${USER_GID} ${MANYMOVE_WS} + +# Demo overlay: manifest + container_scripts + launch files. +COPY package.xml CMakeLists.txt ${MANYMOVE_WS}/src/manymove_industrial/ +COPY config/ ${MANYMOVE_WS}/src/manymove_industrial/config/ +COPY launch/ ${MANYMOVE_WS}/src/manymove_industrial/launch/ +COPY container_scripts/ /var/lib/ros2_medkit/scripts/ +RUN find /var/lib/ros2_medkit/scripts -name '*.bash' -exec chmod +x {} \; && \ + chown -R ${USERNAME}:${USER_GID} ${MANYMOVE_WS}/src/manymove_industrial /var/lib/ros2_medkit +RUN gosu ${USERNAME} bash -lc "set -eo pipefail; \ + source /opt/ros/${ROS_DISTRO}/setup.bash; \ + source ${MANYMOVE_WS}/install/setup.bash; \ + cd ${MANYMOVE_WS}; \ + colcon build --symlink-install --packages-select manymove_industrial" + +WORKDIR ${MANYMOVE_WS} +RUN echo "source /opt/ros/${ROS_DISTRO}/setup.bash" >> /etc/bash.bashrc && \ + echo "source ${MANYMOVE_WS}/install/setup.bash" >> /etc/bash.bashrc + +USER ${USERNAME} +EXPOSE 8080 8765 + +ENTRYPOINT ["/ros_entrypoint.sh"] +CMD ["bash"] diff --git a/demos/manymove_industrial/README.md b/demos/manymove_industrial/README.md new file mode 100644 index 0000000..c8bba7d --- /dev/null +++ b/demos/manymove_industrial/README.md @@ -0,0 +1,115 @@ +# manymove_industrial + +manymove BT manipulator pipeline (xArm7 fake hardware) with `ros2_medkit` +fault reporting. The image follows the manymove project's own docker style +(manymove + Groot + xarm_ros2 all built from source) and adds the medkit +fault_manager / gateway / web UI on top. + +## What's in the demo + +- xArm7 cobot under `fake_components` hardware (no real arm needed) +- MoveIt 2 + manymove BT pipeline launched via the upstream + `xarm7_movegroup_fake_cpp_trees.launch.py` +- `bt_client_xarm7` from the [selfpatch fork](https://github.com/selfpatch/manymove) + emits `MANYMOVE_*` fault codes through `ros2_medkit_fault_reporter` on + collision / retry / IO failures +- Groot BT visualizer (built from source, same as the upstream demo) for a + live view of the BT during the run +- manymove Qt HMI for operator-driven blackboard mutation +- medkit fault_manager + gateway + Web UI on `:3000` for the fault timeline + +## Status + +**v1 (this directory):** real BT pipeline + medkit. Inject scripts use the +`update_blackboard` HMI service so faults are produced organically by the BT +nodes (not synthesised on the FaultManager service). + +**v2 (this directory, since `feat/manymove-industrial`):** adds the PLC tier. +A Python OPC UA server (`plc_sim/`) emulates the line PLC (photoeye, +conveyor, e-stop tags) and publishes `AlarmConditionType` events. A ROS 2 +bridge (`opcua_bridge/`) subscribes to those events and forwards them to +the `FaultManager` as `MANYMOVE_PLC_*` faults with `source_id=/plc/sensor_io`. +The PLC sim is swappable with a real OpenPLC v3 + ST program later; the +OPC UA AlarmConditionType surface stays the same. + +This gives the demo a real cross-source narrative: a PLC photoeye flicker +WARN can land in the same dashboard alongside a manymove BT +`MANYMOVE_PLANNER_COLLISION_DETECTED` CRITICAL, on one timeline. + +## Prerequisites + +- Docker Engine + `docker compose` plugin +- `xhost` (any standard X server install) for the GUI tools on the cpu profile +- `curl`, `jq` for the host-side helper scripts + +The first build is heavy (downloads + colcon-builds manymove, Groot and +xarm_ros2 from source). Expect 25-40 minutes the first time; subsequent +builds reuse Docker layer cache. + +## Quick start + +```bash +./run-demo.sh # cpu profile, X11 RViz/Groot/HMI, web UI on :3000 +./check-demo.sh # gateway health + entity discovery + fault list +./stop-demo.sh + +./run-demo.sh --ci # headless, no web UI / no X11 (matches CI smoke job) +``` + +## Fault scenarios + +Container scripts under `container_scripts/manymove-planning/` are mounted +at the well-known gateway scripts dir and runnable from the web UI or via +`scripts-api.sh`: + +- `arm-self-test` - emits `MANYMOVE_SELFTEST` (INFO + immediate PASSED) so + operators can confirm the medkit pipeline is wired without producing a + real fault entry. +- `inject-collision` - flips the BT blackboard `collision_detected` flag. + The next `MoveManipulatorAction::onStart` tick observes it, emits + `MANYMOVE_PLANNER_COLLISION_DETECTED` (ERROR) and returns FAILURE. + This is a real BT fault, not a synthesised report. +- `inject-soft-fault` - drops a thin collision wall in the planning scene + near the pick zone; planning fails ~30% of attempts and the BT emits + `MANYMOVE_PLANNER_RETRY_ATTEMPT` (WARN). The reporter's LocalFilter + (threshold=3, window=10s) throttles those locally and only forwards once + the threshold is crossed - this is the soft-fault narrative. +- `restore-normal` - clears `collision_detected`, `stop_execution` and + triggers `reset` + `start` on the blackboard so the BT picks up cleanly. + +## SOVD manifest + +`config/manymove_industrial_manifest.yaml` declares the BT client, MoveIt +move_group, manymove planner and object_manager action servers, the +ufactory_driver, the HMI service node, and the medkit fault_manager / +gateway. Apps' `ros_binding.namespace + node_name` link the gateway to the +runtime FQNs from the upstream xarm7 launch (the manymove fork renames +each `bt_client_*` executable's ROS node to match its binary). + +## Live BT view (Groot) + +The image builds Groot from source (matching upstream manymove). Once the +demo is up: + +```bash +docker compose exec manymove-sim bash -lc \ + "source install/setup.bash && src/Groot/build/Groot" +``` + +This is the same workflow upstream manymove uses to record demo videos. + +## TODO: PLC tier (v1.5) + +Tier 2 PLC correlation narrative is deferred to a follow-up commit: + +- Custom OpenPLC v3 image (mirror `demos/openplc_tank/openplc/Dockerfile`, + enable OPC UA driver) + ST program (conveyor + cylinder + photoeye + e-stop). +- Python `asyncua` ROS<->OPC UA bridge node that publishes PLC tags as ROS + topics, exposes `/plc/set_estop`, and emits its own `PLC_ESTOP_ENGAGED` + fault. +- Correlation logic: bridge subscribes to `/fault_manager/events`, observes + the pair (`PLC_ESTOP_ENGAGED` + `MANYMOVE_PLANNER_ESTOP_TRIGGERED`) within + a 2 s window, emits the synthetic `CORRELATED_LINE_ESTOP` fault. Filters + events whose source_id matches its own to avoid feedback. +- `inject-hard-fault.sh` flips the PLC e-stop, demonstrating the tier-2 + narrative end to end. diff --git a/demos/manymove_industrial/check-demo.sh b/demos/manymove_industrial/check-demo.sh new file mode 100755 index 0000000..170db37 --- /dev/null +++ b/demos/manymove_industrial/check-demo.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -euo pipefail + +GATEWAY="${GATEWAY:-http://localhost:8080}" + +echo "Gateway health:" +curl -fsS "${GATEWAY}/api/v1/health" || echo " FAIL" + +echo "" +echo "Apps:" +curl -fsS "${GATEWAY}/api/v1/apps" | jq -c '.items[] | {id, ros_binding}' || true + +echo "" +echo "Active faults (CONFIRMED + PREFAILED):" +curl -fsS -X POST -H "Content-Type: application/json" \ + -d '{"statuses": ["CONFIRMED", "PREFAILED"]}' \ + "${GATEWAY}/api/v1/faults/list" | jq -c '.items[] | {fault_code, severity, status, source: .reporting_sources[0]}' || true diff --git a/demos/manymove_industrial/config/manymove_industrial_manifest.yaml b/demos/manymove_industrial/config/manymove_industrial_manifest.yaml new file mode 100644 index 0000000..f37635e --- /dev/null +++ b/demos/manymove_industrial/config/manymove_industrial_manifest.yaml @@ -0,0 +1,217 @@ +manifest_version: "1.0" +metadata: + name: "manymove-industrial" + description: "manymove BT manipulator pipeline (xArm7 fake hardware) with medkit fault reporting" + version: "0.1.0" + +config: + unmanifested_nodes: "warn" + inherit_runtime_resources: true + +areas: + - id: manipulation + name: "Manipulation" + description: "xArm7 cobot under fake_components hardware" + namespace: / + - id: planning + name: "Planning" + description: "BehaviorTree.CPP orchestration and MoveIt motion planning" + namespace: / + - id: diagnostics + name: "Diagnostics" + description: "ros2_medkit fault management" + namespace: / + - id: line + name: "Conveyor line" + description: "PLC-driven conveyor line: photoeyes, motor, e-stop" + # /plc namespace anchors topic-discovered PLC-side nodes (opcua_bridge + # publishes /plc/heartbeat etc.) into this area in the SOVD entity tree. + namespace: /plc + +components: + - id: xarm7-arm + name: "xArm 7 manipulator" + type: "manipulator" + description: "7-DoF cobot, fake_components hardware" + area: manipulation + - id: ufactory-driver + name: "UFactory ROS 2 driver" + type: "driver" + description: "xarm_ros2 driver layer (fake mode)" + area: manipulation + - id: manymove-bt + name: "manymove BT client" + type: "controller" + description: "BehaviorTree.CPP orchestration of pick/place sequences" + area: planning + - id: manymove-planning + name: "manymove planning subsystem" + type: "controller" + description: "Logical grouping for planning-side container scripts (collision inject, restore, self-test)" + area: planning + - id: move-group + name: "MoveIt move_group" + type: "planner" + description: "OMPL motion planner" + area: planning + - id: object-manager + name: "manymove object_manager" + type: "service" + description: "Planning scene collision objects" + area: planning + - id: action-server + name: "manymove planner action_server" + type: "service" + description: "MoveManipulator / PlanManipulator action servers" + area: planning + - id: gateway + name: "ros2_medkit gateway" + type: "diagnostics" + description: "HTTP REST API for fault inspection" + area: diagnostics + - id: fault-manager + name: "ros2_medkit fault_manager" + type: "diagnostics" + description: "SQLite-backed fault store" + area: diagnostics + - id: openplc + name: "Line PLC (OPC UA)" + type: "controller" + description: "IEC 61131-3 soft PLC exposing photoeye / conveyor / e-stop tags + AlarmConditionType events" + area: line + - id: photoeye-pick + name: "Pick photoeye" + type: "sensor" + description: "Diffuse-mode photoelectric sensor at pick position" + area: line + - id: photoeye-drop + name: "Drop photoeye" + type: "sensor" + description: "Diffuse-mode photoelectric sensor at drop position" + area: line + - id: conveyor-motor + name: "Conveyor motor" + type: "actuator" + description: "Belt drive motor" + area: line + - id: opcua-bridge + name: "OPC UA -> medkit bridge" + type: "bridge" + description: "Subscribes to PLC AlarmConditionType events and forwards them as medkit faults" + area: line + - id: conveyor-line + name: "Conveyor line subsystem" + type: "controller" + description: "Logical grouping for line-side container scripts (PLC alarm inject / restore)" + area: line + +apps: + - id: bt-client-xarm7 + name: "BT client (xArm7)" + category: "controller" + is_located_on: manymove-bt + ros_binding: + namespace: / + node_name: bt_client_xarm7 + - id: hmi-service-node + name: "HMI service node" + category: "controller" + is_located_on: manymove-bt + ros_binding: + namespace: / + node_name: hmi_service_node + - id: move-group-app + name: "move_group" + category: "planner" + is_located_on: move-group + ros_binding: + namespace: / + node_name: move_group + - id: action-server-app + name: "action_server_node" + category: "service" + is_located_on: action-server + ros_binding: + namespace: / + node_name: action_server_node + - id: object-manager-app + name: "object_manager_node" + category: "service" + is_located_on: object-manager + ros_binding: + namespace: / + node_name: object_manager_node + - id: ufactory-driver-app + name: "ufactory_driver" + category: "driver" + is_located_on: ufactory-driver + ros_binding: + namespace: / + node_name: ufactory_driver + - id: fault-manager-app + name: "fault_manager" + category: "diagnostics" + is_located_on: fault-manager + ros_binding: + namespace: / + node_name: fault_manager + - id: gateway-app + name: "ros2_medkit_gateway" + category: "diagnostics" + is_located_on: gateway + ros_binding: + namespace: / + node_name: ros2_medkit_gateway + - id: openplc-runtime + name: "OpenPLC runtime" + category: "controller" + is_located_on: openplc + ros_binding: + namespace: /plc + node_name: openplc_runtime + - id: opcua-bridge-app + name: "opcua_bridge" + category: "bridge" + is_located_on: opcua-bridge + ros_binding: + namespace: /plc + node_name: sensor_io + +functions: + - id: pick-and-place + name: "Pick and place" + category: "manipulation" + description: "BT-driven manipulation sequence on fake xArm7" + hosted_by: + - bt-client-xarm7 + - move-group-app + - action-server-app + - object-manager-app + - id: hmi-control + name: "Operator control" + category: "interaction" + description: "Qt HMI panel for blackboard mutation and BT reset" + hosted_by: + - hmi-service-node + - bt-client-xarm7 + - id: fault-management + name: "Fault management" + category: "diagnostics" + description: "Capture, persist, and surface BT runtime failures emitted by manymove_cpp_trees" + hosted_by: + - fault-manager-app + - gateway-app + - id: line-control + name: "Line control" + category: "controller" + description: "PLC-driven conveyor sequencing with safety interlocks" + hosted_by: + - openplc-runtime + - id: fault-aggregation + name: "Cross-source fault aggregation" + category: "diagnostics" + description: "Bridge PLC AlarmConditionType events into the same medkit FaultManager that aggregates ROS-side faults" + hosted_by: + - opcua-bridge-app + - fault-manager-app + - gateway-app diff --git a/demos/manymove_industrial/config/medkit_params.yaml b/demos/manymove_industrial/config/medkit_params.yaml new file mode 100644 index 0000000..638a21e --- /dev/null +++ b/demos/manymove_industrial/config/medkit_params.yaml @@ -0,0 +1,51 @@ +diagnostics: + ros2_medkit_gateway: + ros__parameters: + server: + host: "0.0.0.0" + port: 8080 + cors: + allowed_origins: ["*"] + allowed_methods: ["GET", "PUT", "POST", "DELETE", "OPTIONS"] + allowed_headers: ["Content-Type", "Accept"] + allow_credentials: false + max_age_seconds: 86400 + discovery: + manifest_path: "" # set by launch + mode: "hybrid" + manifest_strict_validation: true + plugins: [""] + scripts: + scripts_dir: "/var/lib/ros2_medkit/scripts" + max_concurrent_executions: 3 + +fault_manager: + ros__parameters: + storage_type: "sqlite" + database_path: "/var/lib/ros2_medkit/faults.db" + confirmation_threshold: 0 + snapshots: + enabled: true + default_topics: + - "/joint_states" + - "/tf" + - "/tf_static" + rosbag: + enabled: true + duration_sec: 10.0 + duration_after_sec: 2.0 + storage_path: "/var/lib/ros2_medkit/rosbags" + format: "mcap" + max_bag_size_mb: 50 + max_total_storage_mb: 500 + topics: "/joint_states,/tf,/tf_static,/blackboard_status,/planning_scene" + +# Per-node fault_reporter local filtering. +bt_client_xarm7: + ros__parameters: + fault_reporter: + local_filtering: + enabled: true + default_threshold: 3 + default_window_sec: 10.0 + bypass_severity: 2 # ERROR + CRITICAL bypass filtering diff --git a/demos/manymove_industrial/container_scripts/conveyor-line/inject-estop/metadata.json b/demos/manymove_industrial/container_scripts/conveyor-line/inject-estop/metadata.json new file mode 100644 index 0000000..56379f5 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/conveyor-line/inject-estop/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Inject PLC e-stop (CRITICAL)", + "description": "Raises the EstopEngaged AlarmConditionType on the PLC OPC UA server. The opcua_bridge forwards it as MANYMOVE_PLC_ESTOP_ENGAGED (CRITICAL) into the medkit FaultManager.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/conveyor-line/inject-estop/script.bash b/demos/manymove_industrial/container_scripts/conveyor-line/inject-estop/script.bash new file mode 100755 index 0000000..9d25668 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/conveyor-line/inject-estop/script.bash @@ -0,0 +1,13 @@ +#!/bin/bash +# Raise EstopEngaged AlarmConditionType (CRITICAL severity) via the PLC +# admin endpoint. opcua_bridge forwards it to medkit as +# MANYMOVE_PLC_ESTOP_ENGAGED. +set -e + +PLC_ADMIN="${PLC_ADMIN_URL:-http://plc-sim:8500}" + +curl -fsS -X POST -H "Content-Type: application/json" \ + -d '{"message":"Line e-stop pressed at station 2"}' \ + "${PLC_ADMIN}/alarm/estop_engaged/raise" >/dev/null + +echo "EstopEngaged alarm raised on ${PLC_ADMIN}" diff --git a/demos/manymove_industrial/container_scripts/conveyor-line/inject-photoeye-flicker/metadata.json b/demos/manymove_industrial/container_scripts/conveyor-line/inject-photoeye-flicker/metadata.json new file mode 100644 index 0000000..d696fa9 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/conveyor-line/inject-photoeye-flicker/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Inject PLC photoeye flicker (WARN)", + "description": "Raises the PhotoeyeFlicker AlarmConditionType on the PLC OPC UA server. The opcua_bridge subscribes and forwards it as MANYMOVE_PLC_PHOTOEYE_FLICKER (WARN) into the medkit FaultManager.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/conveyor-line/inject-photoeye-flicker/script.bash b/demos/manymove_industrial/container_scripts/conveyor-line/inject-photoeye-flicker/script.bash new file mode 100755 index 0000000..4ad82f4 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/conveyor-line/inject-photoeye-flicker/script.bash @@ -0,0 +1,13 @@ +#!/bin/bash +# Raise PhotoeyeFlicker AlarmConditionType via the PLC admin endpoint. +# The opcua_bridge service is subscribed and forwards it as +# MANYMOVE_PLC_PHOTOEYE_FLICKER (WARN) to the medkit FaultManager. +set -e + +PLC_ADMIN="${PLC_ADMIN_URL:-http://plc-sim:8500}" + +curl -fsS -X POST -H "Content-Type: application/json" \ + -d '{"message":"Pick photoeye toggled 12 times in 2s, expected <=3"}' \ + "${PLC_ADMIN}/alarm/photoeye_flicker/raise" >/dev/null + +echo "PhotoeyeFlicker alarm raised on ${PLC_ADMIN}" diff --git a/demos/manymove_industrial/container_scripts/conveyor-line/restore-line/metadata.json b/demos/manymove_industrial/container_scripts/conveyor-line/restore-line/metadata.json new file mode 100644 index 0000000..f27a6a7 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/conveyor-line/restore-line/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Clear PLC alarms", + "description": "Clears all three PLC AlarmConditionType alarms (photoeye_flicker, conveyor_overspeed, estop_engaged). The opcua_bridge forwards PASSED events that heal the corresponding medkit faults.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/conveyor-line/restore-line/script.bash b/demos/manymove_industrial/container_scripts/conveyor-line/restore-line/script.bash new file mode 100755 index 0000000..0f87862 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/conveyor-line/restore-line/script.bash @@ -0,0 +1,11 @@ +#!/bin/bash +# Clear every PLC AlarmConditionType. opcua_bridge forwards a PASSED +# event per alarm, healing the corresponding medkit fault. +set -e + +PLC_ADMIN="${PLC_ADMIN_URL:-http://plc-sim:8500}" + +for alarm in photoeye_flicker conveyor_overspeed estop_engaged; do + curl -fsS -X POST "${PLC_ADMIN}/alarm/${alarm}/clear" >/dev/null || true + echo "cleared ${alarm}" +done diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/arm-self-test/metadata.json b/demos/manymove_industrial/container_scripts/manymove-planning/arm-self-test/metadata.json new file mode 100644 index 0000000..b2f81d5 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/arm-self-test/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Arm self-test (synthetic)", + "description": "Emits an INFO-severity health-check fault (and immediately PASSED) so operators can confirm the medkit pipeline is wired.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/arm-self-test/script.bash b/demos/manymove_industrial/container_scripts/manymove-planning/arm-self-test/script.bash new file mode 100755 index 0000000..dd26448 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/arm-self-test/script.bash @@ -0,0 +1,19 @@ +#!/bin/bash +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash + +CODE="MANYMOVE_SELFTEST" + +ros2 service call /fault_manager/report_fault \ + ros2_medkit_msgs/srv/ReportFault \ + "{fault_code: '${CODE}', event_type: 0, severity: 0, \ + description: 'arm-self-test: pipeline OK', source_id: '/bt_client_xarm7'}" + +sleep 1 + +ros2 service call /fault_manager/report_fault \ + ros2_medkit_msgs/srv/ReportFault \ + "{fault_code: '${CODE}', event_type: 1, severity: 0, \ + description: 'arm-self-test: cleared', source_id: '/bt_client_xarm7'}" diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/inject-collision/metadata.json b/demos/manymove_industrial/container_scripts/manymove-planning/inject-collision/metadata.json new file mode 100644 index 0000000..621b962 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/inject-collision/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Inject planner collision (real BT)", + "description": "Sets the BT blackboard 'collision_detected' flag via update_blackboard. The next MoveManipulatorAction tick observes it, emits MANYMOVE_PLANNER_COLLISION_DETECTED, and the BT returns FAILURE.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/inject-collision/script.bash b/demos/manymove_industrial/container_scripts/manymove-planning/inject-collision/script.bash new file mode 100755 index 0000000..f49bfdb --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/inject-collision/script.bash @@ -0,0 +1,23 @@ +#!/bin/bash +# Trigger a real collision fault by flipping the BT blackboard's +# collision_detected flag. MoveManipulatorAction::onStart will see the flag +# next tick, emit MANYMOVE_PLANNER_COLLISION_DETECTED and return FAILURE. +# +# `set -u` is intentionally avoided: ROS 2's setup.bash references unbound +# variables when sourced from a fresh shell. +set -e + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /opt/manymove_ws/install/setup.bash + +# Default xArm7 prefix in the upstream launch is empty; the BT keys live in +# the global blackboard under the "collision_detected" name. +# update_blackboard expects value_data as quoted strings even for bool/double +# (the service serialises every entry as a string). +PREFIX="${ROBOT_PREFIX:-}" + +ros2 service call /update_blackboard \ + manymove_msgs/srv/SetBlackboardValues \ + "{key: ['${PREFIX}collision_detected'], value_type: ['bool'], value_data: ['true']}" diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/inject-soft-fault/metadata.json b/demos/manymove_industrial/container_scripts/manymove-planning/inject-soft-fault/metadata.json new file mode 100644 index 0000000..ca3aefe --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/inject-soft-fault/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Inject soft fault (collision wall)", + "description": "Adds a thin collision wall in the planning scene near the pick zone. Roughly 30% of pick attempts fail to plan around it, which produces real MANYMOVE_PLANNER_RETRY_ATTEMPT bursts that the LocalFilter throttles before forwarding to the FaultManager.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/inject-soft-fault/script.bash b/demos/manymove_industrial/container_scripts/manymove-planning/inject-soft-fault/script.bash new file mode 100755 index 0000000..224f1e2 --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/inject-soft-fault/script.bash @@ -0,0 +1,21 @@ +#!/bin/bash +# Drop a thin collision wall into the planning scene. MoveManipulatorAction +# will retry pick poses near it, occasionally fail planning, and emit +# MANYMOVE_PLANNER_RETRY_ATTEMPT (WARN) before either succeeding or hitting +# RETRIES_EXHAUSTED. +set -e + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /opt/manymove_ws/install/setup.bash + +ros2 action send_goal --feedback /add_collision_object \ + manymove_msgs/action/AddCollisionObject \ + "{object_id: 'soft_fault_wall', \ + primitive_type: 'box', \ + dimensions: [0.4, 0.02, 0.3], \ + pose: {position: {x: 0.35, y: 0.0, z: 0.15}, orientation: {w: 1.0}}, \ + frame_id: 'world', \ + color: {r: 1.0, a: 0.5}}" \ + || echo "(soft-fault: action server not available, skipping)" diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/restore-normal/metadata.json b/demos/manymove_industrial/container_scripts/manymove-planning/restore-normal/metadata.json new file mode 100644 index 0000000..bb324ce --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/restore-normal/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "Restore normal (clear collision + resume BT)", + "description": "Clears collision_detected and stop_execution on the BT blackboard, sets reset+start so the BT picks up cleanly on the next tick.", + "format": "bash" +} diff --git a/demos/manymove_industrial/container_scripts/manymove-planning/restore-normal/script.bash b/demos/manymove_industrial/container_scripts/manymove-planning/restore-normal/script.bash new file mode 100755 index 0000000..4a40b2d --- /dev/null +++ b/demos/manymove_industrial/container_scripts/manymove-planning/restore-normal/script.bash @@ -0,0 +1,18 @@ +#!/bin/bash +# Clear BT blackboard flags that the inject-* scripts may have set, so the +# next BT tick proceeds normally. Also resets the BT execution state so +# operators can rerun without restarting the demo. +set -e + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /opt/manymove_ws/install/setup.bash + +PREFIX="${ROBOT_PREFIX:-}" + +ros2 service call /update_blackboard \ + manymove_msgs/srv/SetBlackboardValues \ + "{key: ['${PREFIX}collision_detected', '${PREFIX}stop_execution', '${PREFIX}reset', '${PREFIX}start'], \ + value_type: ['bool', 'bool', 'bool', 'bool'], \ + value_data: ['false', 'false', 'true', 'true']}" diff --git a/demos/manymove_industrial/docker-compose.yml b/demos/manymove_industrial/docker-compose.yml new file mode 100644 index 0000000..5488aeb --- /dev/null +++ b/demos/manymove_industrial/docker-compose.yml @@ -0,0 +1,113 @@ +services: + # cpu profile: full demo with X11 forwarding so RViz / Groot / manymove HMI + # render on the host display. matches the manymove project's own demo style. + manymove-sim: + profiles: ["cpu"] + build: + context: . + dockerfile: Dockerfile + container_name: manymove_industrial_sim + environment: + - DISPLAY=${DISPLAY} + - QT_X11_NO_MITSHM=1 + - LIBGL_ALWAYS_SOFTWARE=1 + - ROS_DOMAIN_ID=42 + - HEADLESS=${HEADLESS:-false} + volumes: + - /tmp/.X11-unix:/tmp/.X11-unix:rw + - manymove_faults:/var/lib/ros2_medkit + ports: + - "8080:8080" # medkit gateway HTTP + - "8765:8765" # foxglove-bridge + networks: + medkit-net: + aliases: [medkit-gateway] + stdin_open: true + tty: true + command: > + bash -c "source /opt/ros/jazzy/setup.bash && + source /opt/manymove_ws/install/setup.bash && + ros2 launch manymove_industrial demo.launch.py headless:=$${HEADLESS}" + + medkit-web-ui: + profiles: ["cpu"] + image: ghcr.io/selfpatch/ros2_medkit_web_ui:latest + container_name: manymove_industrial_web_ui + ports: + - "3000:80" + depends_on: + - manymove-sim + networks: [medkit-net] + + # PLC simulator: OPC UA server emulating a line PLC (photoeye, conveyor, + # e-stop). Publishes AlarmConditionType events the bridge subscribes to. + # Swappable with a real OpenPLC v3 + ST program once the IEC 61131-3 + # build pipeline is in place; the OPC UA surface stays identical. + plc-sim: + profiles: ["cpu", "ci"] + build: + context: ./plc_sim + dockerfile: Dockerfile + # No container_name: leaves the default service-name DNS alias intact + # on the user-defined bridge so other services can resolve `plc-sim`. + hostname: plc-sim + environment: + # asyncua advertises this URL in endpoint discovery; clients reconnect + # to it after the initial bind, so it must be a hostname they can + # resolve (the service name on medkit-net). + - PLC_OPCUA_ENDPOINT=opc.tcp://plc-sim:4840/manymove_plc/server + ports: + - "4840:4840" # OPC UA endpoint + - "8500:8500" # admin (raise/clear alarm via curl) + networks: [medkit-net] + + # OPC UA -> medkit fault bridge. Subscribes to PLC alarm conditions and + # POSTs them to the FaultManager so PLC faults land in the same + # dashboard / SOVD entity tree as the manymove BT-side faults. + opcua-bridge: + profiles: ["cpu", "ci"] + build: + context: ./opcua_bridge + dockerfile: Dockerfile + # No container_name: keep the default service-name DNS alias on the + # bridge so the rest of the compose can resolve `opcua-bridge`. + hostname: opcua-bridge + environment: + - OPCUA_ENDPOINT=opc.tcp://plc-sim:4840/manymove_plc/server + - BRIDGE_SOURCE_ID=/plc/sensor_io + - REPORT_FAULT_SERVICE=/fault_manager/report_fault + - ROS_DOMAIN_ID=42 + - RMW_IMPLEMENTATION=rmw_fastrtps_cpp + depends_on: + - plc-sim + networks: [medkit-net] + + # CI profile: headless, no web UI, no X11. Suitable for the smoke test. + manymove-sim-ci: + profiles: ["ci"] + build: + context: . + dockerfile: Dockerfile + container_name: manymove_industrial_sim_ci + environment: + - ROS_DOMAIN_ID=42 + - HEADLESS=true + ports: + - "8080:8080" + volumes: + - manymove_faults_ci:/var/lib/ros2_medkit + networks: + medkit-net: + aliases: [medkit-gateway] + command: > + bash -c "source /opt/ros/jazzy/setup.bash && + source /opt/manymove_ws/install/setup.bash && + ros2 launch manymove_industrial demo.launch.py headless:=true" + +networks: + medkit-net: + driver: bridge + +volumes: + manymove_faults: + manymove_faults_ci: diff --git a/demos/manymove_industrial/launch/demo.launch.py b/demos/manymove_industrial/launch/demo.launch.py new file mode 100644 index 0000000..5f58a0f --- /dev/null +++ b/demos/manymove_industrial/launch/demo.launch.py @@ -0,0 +1,70 @@ +"""manymove_industrial demo launch. + +Reuses the upstream manymove xarm7_movegroup_fake_cpp_trees launch verbatim +so the BT pipeline behaves exactly as in the manymove project's own demos, +and adds the medkit fault_manager + gateway against the SOVD manifest. + +The bt_client_xarm7 binary inside the manymove fork is the one that +actually emits MANYMOVE_* fault codes through ros2_medkit_fault_reporter +when the BT runs into collision / retry / IO failures. +""" + +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument, IncludeLaunchDescription +from launch.conditions import IfCondition, UnlessCondition +from launch.launch_description_sources import PythonLaunchDescriptionSource +from launch.substitutions import LaunchConfiguration, PathJoinSubstitution +from launch_ros.actions import Node +from launch_ros.substitutions import FindPackageShare + + +def generate_launch_description(): + headless = LaunchConfiguration("headless") + log_level = LaunchConfiguration("log_level") + + demo_pkg = FindPackageShare("manymove_industrial") + bringup_pkg = FindPackageShare("manymove_bringup") + + manifest_path = PathJoinSubstitution([demo_pkg, "config", "manymove_industrial_manifest.yaml"]) + medkit_params = PathJoinSubstitution([demo_pkg, "config", "medkit_params.yaml"]) + + # Upstream manymove xArm7 movegroup + BT pipeline (fake hardware). + manymove_xarm7 = IncludeLaunchDescription( + PythonLaunchDescriptionSource([ + bringup_pkg, "/launch/xarm7_movegroup_fake_cpp_trees.launch.py", + ]), + launch_arguments={"log_level": log_level}.items(), + ) + + fault_manager = Node( + package="ros2_medkit_fault_manager", + executable="fault_manager_node", + name="fault_manager", + output="screen", + parameters=[medkit_params], + ) + + gateway = Node( + package="ros2_medkit_gateway", + executable="gateway_node", + name="ros2_medkit_gateway", + namespace="diagnostics", + output="screen", + parameters=[medkit_params, {"discovery.manifest_path": manifest_path}], + ) + + return LaunchDescription([ + DeclareLaunchArgument( + "headless", + default_value="false", + description="Suppress GUI tools (rviz/groot/HMI) for CI runs.", + ), + DeclareLaunchArgument( + "log_level", + default_value="info", + description="Log level forwarded to the manymove BT client.", + ), + manymove_xarm7, + fault_manager, + gateway, + ]) diff --git a/demos/manymove_industrial/opcua_bridge/Dockerfile b/demos/manymove_industrial/opcua_bridge/Dockerfile new file mode 100644 index 0000000..09e8dfe --- /dev/null +++ b/demos/manymove_industrial/opcua_bridge/Dockerfile @@ -0,0 +1,20 @@ +FROM ros:jazzy-ros-base + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + OPCUA_ENDPOINT=opc.tcp://plc-sim:4840/manymove_plc/server \ + BRIDGE_SOURCE_ID=/plc/sensor_io \ + REPORT_FAULT_SERVICE=/fault_manager/report_fault + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3-pip \ + ros-jazzy-ros2-medkit-msgs=0.4.0-1* \ + ros-jazzy-ros2-medkit-fault-reporter=0.4.0-1* \ + && rm -rf /var/lib/apt/lists/* \ + && pip install --no-cache-dir --break-system-packages --ignore-installed \ + "asyncua==1.1.5" + +WORKDIR /opt/opcua_bridge +COPY bridge.py ./ + +CMD ["bash", "-c", "source /opt/ros/jazzy/setup.bash && exec python3 -u bridge.py"] diff --git a/demos/manymove_industrial/opcua_bridge/bridge.py b/demos/manymove_industrial/opcua_bridge/bridge.py new file mode 100644 index 0000000..a4892ef --- /dev/null +++ b/demos/manymove_industrial/opcua_bridge/bridge.py @@ -0,0 +1,258 @@ +"""OPC UA -> ros2_medkit fault bridge. + +ROS 2 node that subscribes to AlarmConditionType events on the PLC OPC UA +server and forwards them to the medkit FaultManager via the +`/fault_manager/report_fault` ROS service. Each PLC alarm maps to a +canonical MANYMOVE_PLC_* fault code so the same medkit dashboard that +aggregates manymove BT-side faults also receives the PLC-side faults +with a distinct `source_id`. + +The mapping table is deliberately explicit (not data-driven from the +OPC UA discovery) so the demo SOVD manifest, the documentation and the +bridge stay in lockstep. +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import signal +import threading +from dataclasses import dataclass +from typing import Optional + +import rclpy +from rclpy.node import Node +from ros2_medkit_msgs.srv import ReportFault +from std_msgs.msg import String + +from asyncua import Client, ua + +LOGGER = logging.getLogger("opcua_bridge") + +OPCUA_ENDPOINT = os.environ.get( + "OPCUA_ENDPOINT", + "opc.tcp://plc-sim:4840/manymove_plc/server", +) +SOURCE_ID = os.environ.get("BRIDGE_SOURCE_ID", "/plc/sensor_io") +REPORT_FAULT_SERVICE = os.environ.get( + "REPORT_FAULT_SERVICE", "/fault_manager/report_fault", +) + + +@dataclass +class AlarmMapping: + """One PLC alarm -> medkit fault binding.""" + + condition_name: str # OPC UA SourceName / ConditionName + fault_code: str # medkit MANYMOVE_PLC_* code + severity: int # medkit 0=INFO 1=WARN 2=ERROR 3=CRITICAL + description: str # short operator-facing description + + +# Canonical mapping. Keep in sync with: +# - docs/PLC_FAULT_CODES.md +# - demos/manymove_industrial/config/manymove_industrial_manifest.yaml +# - demos/manymove_industrial/plc_sim/plc_server.py +ALARM_MAP: dict[str, AlarmMapping] = { + "PhotoeyeFlicker": AlarmMapping( + condition_name="PhotoeyeFlicker", + fault_code="MANYMOVE_PLC_PHOTOEYE_FLICKER", + severity=1, + description="Pick photoeye toggling above commissioning rate", + ), + "ConveyorOverspeed": AlarmMapping( + condition_name="ConveyorOverspeed", + fault_code="MANYMOVE_PLC_CONVEYOR_OVERSPEED", + severity=2, + description="Conveyor speed beyond commissioning envelope", + ), + "EstopEngaged": AlarmMapping( + condition_name="EstopEngaged", + fault_code="MANYMOVE_PLC_ESTOP_ENGAGED", + severity=3, + description="Line e-stop engaged", + ), +} + + +class BridgeNode(Node): + """ROS 2 node hosting the fault reporting client. + + The async OPC UA work runs in a separate thread with its own event + loop; on each alarm event, it dispatches a fault report via + rclpy's executor by submitting a job onto the ROS thread. + """ + + def __init__(self) -> None: + # Place the node under /plc namespace so the medkit gateway's + # topic-discovery routes it under the conveyor-line area instead + # of the global / namespace. + super().__init__("sensor_io", namespace="/plc") + self.client = self.create_client(ReportFault, REPORT_FAULT_SERVICE) + self.source_id = SOURCE_ID + # Heartbeat publisher: makes the bridge node discoverable in the + # SOVD entity tree (topic-discovery requires at least one topic + # per node) and gives operators a simple liveness signal. + self.heartbeat_pub = self.create_publisher(String, "heartbeat", 10) + self._heartbeat_timer = self.create_timer(1.0, self._publish_heartbeat) + self._await_service() + + def _publish_heartbeat(self) -> None: + msg = String() + msg.data = "opcua_bridge alive" + self.heartbeat_pub.publish(msg) + + def _await_service(self) -> None: + while rclpy.ok() and not self.client.wait_for_service(timeout_sec=2.0): + self.get_logger().info( + f"waiting for service {REPORT_FAULT_SERVICE} ...", + ) + + def report_fault( + self, mapping: AlarmMapping, message: str, event_type: int, + ) -> None: + request = ReportFault.Request() + request.fault_code = mapping.fault_code + request.event_type = event_type # 0=FAILED, 1=PASSED + request.severity = mapping.severity if event_type == 0 else 0 + request.description = message + request.source_id = self.source_id + + future = self.client.call_async(request) + + def _done(fut) -> None: + try: + response = fut.result() + self.get_logger().info( + f"reported {mapping.fault_code} event={event_type} " + f"accepted={getattr(response, 'accepted', '?')}", + ) + except Exception as exc: # noqa: BLE001 + self.get_logger().warning( + f"report_fault call failed: {exc}", + ) + + future.add_done_callback(_done) + + +class AlarmEventHandler: + """asyncua SubHandler. Translates AlarmConditionType events into + BridgeNode.report_fault dispatches. Loopback prevention: if the + SourceName begins with our own source_id, drop the event.""" + + def __init__(self, node: BridgeNode) -> None: + self.node = node + + async def event_notification(self, event) -> None: + try: + source_name = getattr(event, "SourceName", None) + severity = getattr(event, "Severity", None) + except Exception: # noqa: BLE001 + LOGGER.warning("event missing standard fields: %s", event) + return + + if source_name is None: + return + if source_name.startswith(self.node.source_id): + LOGGER.debug("ignoring loopback event from %s", source_name) + return + + mapping = ALARM_MAP.get(source_name) + if mapping is None: + LOGGER.debug("no mapping for source_name=%s", source_name) + return + + # Pull message text (LocalizedText.Text). + message_text = getattr(event, "Message", None) + message_str = mapping.description + if message_text is not None: + text = getattr(message_text, "Text", None) + if text: + message_str = text + + # Severity 0 signals cleared/healed in our PLC convention. + event_type = 1 if severity == 0 else 0 + if event_type == 1: + message_str = f"{mapping.description} cleared" + self.node.report_fault(mapping, message_str, event_type) + + +async def opcua_loop(node: BridgeNode, stop_event: asyncio.Event) -> None: + handler = AlarmEventHandler(node) + # One-shot DNS diagnostic so future "name resolution failure" loops are + # debuggable without docker exec into the running container. + try: + import socket + from urllib.parse import urlparse + + parsed = urlparse(OPCUA_ENDPOINT) + host = parsed.hostname or OPCUA_ENDPOINT + infos = socket.getaddrinfo(host, parsed.port or 4840) + LOGGER.info( + "DNS check: %s resolves to %s", + host, + ", ".join(sorted({i[4][0] for i in infos})), + ) + except Exception as dns_exc: # noqa: BLE001 + LOGGER.warning("DNS check FAILED: %s", dns_exc) + + while not stop_event.is_set(): + try: + async with Client(OPCUA_ENDPOINT) as client: + LOGGER.info("connected to %s", OPCUA_ENDPOINT) + sub = await client.create_subscription(500, handler) + event_type = client.get_node(ua.ObjectIds.AlarmConditionType) + await sub.subscribe_events(client.nodes.server, event_type) + LOGGER.info("subscribed to AlarmConditionType events") + while not stop_event.is_set(): + await asyncio.sleep(0.5) + except (OSError, ua.UaError, asyncio.TimeoutError) as exc: + LOGGER.warning("OPC UA session error: %s, retrying in 2s", exc) + await asyncio.sleep(2.0) + + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + + rclpy.init() + node = BridgeNode() + + stop = asyncio.Event() + + def _signal_handler() -> None: + stop.set() + rclpy.shutdown() + + def _opcua_thread() -> None: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(opcua_loop(node, stop)) + finally: + loop.close() + + thread = threading.Thread(target=_opcua_thread, name="opcua-loop", daemon=True) + thread.start() + + # rclpy.spin in main thread so service futures resolve. + try: + for sig in (signal.SIGINT, signal.SIGTERM): + signal.signal(sig, lambda *_: _signal_handler()) + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + stop.set() + node.destroy_node() + if rclpy.ok(): + rclpy.shutdown() + thread.join(timeout=3.0) + + +if __name__ == "__main__": + main() diff --git a/demos/manymove_industrial/package.xml b/demos/manymove_industrial/package.xml new file mode 100644 index 0000000..44d8f5a --- /dev/null +++ b/demos/manymove_industrial/package.xml @@ -0,0 +1,25 @@ + + + manymove_industrial + 0.1.0 + + manymove + ros2_medkit integration demo: fault-aware BT manipulator + pipeline with planned OpenPLC + OPC UA tier 2 correlation. + + Selfpatch + BSD-3-Clause + + ament_cmake + + ros2_medkit_fault_manager + ros2_medkit_gateway + ros2_medkit_diagnostic_bridge + ros2_medkit_graph_provider + manymove_cpp_trees + manymove_planner + manymove_object_manager + + + ament_cmake + + diff --git a/demos/manymove_industrial/plc_sim/Dockerfile b/demos/manymove_industrial/plc_sim/Dockerfile new file mode 100644 index 0000000..ee738eb --- /dev/null +++ b/demos/manymove_industrial/plc_sim/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.12-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /opt/plc_sim + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY plc_server.py ./ + +ENV PYTHONUNBUFFERED=1 \ + PLC_OPCUA_ENDPOINT=opc.tcp://0.0.0.0:4840/manymove_plc/server \ + PLC_ADMIN_PORT=8500 + +EXPOSE 4840 8500 + +HEALTHCHECK --interval=10s --timeout=3s --start-period=10s --retries=5 \ + CMD curl -fsS http://localhost:8500/health >/dev/null || exit 1 + +CMD ["python", "-u", "plc_server.py"] diff --git a/demos/manymove_industrial/plc_sim/plc_server.py b/demos/manymove_industrial/plc_sim/plc_server.py new file mode 100644 index 0000000..acbc8c7 --- /dev/null +++ b/demos/manymove_industrial/plc_sim/plc_server.py @@ -0,0 +1,230 @@ +"""OPC UA server that emulates an industrial PLC for the manymove_industrial demo. + +The control logic, tag namespace and alarm conditions are designed to be +swappable with a real OpenPLC v3 + ST program once the IEC 61131-3 build +pipeline is in place. From a bridge-client perspective the OPC UA surface +is identical: standard AlarmConditionType events on namespace 2. + +Three alarm conditions are exposed: + - photoeye_flicker : WARN - intermittent pick photoeye, precursor fault + - conveyor_overspeed: ERROR - motor speed beyond commissioning envelope + - estop_engaged : CRITICAL - line e-stop pressed + +External tags can be poked via a small admin endpoint so the demo +orchestrator (record_full.sh) can trigger faults at known timestamps +without going through the OPC UA write path. +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import signal +from typing import Optional + +from aiohttp import web +from asyncua import Server, ua +from asyncua.common.events import Event + +LOGGER = logging.getLogger("plc_sim") + +ENDPOINT = os.environ.get("PLC_OPCUA_ENDPOINT", "opc.tcp://0.0.0.0:4840/manymove_plc/server") +ADMIN_PORT = int(os.environ.get("PLC_ADMIN_PORT", "8500")) +APP_URI = "urn:selfpatch:manymove_industrial:plc_sim" + + +class PLCSimulator: + """Holds the OPC UA server, tags, alarm conditions and admin endpoint.""" + + def __init__(self) -> None: + self.server = Server() + self.alarm_nodes: dict[str, "AlarmHandle"] = {} + + async def setup(self) -> None: + await self.server.init() + self.server.set_endpoint(ENDPOINT) + self.server.set_application_uri(APP_URI) + self.server.set_server_name("manymove_industrial PLC sim") + + # Namespaces: 0 = OPC standard, 1 = local URI, 2 = our PLC tags. + self.ns_plc = await self.server.register_namespace(APP_URI) + LOGGER.info("registered PLC namespace idx=%s", self.ns_plc) + + objects = self.server.nodes.objects + plc_folder = await objects.add_folder(self.ns_plc, "PLC") + + # Process tags (analog + digital, similar to ST %IX/%QX/%MW). + self.tag_photoeye_pick = await plc_folder.add_variable( + self.ns_plc, "photoeye_pick", False, ua.VariantType.Boolean, + ) + self.tag_photoeye_drop = await plc_folder.add_variable( + self.ns_plc, "photoeye_drop", False, ua.VariantType.Boolean, + ) + self.tag_conveyor_speed_rpm = await plc_folder.add_variable( + self.ns_plc, "conveyor_speed_rpm", 0.0, ua.VariantType.Double, + ) + self.tag_conveyor_motor = await plc_folder.add_variable( + self.ns_plc, "conveyor_motor", True, ua.VariantType.Boolean, + ) + self.tag_estop = await plc_folder.add_variable( + self.ns_plc, "estop", False, ua.VariantType.Boolean, + ) + for tag in ( + self.tag_photoeye_pick, + self.tag_photoeye_drop, + self.tag_conveyor_speed_rpm, + self.tag_conveyor_motor, + self.tag_estop, + ): + await tag.set_writable() + + # Alarm conditions (OPC UA Part 9 AlarmConditionType). Source MUST + # be an Object node (supports EventNotifier attribute), not a + # Variable. Per OPC UA spec, the Server object is the canonical + # event emitter when no per-equipment Object exists. + server_node = self.server.get_node(ua.ObjectIds.Server) + self.alarm_nodes["photoeye_flicker"] = AlarmHandle( + server=self.server, + source=server_node, + event_type=ua.ObjectIds.AlarmConditionType, + severity=300, # 1-1000, mid range + condition_name="PhotoeyeFlicker", + message="Pick photoeye toggling above acceptance rate", + ) + self.alarm_nodes["conveyor_overspeed"] = AlarmHandle( + server=self.server, + source=server_node, + event_type=ua.ObjectIds.AlarmConditionType, + severity=600, + condition_name="ConveyorOverspeed", + message="Conveyor speed beyond commissioning envelope", + ) + self.alarm_nodes["estop_engaged"] = AlarmHandle( + server=self.server, + source=server_node, + event_type=ua.ObjectIds.AlarmConditionType, + severity=900, + condition_name="EstopEngaged", + message="Line e-stop pressed", + ) + for handle in self.alarm_nodes.values(): + await handle.setup() + + async def run(self) -> None: + async with self.server: + LOGGER.info("OPC UA server up at %s", ENDPOINT) + admin_runner = await self._start_admin_endpoint() + try: + await asyncio.Event().wait() + finally: + await admin_runner.cleanup() + + async def _start_admin_endpoint(self) -> web.AppRunner: + """Tiny HTTP control surface so external orchestrators (record_full.sh, + smoke tests, container_scripts) can trigger alarms without speaking + OPC UA themselves.""" + app = web.Application() + app.router.add_post("/alarm/{name}/raise", self._handle_raise) + app.router.add_post("/alarm/{name}/clear", self._handle_clear) + app.router.add_get("/health", lambda _: web.json_response({"ok": True})) + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, "0.0.0.0", ADMIN_PORT) + await site.start() + LOGGER.info("admin endpoint up at http://0.0.0.0:%s", ADMIN_PORT) + return runner + + async def _handle_raise(self, request: web.Request) -> web.Response: + name = request.match_info["name"] + handle = self.alarm_nodes.get(name) + if handle is None: + return web.json_response({"error": f"unknown alarm {name}"}, status=404) + body = await request.json() if request.body_exists else {} + message = body.get("message") + await handle.raise_alarm(message=message) + LOGGER.info("admin raised alarm: %s", name) + return web.json_response({"ok": True, "alarm": name, "state": "active"}) + + async def _handle_clear(self, request: web.Request) -> web.Response: + name = request.match_info["name"] + handle = self.alarm_nodes.get(name) + if handle is None: + return web.json_response({"error": f"unknown alarm {name}"}, status=404) + await handle.clear_alarm() + LOGGER.info("admin cleared alarm: %s", name) + return web.json_response({"ok": True, "alarm": name, "state": "cleared"}) + + +class AlarmHandle: + """Manages one AlarmConditionType lifecycle (active / cleared).""" + + def __init__( + self, + server: Server, + source, + event_type: int, + severity: int, + condition_name: str, + message: str, + ) -> None: + self.server = server + self.source = source + self.event_type = event_type + self.severity = severity + self.condition_name = condition_name + self.default_message = message + self.generator: Optional[Event] = None + self.active = False + + async def setup(self) -> None: + self.generator = await self.server.get_event_generator(self.event_type, self.source) + self.generator.event.SourceName = self.condition_name + self.generator.event.Severity = self.severity + self.generator.event.Message = ua.LocalizedText(self.default_message) + + async def raise_alarm(self, message: Optional[str] = None) -> None: + if self.generator is None: + raise RuntimeError("alarm not initialised") + if message: + self.generator.event.Message = ua.LocalizedText(message) + else: + self.generator.event.Message = ua.LocalizedText(self.default_message) + await self.generator.trigger() + self.active = True + + async def clear_alarm(self) -> None: + if self.generator is None: + return + self.generator.event.Message = ua.LocalizedText( + f"{self.default_message} cleared", + ) + # Severity 0 signals cleared/healed in our mapping convention. + prev_severity = self.generator.event.Severity + self.generator.event.Severity = 0 + await self.generator.trigger() + self.generator.event.Severity = prev_severity + self.active = False + + +async def main() -> None: + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s") + sim = PLCSimulator() + await sim.setup() + + stop = asyncio.Event() + loop = asyncio.get_running_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler(sig, stop.set) + + server_task = asyncio.create_task(sim.run()) + await stop.wait() + server_task.cancel() + try: + await server_task + except asyncio.CancelledError: + pass + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demos/manymove_industrial/plc_sim/requirements.txt b/demos/manymove_industrial/plc_sim/requirements.txt new file mode 100644 index 0000000..a4b81f3 --- /dev/null +++ b/demos/manymove_industrial/plc_sim/requirements.txt @@ -0,0 +1,2 @@ +asyncua==1.1.5 +aiohttp==3.9.5 diff --git a/demos/manymove_industrial/run-demo.sh b/demos/manymove_industrial/run-demo.sh new file mode 100755 index 0000000..2d913a7 --- /dev/null +++ b/demos/manymove_industrial/run-demo.sh @@ -0,0 +1,63 @@ +#!/bin/bash +set -euo pipefail + +cd "$(dirname "$0")" + +PROFILE="cpu" +NO_CACHE=0 + +while [ $# -gt 0 ]; do + case "$1" in + --ci) PROFILE="ci"; shift ;; + --no-cache) NO_CACHE=1; shift ;; + -h|--help) + cat </dev/null 2>&1; then + echo "docker is required" >&2 + exit 1 +fi + +# Allow X11 access from the container so RViz / Groot / manymove HMI render +# on the host display when running the cpu profile. +if [ "$PROFILE" = "cpu" ] && command -v xhost >/dev/null 2>&1; then + xhost +local:root >/dev/null 2>&1 || true +fi + +if [ "$NO_CACHE" = "1" ]; then + docker compose --profile "$PROFILE" build --no-cache +fi + +docker compose --profile "$PROFILE" up -d --build + +cat </dev/null; then + pass "$script script accepted by gateway" + else + fail "gateway rejected $script script execution" + fi +done + +section "Medkit REST -> FaultManager round-trip" +# arm-self-test issues a FAILED + PASSED pair directly via the FaultManager +# service. We only assert the script is accepted by the gateway; the +# fault visibility is intentionally not asserted because the test uses +# severity 0 (INFO) which does not pass the FaultManager debounce +# threshold, and even when it did the FAILED -> PASSED pair clears the +# fault from the active list within the smoke poll window. +# The full REST -> FaultManager round-trip is covered by the PLC bridge +# section below (real fault from a real source, severity 1 WARN). +if curl -fsS -X POST -H "Content-Type: application/json" -d "$EXEC_BODY" \ + "$API_BASE/components/manymove-planning/scripts/arm-self-test/executions" >/dev/null; then + pass "arm-self-test script accepted by gateway" +else + fail "gateway rejected arm-self-test script execution" +fi + +section "PLC bridge: cross-source fault aggregation" +# Conveyor-line scripts trigger PLC AlarmConditionType events on the +# plc-sim OPC UA server; opcua_bridge subscribes and forwards them as +# MANYMOVE_PLC_* faults with source_id=/plc/sensor_io. + +# Manifest must declare the conveyor-line component (the container_scripts +# directory + manifest component id must match for the gateway to expose +# the script endpoints). +if api_get "/components"; then + if echo "$RESPONSE" | items_contain_id "openplc"; then + pass "openplc component declared in components" + else + fail "openplc component missing from components list" + fi + if echo "$RESPONSE" | items_contain_id "opcua-bridge"; then + pass "opcua-bridge component declared in components" + else + fail "opcua-bridge component missing from components list" + fi +fi + +# Photoeye flicker injection -> MANYMOVE_PLC_PHOTOEYE_FLICKER (WARN) +if curl -fsS -X POST -H "Content-Type: application/json" -d "$EXEC_BODY" \ + "$API_BASE/components/conveyor-line/scripts/inject-photoeye-flicker/executions" >/dev/null; then + pass "inject-photoeye-flicker script accepted by gateway" +else + fail "gateway rejected inject-photoeye-flicker script execution" +fi + +if poll_until "/faults" \ + '.items[] | select(.fault_code == "MANYMOVE_PLC_PHOTOEYE_FLICKER")' 30; then + pass "PLC photoeye flicker fault arrived via opcua_bridge" +else + fail "PLC photoeye flicker fault did not arrive via opcua_bridge" +fi + +# Clear PLC alarms -> PASSED events -> healed faults +if curl -fsS -X POST -H "Content-Type: application/json" -d "$EXEC_BODY" \ + "$API_BASE/components/conveyor-line/scripts/restore-line/executions" >/dev/null; then + pass "restore-line script accepted by gateway" +else + fail "gateway rejected restore-line script execution" +fi + +if poll_until "/faults?statuses=HEALED,PREPASSED" \ + '.items[] | select(.fault_code == "MANYMOVE_PLC_PHOTOEYE_FLICKER")' 30; then + pass "PLC photoeye flicker fault healed via opcua_bridge" +else + fail "PLC photoeye flicker fault did not heal after restore-line" +fi + +# Final summary +echo "" +if [ "$FAIL_COUNT" -gt 0 ]; then + echo -e "${RED}Failed: $FAIL_COUNT${NC} (passed: $PASS_COUNT)" + echo -e "Failed tests:$FAILED_TESTS" + exit 1 +fi +echo -e "${GREEN}All $PASS_COUNT smoke tests passed${NC}"