diff --git a/Ryzen-AI-CVML-Library/README.md b/Ryzen-AI-CVML-Library/README.md index 12fd4912..395fcf3f 100644 --- a/Ryzen-AI-CVML-Library/README.md +++ b/Ryzen-AI-CVML-Library/README.md @@ -176,7 +176,12 @@ corresponding, header file under the **include/** folder, where `feature-name` is the name of the desired Ryzen AI feature. -For example, the definitions for the Ryzen AI Depth Estimation feature are +For example, the definitions for the Ryzen AI Body Pose feature are +available after adding a line similar to the following example: + + #include + +Similarly, the definitions for the Ryzen AI Depth Estimation feature are available after adding a line similar to the following example: #include @@ -392,6 +397,7 @@ Date | Revision | Notes November 30, 2023 | 1.0 | Initial revision April 2, 2024 | 1.1 | Include driver/copyright info March 7, 2025 | 1.2 | Minor updates and notes +June 14, 2026 | 1.3 | Add Body Pose feature [Back to top](#top) diff --git a/Ryzen-AI-CVML-Library/include/cvml-body-pose.h b/Ryzen-AI-CVML-Library/include/cvml-body-pose.h new file mode 100644 index 00000000..401a6af2 --- /dev/null +++ b/Ryzen-AI-CVML-Library/include/cvml-body-pose.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. + */ + +#ifndef EDGEML_FEATURES_BODY_POSE_INCLUDE_CVML_BODY_POSE_H_ +#define EDGEML_FEATURES_BODY_POSE_INCLUDE_CVML_BODY_POSE_H_ + +#include "cvml-api-common.h" +#include "cvml-context.h" +#include "cvml-image.h" +#include "cvml-types.h" + +using amd::cvml::Array; +using amd::cvml::Context; +using amd::cvml::Image; +using amd::cvml::Person; + +namespace amd { +namespace cvml { + +/** + * Body Pose Detection feature class. + * + * The body pose feature takes in an image or video stream as input. For each frame, + * it returns predictions for up to 6 people in the frame containing: + * + * - Bounding box (x, y, width, height) in image space + * - 17 landmark coordinates (x, y, z) in image space + * - Confidence score for each landmark (0.0 to 1.0) + * - Confidence score for the overall person (0.0 to 1.0) + * + * The landmarks correspond to 17 unique joint positions on the human body in accordance with + * the COCO-Pose dataset (see \a BodyPose::Keypoint). Any landmark that is occluded or falls outside + * of the frame will have its associated confidence score set to -1. Unless 3D detection is enabled, + * the z coordinate of each landmark is set to 0. + * + * If the input streaming mode != ONE_SHOT, the API may enable additional postprocessing to smoothen + * detections across frames. + * + * Example + * + * // create Ryzen AI context + * auto context = amd::cvml::CreateContext(); + * + * // create body pose feature + * amd::cvml::BodyPose feature(context); + * + * // iterate over input frames + * for (auto frame ... ) { + * // encapsulate input image + * amd::cvml::Image img( ... ); + * + * // detect people/poses + * auto output = feature.Generate(img); + * } + */ +class CVML_SDK_EXPORT BodyPose { + AMD_CVML_INTERFACE(BodyPose); + + public: + /** + * Constructor + * + * @param context Pointer to CVML SDK context + */ + explicit BodyPose(Context* context); + + /** + * Defines the landmark indices of the Array landmarks + * within a \a Person object + */ + enum class Keypoint { + kNose, ///< Nose + kLeftEye, ///< Left eye + kRightEye, ///< Right eye + kLeftEar, ///< Left ear + kRightEar, ///< Right ear + kLeftShoulder, ///< Left shoulder + kRightShoulder, ///< Right shoulder + kLeftElbow, ///< Left elbow + kRightElbow, ///< Right elbow + kLeftWrist, ///< Left wrist + kRightWrist, ///< Right wrist + kLeftHip, ///< Left hip + kRightHip, ///< Right hip + kLeftKnee, ///< Left knee + kRightKnee, ///< Right knee + kLeftAnkle, ///< Left ankle + kRightAnkle, ///< Right ankle + kNumPoints ///< Total number of returned landmarks + }; + + /** + * Main feature entry point. + * + * Applications/clients should call this function once for every + * frame in the video or live stream. + * + * @param img amd::cvml::Image object containing input image + * @return the Array of Person structs representing detected people + */ + Array Generate(const Image& img); + + /** + * Set the detection threshold for people within a scene. + * + * Detected persons under the specified threshold are not returned by + * the \a Generate() function. + * + * @param threshold Person detection threshold, from 0.0 to 1.0 + */ + void SetDetectionThreshold(float threshold); + + class Impl; + + protected: + Impl* impl_; ///< Implementation of body pose interface. +}; + +} // namespace cvml +} // namespace amd + +#endif // EDGEML_FEATURES_BODY_POSE_INCLUDE_CVML_BODY_POSE_H_ diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/CMakeLists.txt b/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/CMakeLists.txt new file mode 100644 index 00000000..47982509 --- /dev/null +++ b/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (C) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. + +set(FEATURE_NAME cvml-sample-body-pose) +project(${FEATURE_NAME}) + +list(APPEND CMAKE_MODULE_PATH ${OPENCV_INSTALL_ROOT} ${AMD_CVML_SDK_ROOT} "${CMAKE_CURRENT_SOURCE_DIR}/../..") + +find_package(OpenCV REQUIRED) +find_package(RyzenAILibrary REQUIRED) + +file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) + +include_directories(${OpenCV_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../common-sample-utils/include) + +add_executable(${PROJECT_NAME} ${SOURCES}) +target_link_libraries(${PROJECT_NAME} + ${OpenCV_LIBS} + ${RyzenAILibrary_LIBS} + common-sample-utils +) + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${FEATURE_NAME}) +install(TARGETS ${PROJECT_NAME} DESTINATION ${AMD_CVML_SDK_ROOT}/samples/${FEATURE_NAME}) diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/README.md b/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/README.md new file mode 100644 index 00000000..8734aa65 --- /dev/null +++ b/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/README.md @@ -0,0 +1,28 @@ +# `cvml-sample-body-pose` + +This sample demonstrates the implementation of AMD's Body Pose feature. +It detects up to 6 people per frame and returns 17 keypoints per person (bounding box, landmark coordinates, and confidence scores) using the COCO-Pose keypoint convention. +Results are visually overlaid on the output image or video. + +## Usage + +```sh +cvml-sample-body-pose.exe [-i path_to_image/video] [-o output image/video filename] [-h] +Options +-i: Run body pose on the given image or video. (Optional) +-o: Specify output image or video file name e.g., .mp4 or .jpg. (Optional) +-h: Show usage. +If no arguments are provided, the application attempts to capture input from camera index 0. + +Examples +Run the sample with an image input without output file: +cvml-sample-body-pose.exe -i my_image.jpg + +Run the sample with a video input and save the result to an output video file: +cvml-sample-body-pose.exe -i my_video.mp4 -o output_video.mp4 + +Run the sample to capture the camera feed and save the result to a video file: +cvml-sample-body-pose.exe -o output_video.mp4 + +Note +If the user runs the application without any arguments, it will use the camera as an input. diff --git a/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/main.cpp b/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/main.cpp new file mode 100644 index 00000000..3f575e44 --- /dev/null +++ b/Ryzen-AI-CVML-Library/samples/cvml-sample-body-pose/main.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. + */ +#include +#include +#include + +#include +#include + +#include "opencv2/opencv.hpp" + +/** + * Declare local class for sample variables and functions. + */ +class BodyPoseSample : public amd::cvml::sample::utils::RunFeatureClass { + public: + static constexpr float detection_threshold_ = 0.2f; + amd::cvml::BodyPose* body_pose_; + std::string input_str_{}; + // cppcheck-suppress duplInheritedMember + std::string output_file_{}; + + /** + * Run Body Pose on single frame + * + * @param frame_rgb Incoming RGB frame + * @return Output RGB frame + */ + cv::Mat Feature(const cv::Mat& frame_rgb) override; + + /** + * Helper functions to draw person skeleton on input image. + * + * @param out_img image to draw on + * @param bp detected Person struct + */ + void DrawPersonEdges(cv::Mat* out_img, const amd::cvml::Person& bp); + void DrawPersonLandmarks(cv::Mat* out_img, const amd::cvml::Person& bp); + void DrawPersonBoundingBox(cv::Mat* out_img, const amd::cvml::Person& bp); +}; + +cv::Mat BodyPoseSample::Feature(const cv::Mat& frame_rgb) { + // Return on invalid input + if (frame_rgb.empty()) { + return frame_rgb; + } + + cv::Mat frame_out = frame_rgb; + + // + // Generate body pose results from the received input frame + // + auto results = body_pose_->Generate( + amd::cvml::Image(amd::cvml::Image::Format::kRGB, amd::cvml::Image::DataType::kUint8, + frame_rgb.cols, frame_rgb.rows, frame_rgb.data)); + + // + // Draw the keypoints, edges, bounding boxes on the output image + // + for (size_t index = 0; index < results.size(); ++index) { + const amd::cvml::Person& bp = results[index]; + + // draw edges between landmarks for each person + DrawPersonEdges(&frame_out, bp); + + // draw individual landmarks for each person + DrawPersonLandmarks(&frame_out, bp); + + // draw bounding box for person instance + DrawPersonBoundingBox(&frame_out, bp); + } + + return frame_out; +} + +void BodyPoseSample::DrawPersonEdges(cv::Mat* out_img, const amd::cvml::Person& bp) { + static const struct { + amd::cvml::BodyPose::Keypoint start; + amd::cvml::BodyPose::Keypoint end; + } edge_list[] = { + // left side body + {amd::cvml::BodyPose::Keypoint::kLeftShoulder, amd::cvml::BodyPose::Keypoint::kLeftElbow}, + {amd::cvml::BodyPose::Keypoint::kLeftElbow, amd::cvml::BodyPose::Keypoint::kLeftWrist}, + {amd::cvml::BodyPose::Keypoint::kLeftShoulder, amd::cvml::BodyPose::Keypoint::kLeftHip}, + {amd::cvml::BodyPose::Keypoint::kLeftHip, amd::cvml::BodyPose::Keypoint::kLeftKnee}, + {amd::cvml::BodyPose::Keypoint::kLeftKnee, amd::cvml::BodyPose::Keypoint::kLeftAnkle}, + + // right side body + {amd::cvml::BodyPose::Keypoint::kRightShoulder, amd::cvml::BodyPose::Keypoint::kRightElbow}, + {amd::cvml::BodyPose::Keypoint::kRightElbow, amd::cvml::BodyPose::Keypoint::kRightWrist}, + {amd::cvml::BodyPose::Keypoint::kRightShoulder, amd::cvml::BodyPose::Keypoint::kRightHip}, + {amd::cvml::BodyPose::Keypoint::kRightHip, amd::cvml::BodyPose::Keypoint::kRightKnee}, + {amd::cvml::BodyPose::Keypoint::kRightKnee, amd::cvml::BodyPose::Keypoint::kRightAnkle}, + + // center body + {amd::cvml::BodyPose::Keypoint::kLeftShoulder, amd::cvml::BodyPose::Keypoint::kRightShoulder}, + {amd::cvml::BodyPose::Keypoint::kLeftHip, amd::cvml::BodyPose::Keypoint::kRightHip}, + {amd::cvml::BodyPose::Keypoint::kNose, amd::cvml::BodyPose::Keypoint::kLeftShoulder}, + {amd::cvml::BodyPose::Keypoint::kNose, amd::cvml::BodyPose::Keypoint::kRightShoulder}, + + // head + {amd::cvml::BodyPose::Keypoint::kNose, amd::cvml::BodyPose::Keypoint::kLeftEye}, + {amd::cvml::BodyPose::Keypoint::kNose, amd::cvml::BodyPose::Keypoint::kLeftEar}, + {amd::cvml::BodyPose::Keypoint::kLeftEye, amd::cvml::BodyPose::Keypoint::kLeftEar}, + + {amd::cvml::BodyPose::Keypoint::kNose, amd::cvml::BodyPose::Keypoint::kRightEye}, + {amd::cvml::BodyPose::Keypoint::kNose, amd::cvml::BodyPose::Keypoint::kRightEar}, + {amd::cvml::BodyPose::Keypoint::kRightEye, amd::cvml::BodyPose::Keypoint::kRightEar}}; + + const cv::Scalar color = cv::Scalar(255, 165, 0); + int line_thickness = 3; + + if (out_img == nullptr || out_img->data == nullptr) { + std::cout << "Invalid output image" << std::endl; + return; + } + + for (int k = 0; k < static_cast(sizeof(edge_list) / sizeof(edge_list[0])); ++k) { + int start_idx = static_cast(edge_list[k].start); + int end_idx = static_cast(edge_list[k].end); + + cv::Point p1(bp.landmarks_[start_idx].x_, bp.landmarks_[start_idx].y_); + cv::Point p2(bp.landmarks_[end_idx].x_, bp.landmarks_[end_idx].y_); + // If either start/end landmark has conf == -1, clip line to image boundaries + if ((bp.landmark_scores_[start_idx] == -1) || (bp.landmark_scores_[end_idx] == -1)) { + if (!cv::clipLine(out_img->size(), p1, p2)) { // skip if line is entirely out of bounds + continue; + } + } + // if conf score < threshold, do not draw edge + if ((bp.landmark_scores_[start_idx] < detection_threshold_) || + (bp.landmark_scores_[end_idx] < detection_threshold_)) { + continue; + } + cv::line(*out_img, p1, p2, color, line_thickness); + } +} + +void BodyPoseSample::DrawPersonLandmarks(cv::Mat* out_img, const amd::cvml::Person& bp) { + const cv::Scalar color = cv::Scalar(0, 255, 0); + const int radius = 10; // radius for landmarks + + if (out_img == nullptr || out_img->data == nullptr) { + std::cout << "Invalid output image" << std::endl; + return; + } + + for (size_t k = 0; k < bp.landmarks_.size(); k++) { + if (bp.landmark_scores_[k] == -1) { + continue; + } + if (bp.landmark_scores_[k] < detection_threshold_) { + continue; + } + cv::Point p(bp.landmarks_[k].x_, bp.landmarks_[k].y_); + + cv::circle(*out_img, p, radius, color, -1); + } +} + +void BodyPoseSample::DrawPersonBoundingBox(cv::Mat* out_img, const amd::cvml::Person& bp) { + const cv::Scalar color = cv::Scalar(125, 18, 255); + const int thickness = 4; // line thickness + + if (out_img == nullptr || out_img->data == nullptr) { + std::cout << "Invalid output image" << std::endl; + return; + } + cv::Rect r(bp.person_.x_, bp.person_.y_, bp.person_.width_, bp.person_.height_); + + cv::rectangle(*out_img, r, color, thickness); +} + +/** + * Main entry point of the sample application. + * + * @param argc Number of command line arguments + * @param argv Array of command line arguments + * @return 0 on success + */ +int main(int argc, char** const argv) { + BodyPoseSample bp_sample; + + // parse command line arguments + if (!amd::cvml::sample::utils::ParseArguments(argc, argv, &bp_sample.input_str_, + &bp_sample.output_file_)) { + return -1; + } + + try { + // create a CVML context for the feature + auto context = amd::cvml::CreateContext(); + if (!context) { + std::cerr << "Failed to create context" << std::endl; + } else { + // select backend (optional) + context->SetInferenceBackend(amd::cvml::Context::InferenceBackend::AUTO); + + // set streaming mode based on input file + bp_sample.SetContextStreamingModeBySrc(context, bp_sample.input_str_); + + // initialize body pose class + amd::cvml::BodyPose body_pose(context); + + // execute main sample application loop with the created feature + bp_sample.body_pose_ = &body_pose; + + // run the feature against input frames and local_data + bp_sample.RunFeature(bp_sample.input_str_, bp_sample.output_file_, "AMD Body Pose"); + } + + // release previously created context + if (context) { + context->Release(); + } + } catch (std::exception& e) { + std::cerr << "Sample application error:" << e.what() << std::endl; + } + return 0; +} diff --git a/Ryzen-AI-CVML-Library/windows/cvml-body-pose/.gitkeep b/Ryzen-AI-CVML-Library/windows/cvml-body-pose/.gitkeep new file mode 100644 index 00000000..e69de29b