From 56fe34f929b18c27fdb5109b5d0dc9c62fe25767 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 10:10:38 -0700 Subject: [PATCH 1/9] try `appverif /verify` in addition to `/fsanitize=address` --- .github/workflows/ci.cpu.yml | 342 ++++++++++++++--------------- .github/workflows/ci.gpu.yml | 10 +- .github/workflows/test-windows.ps1 | 10 + 3 files changed, 186 insertions(+), 176 deletions(-) diff --git a/.github/workflows/ci.cpu.yml b/.github/workflows/ci.cpu.yml index 7b353d97f..6f9d34a4a 100644 --- a/.github/workflows/ci.cpu.yml +++ b/.github/workflows/ci.cpu.yml @@ -12,133 +12,133 @@ concurrency: jobs: - build-cpu: - runs-on: ubuntu-latest - name: ${{ matrix.name }} - strategy: - fail-fast: false - matrix: - include: - - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } - - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - # With the following config, 2 tests mysteriously time out, but only in CI and not locally. - # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } - - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } - - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } - - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } - container: - options: -u root - image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - defaults: - run: - shell: su coder {0} - working-directory: /home/coder - steps: - - name: Checkout stdexec - uses: actions/checkout@v4 - with: - path: stdexec - persist-credentials: false - - name: Setup environment - run: | - echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" - echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" - - if: github.repository_owner == 'NVIDIA' - name: Get AWS credentials for sccache bucket - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-region: us-east-2 - role-duration-seconds: 28800 # 8 hours - role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA - - name: Build and test CPU schedulers - env: - ASAN_OPTIONS: alloc_dealloc_mismatch=0 - NVCC_APPEND_FLAGS: "-t=100" - SCCACHE_BUCKET: "rapids-sccache-devs" - SCCACHE_DIST_REQUEST_TIMEOUT: "7140" - SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" - SCCACHE_IDLE_TIMEOUT: "0" - SCCACHE_REGION: "us-east-2" - SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" - SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" - SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" - SCCACHE_SERVER_LOG: "sccache=debug" - SCCACHE_SERVER_PORT: "4225" - run: | - set -e; - source /etc/profile - set -x; - - devcontainer-utils-install-sccache --version rapids; - - devcontainer-utils-init-sccache-dist \ - --enable-sccache-dist - <<< " \ - --auth-type 'token' \ - --auth-token '$( \ - curl -fsSL -H "Authorization: Bearer $( \ - curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ - "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ - | jq -r '.value' \ - )" https://token.rapids.nvidia.com/gh/token/exchange \ - | jq -r '.token')' \ - "; - - # Copy source folder into ~/stdexec - cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; - chown -R coder:coder ~/stdexec; - cd ~/stdexec; - - # Configure - cmake -S . -B build -GNinja \ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ - -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ - -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ - -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ - -DSTDEXEC_BUILD_TESTS:BOOL=ON \ - ; - - # Compile - cmake --build build -v -j 512; - - # Print sccache stats - sccache -s; - - # Tests - SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ - ctest --test-dir build --verbose --output-on-failure --timeout 60; - - if: ${{ !cancelled() }} - name: Upload sccache logs - uses: actions/upload-artifact@v4 - with: - name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} - path: /tmp/sccache*.log - compression-level: 9 - - ci-cpu: - runs-on: ubuntu-latest - name: CI (CPU) - needs: - - build-cpu - steps: - - run: echo "CI (CPU) success" + # build-cpu: + # runs-on: ubuntu-latest + # name: ${{ matrix.name }} + # strategy: + # fail-fast: false + # matrix: + # include: + # - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + # - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } + # - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + # - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + # # With the following config, 2 tests mysteriously time out, but only in CI and not locally. + # # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + # - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + # - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } + # - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } + # - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + # - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + # - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + # - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } + # - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } + # container: + # options: -u root + # image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} + # permissions: + # id-token: write # This is required for configure-aws-credentials + # contents: read # This is required for actions/checkout + # defaults: + # run: + # shell: su coder {0} + # working-directory: /home/coder + # steps: + # - name: Checkout stdexec + # uses: actions/checkout@v4 + # with: + # path: stdexec + # persist-credentials: false + # - name: Setup environment + # run: | + # echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" + # echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" + # - if: github.repository_owner == 'NVIDIA' + # name: Get AWS credentials for sccache bucket + # uses: aws-actions/configure-aws-credentials@v4 + # with: + # aws-region: us-east-2 + # role-duration-seconds: 28800 # 8 hours + # role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + # - name: Build and test CPU schedulers + # env: + # ASAN_OPTIONS: alloc_dealloc_mismatch=0 + # NVCC_APPEND_FLAGS: "-t=100" + # SCCACHE_BUCKET: "rapids-sccache-devs" + # SCCACHE_DIST_REQUEST_TIMEOUT: "7140" + # SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" + # SCCACHE_IDLE_TIMEOUT: "0" + # SCCACHE_REGION: "us-east-2" + # SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" + # SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" + # SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" + # SCCACHE_SERVER_LOG: "sccache=debug" + # SCCACHE_SERVER_PORT: "4225" + # run: | + # set -e; + # source /etc/profile + # set -x; + + # devcontainer-utils-install-sccache --version rapids; + + # devcontainer-utils-init-sccache-dist \ + # --enable-sccache-dist - <<< " \ + # --auth-type 'token' \ + # --auth-token '$( \ + # curl -fsSL -H "Authorization: Bearer $( \ + # curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ + # "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ + # | jq -r '.value' \ + # )" https://token.rapids.nvidia.com/gh/token/exchange \ + # | jq -r '.token')' \ + # "; + + # # Copy source folder into ~/stdexec + # cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; + # chown -R coder:coder ~/stdexec; + # cd ~/stdexec; + + # # Configure + # cmake -S . -B build -GNinja \ + # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + # -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ + # -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ + # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + # -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ + # -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ + # -DSTDEXEC_BUILD_TESTS:BOOL=ON \ + # ; + + # # Compile + # cmake --build build -v -j 512; + + # # Print sccache stats + # sccache -s; + + # # Tests + # SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ + # ctest --test-dir build --verbose --output-on-failure --timeout 60; + # - if: ${{ !cancelled() }} + # name: Upload sccache logs + # uses: actions/upload-artifact@v4 + # with: + # name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} + # path: /tmp/sccache*.log + # compression-level: 9 + + # ci-cpu: + # runs-on: ubuntu-latest + # name: CI (CPU) + # needs: + # - build-cpu + # steps: + # - run: echo "CI (CPU) success" build-cpu-windows: runs-on: windows-2022 @@ -174,47 +174,47 @@ jobs: steps: - run: echo "CI (CPU) (Windows) success" - build-cpu-macos: - runs-on: macos-26-large - name: macos-${{ matrix.name }} - strategy: - fail-fast: false - matrix: - include: - - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } - - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } - steps: - - name: Checkout stdexec (MacOS) - uses: actions/checkout@v4 - with: - persist-credentials: false - - - name: Install dependencies - run: | - brew update - brew install ninja - shell: bash - - - name: Build and test CPU schedulers (MacOS) - shell: bash - run: | - mkdir build - cmake -S. -Bbuild -GNinja \ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ - -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - -DCMAKE_CXX_STANDARD:STRING=20 \ - -DSTDEXEC_BUILD_TESTS:BOOL=ON - - cmake --build build/ -v - cd build - ctest --output-on-failure - - ci-cpu-macos: - runs-on: macos-latest-large - name: CI (CPU) (MacOS) - needs: - - build-cpu-macos - steps: - - run: echo "CI (CPU) (MacOS) success" + # build-cpu-macos: + # runs-on: macos-26-large + # name: macos-${{ matrix.name }} + # strategy: + # fail-fast: false + # matrix: + # include: + # - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } + # - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } + # steps: + # - name: Checkout stdexec (MacOS) + # uses: actions/checkout@v4 + # with: + # persist-credentials: false + + # - name: Install dependencies + # run: | + # brew update + # brew install ninja + # shell: bash + + # - name: Build and test CPU schedulers (MacOS) + # shell: bash + # run: | + # mkdir build + # cmake -S. -Bbuild -GNinja \ + # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + # -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ + # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + # -DCMAKE_CXX_STANDARD:STRING=20 \ + # -DSTDEXEC_BUILD_TESTS:BOOL=ON + + # cmake --build build/ -v + # cd build + # ctest --output-on-failure + + # ci-cpu-macos: + # runs-on: macos-latest-large + # name: CI (CPU) (MacOS) + # needs: + # - build-cpu-macos + # steps: + # - run: echo "CI (CPU) (MacOS) success" diff --git a/.github/workflows/ci.gpu.yml b/.github/workflows/ci.gpu.yml index 4ca81c304..77107f855 100644 --- a/.github/workflows/ci.gpu.yml +++ b/.github/workflows/ci.gpu.yml @@ -20,11 +20,11 @@ jobs: matrix: include: - { name: "clang 21", cuda: "12.0", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.0", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } - - { name: "clang 21", cuda: "12.9", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.9", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } - - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Release", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } - - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Debug", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } - - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Release", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } - - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Debug", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + # - { name: "clang 21", cuda: "12.9", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.9", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } + # - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Release", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + # - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Debug", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + # - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Release", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + # - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Debug", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } runs-on: linux-${{ matrix.arch }}-gpu-${{ matrix.gpu }}-${{ matrix.driver }}-1 container: options: -u root diff --git a/.github/workflows/test-windows.ps1 b/.github/workflows/test-windows.ps1 index c20ecc521..4b46a402f 100644 --- a/.github/workflows/test-windows.ps1 +++ b/.github/workflows/test-windows.ps1 @@ -28,4 +28,14 @@ Invoke-NativeCommand cmake -B $BuildDirectory -G Ninja ` "-DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost" ` "-DSTDEXEC_BUILD_TESTS:BOOL=TRUE" . Invoke-NativeCommand cmake --build $BuildDirectory + +# Run the tests under Application Verifier to check for runtime failures like +# use-after-free. +Invoke-NativeCommand appverif /verify test.stdexec.exe +Invoke-NativeCommand appverif /verify test.exec.exe + Invoke-NativeCommand ctest --test-dir $BuildDirectory --output-on-failure --verbose --timeout 60 + +# Reset the Application Verifier settings for the test executables. +Invoke-NativeCommand appverif /n test.stdexec.exe +Invoke-NativeCommand appverif /n test.exec.exe From b594c199b7526bb3d913def3e9741da4d7182b1e Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 11:35:28 -0700 Subject: [PATCH 2/9] defer the call to unhandled_stopped() --- include/exec/task.hpp | 7 +-- include/stdexec/__detail/__as_awaitable.hpp | 2 +- include/stdexec/coroutine.hpp | 28 +++++------ test/stdexec/types/test_task.cpp | 52 +++++++++++++++++++-- 4 files changed, 67 insertions(+), 22 deletions(-) diff --git a/include/exec/task.hpp b/include/exec/task.hpp index e0a3f4af3..d800318ca 100644 --- a/include/exec/task.hpp +++ b/include/exec/task.hpp @@ -672,8 +672,7 @@ namespace experimental::execution if constexpr (requires { __coro_.promise().stop_requested() ? 0 : 1; }) { if (__coro_.promise().stop_requested()) - return STDEXEC::__coroutine_destroy_and_continue( - __parent.promise().unhandled_stopped()); + return STDEXEC::__coroutine_unhandled_stopped(__parent); } return __coro_; } @@ -681,7 +680,9 @@ namespace experimental::execution constexpr auto await_resume() -> _Ty { __context_.reset(); - scope_guard __on_exit{[this]() noexcept { std::exchange(__coro_, {}).destroy(); }}; + scope_guard __on_exit{ + [this]() noexcept + { STDEXEC::__coroutine_destroy_nothrow(std::exchange(__coro_, {})); }}; if (__coro_.promise().__data_.index() == 1) std::rethrow_exception(std::move(__var::__get<1>(__coro_.promise().__data_))); diff --git a/include/stdexec/__detail/__as_awaitable.hpp b/include/stdexec/__detail/__as_awaitable.hpp index e489263c6..92a55d9e5 100644 --- a/include/stdexec/__detail/__as_awaitable.hpp +++ b/include/stdexec/__detail/__as_awaitable.hpp @@ -135,7 +135,7 @@ namespace STDEXEC // as normal. if (__result_.__is_valueless()) { - return STDEXEC::__coroutine_destroy_and_continue(__continuation_.unhandled_stopped()); + return STDEXEC::__coroutine_unhandled_stopped(__continuation_); } else { diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index 06d9eda32..a545721d8 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -220,27 +220,27 @@ namespace STDEXEC {&__destroy_and_continue_frame::__resume}, {}}; - struct __symmetric_transfer_frame : __detail::__synthetic_coro_frame + struct __unhandled_stopped_frame : __detail::__synthetic_coro_frame { static void __resume(void* __address) noexcept { - // Make a local copy of the promise to ensure we can safely destroy the suspended - // coroutine after resuming the continuation. - auto __promise = static_cast<__symmetric_transfer_frame*>(__address)->__promise_; + // Make a local copy of the promise since it will go away once we call through + // the __unhandled_stopped_fn_ function pointer. + auto __promise = static_cast<__unhandled_stopped_frame*>(__address)->__promise_; STDEXEC_ATTRIBUTE(musttail) - return STDEXEC::__coroutine_resume_nothrow(__promise.__continue_.address()); + return STDEXEC::__coroutine_resume_nothrow(__promise.__coro_.unhandled_stopped().address()); } struct __promise { - __std::coroutine_handle<> __continue_{}; + __coroutine_handle<> __coro_; } __promise_; - static thread_local __symmetric_transfer_frame value; + static thread_local __unhandled_stopped_frame value; }; - inline thread_local __symmetric_transfer_frame __symmetric_transfer_frame::value{ - {&__symmetric_transfer_frame::__resume}, + inline thread_local __unhandled_stopped_frame __unhandled_stopped_frame::value{ + {&__unhandled_stopped_frame::__resume}, {}}; inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // @@ -252,11 +252,11 @@ namespace STDEXEC return __std::coroutine_handle<>::from_address(&__destroy_and_continue_frame::value); } - inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __continue) noexcept // + inline auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // -> __std::coroutine_handle<> { - __symmetric_transfer_frame::value.__promise_.__continue_ = __continue; - return __std::coroutine_handle<>::from_address(&__symmetric_transfer_frame::value); + __unhandled_stopped_frame::value.__promise_.__coro_ = __coro; + return __std::coroutine_handle<>::from_address(&__unhandled_stopped_frame::value); } # else @@ -271,10 +271,10 @@ namespace STDEXEC } STDEXEC_ATTRIBUTE(always_inline) - auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __continue) noexcept // + auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // -> __std::coroutine_handle<> { - return __continue; + return __coro.unhandled_stopped(); } # endif // STDEXEC_MSVC() && STDEXEC_MSVC_VERSION < 1950 diff --git a/test/stdexec/types/test_task.cpp b/test/stdexec/types/test_task.cpp index 85c8aa64a..48accaa08 100644 --- a/test/stdexec/types/test_task.cpp +++ b/test/stdexec/types/test_task.cpp @@ -327,8 +327,6 @@ namespace })); } - // FUTURE TODO: add support so that `co_await sndr` can return a reference. - constinit int global_int = 0; constexpr auto wrap_ref = ex::then([](auto &i) noexcept { return std::ref(i); }); @@ -374,17 +372,28 @@ namespace struct operation { Receiver rcvr_; + bool complete_inline_ = true; void start() & noexcept { - ex::set_stopped(std::move(rcvr_)); + if (complete_inline_) + { + ex::set_stopped(std::move(rcvr_)); + } + else + { + std::thread([rcvr = std::move(rcvr_)]() mutable noexcept + { ex::set_stopped(std::move(rcvr)); }) + .detach(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } } }; template auto connect(Receiver rcvr) && -> operation { - return {std::move(rcvr)}; + return {std::move(rcvr), complete_inline_}; } struct attrs @@ -402,6 +411,8 @@ namespace { return {}; } + + bool complete_inline_ = true; }; TEST_CASE("task co_awaiting inline|async_affine stopped sender does not deadlock", @@ -445,6 +456,39 @@ namespace static_assert(!ex::sender_to, sink>); static_assert(ex::sender_in, ex::__sync_wait::__env>); + auto await_stopped_sender(bool complete_inline) -> ex::task + { + co_await inline_affine_stopped_sender{complete_inline}; + } + + TEST_CASE("repro for NVIDIA/stdexec#2047", "[types][task]") + { + [[maybe_unused]] + // repeat this test 1000 times because it can expose race conditions + int i = GENERATE(repeat(1000, values({1}))); + auto pool = exec::static_thread_pool(1); + + auto scope = ex::counting_scope(); + ex::spawn(ex::starts_on(pool.get_scheduler(), await_stopped_sender(true)) + | ex::upon_error([](auto) noexcept { std::terminate(); }), + scope.get_token()); + ex::sync_wait(scope.join()); + } + + TEST_CASE("repro for NVIDIA/stdexec#2047 async completion from another thread", "[types][task]") + { + [[maybe_unused]] + // repeat this test 1000 times because it can expose race conditions + int i = GENERATE(repeat(1000, values({1}))); + auto pool = exec::static_thread_pool(1); + + auto scope = ex::counting_scope(); + ex::spawn(ex::starts_on(pool.get_scheduler(), await_stopped_sender(false)) + | ex::upon_error([](auto) noexcept { std::terminate(); }), + scope.get_token()); + ex::sync_wait(scope.join()); + } + // TODO: add tests for stop token support in task } // anonymous namespace From 2bce2bb1b1bb2f4c67ddc7f3dd91dd62738a2793 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 12:18:47 -0700 Subject: [PATCH 3/9] give up on AppVerifier --- .github/workflows/test-windows.ps1 | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/test-windows.ps1 b/.github/workflows/test-windows.ps1 index 4b46a402f..c20ecc521 100644 --- a/.github/workflows/test-windows.ps1 +++ b/.github/workflows/test-windows.ps1 @@ -28,14 +28,4 @@ Invoke-NativeCommand cmake -B $BuildDirectory -G Ninja ` "-DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost" ` "-DSTDEXEC_BUILD_TESTS:BOOL=TRUE" . Invoke-NativeCommand cmake --build $BuildDirectory - -# Run the tests under Application Verifier to check for runtime failures like -# use-after-free. -Invoke-NativeCommand appverif /verify test.stdexec.exe -Invoke-NativeCommand appverif /verify test.exec.exe - Invoke-NativeCommand ctest --test-dir $BuildDirectory --output-on-failure --verbose --timeout 60 - -# Reset the Application Verifier settings for the test executables. -Invoke-NativeCommand appverif /n test.stdexec.exe -Invoke-NativeCommand appverif /n test.exec.exe From 6caa6aa045656256c2aa9c8456a696c08985e411 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 16:37:20 -0700 Subject: [PATCH 4/9] maybe if the problematic await_suspend returns void --- include/stdexec/__detail/__as_awaitable.hpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/stdexec/__detail/__as_awaitable.hpp b/include/stdexec/__detail/__as_awaitable.hpp index 92a55d9e5..29bc97e02 100644 --- a/include/stdexec/__detail/__as_awaitable.hpp +++ b/include/stdexec/__detail/__as_awaitable.hpp @@ -317,7 +317,7 @@ namespace STDEXEC STDEXEC_CONSTEXPR_CXX23 auto await_suspend([[maybe_unused]] __std::coroutine_handle<> __continuation) noexcept - -> __std::coroutine_handle<> + -> STDEXEC_PP_IIF(STDEXEC_MSVC(), void, __std::coroutine_handle<>) { STDEXEC_ASSERT(this->__continuation_.handle() == __continuation); @@ -331,6 +331,8 @@ namespace STDEXEC // skip the spin entirely std::thread::id const __old_id = this->__thread_id_.exchange(std::thread::id{}, __std::memory_order_release); + +# if !STDEXEC_MSVC() if (__old_id == std::thread::id{}) { // The receiver already cleared __thread_id_, so it completed on the same @@ -338,6 +340,15 @@ namespace STDEXEC return this->__get_continuation(); } return __std::noop_coroutine(); +# else + if (__old_id == std::thread::id{}) + { + // The receiver already cleared __thread_id_, so it completed on the same + // thread. Resume the continuation directly. + STDEXEC_ATTRIBUTE(musttail) + return STDEXEC::__coroutine_resume_nothrow(this->__get_continuation()); + } +# endif } private: From 8c7fecfc37fb5b1cc104bb69cc45869c7fb30205 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 17:29:22 -0700 Subject: [PATCH 5/9] one more time --- include/stdexec/__detail/__as_awaitable.hpp | 29 +++++++-------------- include/stdexec/__detail/__config.hpp | 2 ++ include/stdexec/coroutine.hpp | 23 +++++++++------- 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/include/stdexec/__detail/__as_awaitable.hpp b/include/stdexec/__detail/__as_awaitable.hpp index 29bc97e02..e1241dcc5 100644 --- a/include/stdexec/__detail/__as_awaitable.hpp +++ b/include/stdexec/__detail/__as_awaitable.hpp @@ -317,7 +317,6 @@ namespace STDEXEC STDEXEC_CONSTEXPR_CXX23 auto await_suspend([[maybe_unused]] __std::coroutine_handle<> __continuation) noexcept - -> STDEXEC_PP_IIF(STDEXEC_MSVC(), void, __std::coroutine_handle<>) { STDEXEC_ASSERT(this->__continuation_.handle() == __continuation); @@ -329,25 +328,17 @@ namespace STDEXEC // proceed to resume(). // - If T2 hasn't run yet, it will see {} from its load in __done() and // skip the spin entirely - std::thread::id const __old_id = this->__thread_id_.exchange(std::thread::id{}, - __std::memory_order_release); - -# if !STDEXEC_MSVC() - if (__old_id == std::thread::id{}) - { - // The receiver already cleared __thread_id_, so it completed on the same - // thread. Resume the continuation directly. - return this->__get_continuation(); - } - return __std::noop_coroutine(); + bool const __done = // + this->__thread_id_.exchange(std::thread::id{}, __std::memory_order_release) + == std::thread::id{}; + + // If the receiver already cleared __thread_id_, it completed on the same thread. + // Resume the continuation directly. +# if !defined(STDEXEC_MSVC_CORO_DESTROY_BUG_WORKAROUND) + return __done ? this->__get_continuation() : __std::noop_coroutine(); # else - if (__old_id == std::thread::id{}) - { - // The receiver already cleared __thread_id_, so it completed on the same - // thread. Resume the continuation directly. - STDEXEC_ATTRIBUTE(musttail) - return STDEXEC::__coroutine_resume_nothrow(this->__get_continuation()); - } + if (__done) + STDEXEC::__coroutine_resume_nothrow(this->__get_continuation()); # endif } diff --git a/include/stdexec/__detail/__config.hpp b/include/stdexec/__detail/__config.hpp index ad3643cad..908d12655 100644 --- a/include/stdexec/__detail/__config.hpp +++ b/include/stdexec/__detail/__config.hpp @@ -379,6 +379,8 @@ namespace STDEXEC::__std #if STDEXEC_MSVC() && !STDEXEC_CLANG_CL() && STDEXEC_MSVC_VERSION >= 1950 # define STDEXEC_ATTR_WHICH_10(_ATTR) [[msvc::musttail]] +#elif STDEXEC_HAS_CPP_ATTRIBUTE(clang::musttail) +# define STDEXEC_ATTR_WHICH_10(_ATTR) [[clang::musttail]] #elif STDEXEC_HAS_CPP_ATTRIBUTE(gnu::musttail) # define STDEXEC_ATTR_WHICH_10(_ATTR) [[gnu::musttail]] #else diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index a545721d8..cf4122a6a 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -24,6 +24,10 @@ #if !STDEXEC_NO_STDCPP_COROUTINES() +# if STDEXEC_MSVC() && STDEXEC_MSVC_VERSION < 1950 +# define STDEXEC_MSVC_CORO_DESTROY_BUG_WORKAROUND +# endif + namespace STDEXEC { template _Up> @@ -179,7 +183,7 @@ namespace STDEXEC sizeof(__synthetic_coro_frame)); } // namespace __detail -# if STDEXEC_MSVC() && STDEXEC_MSVC_VERSION < 1950 +# if defined(STDEXEC_MSVC_CORO_DESTROY_BUG_WORKAROUND) // MSVCBUG https://developercommunity.visualstudio.com/t/destroy-coroutine-from-final_suspend-r/10096047 // Prior to Visual Studio 17.9 (Feb, 2024), aka MSVC 19.39, MSVC incorrectly allocates @@ -189,8 +193,8 @@ namespace STDEXEC // implementation when NRVO is in play. // This workaround delays the destruction of the suspended coroutine by wrapping the - // continuation in another "synthetic" coroutine the resumes the continuation and *then* - // destroys the suspended coroutine. + // continuation in another "synthetic" coroutine that resumes the continuation and + // *then* destroys the suspended coroutine. // The wrapping coroutine frame is thread-local and reused within the thread for each // destroy-and-continue sequence. @@ -201,10 +205,10 @@ namespace STDEXEC { // Make a local copy of the promise to ensure we can safely destroy the suspended // coroutine after resuming the continuation. - auto __promise = static_cast<__destroy_and_continue_frame*>(__address)->__promise_; - STDEXEC::__coroutine_resume_nothrow(__promise.__continue_); - STDEXEC_ATTRIBUTE(musttail) - return STDEXEC::__coroutine_destroy_nothrow(__promise.__destroy_.address()); + auto& __self = *static_cast<__destroy_and_continue_frame*>(__address); + auto __destroy = __self.__promise_.__destroy_; + STDEXEC::__coroutine_resume_nothrow(__self.__promise_.__continue_); + STDEXEC::__coroutine_destroy_nothrow(__destroy); } struct __promise @@ -226,9 +230,8 @@ namespace STDEXEC { // Make a local copy of the promise since it will go away once we call through // the __unhandled_stopped_fn_ function pointer. - auto __promise = static_cast<__unhandled_stopped_frame*>(__address)->__promise_; - STDEXEC_ATTRIBUTE(musttail) - return STDEXEC::__coroutine_resume_nothrow(__promise.__coro_.unhandled_stopped().address()); + auto& __self = *static_cast<__unhandled_stopped_frame*>(__address); + STDEXEC::__coroutine_resume_nothrow(__self.__promise_.__coro_.unhandled_stopped()); } struct __promise From 041059304d753971263a58b8a36c05f3790f3a03 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 17:34:48 -0700 Subject: [PATCH 6/9] format --- include/stdexec/coroutine.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index cf4122a6a..e3d11bbe6 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -205,8 +205,8 @@ namespace STDEXEC { // Make a local copy of the promise to ensure we can safely destroy the suspended // coroutine after resuming the continuation. - auto& __self = *static_cast<__destroy_and_continue_frame*>(__address); - auto __destroy = __self.__promise_.__destroy_; + auto& __self = *static_cast<__destroy_and_continue_frame*>(__address); + auto __destroy = __self.__promise_.__destroy_; STDEXEC::__coroutine_resume_nothrow(__self.__promise_.__continue_); STDEXEC::__coroutine_destroy_nothrow(__destroy); } From ee0b220070f67020fd57eb0ca89b5f6e27b1163f Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 17:58:38 -0700 Subject: [PATCH 7/9] reenable other CI configurations --- .github/workflows/ci.cpu.yml | 342 +++++++++++++++++------------------ .github/workflows/ci.gpu.yml | 10 +- 2 files changed, 176 insertions(+), 176 deletions(-) diff --git a/.github/workflows/ci.cpu.yml b/.github/workflows/ci.cpu.yml index 6f9d34a4a..7b353d97f 100644 --- a/.github/workflows/ci.cpu.yml +++ b/.github/workflows/ci.cpu.yml @@ -12,133 +12,133 @@ concurrency: jobs: - # build-cpu: - # runs-on: ubuntu-latest - # name: ${{ matrix.name }} - # strategy: - # fail-fast: false - # matrix: - # include: - # - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - # - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } - # - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - # - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - # # With the following config, 2 tests mysteriously time out, but only in CI and not locally. - # # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - # - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - # - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } - # - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } - # - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - # - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - # - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - # - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } - # - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } - # container: - # options: -u root - # image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} - # permissions: - # id-token: write # This is required for configure-aws-credentials - # contents: read # This is required for actions/checkout - # defaults: - # run: - # shell: su coder {0} - # working-directory: /home/coder - # steps: - # - name: Checkout stdexec - # uses: actions/checkout@v4 - # with: - # path: stdexec - # persist-credentials: false - # - name: Setup environment - # run: | - # echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" - # echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" - # - if: github.repository_owner == 'NVIDIA' - # name: Get AWS credentials for sccache bucket - # uses: aws-actions/configure-aws-credentials@v4 - # with: - # aws-region: us-east-2 - # role-duration-seconds: 28800 # 8 hours - # role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA - # - name: Build and test CPU schedulers - # env: - # ASAN_OPTIONS: alloc_dealloc_mismatch=0 - # NVCC_APPEND_FLAGS: "-t=100" - # SCCACHE_BUCKET: "rapids-sccache-devs" - # SCCACHE_DIST_REQUEST_TIMEOUT: "7140" - # SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" - # SCCACHE_IDLE_TIMEOUT: "0" - # SCCACHE_REGION: "us-east-2" - # SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" - # SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" - # SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" - # SCCACHE_SERVER_LOG: "sccache=debug" - # SCCACHE_SERVER_PORT: "4225" - # run: | - # set -e; - # source /etc/profile - # set -x; - - # devcontainer-utils-install-sccache --version rapids; - - # devcontainer-utils-init-sccache-dist \ - # --enable-sccache-dist - <<< " \ - # --auth-type 'token' \ - # --auth-token '$( \ - # curl -fsSL -H "Authorization: Bearer $( \ - # curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ - # "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ - # | jq -r '.value' \ - # )" https://token.rapids.nvidia.com/gh/token/exchange \ - # | jq -r '.token')' \ - # "; - - # # Copy source folder into ~/stdexec - # cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; - # chown -R coder:coder ~/stdexec; - # cd ~/stdexec; - - # # Configure - # cmake -S . -B build -GNinja \ - # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - # -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ - # -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ - # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - # -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ - # -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ - # -DSTDEXEC_BUILD_TESTS:BOOL=ON \ - # ; - - # # Compile - # cmake --build build -v -j 512; - - # # Print sccache stats - # sccache -s; - - # # Tests - # SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ - # ctest --test-dir build --verbose --output-on-failure --timeout 60; - # - if: ${{ !cancelled() }} - # name: Upload sccache logs - # uses: actions/upload-artifact@v4 - # with: - # name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} - # path: /tmp/sccache*.log - # compression-level: 9 - - # ci-cpu: - # runs-on: ubuntu-latest - # name: CI (CPU) - # needs: - # - build-cpu - # steps: - # - run: echo "CI (CPU) success" + build-cpu: + runs-on: ubuntu-latest + name: ${{ matrix.name }} + strategy: + fail-fast: false + matrix: + include: + - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } + - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + # With the following config, 2 tests mysteriously time out, but only in CI and not locally. + # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } + - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } + - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } + - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } + container: + options: -u root + image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + defaults: + run: + shell: su coder {0} + working-directory: /home/coder + steps: + - name: Checkout stdexec + uses: actions/checkout@v4 + with: + path: stdexec + persist-credentials: false + - name: Setup environment + run: | + echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" + echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" + - if: github.repository_owner == 'NVIDIA' + name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-2 + role-duration-seconds: 28800 # 8 hours + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + - name: Build and test CPU schedulers + env: + ASAN_OPTIONS: alloc_dealloc_mismatch=0 + NVCC_APPEND_FLAGS: "-t=100" + SCCACHE_BUCKET: "rapids-sccache-devs" + SCCACHE_DIST_REQUEST_TIMEOUT: "7140" + SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" + SCCACHE_IDLE_TIMEOUT: "0" + SCCACHE_REGION: "us-east-2" + SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" + SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" + SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" + SCCACHE_SERVER_LOG: "sccache=debug" + SCCACHE_SERVER_PORT: "4225" + run: | + set -e; + source /etc/profile + set -x; + + devcontainer-utils-install-sccache --version rapids; + + devcontainer-utils-init-sccache-dist \ + --enable-sccache-dist - <<< " \ + --auth-type 'token' \ + --auth-token '$( \ + curl -fsSL -H "Authorization: Bearer $( \ + curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ + "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ + | jq -r '.value' \ + )" https://token.rapids.nvidia.com/gh/token/exchange \ + | jq -r '.token')' \ + "; + + # Copy source folder into ~/stdexec + cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; + chown -R coder:coder ~/stdexec; + cd ~/stdexec; + + # Configure + cmake -S . -B build -GNinja \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ + -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ + -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ + -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ + -DSTDEXEC_BUILD_TESTS:BOOL=ON \ + ; + + # Compile + cmake --build build -v -j 512; + + # Print sccache stats + sccache -s; + + # Tests + SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ + ctest --test-dir build --verbose --output-on-failure --timeout 60; + - if: ${{ !cancelled() }} + name: Upload sccache logs + uses: actions/upload-artifact@v4 + with: + name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} + path: /tmp/sccache*.log + compression-level: 9 + + ci-cpu: + runs-on: ubuntu-latest + name: CI (CPU) + needs: + - build-cpu + steps: + - run: echo "CI (CPU) success" build-cpu-windows: runs-on: windows-2022 @@ -174,47 +174,47 @@ jobs: steps: - run: echo "CI (CPU) (Windows) success" - # build-cpu-macos: - # runs-on: macos-26-large - # name: macos-${{ matrix.name }} - # strategy: - # fail-fast: false - # matrix: - # include: - # - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } - # - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } - # steps: - # - name: Checkout stdexec (MacOS) - # uses: actions/checkout@v4 - # with: - # persist-credentials: false - - # - name: Install dependencies - # run: | - # brew update - # brew install ninja - # shell: bash - - # - name: Build and test CPU schedulers (MacOS) - # shell: bash - # run: | - # mkdir build - # cmake -S. -Bbuild -GNinja \ - # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - # -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ - # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - # -DCMAKE_CXX_STANDARD:STRING=20 \ - # -DSTDEXEC_BUILD_TESTS:BOOL=ON - - # cmake --build build/ -v - # cd build - # ctest --output-on-failure - - # ci-cpu-macos: - # runs-on: macos-latest-large - # name: CI (CPU) (MacOS) - # needs: - # - build-cpu-macos - # steps: - # - run: echo "CI (CPU) (MacOS) success" + build-cpu-macos: + runs-on: macos-26-large + name: macos-${{ matrix.name }} + strategy: + fail-fast: false + matrix: + include: + - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } + - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } + steps: + - name: Checkout stdexec (MacOS) + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Install dependencies + run: | + brew update + brew install ninja + shell: bash + + - name: Build and test CPU schedulers (MacOS) + shell: bash + run: | + mkdir build + cmake -S. -Bbuild -GNinja \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ + -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + -DCMAKE_CXX_STANDARD:STRING=20 \ + -DSTDEXEC_BUILD_TESTS:BOOL=ON + + cmake --build build/ -v + cd build + ctest --output-on-failure + + ci-cpu-macos: + runs-on: macos-latest-large + name: CI (CPU) (MacOS) + needs: + - build-cpu-macos + steps: + - run: echo "CI (CPU) (MacOS) success" diff --git a/.github/workflows/ci.gpu.yml b/.github/workflows/ci.gpu.yml index 77107f855..4ca81c304 100644 --- a/.github/workflows/ci.gpu.yml +++ b/.github/workflows/ci.gpu.yml @@ -20,11 +20,11 @@ jobs: matrix: include: - { name: "clang 21", cuda: "12.0", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.0", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } - # - { name: "clang 21", cuda: "12.9", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.9", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } - # - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Release", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } - # - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Debug", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } - # - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Release", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } - # - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Debug", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + - { name: "clang 21", cuda: "12.9", cxx: "clang++", build: "Release", tag: "llvm21-cuda12.9", gpu: "v100", sm: "70", driver: "latest", arch: "amd64" } + - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Release", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + - { name: "nvc++ 25.9", cuda: "12.9", cxx: "mpic++", build: "Debug", tag: "nvhpc25.9", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Release", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } + - { name: "nvc++ 25.11", cuda: "13.0", cxx: "mpic++", build: "Debug", tag: "nvhpc25.11", gpu: "l4", sm: "75", driver: "latest", arch: "amd64" } runs-on: linux-${{ matrix.arch }}-gpu-${{ matrix.gpu }}-${{ matrix.driver }}-1 container: options: -u root From 7d82400c7e7b520cb980d32ce1fbff44c5d5be69 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 18:21:22 -0700 Subject: [PATCH 8/9] does `__coroutine_unhandled_stopped` have any purpose? --- include/stdexec/coroutine.hpp | 64 ++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index e3d11bbe6..8165d5d55 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -224,27 +224,27 @@ namespace STDEXEC {&__destroy_and_continue_frame::__resume}, {}}; - struct __unhandled_stopped_frame : __detail::__synthetic_coro_frame - { - static void __resume(void* __address) noexcept - { - // Make a local copy of the promise since it will go away once we call through - // the __unhandled_stopped_fn_ function pointer. - auto& __self = *static_cast<__unhandled_stopped_frame*>(__address); - STDEXEC::__coroutine_resume_nothrow(__self.__promise_.__coro_.unhandled_stopped()); - } - - struct __promise - { - __coroutine_handle<> __coro_; - } __promise_; - - static thread_local __unhandled_stopped_frame value; - }; - - inline thread_local __unhandled_stopped_frame __unhandled_stopped_frame::value{ - {&__unhandled_stopped_frame::__resume}, - {}}; + // struct __unhandled_stopped_frame : __detail::__synthetic_coro_frame + // { + // static void __resume(void* __address) noexcept + // { + // // Make a local copy of the promise since it will go away once we call through + // // the __unhandled_stopped_fn_ function pointer. + // auto& __self = *static_cast<__unhandled_stopped_frame*>(__address); + // STDEXEC::__coroutine_resume_nothrow(__self.__promise_.__coro_.unhandled_stopped()); + // } + + // struct __promise + // { + // __coroutine_handle<> __coro_; + // } __promise_; + + // static thread_local __unhandled_stopped_frame value; + // }; + + // inline thread_local __unhandled_stopped_frame __unhandled_stopped_frame::value{ + // {&__unhandled_stopped_frame::__resume}, + // {}}; inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // __std::coroutine_handle<> __continue) noexcept // @@ -255,12 +255,12 @@ namespace STDEXEC return __std::coroutine_handle<>::from_address(&__destroy_and_continue_frame::value); } - inline auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // - -> __std::coroutine_handle<> - { - __unhandled_stopped_frame::value.__promise_.__coro_ = __coro; - return __std::coroutine_handle<>::from_address(&__unhandled_stopped_frame::value); - } + // inline auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // + // -> __std::coroutine_handle<> + // { + // __unhandled_stopped_frame::value.__promise_.__coro_ = __coro; + // return __std::coroutine_handle<>::from_address(&__unhandled_stopped_frame::value); + // } # else @@ -273,6 +273,15 @@ namespace STDEXEC return __continue; } + // STDEXEC_ATTRIBUTE(always_inline) + // auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // + // -> __std::coroutine_handle<> + // { + // return __coro.unhandled_stopped(); + // } + +# endif // !defined(STDEXEC_MSVC_CORO_DESTROY_BUG_WORKAROUND) + STDEXEC_ATTRIBUTE(always_inline) auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // -> __std::coroutine_handle<> @@ -280,7 +289,6 @@ namespace STDEXEC return __coro.unhandled_stopped(); } -# endif // STDEXEC_MSVC() && STDEXEC_MSVC_VERSION < 1950 } // namespace STDEXEC #endif // !STDEXEC_NO_STDCPP_COROUTINES() From f004614013b6e795b48302025d8fe3934e4f121e Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 1 May 2026 19:37:52 -0700 Subject: [PATCH 9/9] remove unneeded `__coroutine_unhandled_stopped` --- include/exec/task.hpp | 2 +- include/stdexec/__detail/__as_awaitable.hpp | 2 +- include/stdexec/coroutine.hpp | 43 --------------------- 3 files changed, 2 insertions(+), 45 deletions(-) diff --git a/include/exec/task.hpp b/include/exec/task.hpp index d800318ca..e94943203 100644 --- a/include/exec/task.hpp +++ b/include/exec/task.hpp @@ -672,7 +672,7 @@ namespace experimental::execution if constexpr (requires { __coro_.promise().stop_requested() ? 0 : 1; }) { if (__coro_.promise().stop_requested()) - return STDEXEC::__coroutine_unhandled_stopped(__parent); + return __parent.unhandled_stopped(); } return __coro_; } diff --git a/include/stdexec/__detail/__as_awaitable.hpp b/include/stdexec/__detail/__as_awaitable.hpp index e1241dcc5..d82e140a2 100644 --- a/include/stdexec/__detail/__as_awaitable.hpp +++ b/include/stdexec/__detail/__as_awaitable.hpp @@ -135,7 +135,7 @@ namespace STDEXEC // as normal. if (__result_.__is_valueless()) { - return STDEXEC::__coroutine_unhandled_stopped(__continuation_); + return __continuation_.unhandled_stopped(); } else { diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index 8165d5d55..54706cd3c 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -224,28 +224,6 @@ namespace STDEXEC {&__destroy_and_continue_frame::__resume}, {}}; - // struct __unhandled_stopped_frame : __detail::__synthetic_coro_frame - // { - // static void __resume(void* __address) noexcept - // { - // // Make a local copy of the promise since it will go away once we call through - // // the __unhandled_stopped_fn_ function pointer. - // auto& __self = *static_cast<__unhandled_stopped_frame*>(__address); - // STDEXEC::__coroutine_resume_nothrow(__self.__promise_.__coro_.unhandled_stopped()); - // } - - // struct __promise - // { - // __coroutine_handle<> __coro_; - // } __promise_; - - // static thread_local __unhandled_stopped_frame value; - // }; - - // inline thread_local __unhandled_stopped_frame __unhandled_stopped_frame::value{ - // {&__unhandled_stopped_frame::__resume}, - // {}}; - inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // __std::coroutine_handle<> __continue) noexcept // -> __std::coroutine_handle<> @@ -255,13 +233,6 @@ namespace STDEXEC return __std::coroutine_handle<>::from_address(&__destroy_and_continue_frame::value); } - // inline auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // - // -> __std::coroutine_handle<> - // { - // __unhandled_stopped_frame::value.__promise_.__coro_ = __coro; - // return __std::coroutine_handle<>::from_address(&__unhandled_stopped_frame::value); - // } - # else STDEXEC_ATTRIBUTE(always_inline) @@ -273,22 +244,8 @@ namespace STDEXEC return __continue; } - // STDEXEC_ATTRIBUTE(always_inline) - // auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // - // -> __std::coroutine_handle<> - // { - // return __coro.unhandled_stopped(); - // } - # endif // !defined(STDEXEC_MSVC_CORO_DESTROY_BUG_WORKAROUND) - STDEXEC_ATTRIBUTE(always_inline) - auto __coroutine_unhandled_stopped(__coroutine_handle<> __coro) noexcept // - -> __std::coroutine_handle<> - { - return __coro.unhandled_stopped(); - } - } // namespace STDEXEC #endif // !STDEXEC_NO_STDCPP_COROUTINES()