From 8a89d951088af098732f0603ebe00a768aec48bb Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 29 Apr 2026 17:17:37 -0700 Subject: [PATCH 1/7] try turning on app verifier for msvc --- .github/workflows/test-windows.ps1 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test-windows.ps1 b/.github/workflows/test-windows.ps1 index 809542815..4f579700f 100644 --- a/.github/workflows/test-windows.ps1 +++ b/.github/workflows/test-windows.ps1 @@ -27,4 +27,10 @@ Invoke-NativeCommand cmake -B $BuildDirectory -G Ninja ` "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" ` "-DSTDEXEC_BUILD_TESTS:BOOL=TRUE" . Invoke-NativeCommand cmake --build $BuildDirectory + +# Enable AppVerifier for the test executables +Invoke-NativeCommand appverif -enable Basics -for test.stdexec.exe +Invoke-NativeCommand appverif -enable Basics -for test.exec.exe + Invoke-NativeCommand ctest --test-dir $BuildDirectory --output-on-failure --verbose --timeout 60 + From 39e1d1b9301a71ada84ba54613fa5576ca79f330 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Wed, 29 Apr 2026 17:41:24 -0700 Subject: [PATCH 2/7] try again --- .github/workflows/ci.cpu.yml | 342 ++++++++++++++--------------- .github/workflows/test-windows.ps1 | 4 +- 2 files changed, 173 insertions(+), 173 deletions(-) diff --git a/.github/workflows/ci.cpu.yml b/.github/workflows/ci.cpu.yml index 7b353d97f..6f9d34a4a 100644 --- a/.github/workflows/ci.cpu.yml +++ b/.github/workflows/ci.cpu.yml @@ -12,133 +12,133 @@ concurrency: jobs: - build-cpu: - runs-on: ubuntu-latest - name: ${{ matrix.name }} - strategy: - fail-fast: false - matrix: - include: - - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } - - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - # With the following config, 2 tests mysteriously time out, but only in CI and not locally. - # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } - - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } - - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } - - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } - container: - options: -u root - image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - defaults: - run: - shell: su coder {0} - working-directory: /home/coder - steps: - - name: Checkout stdexec - uses: actions/checkout@v4 - with: - path: stdexec - persist-credentials: false - - name: Setup environment - run: | - echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" - echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" - - if: github.repository_owner == 'NVIDIA' - name: Get AWS credentials for sccache bucket - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-region: us-east-2 - role-duration-seconds: 28800 # 8 hours - role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA - - name: Build and test CPU schedulers - env: - ASAN_OPTIONS: alloc_dealloc_mismatch=0 - NVCC_APPEND_FLAGS: "-t=100" - SCCACHE_BUCKET: "rapids-sccache-devs" - SCCACHE_DIST_REQUEST_TIMEOUT: "7140" - SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" - SCCACHE_IDLE_TIMEOUT: "0" - SCCACHE_REGION: "us-east-2" - SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" - SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" - SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" - SCCACHE_SERVER_LOG: "sccache=debug" - SCCACHE_SERVER_PORT: "4225" - run: | - set -e; - source /etc/profile - set -x; - - devcontainer-utils-install-sccache --version rapids; - - devcontainer-utils-init-sccache-dist \ - --enable-sccache-dist - <<< " \ - --auth-type 'token' \ - --auth-token '$( \ - curl -fsSL -H "Authorization: Bearer $( \ - curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ - "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ - | jq -r '.value' \ - )" https://token.rapids.nvidia.com/gh/token/exchange \ - | jq -r '.token')' \ - "; - - # Copy source folder into ~/stdexec - cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; - chown -R coder:coder ~/stdexec; - cd ~/stdexec; - - # Configure - cmake -S . -B build -GNinja \ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ - -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ - -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ - -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ - -DSTDEXEC_BUILD_TESTS:BOOL=ON \ - ; - - # Compile - cmake --build build -v -j 512; - - # Print sccache stats - sccache -s; - - # Tests - SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ - ctest --test-dir build --verbose --output-on-failure --timeout 60; - - if: ${{ !cancelled() }} - name: Upload sccache logs - uses: actions/upload-artifact@v4 - with: - name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} - path: /tmp/sccache*.log - compression-level: 9 - - ci-cpu: - runs-on: ubuntu-latest - name: CI (CPU) - needs: - - build-cpu - steps: - - run: echo "CI (CPU) success" + # build-cpu: + # runs-on: ubuntu-latest + # name: ${{ matrix.name }} + # strategy: + # fail-fast: false + # matrix: + # include: + # - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + # - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } + # - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + # - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + # - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + # # With the following config, 2 tests mysteriously time out, but only in CI and not locally. + # # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + # - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + # - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } + # - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } + # - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + # - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + # - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + # - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } + # - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } + # container: + # options: -u root + # image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} + # permissions: + # id-token: write # This is required for configure-aws-credentials + # contents: read # This is required for actions/checkout + # defaults: + # run: + # shell: su coder {0} + # working-directory: /home/coder + # steps: + # - name: Checkout stdexec + # uses: actions/checkout@v4 + # with: + # path: stdexec + # persist-credentials: false + # - name: Setup environment + # run: | + # echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" + # echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" + # - if: github.repository_owner == 'NVIDIA' + # name: Get AWS credentials for sccache bucket + # uses: aws-actions/configure-aws-credentials@v4 + # with: + # aws-region: us-east-2 + # role-duration-seconds: 28800 # 8 hours + # role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + # - name: Build and test CPU schedulers + # env: + # ASAN_OPTIONS: alloc_dealloc_mismatch=0 + # NVCC_APPEND_FLAGS: "-t=100" + # SCCACHE_BUCKET: "rapids-sccache-devs" + # SCCACHE_DIST_REQUEST_TIMEOUT: "7140" + # SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" + # SCCACHE_IDLE_TIMEOUT: "0" + # SCCACHE_REGION: "us-east-2" + # SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" + # SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" + # SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" + # SCCACHE_SERVER_LOG: "sccache=debug" + # SCCACHE_SERVER_PORT: "4225" + # run: | + # set -e; + # source /etc/profile + # set -x; + + # devcontainer-utils-install-sccache --version rapids; + + # devcontainer-utils-init-sccache-dist \ + # --enable-sccache-dist - <<< " \ + # --auth-type 'token' \ + # --auth-token '$( \ + # curl -fsSL -H "Authorization: Bearer $( \ + # curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ + # "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ + # | jq -r '.value' \ + # )" https://token.rapids.nvidia.com/gh/token/exchange \ + # | jq -r '.token')' \ + # "; + + # # Copy source folder into ~/stdexec + # cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; + # chown -R coder:coder ~/stdexec; + # cd ~/stdexec; + + # # Configure + # cmake -S . -B build -GNinja \ + # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + # -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ + # -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ + # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + # -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ + # -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ + # -DSTDEXEC_BUILD_TESTS:BOOL=ON \ + # ; + + # # Compile + # cmake --build build -v -j 512; + + # # Print sccache stats + # sccache -s; + + # # Tests + # SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ + # ctest --test-dir build --verbose --output-on-failure --timeout 60; + # - if: ${{ !cancelled() }} + # name: Upload sccache logs + # uses: actions/upload-artifact@v4 + # with: + # name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} + # path: /tmp/sccache*.log + # compression-level: 9 + + # ci-cpu: + # runs-on: ubuntu-latest + # name: CI (CPU) + # needs: + # - build-cpu + # steps: + # - run: echo "CI (CPU) success" build-cpu-windows: runs-on: windows-2022 @@ -174,47 +174,47 @@ jobs: steps: - run: echo "CI (CPU) (Windows) success" - build-cpu-macos: - runs-on: macos-26-large - name: macos-${{ matrix.name }} - strategy: - fail-fast: false - matrix: - include: - - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } - - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } - steps: - - name: Checkout stdexec (MacOS) - uses: actions/checkout@v4 - with: - persist-credentials: false - - - name: Install dependencies - run: | - brew update - brew install ninja - shell: bash - - - name: Build and test CPU schedulers (MacOS) - shell: bash - run: | - mkdir build - cmake -S. -Bbuild -GNinja \ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ - -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - -DCMAKE_CXX_STANDARD:STRING=20 \ - -DSTDEXEC_BUILD_TESTS:BOOL=ON - - cmake --build build/ -v - cd build - ctest --output-on-failure - - ci-cpu-macos: - runs-on: macos-latest-large - name: CI (CPU) (MacOS) - needs: - - build-cpu-macos - steps: - - run: echo "CI (CPU) (MacOS) success" + # build-cpu-macos: + # runs-on: macos-26-large + # name: macos-${{ matrix.name }} + # strategy: + # fail-fast: false + # matrix: + # include: + # - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } + # - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } + # steps: + # - name: Checkout stdexec (MacOS) + # uses: actions/checkout@v4 + # with: + # persist-credentials: false + + # - name: Install dependencies + # run: | + # brew update + # brew install ninja + # shell: bash + + # - name: Build and test CPU schedulers (MacOS) + # shell: bash + # run: | + # mkdir build + # cmake -S. -Bbuild -GNinja \ + # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + # -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ + # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + # -DCMAKE_CXX_STANDARD:STRING=20 \ + # -DSTDEXEC_BUILD_TESTS:BOOL=ON + + # cmake --build build/ -v + # cd build + # ctest --output-on-failure + + # ci-cpu-macos: + # runs-on: macos-latest-large + # name: CI (CPU) (MacOS) + # needs: + # - build-cpu-macos + # steps: + # - run: echo "CI (CPU) (MacOS) success" diff --git a/.github/workflows/test-windows.ps1 b/.github/workflows/test-windows.ps1 index 4f579700f..e6d0ec313 100644 --- a/.github/workflows/test-windows.ps1 +++ b/.github/workflows/test-windows.ps1 @@ -29,8 +29,8 @@ Invoke-NativeCommand cmake -B $BuildDirectory -G Ninja ` Invoke-NativeCommand cmake --build $BuildDirectory # Enable AppVerifier for the test executables -Invoke-NativeCommand appverif -enable Basics -for test.stdexec.exe -Invoke-NativeCommand appverif -enable Basics -for test.exec.exe +Invoke-NativeCommand appverif /verify test.stdexec.exe +Invoke-NativeCommand appverif /verify test.exec.exe Invoke-NativeCommand ctest --test-dir $BuildDirectory --output-on-failure --verbose --timeout 60 From 49f170416643c9f7fc73bd93e635e55ea4160b8c Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Thu, 30 Apr 2026 16:25:44 -0700 Subject: [PATCH 3/7] wrap unhandled_stopped coroutine handles on msvc --- .github/workflows/test-windows.ps1 | 8 +-- include/exec/at_coroutine_exit.hpp | 4 +- include/exec/on_coro_disposition.hpp | 2 +- include/exec/task.hpp | 7 +- include/stdexec/__detail/__as_awaitable.hpp | 10 ++- include/stdexec/__detail/__config.hpp | 2 +- .../stdexec/__detail/__connect_awaitable.hpp | 24 +++---- include/stdexec/__detail/__task.hpp | 2 +- include/stdexec/coroutine.hpp | 67 +++++++++++++++---- 9 files changed, 84 insertions(+), 42 deletions(-) diff --git a/.github/workflows/test-windows.ps1 b/.github/workflows/test-windows.ps1 index e6d0ec313..5f2774ff5 100644 --- a/.github/workflows/test-windows.ps1 +++ b/.github/workflows/test-windows.ps1 @@ -21,16 +21,12 @@ New-Item -ItemType Directory $BuildDirectory | Out-Null Invoke-NativeCommand cmake -B $BuildDirectory -G Ninja ` "-DCMAKE_BUILD_TYPE=$Config" ` + "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" ` "-DCMAKE_MSVC_DEBUG_INFORMATION_FORMAT:STRING=Embedded" ` + "-DCMAKE_CXX_FLAGS:STRING=/fsanitize=address" ` "-DSTDEXEC_ENABLE_ASIO:BOOL=TRUE" ` "-DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost" ` - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" ` "-DSTDEXEC_BUILD_TESTS:BOOL=TRUE" . Invoke-NativeCommand cmake --build $BuildDirectory - -# Enable AppVerifier for the test executables -Invoke-NativeCommand appverif /verify test.stdexec.exe -Invoke-NativeCommand appverif /verify test.exec.exe - Invoke-NativeCommand ctest --test-dir $BuildDirectory --output-on-failure --verbose --timeout 60 diff --git a/include/exec/at_coroutine_exit.hpp b/include/exec/at_coroutine_exit.hpp index a54668d63..9a872f67f 100644 --- a/include/exec/at_coroutine_exit.hpp +++ b/include/exec/at_coroutine_exit.hpp @@ -141,7 +141,7 @@ namespace experimental::execution ~__task() { if (__coro_) - __coro_.destroy(); + STDEXEC::__coroutine_destroy_nothrow(__coro_); } [[nodiscard]] @@ -189,7 +189,7 @@ namespace experimental::execution { auto __cont = __h.promise().continuation(); auto __coro = __h.promise().__is_stopped_ ? __cont.unhandled_stopped() : __cont.handle(); - return STDEXEC_CORO_DESTROY_AND_CONTINUE(__h, __coro); + return STDEXEC::__coroutine_destroy_and_continue(__h, __coro); } static constexpr void await_resume() noexcept {} diff --git a/include/exec/on_coro_disposition.hpp b/include/exec/on_coro_disposition.hpp index bfd8385cd..edb8b2ed1 100644 --- a/include/exec/on_coro_disposition.hpp +++ b/include/exec/on_coro_disposition.hpp @@ -121,7 +121,7 @@ namespace experimental::execution { auto __cont = __h.promise().continuation(); auto __coro = __h.promise().__is_stopped_ ? __cont.unhandled_stopped() : __cont.handle(); - return STDEXEC_CORO_DESTROY_AND_CONTINUE(__h, __coro); + return STDEXEC::__coroutine_destroy_and_continue(__h, __coro); } void await_resume() const noexcept {} diff --git a/include/exec/task.hpp b/include/exec/task.hpp index c73746769..e0a3f4af3 100644 --- a/include/exec/task.hpp +++ b/include/exec/task.hpp @@ -531,7 +531,7 @@ namespace experimental::execution constexpr ~basic_task() { if (__coro_) - __coro_.destroy(); + STDEXEC::__coroutine_destroy_nothrow(__coro_); } private: @@ -653,7 +653,7 @@ namespace experimental::execution constexpr ~__task_awaiter() { if (__coro_) - __coro_.destroy(); + STDEXEC::__coroutine_destroy_nothrow(__coro_); } static constexpr auto await_ready() noexcept -> bool @@ -672,7 +672,8 @@ namespace experimental::execution if constexpr (requires { __coro_.promise().stop_requested() ? 0 : 1; }) { if (__coro_.promise().stop_requested()) - return __parent.promise().unhandled_stopped(); + return STDEXEC::__coroutine_destroy_and_continue( + __parent.promise().unhandled_stopped()); } return __coro_; } diff --git a/include/stdexec/__detail/__as_awaitable.hpp b/include/stdexec/__detail/__as_awaitable.hpp index 1d154e29b..e489263c6 100644 --- a/include/stdexec/__detail/__as_awaitable.hpp +++ b/include/stdexec/__detail/__as_awaitable.hpp @@ -133,8 +133,14 @@ namespace STDEXEC // If the operation was stopped (__result_ is valueless), we should use the // unhandled_stopped() continuation. Otherwise, should resume the __continuation_ // as normal. - return __result_.__is_valueless() ? __continuation_.unhandled_stopped() - : __continuation_.handle(); + if (__result_.__is_valueless()) + { + return STDEXEC::__coroutine_destroy_and_continue(__continuation_.unhandled_stopped()); + } + else + { + return __continuation_.handle(); + } } __coroutine_handle<> __continuation_; diff --git a/include/stdexec/__detail/__config.hpp b/include/stdexec/__detail/__config.hpp index 3106b08fa..cf895ed44 100644 --- a/include/stdexec/__detail/__config.hpp +++ b/include/stdexec/__detail/__config.hpp @@ -321,7 +321,7 @@ namespace STDEXEC::__std # define STDEXEC_ATTR_WHICH_4(_ATTR) __forceinline #elif STDEXEC_CLANG() # define STDEXEC_ATTR_WHICH_4(_ATTR) \ - inline //__attribute__((__always_inline__, __artificial__, __nodebug__)) inline + __attribute__((__always_inline__, __artificial__, __nodebug__)) inline #elif STDEXEC_GCC() # define STDEXEC_ATTR_WHICH_4(_ATTR) __attribute__((__always_inline__, __artificial__)) inline #else diff --git a/include/stdexec/__detail/__connect_awaitable.hpp b/include/stdexec/__detail/__connect_awaitable.hpp index efbe443cf..9b24af9ec 100644 --- a/include/stdexec/__detail/__connect_awaitable.hpp +++ b/include/stdexec/__detail/__connect_awaitable.hpp @@ -182,7 +182,7 @@ namespace STDEXEC ~__state() { - // make sure to destroy in the reverse order of construction + // make sure to __destroy in the reverse order of construction __awaiter_.__destroy(); __awaitable_.__destroy(); } @@ -206,12 +206,12 @@ namespace STDEXEC : __source_awaitable_(static_cast<_Awaitable2&&>(__awaitable)) {} - constexpr void construct(__std::coroutine_handle<_Promise> __coro) noexcept(__is_nothrow) + constexpr void __construct(__std::coroutine_handle<_Promise> __coro) noexcept(__is_nothrow) { __awaiter_.__construct(static_cast<_Awaitable&&>(__source_awaitable_), __coro); } - constexpr void destroy() noexcept + constexpr void __destroy() noexcept { __awaiter_.__destroy(); } @@ -263,12 +263,12 @@ namespace STDEXEC : __source_awaitable_(static_cast<_Awaitable2&&>(__awaitable)) {} - constexpr void construct(__std::coroutine_handle<_Promise> __coro) noexcept(__is_nothrow) + constexpr void __construct(__std::coroutine_handle<_Promise> __coro) noexcept(__is_nothrow) { __awaiter_.__construct(static_cast<_Awaitable&&>(__source_awaitable_), __coro); } - constexpr void destroy() noexcept + constexpr void __destroy() noexcept { __awaiter_.__destroy(); } @@ -328,12 +328,12 @@ namespace STDEXEC : __source_awaitable_(static_cast<_Awaitable2&&>(__awaitable)) {} - constexpr void construct(__std::coroutine_handle<_Promise> __coro) noexcept(__is_nothrow) + constexpr void __construct(__std::coroutine_handle<_Promise> __coro) noexcept(__is_nothrow) { __awaiter_.__construct(static_cast<_Awaitable&&>(__source_awaitable_), __coro); } - constexpr void destroy() noexcept + constexpr void __destroy() noexcept { __awaiter_.__destroy(); } @@ -363,12 +363,12 @@ namespace STDEXEC __awaiter_.__destroy(); } - static constexpr void construct(__std::coroutine_handle<_Promise>) noexcept + static constexpr void __construct(__std::coroutine_handle<_Promise>) noexcept { // no-op } - static constexpr void destroy() noexcept + static constexpr void __destroy() noexcept { // no-op } @@ -389,7 +389,7 @@ namespace STDEXEC { if (__started_) { - __awaiter_.destroy(); + __awaiter_.__destroy(); } } @@ -399,7 +399,7 @@ namespace STDEXEC STDEXEC_TRY { - __awaiter_.construct(__coro); + __awaiter_.__construct(__coro); __started_ = true; if (!__awaiter_.await_ready()) @@ -439,7 +439,7 @@ namespace STDEXEC } STDEXEC_CATCH_ALL { - if constexpr (!noexcept(__awaiter_.construct(__coro)) + if constexpr (!noexcept(__awaiter_.__construct(__coro)) || !noexcept(__awaiter_.await_ready()) || !noexcept(__awaiter_.await_suspend(__coro))) { diff --git a/include/stdexec/__detail/__task.hpp b/include/stdexec/__detail/__task.hpp index 9a8120569..a2350be9e 100644 --- a/include/stdexec/__detail/__task.hpp +++ b/include/stdexec/__detail/__task.hpp @@ -287,7 +287,7 @@ namespace STDEXEC constexpr ~task() { if (__coro_) - __coro_.destroy(); + STDEXEC::__coroutine_destroy_nothrow(__coro_); } [[nodiscard]] diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index f26378a97..7bc4680af 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -33,30 +33,28 @@ namespace STDEXEC return __std::coroutine_handle<_Tp>::from_address(__h.address()); } - inline void __coroutine_resume_nothrow(__std::coroutine_handle<> __h) noexcept + STDEXEC_ATTRIBUTE(always_inline) + void __coroutine_resume_nothrow(__std::coroutine_handle<> __h) noexcept { STDEXEC_TRY { - STDEXEC_ASSERT(__h); - __h.resume(); + __builtin_coro_resume(__h.address()); } STDEXEC_CATCH_ALL { - STDEXEC_ASSERT(!"Coroutine resume threw an exception!"); __std::unreachable(); } } - inline void __coroutine_destroy_nothrow(__std::coroutine_handle<> __h) noexcept + STDEXEC_ATTRIBUTE(always_inline) + void __coroutine_destroy_nothrow(__std::coroutine_handle<> __h) noexcept { STDEXEC_TRY { - STDEXEC_ASSERT(__h); - __h.destroy(); + __builtin_coro_destroy(__h.address()); } STDEXEC_CATCH_ALL { - STDEXEC_ASSERT(!"Coroutine destroy threw an exception!"); __std::unreachable(); } } @@ -174,7 +172,7 @@ namespace STDEXEC sizeof(__synthetic_coro_frame)); } // namespace __detail -# if STDEXEC_MSVC() && STDEXEC_MSVC_VERSION <= 1939 +# if STDEXEC_MSVC() && STDEXEC_MSVC_VERSION < 1950 // MSVCBUG https://developercommunity.visualstudio.com/t/destroy-coroutine-from-final_suspend-r/10096047 // Prior to Visual Studio 17.9 (Feb, 2024), aka MSVC 19.39, MSVC incorrectly allocates @@ -212,6 +210,26 @@ namespace STDEXEC } __promise_; }; + struct __symmetric_transfer_frame : __detail::__synthetic_coro_frame + { + constexpr __symmetric_transfer_frame() noexcept + : __detail::__synthetic_coro_frame{&__symmetric_transfer_frame::__resume} + {} + + static void __resume(void* __address) noexcept + { + // Make a local copy of the promise to ensure we can safely destroy the suspended + // coroutine after resuming the continuation. + auto __promise = static_cast<__symmetric_transfer_frame*>(__address)->__promise_; + STDEXEC::__coroutine_resume_nothrow(__promise.__continue_); + } + + struct __promise + { + __std::coroutine_handle<> __continue_{}; + } __promise_; + }; + inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // __std::coroutine_handle<> __continue) noexcept // -> __std::coroutine_handle<> @@ -222,12 +240,33 @@ namespace STDEXEC return __std::coroutine_handle<>::from_address(&__fr); } -# define STDEXEC_CORO_DESTROY_AND_CONTINUE(__destroy, __continue) \ - ::STDEXEC::__coroutine_destroy_and_continue(__destroy, __continue) + inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __continue) noexcept // + -> __std::coroutine_handle<> + { + static constinit thread_local __symmetric_transfer_frame __fr; + __fr.__promise_.__continue_ = __continue; + return __std::coroutine_handle<>::from_address(&__fr); + } + # else -# define STDEXEC_CORO_DESTROY_AND_CONTINUE(__destroy, __continue) \ - (__destroy.destroy(), __continue) -# endif + + STDEXEC_ATTRIBUTE(always_inline) + auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // + __std::coroutine_handle<> __continue) noexcept // + -> __std::coroutine_handle<> + { + ::STDEXEC::__coroutine_destroy_nothrow(__destroy); + return __continue; + } + + STDEXEC_ATTRIBUTE(always_inline) + auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __continue) noexcept // + -> __std::coroutine_handle<> + { + return __continue; + } + +# endif // STDEXEC_MSVC() && STDEXEC_MSVC_VERSION < 1950 } // namespace STDEXEC #endif // !STDEXEC_NO_STDCPP_COROUTINES() From 9b473a38f9d1b0df87b0344e5d5e892ceb483025 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Thu, 30 Apr 2026 16:57:33 -0700 Subject: [PATCH 4/7] try to convince msvc to constinit the synthetic coro frames --- include/stdexec/coroutine.hpp | 47 ++++++++++++++++------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index 7bc4680af..ba3fbf38b 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -155,17 +155,12 @@ namespace STDEXEC { struct __synthetic_coro_frame { - void (*__resume_)(void*) noexcept; - // we never invoke __destroy_ so a no-op implementation is fine; we've chosen the - // address of a no-op function rather than nullptr in case some rogue awaitable - // *does* invoke destroy on the synthesized handle that it receives in its - // await_suspend function - void (*__destroy_)(void*) noexcept = &__noop_destroy; - - static void __noop_destroy(void*) noexcept - { - STDEXEC_ASSERT(!"Attempt to destroy a synthetic coroutine!"); - } + using __callback_fn_t = void(void*) noexcept; + + __callback_fn_t* __resume_ = &__noop_fn; + __callback_fn_t* __destroy_ = &__noop_fn; + + static void __noop_fn(void*) noexcept {} }; static constexpr std::ptrdiff_t __coro_promise_offset = static_cast( @@ -190,10 +185,6 @@ namespace STDEXEC struct __destroy_and_continue_frame : __detail::__synthetic_coro_frame { - constexpr __destroy_and_continue_frame() noexcept - : __detail::__synthetic_coro_frame{&__destroy_and_continue_frame::__resume} - {} - static void __resume(void* __address) noexcept { // Make a local copy of the promise to ensure we can safely destroy the suspended @@ -208,14 +199,15 @@ namespace STDEXEC __std::coroutine_handle<> __destroy_{}; __std::coroutine_handle<> __continue_{}; } __promise_; + + static thread_local __destroy_and_continue_frame value; }; + constinit inline thread_local __destroy_and_continue_frame __destroy_and_continue_frame::value{ + {&__destroy_and_continue_frame::__resume}, {}}; + struct __symmetric_transfer_frame : __detail::__synthetic_coro_frame { - constexpr __symmetric_transfer_frame() noexcept - : __detail::__synthetic_coro_frame{&__symmetric_transfer_frame::__resume} - {} - static void __resume(void* __address) noexcept { // Make a local copy of the promise to ensure we can safely destroy the suspended @@ -228,24 +220,27 @@ namespace STDEXEC { __std::coroutine_handle<> __continue_{}; } __promise_; + + static thread_local __symmetric_transfer_frame value; }; + constinit inline thread_local __symmetric_transfer_frame __symmetric_transfer_frame::value{ + {&__symmetric_transfer_frame::__resume}, {}}; + inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // __std::coroutine_handle<> __continue) noexcept // -> __std::coroutine_handle<> { - static constinit thread_local __destroy_and_continue_frame __fr; - __fr.__promise_.__destroy_ = __destroy; - __fr.__promise_.__continue_ = __continue; - return __std::coroutine_handle<>::from_address(&__fr); + __destroy_and_continue_frame::value.__promise_.__destroy_ = __destroy; + __destroy_and_continue_frame::value.__promise_.__continue_ = __continue; + return __std::coroutine_handle<>::from_address(&__destroy_and_continue_frame::value); } inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __continue) noexcept // -> __std::coroutine_handle<> { - static constinit thread_local __symmetric_transfer_frame __fr; - __fr.__promise_.__continue_ = __continue; - return __std::coroutine_handle<>::from_address(&__fr); + __symmetric_transfer_frame::value.__promise_.__continue_ = __continue; + return __std::coroutine_handle<>::from_address(&__symmetric_transfer_frame::value); } # else From df6213ec24993001dc368dade0a3f0097286e5e7 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Thu, 30 Apr 2026 17:50:22 -0700 Subject: [PATCH 5/7] try again --- .github/workflows/test-windows.ps1 | 3 +-- include/stdexec/__detail/__config.hpp | 17 +++++++++++++ include/stdexec/coroutine.hpp | 36 +++++++++++++++++++-------- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-windows.ps1 b/.github/workflows/test-windows.ps1 index 5f2774ff5..c20ecc521 100644 --- a/.github/workflows/test-windows.ps1 +++ b/.github/workflows/test-windows.ps1 @@ -23,10 +23,9 @@ Invoke-NativeCommand cmake -B $BuildDirectory -G Ninja ` "-DCMAKE_BUILD_TYPE=$Config" ` "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" ` "-DCMAKE_MSVC_DEBUG_INFORMATION_FORMAT:STRING=Embedded" ` - "-DCMAKE_CXX_FLAGS:STRING=/fsanitize=address" ` + "-DCMAKE_CXX_FLAGS:STRING=/fsanitize=address /EHsc" ` "-DSTDEXEC_ENABLE_ASIO:BOOL=TRUE" ` "-DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost" ` "-DSTDEXEC_BUILD_TESTS:BOOL=TRUE" . Invoke-NativeCommand cmake --build $BuildDirectory Invoke-NativeCommand ctest --test-dir $BuildDirectory --output-on-failure --verbose --timeout 60 - diff --git a/include/stdexec/__detail/__config.hpp b/include/stdexec/__detail/__config.hpp index cf895ed44..ad3643cad 100644 --- a/include/stdexec/__detail/__config.hpp +++ b/include/stdexec/__detail/__config.hpp @@ -142,6 +142,13 @@ # define STDEXEC_HAS_ATTRIBUTE(...) 0 #endif +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(__has_cpp_attribute) +# define STDEXEC_HAS_CPP_ATTRIBUTE(...) __has_cpp_attribute(__VA_ARGS__) +#else +# define STDEXEC_HAS_CPP_ATTRIBUTE(...) 0 +#endif + //////////////////////////////////////////////////////////////////////////////////////////////////// #if STDEXEC_CLANG() && STDEXEC_CUDA_COMPILATION() # define STDEXEC_HOST_DEVICE_DEDUCTION_GUIDE __host__ __device__ @@ -370,6 +377,16 @@ namespace STDEXEC::__std #define STDEXEC_ATTR_noinline STDEXEC_PP_PROBE(~, 9) #define STDEXEC_ATTR___noinline__ STDEXEC_PP_PROBE(~, 9) +#if STDEXEC_MSVC() && !STDEXEC_CLANG_CL() && STDEXEC_MSVC_VERSION >= 1950 +# define STDEXEC_ATTR_WHICH_10(_ATTR) [[msvc::musttail]] +#elif STDEXEC_HAS_CPP_ATTRIBUTE(gnu::musttail) +# define STDEXEC_ATTR_WHICH_10(_ATTR) [[gnu::musttail]] +#else +# define STDEXEC_ATTR_WHICH_10(_ATTR) /*nothing*/ +#endif +#define STDEXEC_ATTR_musttail STDEXEC_PP_PROBE(~, 10) +#define STDEXEC_ATTR___musttail__ STDEXEC_PP_PROBE(~, 10) + //////////////////////////////////////////////////////////////////////////////////////////////////// // warning push/pop portability macros #if STDEXEC_NVCC() diff --git a/include/stdexec/coroutine.hpp b/include/stdexec/coroutine.hpp index ba3fbf38b..06d9eda32 100644 --- a/include/stdexec/coroutine.hpp +++ b/include/stdexec/coroutine.hpp @@ -34,11 +34,11 @@ namespace STDEXEC } STDEXEC_ATTRIBUTE(always_inline) - void __coroutine_resume_nothrow(__std::coroutine_handle<> __h) noexcept + void __coroutine_resume_nothrow(void* __address) noexcept { STDEXEC_TRY { - __builtin_coro_resume(__h.address()); + __builtin_coro_resume(__address); } STDEXEC_CATCH_ALL { @@ -47,11 +47,17 @@ namespace STDEXEC } STDEXEC_ATTRIBUTE(always_inline) - void __coroutine_destroy_nothrow(__std::coroutine_handle<> __h) noexcept + void __coroutine_resume_nothrow(__std::coroutine_handle<> __h) noexcept + { + STDEXEC::__coroutine_resume_nothrow(__h.address()); + } + + STDEXEC_ATTRIBUTE(always_inline) + void __coroutine_destroy_nothrow(void* __address) noexcept { STDEXEC_TRY { - __builtin_coro_destroy(__h.address()); + __builtin_coro_destroy(__address); } STDEXEC_CATCH_ALL { @@ -59,6 +65,12 @@ namespace STDEXEC } } + STDEXEC_ATTRIBUTE(always_inline) + void __coroutine_destroy_nothrow(__std::coroutine_handle<> __h) noexcept + { + STDEXEC::__coroutine_destroy_nothrow(__h.address()); + } + // A coroutine handle that also supports unhandled_stopped() for propagating stop // signals through co_awaits of senders. template @@ -191,7 +203,8 @@ namespace STDEXEC // coroutine after resuming the continuation. auto __promise = static_cast<__destroy_and_continue_frame*>(__address)->__promise_; STDEXEC::__coroutine_resume_nothrow(__promise.__continue_); - STDEXEC::__coroutine_destroy_nothrow(__promise.__destroy_); + STDEXEC_ATTRIBUTE(musttail) + return STDEXEC::__coroutine_destroy_nothrow(__promise.__destroy_.address()); } struct __promise @@ -203,8 +216,9 @@ namespace STDEXEC static thread_local __destroy_and_continue_frame value; }; - constinit inline thread_local __destroy_and_continue_frame __destroy_and_continue_frame::value{ - {&__destroy_and_continue_frame::__resume}, {}}; + inline thread_local __destroy_and_continue_frame __destroy_and_continue_frame::value{ + {&__destroy_and_continue_frame::__resume}, + {}}; struct __symmetric_transfer_frame : __detail::__synthetic_coro_frame { @@ -213,7 +227,8 @@ namespace STDEXEC // Make a local copy of the promise to ensure we can safely destroy the suspended // coroutine after resuming the continuation. auto __promise = static_cast<__symmetric_transfer_frame*>(__address)->__promise_; - STDEXEC::__coroutine_resume_nothrow(__promise.__continue_); + STDEXEC_ATTRIBUTE(musttail) + return STDEXEC::__coroutine_resume_nothrow(__promise.__continue_.address()); } struct __promise @@ -224,8 +239,9 @@ namespace STDEXEC static thread_local __symmetric_transfer_frame value; }; - constinit inline thread_local __symmetric_transfer_frame __symmetric_transfer_frame::value{ - {&__symmetric_transfer_frame::__resume}, {}}; + inline thread_local __symmetric_transfer_frame __symmetric_transfer_frame::value{ + {&__symmetric_transfer_frame::__resume}, + {}}; inline auto __coroutine_destroy_and_continue(__std::coroutine_handle<> __destroy, // __std::coroutine_handle<> __continue) noexcept // From 43e326d3adca5d4accea6c2d8c96741766eddbb4 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Thu, 30 Apr 2026 18:08:43 -0700 Subject: [PATCH 6/7] try again --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f603401a6..c574a5341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,7 +252,7 @@ target_compile_options(stdexec INTERFACE # Do you want a preprocessor that works? Picky, picky. target_compile_options(stdexec INTERFACE - $<$:/Zc:__cplusplus /Zc:preprocessor /Zc:externConstexpr> + $<$:/Zc:__cplusplus /Zc:preprocessor /Zc:externConstexpr /bigobj> ) set(STDEXEC_NAMESPACE "stdexec" CACHE STRING "The name of the top-level namespace for stdexec") From e4caf2fdbaffbb0c59296d0ce9d4983b5f1d8b35 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Thu, 30 Apr 2026 19:21:34 -0700 Subject: [PATCH 7/7] turn other cpu ci targets back on --- .github/workflows/ci.cpu.yml | 342 +++++++++++++++++------------------ 1 file changed, 171 insertions(+), 171 deletions(-) diff --git a/.github/workflows/ci.cpu.yml b/.github/workflows/ci.cpu.yml index 6f9d34a4a..7b353d97f 100644 --- a/.github/workflows/ci.cpu.yml +++ b/.github/workflows/ci.cpu.yml @@ -12,133 +12,133 @@ concurrency: jobs: - # build-cpu: - # runs-on: ubuntu-latest - # name: ${{ matrix.name }} - # strategy: - # fail-fast: false - # matrix: - # include: - # - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - # - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } - # - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } - # - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - # - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } - # # With the following config, 2 tests mysteriously time out, but only in CI and not locally. - # # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - # - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - # - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } - # - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } - # - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - # - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } - # - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } - # - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } - # - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } - # container: - # options: -u root - # image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} - # permissions: - # id-token: write # This is required for configure-aws-credentials - # contents: read # This is required for actions/checkout - # defaults: - # run: - # shell: su coder {0} - # working-directory: /home/coder - # steps: - # - name: Checkout stdexec - # uses: actions/checkout@v4 - # with: - # path: stdexec - # persist-credentials: false - # - name: Setup environment - # run: | - # echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" - # echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" - # - if: github.repository_owner == 'NVIDIA' - # name: Get AWS credentials for sccache bucket - # uses: aws-actions/configure-aws-credentials@v4 - # with: - # aws-region: us-east-2 - # role-duration-seconds: 28800 # 8 hours - # role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA - # - name: Build and test CPU schedulers - # env: - # ASAN_OPTIONS: alloc_dealloc_mismatch=0 - # NVCC_APPEND_FLAGS: "-t=100" - # SCCACHE_BUCKET: "rapids-sccache-devs" - # SCCACHE_DIST_REQUEST_TIMEOUT: "7140" - # SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" - # SCCACHE_IDLE_TIMEOUT: "0" - # SCCACHE_REGION: "us-east-2" - # SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" - # SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" - # SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" - # SCCACHE_SERVER_LOG: "sccache=debug" - # SCCACHE_SERVER_PORT: "4225" - # run: | - # set -e; - # source /etc/profile - # set -x; - - # devcontainer-utils-install-sccache --version rapids; - - # devcontainer-utils-init-sccache-dist \ - # --enable-sccache-dist - <<< " \ - # --auth-type 'token' \ - # --auth-token '$( \ - # curl -fsSL -H "Authorization: Bearer $( \ - # curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ - # "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ - # | jq -r '.value' \ - # )" https://token.rapids.nvidia.com/gh/token/exchange \ - # | jq -r '.token')' \ - # "; - - # # Copy source folder into ~/stdexec - # cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; - # chown -R coder:coder ~/stdexec; - # cd ~/stdexec; - - # # Configure - # cmake -S . -B build -GNinja \ - # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - # -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ - # -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ - # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - # -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ - # -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ - # -DSTDEXEC_BUILD_TESTS:BOOL=ON \ - # ; - - # # Compile - # cmake --build build -v -j 512; - - # # Print sccache stats - # sccache -s; - - # # Tests - # SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ - # ctest --test-dir build --verbose --output-on-failure --timeout 60; - # - if: ${{ !cancelled() }} - # name: Upload sccache logs - # uses: actions/upload-artifact@v4 - # with: - # name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} - # path: /tmp/sccache*.log - # compression-level: 9 - - # ci-cpu: - # runs-on: ubuntu-latest - # name: CI (CPU) - # needs: - # - build-cpu - # steps: - # - run: echo "CI (CPU) success" + build-cpu: + runs-on: ubuntu-latest + name: ${{ matrix.name }} + strategy: + fail-fast: false + matrix: + include: + - { name: "CPU (clang 16, Debug)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 16, Debug, c++23)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "23", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 16, Debug, TSAN)", build: "Debug", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + - { name: "CPU (clang 16, Release)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 16, Release, ASAN)", build: "Release", tag: llvm16-cuda12.9, cxxstd: "20", cxxflags: "-stdlib=libc++ -fsanitize=address -fsanitize-ignorelist=/home/coder/stdexec/sanitizer-ignorelist.txt" } + - { name: "CPU (clang 22, Debug)", build: "Debug", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + - { name: "CPU (clang 22, Release)", build: "Release", tag: llvm22-cuda13.2, cxxstd: "23", cxxflags: "-stdlib=libc++" } + - { name: "CPU (gcc 12, Debug)", build: "Debug", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + - { name: "CPU (gcc 12, Release)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "", } + # With the following config, 2 tests mysteriously time out, but only in CI and not locally. + # - { name: "CPU (gcc 12, Release, ASAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + - { name: "CPU (gcc 12, Release, TSAN)", build: "Release", tag: gcc12-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + - { name: "CPU (gcc 13, Debug)", build: "Debug", tag: gcc13-cuda12.9, cxxstd: "20", cxxflags: "", } + - { name: "CPU (gcc 14, Debug)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "", } + - { name: "CPU (gcc 14, Debug, ASAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + - { name: "CPU (gcc 14, Debug, TSAN)", build: "Debug", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=thread" } + - { name: "CPU (gcc 14, Release, ASAN)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=address" } + - { name: "CPU (gcc 14, Release, LEAK)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "20", cxxflags: "-fsanitize=leak", } + - { name: "CPU (gcc 14, Release, c++23)", build: "Release", tag: gcc14-cuda12.9, cxxstd: "23", cxxflags: "", } + container: + options: -u root + image: rapidsai/devcontainers:26.06-cpp-${{ matrix.tag }} + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + defaults: + run: + shell: su coder {0} + working-directory: /home/coder + steps: + - name: Checkout stdexec + uses: actions/checkout@v4 + with: + path: stdexec + persist-credentials: false + - name: Setup environment + run: | + echo "ARTIFACT_PREFIX=${{runner.os}}-${{matrix.tag}}-amd64" >> "${GITHUB_ENV}" + echo "ARTIFACT_SUFFIX=${{github.run_id}}-${{github.run_attempt}}-$RANDOM" >> "${GITHUB_ENV}" + - if: github.repository_owner == 'NVIDIA' + name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-east-2 + role-duration-seconds: 28800 # 8 hours + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA + - name: Build and test CPU schedulers + env: + ASAN_OPTIONS: alloc_dealloc_mismatch=0 + NVCC_APPEND_FLAGS: "-t=100" + SCCACHE_BUCKET: "rapids-sccache-devs" + SCCACHE_DIST_REQUEST_TIMEOUT: "7140" + SCCACHE_DIST_URL: "https://amd64.linux.sccache.rapids.nvidia.com" + SCCACHE_IDLE_TIMEOUT: "0" + SCCACHE_REGION: "us-east-2" + SCCACHE_S3_KEY_PREFIX: "nvidia-stdexec-dev" + SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: "nvidia-stdexec-dev/preprocessor" + SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: "true" + SCCACHE_SERVER_LOG: "sccache=debug" + SCCACHE_SERVER_PORT: "4225" + run: | + set -e; + source /etc/profile + set -x; + + devcontainer-utils-install-sccache --version rapids; + + devcontainer-utils-init-sccache-dist \ + --enable-sccache-dist - <<< " \ + --auth-type 'token' \ + --auth-token '$( \ + curl -fsSL -H "Authorization: Bearer $( \ + curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ + "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \ + | jq -r '.value' \ + )" https://token.rapids.nvidia.com/gh/token/exchange \ + | jq -r '.token')' \ + "; + + # Copy source folder into ~/stdexec + cp -r "${GITHUB_WORKSPACE}"/stdexec ~/; + chown -R coder:coder ~/stdexec; + cd ~/stdexec; + + # Configure + cmake -S . -B build -GNinja \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DCMAKE_CXX_FLAGS="${{ matrix.cxxflags }}" \ + -DSTDEXEC_ENABLE_TBB:BOOL=${{ !contains(matrix.cxxflags, '-fsanitize') }} \ + -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + -DCMAKE_CXX_STANDARD:STRING=${{ matrix.cxxstd }} \ + -DCMAKE_CXX_EXTENSIONS:BOOL=OFF \ + -DSTDEXEC_BUILD_TESTS:BOOL=ON \ + ; + + # Compile + cmake --build build -v -j 512; + + # Print sccache stats + sccache -s; + + # Tests + SCCACHE_NO_CACHE=1 SCCACHE_NO_DIST_COMPILE=1 \ + ctest --test-dir build --verbose --output-on-failure --timeout 60; + - if: ${{ !cancelled() }} + name: Upload sccache logs + uses: actions/upload-artifact@v4 + with: + name: sccache-client-logs-${{env.ARTIFACT_PREFIX}}-${{env.ARTIFACT_SUFFIX}} + path: /tmp/sccache*.log + compression-level: 9 + + ci-cpu: + runs-on: ubuntu-latest + name: CI (CPU) + needs: + - build-cpu + steps: + - run: echo "CI (CPU) success" build-cpu-windows: runs-on: windows-2022 @@ -174,47 +174,47 @@ jobs: steps: - run: echo "CI (CPU) (Windows) success" - # build-cpu-macos: - # runs-on: macos-26-large - # name: macos-${{ matrix.name }} - # strategy: - # fail-fast: false - # matrix: - # include: - # - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } - # - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } - # steps: - # - name: Checkout stdexec (MacOS) - # uses: actions/checkout@v4 - # with: - # persist-credentials: false - - # - name: Install dependencies - # run: | - # brew update - # brew install ninja - # shell: bash - - # - name: Build and test CPU schedulers (MacOS) - # shell: bash - # run: | - # mkdir build - # cmake -S. -Bbuild -GNinja \ - # -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - # -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ - # -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ - # -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ - # -DCMAKE_CXX_STANDARD:STRING=20 \ - # -DSTDEXEC_BUILD_TESTS:BOOL=ON - - # cmake --build build/ -v - # cd build - # ctest --output-on-failure - - # ci-cpu-macos: - # runs-on: macos-latest-large - # name: CI (CPU) (MacOS) - # needs: - # - build-cpu-macos - # steps: - # - run: echo "CI (CPU) (MacOS) success" + build-cpu-macos: + runs-on: macos-26-large + name: macos-${{ matrix.name }} + strategy: + fail-fast: false + matrix: + include: + - { compiler: "clang++", build: "Debug", name: "CPU (MacOS) (clang, Debug)" } + - { compiler: "clang++", build: "Release", name: "CPU (MacOS) (clang, Release)" } + steps: + - name: Checkout stdexec (MacOS) + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Install dependencies + run: | + brew update + brew install ninja + shell: bash + + - name: Build and test CPU schedulers (MacOS) + shell: bash + run: | + mkdir build + cmake -S. -Bbuild -GNinja \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} \ + -DSTDEXEC_ENABLE_ASIO:BOOL=TRUE \ + -DSTDEXEC_ASIO_IMPLEMENTATION:STRING=boost \ + -DCMAKE_CXX_STANDARD:STRING=20 \ + -DSTDEXEC_BUILD_TESTS:BOOL=ON + + cmake --build build/ -v + cd build + ctest --output-on-failure + + ci-cpu-macos: + runs-on: macos-latest-large + name: CI (CPU) (MacOS) + needs: + - build-cpu-macos + steps: + - run: echo "CI (CPU) (MacOS) success"