From f7c0cb0496971039fd8adadf1d5f5512aff519c5 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 10:27:17 -0400
Subject: [PATCH 01/10] Implment ckd_add

---
 include/boost/int128/utilities.hpp | 99 ++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
diff --git a/include/boost/int128/utilities.hpp b/include/boost/int128/utilities.hpp
index 0464b548..b6e0ce24 100644
--- a/include/boost/int128/utilities.hpp
+++ b/include/boost/int128/utilities.hpp
@@ -12,6 +12,8 @@
 #ifndef BOOST_INT128_BUILD_MODULE
 
 #include <cstdint>
+#include <limits>
+#include <type_traits>
 
 #endif
 
@@ -253,6 +255,103 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(const int1
     return static_cast<int128_t>(isqrt(static_cast<uint128_t>(n)));
 }
 
+namespace detail {
+
+template <typename T>
+struct valid_checked_type : std::integral_constant<bool, std::is_integral<T>::value> {};
+
+template <>
+struct valid_checked_type<int128_t> : std::true_type {};
+
+template <>
+struct valid_checked_type<uint128_t> : std::true_type {};
+
+// Widen an integer operand to its 128-bit two's complement bit pattern, returned as a uint128_t
+template <typename T>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t ckd_widen(const T value) noexcept
+{
+    BOOST_INT128_IF_CONSTEXPR (std::numeric_limits<T>::is_signed)
+    {
+        return static_cast<uint128_t>(static_cast<int128_t>(value));
+    }
+    else
+    {
+        return static_cast<uint128_t>(value);
+    }
+}
+
+} // namespace detail
+
+// Checked addition following the C23 <stdckdint.h> ckd_add contract.
+//
+// Computes a + b as if both operands were represented in a signed integer
+// type of infinite range and then converts that exact result to the type
+// pointed to by result. *result always receives the exact result wrapped
+// around to the width of *result. Returns false when *result represents the
+// exact mathematical sum, and true when the sum did not fit and wrap-around
+// occurred.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_add operands must be integer types.");
+
+    // Widen both operands
+    const uint128_t raw_a {detail::ckd_widen(a)};
+    const uint128_t raw_b {detail::ckd_widen(b)};
+
+    *result = static_cast<T1>(raw_a + raw_b);
+
+    const bool a_negative {std::numeric_limits<T2>::is_signed && ((raw_a >> 127) != 0U)};
+    const bool b_negative {std::numeric_limits<T3>::is_signed && ((raw_b >> 127) != 0U)};
+
+    const uint128_t a_magnitude {a_negative ? uint128_t{0} - raw_a : raw_a};
+    const uint128_t b_magnitude {b_negative ? uint128_t{0} - raw_b : raw_b};
+
+    // Combine into the sign, magnitude, and 129th-bit carry of the exact sum.
+    uint128_t sum_magnitude {0};
+    bool sum_negative {false};
+    bool carry {false};
+
+    if (a_negative == b_negative)
+    {
+        // Equal signs: magnitudes add and may overflow into a 129th bit.
+        sum_magnitude = a_magnitude + b_magnitude;
+        carry = sum_magnitude < a_magnitude;
+        sum_negative = a_negative;
+    }
+    else if (a_magnitude >= b_magnitude)
+    {
+        // Opposite signs: magnitudes subtract and never carry.
+        sum_magnitude = a_magnitude - b_magnitude;
+        sum_negative = a_negative;
+    }
+    else
+    {
+        sum_magnitude = b_magnitude - a_magnitude;
+        sum_negative = b_negative;
+    }
+
+    // Bounds of the destination type expressed as unsigned magnitudes.
+    const auto max_magnitude {static_cast<uint128_t>((std::numeric_limits<T1>::max)())};
+    const auto min_magnitude {std::numeric_limits<T1>::is_signed ? max_magnitude + uint128_t{1} : uint128_t{0}};
+
+    if (carry)
+    {
+        // |sum| >= 2^128 cannot be represented by any 128-bit or narrower type.
+        return true;
+    }
+
+    if (sum_negative)
+    {
+        return sum_magnitude > min_magnitude;
+    }
+
+    return sum_magnitude > max_magnitude;
+}
+
 } // namespace int128
 } // namespace boost
 

From 0f26bbf8c13f75e789c6ef11a41bbb0db19b1e92 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 10:27:28 -0400
Subject: [PATCH 02/10] Test ckd_add

---
 test/Jamfile      |   1 +
 test/test_ckd.cpp | 279 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 280 insertions(+)
 create mode 100644 test/test_ckd.cpp

diff --git a/test/Jamfile b/test/Jamfile
index 25e598d7..77063b6e 100644
--- a/test/Jamfile
+++ b/test/Jamfile
@@ -82,6 +82,7 @@ run test_midpoint.cpp ;
 run test_powm.cpp ;
 run test_ipow.cpp ;
 run test_isqrt.cpp ;
+run test_ckd.cpp ;
 
 run test_format.cpp ;
 run test_fmt_format.cpp ;
diff --git a/test/test_ckd.cpp b/test/test_ckd.cpp
new file mode 100644
index 00000000..cb7ac290
--- /dev/null
+++ b/test/test_ckd.cpp
@@ -0,0 +1,279 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/int128.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <random>
+#include <cstdint>
+#include <limits>
+
+using boost::int128::ckd_add;
+using boost::int128::int128_t;
+using boost::int128::uint128_t;
+
+constexpr std::size_t N {4096};
+static std::mt19937_64 rng {42};
+static std::uniform_int_distribution<std::uint64_t> dist {0, UINT64_MAX};
+
+// Small magnitudes exercise the no-overflow path for narrow targets, where a
+// purely full-range distribution would almost always overflow.
+static std::uniform_int_distribution<int> small_dist {-1000, 1000};
+
+//
+// Oracle-based testing for the standard integer types. __builtin_add_overflow
+// implements exactly the C23 contract (exact sum, wrap into the destination,
+// return true on overflow), so it is an independent reference.
+//
+#if defined(__GNUC__) || defined(__clang__)
+
+template <typename T1, typename T2, typename T3>
+void check(const T2 a, const T3 b)
+{
+    T1 expected {};
+    const bool expected_overflow {__builtin_add_overflow(a, b, &expected)};
+
+    T1 got {};
+    const bool got_overflow {ckd_add(&got, a, b)};
+
+    BOOST_TEST_EQ(got_overflow, expected_overflow);
+    BOOST_TEST(got == expected);
+}
+
+template <typename T1, typename T2, typename T3>
+void fuzz_standard()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        check<T1, T2, T3>(static_cast<T2>(dist(rng)), static_cast<T3>(dist(rng)));
+        check<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(small_dist(rng)));
+        check<T1, T2, T3>(static_cast<T2>(dist(rng)), static_cast<T3>(small_dist(rng)));
+        check<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(dist(rng)));
+    }
+}
+
+void test_standard_oracle()
+{
+    fuzz_standard<std::int32_t,  std::int32_t,  std::int32_t>();
+    fuzz_standard<std::uint32_t, std::uint32_t, std::uint32_t>();
+    fuzz_standard<std::int8_t,   std::int32_t,  std::int32_t>();
+    fuzz_standard<std::uint8_t,  std::int32_t,  std::int32_t>();
+    fuzz_standard<std::int16_t,  std::int16_t,  std::uint16_t>();
+    fuzz_standard<std::int64_t,  std::int32_t,  std::uint32_t>();
+    fuzz_standard<std::uint64_t, std::int64_t,  std::int64_t>();
+    fuzz_standard<std::int32_t,  std::int64_t,  std::int64_t>();
+    fuzz_standard<std::uint32_t, std::int8_t,   std::int8_t>();
+    fuzz_standard<std::int64_t,  std::uint64_t, std::uint64_t>();
+    fuzz_standard<std::uint16_t, std::int64_t,  std::int32_t>();
+}
+
+#else
+
+void test_standard_oracle() {}
+
+#endif
+
+//
+// Oracle-based testing at the full 128-bit width using the native compiler
+// type, which again matches the C23 contract exactly.
+//
+#if defined(__SIZEOF_INT128__) && (defined(__GNUC__) || defined(__clang__))
+
+static uint128_t lib_u(const unsigned __int128 v)
+{
+    return uint128_t{static_cast<std::uint64_t>(v >> 64), static_cast<std::uint64_t>(v)};
+}
+
+static int128_t lib_s(const __int128 v)
+{
+    return static_cast<int128_t>(lib_u(static_cast<unsigned __int128>(v)));
+}
+
+static unsigned __int128 rand_native()
+{
+    return (static_cast<unsigned __int128>(dist(rng)) << 64) | static_cast<unsigned __int128>(dist(rng));
+}
+
+void test_native_oracle()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const unsigned __int128 ua {rand_native()};
+        const unsigned __int128 ub {rand_native()};
+        const __int128 sa {static_cast<__int128>(ua)};
+        const __int128 sb {static_cast<__int128>(ub)};
+
+        // uint128_t target, unsigned operands
+        {
+            unsigned __int128 ref {};
+            const bool ref_of {__builtin_add_overflow(ua, ub, &ref)};
+            uint128_t got {};
+            const bool got_of {ckd_add(&got, lib_u(ua), lib_u(ub))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_u(ref));
+        }
+
+        // int128_t target, signed operands
+        {
+            __int128 ref {};
+            const bool ref_of {__builtin_add_overflow(sa, sb, &ref)};
+            int128_t got {};
+            const bool got_of {ckd_add(&got, lib_s(sa), lib_s(sb))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_s(ref));
+        }
+
+        // int128_t target, mixed-sign operands (unsigned + signed)
+        {
+            __int128 ref {};
+            const bool ref_of {__builtin_add_overflow(ua, sb, &ref)};
+            int128_t got {};
+            const bool got_of {ckd_add(&got, lib_u(ua), lib_s(sb))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_s(ref));
+        }
+
+        // uint128_t target, mixed-sign operands
+        {
+            unsigned __int128 ref {};
+            const bool ref_of {__builtin_add_overflow(sa, ub, &ref)};
+            uint128_t got {};
+            const bool got_of {ckd_add(&got, lib_s(sa), lib_u(ub))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_u(ref));
+        }
+    }
+}
+
+#else
+
+void test_native_oracle() {}
+
+#endif
+
+//
+// Hand-verified edge cases that run on every platform, including those without
+// a native 128-bit type.
+//
+void test_u128_edges()
+{
+    constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
+    uint128_t r {0};
+
+    // 2^128 - 1 + 1 == 2^128 -> wraps to 0.
+    BOOST_TEST_EQ(ckd_add(&r, u_max, uint128_t{1}), true);
+    BOOST_TEST(r == uint128_t{0});
+
+    // 2^128 - 1 + 0 fits.
+    BOOST_TEST_EQ(ckd_add(&r, u_max, uint128_t{0}), false);
+    BOOST_TEST(r == u_max);
+
+    // 5 + (-3) == 2 fits.
+    BOOST_TEST_EQ(ckd_add(&r, uint128_t{5}, int128_t{-3}), false);
+    BOOST_TEST(r == uint128_t{2});
+
+    // 3 + (-5) == -2 cannot be represented; wraps to 2^128 - 2.
+    BOOST_TEST_EQ(ckd_add(&r, uint128_t{3}, int128_t{-5}), true);
+    BOOST_TEST(r == u_max - uint128_t{1});
+
+    // (2^128 - 1) + (-1) == 2^128 - 2 fits.
+    BOOST_TEST_EQ(ckd_add(&r, u_max, int128_t{-1}), false);
+    BOOST_TEST(r == u_max - uint128_t{1});
+
+    // (2^128 - 1) + (2^128 - 1) == 2^129 - 2 -> carry, wraps to 2^128 - 2.
+    BOOST_TEST_EQ(ckd_add(&r, u_max, u_max), true);
+    BOOST_TEST(r == u_max - uint128_t{1});
+}
+
+void test_i128_edges()
+{
+    constexpr auto i_max {(std::numeric_limits<int128_t>::max)()};
+    constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
+    constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
+    int128_t r {0};
+
+    // INT128_MAX + 1 overflows, wraps to INT128_MIN.
+    BOOST_TEST_EQ(ckd_add(&r, i_max, int128_t{1}), true);
+    BOOST_TEST(r == i_min);
+
+    // INT128_MAX + 0 fits.
+    BOOST_TEST_EQ(ckd_add(&r, i_max, int128_t{0}), false);
+    BOOST_TEST(r == i_max);
+
+    // INT128_MIN + (-1) overflows, wraps to INT128_MAX.
+    BOOST_TEST_EQ(ckd_add(&r, i_min, int128_t{-1}), true);
+    BOOST_TEST(r == i_max);
+
+    // INT128_MAX + INT128_MIN == -1 fits.
+    BOOST_TEST_EQ(ckd_add(&r, i_max, i_min), false);
+    BOOST_TEST(r == int128_t{-1});
+
+    // (2^128 - 1) [unsigned] + 1 == 2^128 cannot fit a signed target; wraps to 0.
+    BOOST_TEST_EQ(ckd_add(&r, u_max, int128_t{1}), true);
+    BOOST_TEST(r == int128_t{0});
+
+    // (2^128 - 1) [unsigned] + 0 == 2^128 - 1 cannot fit; reinterprets to -1.
+    BOOST_TEST_EQ(ckd_add(&r, u_max, uint128_t{0}), true);
+    BOOST_TEST(r == int128_t{-1});
+}
+
+void test_narrow_target_wide_operands()
+{
+    constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
+    constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
+
+    // Small sum into a narrow signed target fits.
+    std::int32_t r32 {0};
+    BOOST_TEST_EQ(ckd_add(&r32, int128_t{1000}, int128_t{2000}), false);
+    BOOST_TEST_EQ(r32, 3000);
+
+    // (2^128 - 1) + 2 == 2^128 + 1 -> wraps mod 256 to 1 in a uint8_t target.
+    std::uint8_t r8 {0};
+    BOOST_TEST_EQ(ckd_add(&r8, u_max, uint128_t{2}), true);
+    BOOST_TEST_EQ(static_cast<int>(r8), 1);
+
+    // INT128_MIN out of int32_t range; its low 32 bits are zero.
+    BOOST_TEST_EQ(ckd_add(&r32, i_min, int128_t{0}), true);
+    BOOST_TEST_EQ(r32, 0);
+}
+
+//
+// constexpr usability.
+//
+constexpr bool ckd_overflows_int_max()
+{
+    int r {0};
+    return ckd_add(&r, (std::numeric_limits<int>::max)(), 1);
+}
+
+constexpr int ckd_value()
+{
+    int r {0};
+    ckd_add(&r, 2, 3);
+    return r;
+}
+
+constexpr bool ckd_overflows_u128_max()
+{
+    uint128_t r {0};
+    return ckd_add(&r, (std::numeric_limits<uint128_t>::max)(), uint128_t{1});
+}
+
+void test_constexpr()
+{
+    static_assert(ckd_overflows_int_max(), "INT_MAX + 1 overflows int");
+    static_assert(ckd_value() == 5, "2 + 3 == 5");
+    static_assert(ckd_overflows_u128_max(), "UINT128_MAX + 1 overflows uint128_t");
+}
+
+int main()
+{
+    test_standard_oracle();
+    test_native_oracle();
+    test_u128_edges();
+    test_i128_edges();
+    test_narrow_target_wide_operands();
+    test_constexpr();
+
+    return boost::report_errors();
+}

From 133b1e1bb70383885123639d75ad7e55050a9e77 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 10:31:21 -0400
Subject: [PATCH 03/10] Reject bool and char per C23

---
 include/boost/int128/utilities.hpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/boost/int128/utilities.hpp b/include/boost/int128/utilities.hpp
index b6e0ce24..5d0d7d71 100644
--- a/include/boost/int128/utilities.hpp
+++ b/include/boost/int128/utilities.hpp
@@ -257,8 +257,12 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(const int1
 
 namespace detail {
 
+// The C23 checked integer macros accept any integer type for their operands
+// except bool, plain char, enumerated types, and bit-precise (_BitInt) types.
 template <typename T>
-struct valid_checked_type : std::integral_constant<bool, std::is_integral<T>::value> {};
+struct valid_checked_type : std::integral_constant<bool, std::is_integral<T>::value &&
+                                                         !std::is_same<T, bool>::value &&
+                                                         !std::is_same<T, char>::value> {};
 
 template <>
 struct valid_checked_type<int128_t> : std::true_type {};
@@ -296,7 +300,7 @@ BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, const T2 a, const T3
     static_assert(detail::valid_checked_type<T1>::value &&
                   detail::valid_checked_type<T2>::value &&
                   detail::valid_checked_type<T3>::value,
-                  "ckd_add operands must be integer types.");
+                  "ckd_add operands must be integer types other than bool and plain char.");
 
     // Widen both operands
     const uint128_t raw_a {detail::ckd_widen(a)};

From 7706a897e48cd410b09503552282f9cc0708f7da Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 11:04:46 -0400
Subject: [PATCH 04/10] Add ckd_sub and ckd_mul

---
 include/boost/int128/utilities.hpp | 162 +++++++++++++++++++++--------
 1 file changed, 119 insertions(+), 43 deletions(-)

diff --git a/include/boost/int128/utilities.hpp b/include/boost/int128/utilities.hpp
index 5d0d7d71..ffb29b66 100644
--- a/include/boost/int128/utilities.hpp
+++ b/include/boost/int128/utilities.hpp
@@ -284,6 +284,72 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t ckd_widen(const T value) noexcept
     }
 }
 
+// Sign and magnitude of an operand together with its 128-bit two's complement
+// image. magnitude is the absolute value; negative records the sign.
+struct ckd_operand
+{
+    uint128_t raw;
+    uint128_t magnitude;
+    bool negative;
+};
+
+template <typename T>
+BOOST_INT128_HOST_DEVICE constexpr ckd_operand ckd_decompose(const T value) noexcept
+{
+    const uint128_t raw {ckd_widen(value)};
+    const bool negative {std::numeric_limits<T>::is_signed && ((raw >> 127) != 0U)};
+    return ckd_operand{raw, negative ? uint128_t{0} - raw : raw, negative};
+}
+
+// Exact signed sum of two operands given as (magnitude, sign). carry marks a
+// 129th bit, which no 128-bit or narrower target can represent.
+struct ckd_sum_result
+{
+    uint128_t magnitude;
+    bool negative;
+    bool carry;
+};
+
+BOOST_INT128_HOST_DEVICE constexpr ckd_sum_result ckd_signed_sum(const uint128_t a_magnitude, const bool a_negative,
+                                                                 const uint128_t b_magnitude, const bool b_negative) noexcept
+{
+    if (a_negative == b_negative)
+    {
+        // Equal signs: magnitudes add and may overflow into a 129th bit.
+        const uint128_t magnitude {a_magnitude + b_magnitude};
+        return ckd_sum_result{magnitude, a_negative, magnitude < a_magnitude};
+    }
+
+    // Opposite signs: the smaller magnitude is subtracted and never carries.
+    if (a_magnitude >= b_magnitude)
+    {
+        return ckd_sum_result{a_magnitude - b_magnitude, a_negative, false};
+    }
+
+    return ckd_sum_result{b_magnitude - a_magnitude, b_negative, false};
+}
+
+// Whether a result of the given sign and magnitude fits in T1. exceeds_width
+// forces overflow when the true magnitude does not even fit in 128 bits.
+template <typename T1>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_overflows(const uint128_t magnitude, const bool negative, const bool exceeds_width) noexcept
+{
+    if (exceeds_width)
+    {
+        return true;
+    }
+
+    const uint128_t max_magnitude {static_cast<uint128_t>((std::numeric_limits<T1>::max)())};
+
+    if (negative)
+    {
+        const uint128_t min_magnitude {std::numeric_limits<T1>::is_signed ? max_magnitude + uint128_t{1} : uint128_t{0}};
+        return magnitude > min_magnitude;
+    }
+
+    return magnitude > max_magnitude;
+}
+
 } // namespace detail
 
 // Checked addition following the C23 <stdckdint.h> ckd_add contract.
@@ -302,58 +368,68 @@ BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, const T2 a, const T3
                   detail::valid_checked_type<T3>::value,
                   "ckd_add operands must be integer types other than bool and plain char.");
 
-    // Widen both operands
-    const uint128_t raw_a {detail::ckd_widen(a)};
-    const uint128_t raw_b {detail::ckd_widen(b)};
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
 
-    *result = static_cast<T1>(raw_a + raw_b);
+    // The modular sum of the widened images is the exact sum mod 2^128, which
+    // is all the wrapped result needs for any target no wider than 128 bits.
+    *result = static_cast<T1>(op_a.raw + op_b.raw);
 
-    const bool a_negative {std::numeric_limits<T2>::is_signed && ((raw_a >> 127) != 0U)};
-    const bool b_negative {std::numeric_limits<T3>::is_signed && ((raw_b >> 127) != 0U)};
+    const auto sum {detail::ckd_signed_sum(op_a.magnitude, op_a.negative, op_b.magnitude, op_b.negative)};
+    return detail::ckd_overflows<T1>(sum.magnitude, sum.negative, sum.carry);
+}
 
-    const uint128_t a_magnitude {a_negative ? uint128_t{0} - raw_a : raw_a};
-    const uint128_t b_magnitude {b_negative ? uint128_t{0} - raw_b : raw_b};
+// Checked subtraction following the C23 <stdckdint.h> ckd_sub contract.
+//
+// Behaves as ckd_add for a - b: *result receives the exact difference wrapped
+// to its width, and the return value reports whether that difference did not
+// fit.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_sub(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_sub operands must be integer types other than bool and plain char.");
 
-    // Combine into the sign, magnitude, and 129th-bit carry of the exact sum.
-    uint128_t sum_magnitude {0};
-    bool sum_negative {false};
-    bool carry {false};
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
 
-    if (a_negative == b_negative)
-    {
-        // Equal signs: magnitudes add and may overflow into a 129th bit.
-        sum_magnitude = a_magnitude + b_magnitude;
-        carry = sum_magnitude < a_magnitude;
-        sum_negative = a_negative;
-    }
-    else if (a_magnitude >= b_magnitude)
-    {
-        // Opposite signs: magnitudes subtract and never carry.
-        sum_magnitude = a_magnitude - b_magnitude;
-        sum_negative = a_negative;
-    }
-    else
-    {
-        sum_magnitude = b_magnitude - a_magnitude;
-        sum_negative = b_negative;
-    }
+    *result = static_cast<T1>(op_a.raw - op_b.raw);
 
-    // Bounds of the destination type expressed as unsigned magnitudes.
-    const auto max_magnitude {static_cast<uint128_t>((std::numeric_limits<T1>::max)())};
-    const auto min_magnitude {std::numeric_limits<T1>::is_signed ? max_magnitude + uint128_t{1} : uint128_t{0}};
+    // a - b is a + (-b): negating b flips its sign while keeping its magnitude.
+    const auto difference {detail::ckd_signed_sum(op_a.magnitude, op_a.negative, op_b.magnitude, !op_b.negative)};
+    return detail::ckd_overflows<T1>(difference.magnitude, difference.negative, difference.carry);
+}
 
-    if (carry)
-    {
-        // |sum| >= 2^128 cannot be represented by any 128-bit or narrower type.
-        return true;
-    }
+// Checked multiplication following the C23 <stdckdint.h> ckd_mul contract.
+//
+// Computes a * b as if both operands had infinite range, stores the result
+// wrapped to the width of *result, and returns true when the exact product did
+// not fit.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_mul(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_mul operands must be integer types other than bool and plain char.");
 
-    if (sum_negative)
-    {
-        return sum_magnitude > min_magnitude;
-    }
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    *result = static_cast<T1>(op_a.raw * op_b.raw);
+
+    // The product magnitude needs more than 128 bits exactly when it exceeds
+    // UINT128_MAX. Dividing the maximum by one magnitude tests that without
+    // forming a 256-bit product.
+    const bool exceeds_width {op_a.magnitude != 0U &&
+                              op_b.magnitude > ((std::numeric_limits<uint128_t>::max)() / op_a.magnitude)};
+
+    const uint128_t product_magnitude {op_a.magnitude * op_b.magnitude};
+    const bool product_negative {op_a.negative != op_b.negative};
 
-    return sum_magnitude > max_magnitude;
+    return detail::ckd_overflows<T1>(product_magnitude, product_negative, exceeds_width);
 }
 
 } // namespace int128

From 88d92c23e5753e27a7ee9af1e6f148a896c6c0d0 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 11:04:53 -0400
Subject: [PATCH 05/10] Test new functions

---
 test/test_ckd.cpp | 302 ++++++++++++++++++++++++++++------------------
 1 file changed, 186 insertions(+), 116 deletions(-)

diff --git a/test/test_ckd.cpp b/test/test_ckd.cpp
index cb7ac290..853f1c59 100644
--- a/test/test_ckd.cpp
+++ b/test/test_ckd.cpp
@@ -9,6 +9,8 @@
 #include <limits>
 
 using boost::int128::ckd_add;
+using boost::int128::ckd_sub;
+using boost::int128::ckd_mul;
 using boost::int128::int128_t;
 using boost::int128::uint128_t;
 
@@ -21,50 +23,66 @@ static std::uniform_int_distribution<std::uint64_t> dist {0, UINT64_MAX};
 static std::uniform_int_distribution<int> small_dist {-1000, 1000};
 
 //
-// Oracle-based testing for the standard integer types. __builtin_add_overflow
-// implements exactly the C23 contract (exact sum, wrap into the destination,
-// return true on overflow), so it is an independent reference.
+// Oracle-based testing for the standard integer types. The __builtin_*_overflow
+// intrinsics implement exactly the C23 contract (exact result, wrap into the
+// destination, return true on overflow), so they are an independent reference.
 //
 #if defined(__GNUC__) || defined(__clang__)
 
-template <typename T1, typename T2, typename T3>
-void check(const T2 a, const T3 b)
+template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
+void check_op(const T2 a, const T3 b, Ref ref_overflow, Ckd ckd_overflow)
 {
     T1 expected {};
-    const bool expected_overflow {__builtin_add_overflow(a, b, &expected)};
+    const bool expected_overflow {ref_overflow(a, b, &expected)};
 
     T1 got {};
-    const bool got_overflow {ckd_add(&got, a, b)};
+    const bool got_overflow {ckd_overflow(&got, a, b)};
 
     BOOST_TEST_EQ(got_overflow, expected_overflow);
     BOOST_TEST(got == expected);
 }
 
-template <typename T1, typename T2, typename T3>
-void fuzz_standard()
+template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
+void fuzz_op(Ref ref_overflow, Ckd ckd_overflow)
 {
     for (std::size_t i {0}; i < N; ++i)
     {
-        check<T1, T2, T3>(static_cast<T2>(dist(rng)), static_cast<T3>(dist(rng)));
-        check<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(small_dist(rng)));
-        check<T1, T2, T3>(static_cast<T2>(dist(rng)), static_cast<T3>(small_dist(rng)));
-        check<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(dist(rng)));
+        check_op<T1, T2, T3>(static_cast<T2>(dist(rng)),       static_cast<T3>(dist(rng)),       ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(small_dist(rng)), ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(dist(rng)),       static_cast<T3>(small_dist(rng)), ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(dist(rng)),       ref_overflow, ckd_overflow);
     }
 }
 
+template <typename Ref, typename Ckd>
+void fuzz_all_triples(Ref ref_overflow, Ckd ckd_overflow)
+{
+    fuzz_op<std::int32_t,  std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint32_t, std::uint32_t, std::uint32_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::int8_t,   std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint8_t,  std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int16_t,  std::int16_t,  std::uint16_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::int64_t,  std::int32_t,  std::uint32_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint64_t, std::int64_t,  std::int64_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int32_t,  std::int64_t,  std::int64_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint32_t, std::int8_t,   std::int8_t  >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int64_t,  std::uint64_t, std::uint64_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint16_t, std::int64_t,  std::int32_t >(ref_overflow, ckd_overflow);
+}
+
 void test_standard_oracle()
 {
-    fuzz_standard<std::int32_t,  std::int32_t,  std::int32_t>();
-    fuzz_standard<std::uint32_t, std::uint32_t, std::uint32_t>();
-    fuzz_standard<std::int8_t,   std::int32_t,  std::int32_t>();
-    fuzz_standard<std::uint8_t,  std::int32_t,  std::int32_t>();
-    fuzz_standard<std::int16_t,  std::int16_t,  std::uint16_t>();
-    fuzz_standard<std::int64_t,  std::int32_t,  std::uint32_t>();
-    fuzz_standard<std::uint64_t, std::int64_t,  std::int64_t>();
-    fuzz_standard<std::int32_t,  std::int64_t,  std::int64_t>();
-    fuzz_standard<std::uint32_t, std::int8_t,   std::int8_t>();
-    fuzz_standard<std::int64_t,  std::uint64_t, std::uint64_t>();
-    fuzz_standard<std::uint16_t, std::int64_t,  std::int32_t>();
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_add_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_add(r, a, b); });
+
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_sub_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
+
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_mul_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
 }
 
 #else
@@ -75,7 +93,8 @@ void test_standard_oracle() {}
 
 //
 // Oracle-based testing at the full 128-bit width using the native compiler
-// type, which again matches the C23 contract exactly.
+// type, which again matches the C23 contract exactly. This is the only place
+// products genuinely exceed 128 bits, exercising the multiply width check.
 //
 #if defined(__SIZEOF_INT128__) && (defined(__GNUC__) || defined(__clang__))
 
@@ -94,7 +113,8 @@ static unsigned __int128 rand_native()
     return (static_cast<unsigned __int128>(dist(rng)) << 64) | static_cast<unsigned __int128>(dist(rng));
 }
 
-void test_native_oracle()
+template <typename Ref, typename Ckd>
+void native_fuzz(Ref ref_overflow, Ckd ckd_overflow)
 {
     for (std::size_t i {0}; i < N; ++i)
     {
@@ -106,9 +126,9 @@ void test_native_oracle()
         // uint128_t target, unsigned operands
         {
             unsigned __int128 ref {};
-            const bool ref_of {__builtin_add_overflow(ua, ub, &ref)};
+            const bool ref_of {ref_overflow(ua, ub, &ref)};
             uint128_t got {};
-            const bool got_of {ckd_add(&got, lib_u(ua), lib_u(ub))};
+            const bool got_of {ckd_overflow(&got, lib_u(ua), lib_u(ub))};
             BOOST_TEST_EQ(got_of, ref_of);
             BOOST_TEST(got == lib_u(ref));
         }
@@ -116,9 +136,9 @@ void test_native_oracle()
         // int128_t target, signed operands
         {
             __int128 ref {};
-            const bool ref_of {__builtin_add_overflow(sa, sb, &ref)};
+            const bool ref_of {ref_overflow(sa, sb, &ref)};
             int128_t got {};
-            const bool got_of {ckd_add(&got, lib_s(sa), lib_s(sb))};
+            const bool got_of {ckd_overflow(&got, lib_s(sa), lib_s(sb))};
             BOOST_TEST_EQ(got_of, ref_of);
             BOOST_TEST(got == lib_s(ref));
         }
@@ -126,25 +146,40 @@ void test_native_oracle()
         // int128_t target, mixed-sign operands (unsigned + signed)
         {
             __int128 ref {};
-            const bool ref_of {__builtin_add_overflow(ua, sb, &ref)};
+            const bool ref_of {ref_overflow(ua, sb, &ref)};
             int128_t got {};
-            const bool got_of {ckd_add(&got, lib_u(ua), lib_s(sb))};
+            const bool got_of {ckd_overflow(&got, lib_u(ua), lib_s(sb))};
             BOOST_TEST_EQ(got_of, ref_of);
             BOOST_TEST(got == lib_s(ref));
         }
 
-        // uint128_t target, mixed-sign operands
+        // uint128_t target, mixed-sign operands (signed + unsigned)
         {
             unsigned __int128 ref {};
-            const bool ref_of {__builtin_add_overflow(sa, ub, &ref)};
+            const bool ref_of {ref_overflow(sa, ub, &ref)};
             uint128_t got {};
-            const bool got_of {ckd_add(&got, lib_s(sa), lib_u(ub))};
+            const bool got_of {ckd_overflow(&got, lib_s(sa), lib_u(ub))};
             BOOST_TEST_EQ(got_of, ref_of);
             BOOST_TEST(got == lib_u(ref));
         }
     }
 }
 
+void test_native_oracle()
+{
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_add_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_add(r, a, b); });
+
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_sub_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
+
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_mul_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
+}
+
 #else
 
 void test_native_oracle() {}
@@ -155,124 +190,159 @@ void test_native_oracle() {}
 // Hand-verified edge cases that run on every platform, including those without
 // a native 128-bit type.
 //
-void test_u128_edges()
-{
-    constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
-    uint128_t r {0};
-
-    // 2^128 - 1 + 1 == 2^128 -> wraps to 0.
-    BOOST_TEST_EQ(ckd_add(&r, u_max, uint128_t{1}), true);
-    BOOST_TEST(r == uint128_t{0});
-
-    // 2^128 - 1 + 0 fits.
-    BOOST_TEST_EQ(ckd_add(&r, u_max, uint128_t{0}), false);
-    BOOST_TEST(r == u_max);
-
-    // 5 + (-3) == 2 fits.
-    BOOST_TEST_EQ(ckd_add(&r, uint128_t{5}, int128_t{-3}), false);
-    BOOST_TEST(r == uint128_t{2});
+constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
+constexpr auto i_max {(std::numeric_limits<int128_t>::max)()};
+constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
 
-    // 3 + (-5) == -2 cannot be represented; wraps to 2^128 - 2.
-    BOOST_TEST_EQ(ckd_add(&r, uint128_t{3}, int128_t{-5}), true);
-    BOOST_TEST(r == u_max - uint128_t{1});
-
-    // (2^128 - 1) + (-1) == 2^128 - 2 fits.
-    BOOST_TEST_EQ(ckd_add(&r, u_max, int128_t{-1}), false);
-    BOOST_TEST(r == u_max - uint128_t{1});
-
-    // (2^128 - 1) + (2^128 - 1) == 2^129 - 2 -> carry, wraps to 2^128 - 2.
-    BOOST_TEST_EQ(ckd_add(&r, u_max, u_max), true);
-    BOOST_TEST(r == u_max - uint128_t{1});
+void test_add_edges()
+{
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_add(&u, u_max, uint128_t{1}), true);   // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_add(&u, uint128_t{5}, int128_t{-3}), false);
+    BOOST_TEST(u == uint128_t{2});
+    BOOST_TEST_EQ(ckd_add(&u, uint128_t{3}, int128_t{-5}), true);   // -2 wraps
+    BOOST_TEST(u == u_max - uint128_t{1});
+    BOOST_TEST_EQ(ckd_add(&u, u_max, u_max), true);          // carry past 2^128
+
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_add(&i, i_max, int128_t{1}), true);    // INT128_MAX + 1 -> INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_add(&i, i_max, i_min), false);
+    BOOST_TEST(i == int128_t{-1});
+    BOOST_TEST_EQ(ckd_add(&i, u_max, uint128_t{0}), true);   // 2^128 - 1 unfit in signed
+    BOOST_TEST(i == int128_t{-1});
 }
 
-void test_i128_edges()
+void test_sub_edges()
 {
-    constexpr auto i_max {(std::numeric_limits<int128_t>::max)()};
-    constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
-    constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
-    int128_t r {0};
-
-    // INT128_MAX + 1 overflows, wraps to INT128_MIN.
-    BOOST_TEST_EQ(ckd_add(&r, i_max, int128_t{1}), true);
-    BOOST_TEST(r == i_min);
-
-    // INT128_MAX + 0 fits.
-    BOOST_TEST_EQ(ckd_add(&r, i_max, int128_t{0}), false);
-    BOOST_TEST(r == i_max);
-
-    // INT128_MIN + (-1) overflows, wraps to INT128_MAX.
-    BOOST_TEST_EQ(ckd_add(&r, i_min, int128_t{-1}), true);
-    BOOST_TEST(r == i_max);
-
-    // INT128_MAX + INT128_MIN == -1 fits.
-    BOOST_TEST_EQ(ckd_add(&r, i_max, i_min), false);
-    BOOST_TEST(r == int128_t{-1});
-
-    // (2^128 - 1) [unsigned] + 1 == 2^128 cannot fit a signed target; wraps to 0.
-    BOOST_TEST_EQ(ckd_add(&r, u_max, int128_t{1}), true);
-    BOOST_TEST(r == int128_t{0});
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_sub(&u, uint128_t{0}, uint128_t{1}), true);   // -1 wraps to 2^128 - 1
+    BOOST_TEST(u == u_max);
+    BOOST_TEST_EQ(ckd_sub(&u, uint128_t{5}, uint128_t{3}), false);
+    BOOST_TEST(u == uint128_t{2});
+    BOOST_TEST_EQ(ckd_sub(&u, u_max, int128_t{-1}), true);          // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_sub(&i, i_min, int128_t{1}), true);           // INT128_MIN - 1 -> INT128_MAX
+    BOOST_TEST(i == i_max);
+    BOOST_TEST_EQ(ckd_sub(&i, i_max, int128_t{-1}), true);          // -> INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_sub(&i, i_max, i_max), false);
+    BOOST_TEST(i == int128_t{0});
+    BOOST_TEST_EQ(ckd_sub(&i, i_min, i_min), false);
+    BOOST_TEST(i == int128_t{0});
+
+    // Narrow targets.
+    std::int32_t r32 {0};
+    BOOST_TEST_EQ(ckd_sub(&r32, int128_t{1000}, int128_t{2000}), false);
+    BOOST_TEST_EQ(r32, -1000);
 
-    // (2^128 - 1) [unsigned] + 0 == 2^128 - 1 cannot fit; reinterprets to -1.
-    BOOST_TEST_EQ(ckd_add(&r, u_max, uint128_t{0}), true);
-    BOOST_TEST(r == int128_t{-1});
+    std::uint8_t r8 {0};
+    BOOST_TEST_EQ(ckd_sub(&r8, uint128_t{0}, uint128_t{1}), true);
+    BOOST_TEST_EQ(static_cast<int>(r8), 255);
 }
 
-void test_narrow_target_wide_operands()
+void test_mul_edges()
 {
-    constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
-    constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
-
-    // Small sum into a narrow signed target fits.
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_mul(&i, i_min, int128_t{-1}), true);          // 2^127 wraps to INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_mul(&i, i_max, int128_t{2}), true);           // 2^128 - 2 -> -2
+    BOOST_TEST(i == int128_t{-2});
+    BOOST_TEST_EQ(ckd_mul(&i, i_min, int128_t{1}), false);
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_mul(&i, int128_t{-3}, int128_t{4}), false);
+    BOOST_TEST(i == int128_t{-12});
+    BOOST_TEST_EQ(ckd_mul(&i, int128_t{0}, int128_t{-7}), false);
+    BOOST_TEST(i == int128_t{0});
+
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_mul(&u, u_max, uint128_t{2}), true);          // 2^129 - 2 wraps
+    BOOST_TEST(u == u_max - uint128_t{1});
+    const uint128_t two_64 {1U, 0U};                                // 2^64
+    BOOST_TEST_EQ(ckd_mul(&u, two_64, two_64), true);               // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_mul(&u, uint128_t{0}, u_max), false);
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_mul(&u, uint128_t{6}, uint128_t{7}), false);
+    BOOST_TEST(u == uint128_t{42});
+    BOOST_TEST_EQ(ckd_mul(&u, u_max, int128_t{-1}), true);          // negative result in unsigned
+    BOOST_TEST(u == uint128_t{1});
+
+    // Narrow targets.
     std::int32_t r32 {0};
-    BOOST_TEST_EQ(ckd_add(&r32, int128_t{1000}, int128_t{2000}), false);
-    BOOST_TEST_EQ(r32, 3000);
+    BOOST_TEST_EQ(ckd_mul(&r32, int128_t{1000}, int128_t{1000}), false);
+    BOOST_TEST_EQ(r32, 1000000);
 
-    // (2^128 - 1) + 2 == 2^128 + 1 -> wraps mod 256 to 1 in a uint8_t target.
     std::uint8_t r8 {0};
-    BOOST_TEST_EQ(ckd_add(&r8, u_max, uint128_t{2}), true);
-    BOOST_TEST_EQ(static_cast<int>(r8), 1);
+    BOOST_TEST_EQ(ckd_mul(&r8, uint128_t{20}, uint128_t{20}), true);    // 400 wraps mod 256
+    BOOST_TEST_EQ(static_cast<int>(r8), 144);
 
-    // INT128_MIN out of int32_t range; its low 32 bits are zero.
-    BOOST_TEST_EQ(ckd_add(&r32, i_min, int128_t{0}), true);
-    BOOST_TEST_EQ(r32, 0);
+    std::int8_t r8s {0};
+    BOOST_TEST_EQ(ckd_mul(&r8s, int128_t{-5}, int128_t{20}), false);
+    BOOST_TEST_EQ(static_cast<int>(r8s), -100);
 }
 
 //
-// constexpr usability.
+// constexpr usability for all three operations.
 //
-constexpr bool ckd_overflows_int_max()
+constexpr bool add_overflows_int_max()
 {
     int r {0};
     return ckd_add(&r, (std::numeric_limits<int>::max)(), 1);
 }
 
-constexpr int ckd_value()
+constexpr bool sub_overflows_int_min()
+{
+    int r {0};
+    return ckd_sub(&r, (std::numeric_limits<int>::min)(), 1);
+}
+
+constexpr bool mul_overflows_int_max()
 {
     int r {0};
-    ckd_add(&r, 2, 3);
+    return ckd_mul(&r, (std::numeric_limits<int>::max)(), 2);
+}
+
+constexpr int sub_value()
+{
+    int r {0};
+    ckd_sub(&r, 5, 3);
+    return r;
+}
+
+constexpr int mul_value()
+{
+    int r {0};
+    ckd_mul(&r, 6, 7);
     return r;
 }
 
-constexpr bool ckd_overflows_u128_max()
+constexpr bool mul_overflows_i128_min()
 {
-    uint128_t r {0};
-    return ckd_add(&r, (std::numeric_limits<uint128_t>::max)(), uint128_t{1});
+    int128_t r {0};
+    return ckd_mul(&r, (std::numeric_limits<int128_t>::min)(), int128_t{-1});
 }
 
 void test_constexpr()
 {
-    static_assert(ckd_overflows_int_max(), "INT_MAX + 1 overflows int");
-    static_assert(ckd_value() == 5, "2 + 3 == 5");
-    static_assert(ckd_overflows_u128_max(), "UINT128_MAX + 1 overflows uint128_t");
+    static_assert(add_overflows_int_max(),  "INT_MAX + 1 overflows int");
+    static_assert(sub_overflows_int_min(),  "INT_MIN - 1 overflows int");
+    static_assert(mul_overflows_int_max(),  "INT_MAX * 2 overflows int");
+    static_assert(sub_value() == 2,         "5 - 3 == 2");
+    static_assert(mul_value() == 42,        "6 * 7 == 42");
+    static_assert(mul_overflows_i128_min(), "INT128_MIN * -1 overflows int128_t");
 }
 
 int main()
 {
     test_standard_oracle();
     test_native_oracle();
-    test_u128_edges();
-    test_i128_edges();
-    test_narrow_target_wide_operands();
+    test_add_edges();
+    test_sub_edges();
+    test_mul_edges();
     test_constexpr();
 
     return boost::report_errors();

From ef2ed4ba862ef621c439a8d2b85177586d57430d Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 11:17:14 -0400
Subject: [PATCH 06/10] Add checked arithmetic example

---
 examples/checked_arithmetic.cpp | 76 +++++++++++++++++++++++++++++++++
 test/Jamfile                    |  1 +
 2 files changed, 77 insertions(+)
 create mode 100644 examples/checked_arithmetic.cpp

diff --git a/examples/checked_arithmetic.cpp b/examples/checked_arithmetic.cpp
new file mode 100644
index 00000000..55db9834
--- /dev/null
+++ b/examples/checked_arithmetic.cpp
@@ -0,0 +1,76 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+// Individual headers
+
+#include <boost/int128/utilities.hpp>
+#include <boost/int128/iostream.hpp>
+
+// Or you can do a single header
+
+// #include <boost/int128.hpp>
+
+#include <cstdint>
+#include <limits>
+#include <iostream>
+
+int main()
+{
+    using boost::int128::uint128_t;
+    using boost::int128::int128_t;
+    using boost::int128::ckd_add;
+    using boost::int128::ckd_sub;
+    using boost::int128::ckd_mul;
+
+    std::cout << std::boolalpha;
+
+    // ckd_add, ckd_sub, and ckd_mul implement the C23 stdckdint.h contract: the
+    // operation is evaluated as if both operands had infinite range, the result
+    // is written to *result wrapped to that type's width, and the function
+    // returns true when the exact result did not fit.
+    constexpr auto u_max {std::numeric_limits<uint128_t>::max()};
+    constexpr auto i_max {std::numeric_limits<int128_t>::max()};
+    constexpr auto i_min {std::numeric_limits<int128_t>::min()};
+
+    // A result that fits returns false and holds the exact value.
+    std::cout << "=== Results That Fit ===" << std::endl;
+    int128_t r {};
+    bool overflow {ckd_add(&r, int128_t{20}, int128_t{22})};
+    std::cout << "ckd_add(20, 22): overflow=" << overflow << ", result=" << r << std::endl;
+
+    // Addition that exceeds the type wraps modulo 2^128 and reports overflow.
+    std::cout << "\n=== Addition Overflow ===" << std::endl;
+    uint128_t u {};
+    overflow = ckd_add(&u, u_max, uint128_t{1});
+    std::cout << "ckd_add(UINT128_MAX, 1): overflow=" << overflow << ", wrapped=" << u << std::endl;
+
+    // Subtracting below zero in an unsigned type wraps to the top of the range.
+    std::cout << "\n=== Subtraction Underflow ===" << std::endl;
+    overflow = ckd_sub(&u, uint128_t{0}, uint128_t{1});
+    std::cout << "ckd_sub(0, 1): overflow=" << overflow << ", wrapped=" << u << std::endl;
+
+    // Multiplication detects overflow that operator* would silently roll over,
+    // including INT128_MIN * -1, whose true result is not representable.
+    std::cout << "\n=== Multiplication Overflow ===" << std::endl;
+    overflow = ckd_mul(&r, i_max, int128_t{2});
+    std::cout << "ckd_mul(INT128_MAX, 2): overflow=" << overflow << ", wrapped=" << r << std::endl;
+    overflow = ckd_mul(&r, i_min, int128_t{-1});
+    std::cout << "ckd_mul(INT128_MIN, -1): overflow=" << overflow << ", wrapped=" << r << std::endl;
+
+    // The result type and the two operand types are independent: they may differ
+    // in width and signedness, and the exact mathematical value is always used.
+    std::cout << "\n=== Mixed Types ===" << std::endl;
+    std::int64_t narrow {};
+    overflow = ckd_add(&narrow, uint128_t{5}, int128_t{-3});
+    std::cout << "ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=" << overflow
+              << ", result=" << narrow << std::endl;
+
+    // Narrow targets make the wrap-around easy to see (400 modulo 256 is 144).
+    std::uint8_t byte {};
+    overflow = ckd_mul(&byte, std::uint8_t{20}, std::uint8_t{20});
+    std::cout << "ckd_mul<uint8_t>(20, 20): overflow=" << overflow
+              << ", wrapped=" << static_cast<int>(byte) << std::endl;
+
+    return 0;
+}
diff --git a/test/Jamfile b/test/Jamfile
index 77063b6e..cae51584 100644
--- a/test/Jamfile
+++ b/test/Jamfile
@@ -100,6 +100,7 @@ run test_hash.cpp : : : <toolset>msvc:<cxxflags>/wd4324 ;
 run ../examples/construction.cpp ;
 run ../examples/bit.cpp ;
 run ../examples/saturating_arithmetic.cpp ;
+run ../examples/checked_arithmetic.cpp ;
 run ../examples/mixed_type_arithmetic.cpp ;
 run ../examples/stream.cpp ;
 run ../examples/basic_arithmetic.cpp ;

From 51ed17e7fc1657efe560f544aba640321aab785d Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 11:18:06 -0400
Subject: [PATCH 07/10] Update utilities and api reference

---
 doc/modules/ROOT/nav.adoc                 |   3 +
 doc/modules/ROOT/pages/api_reference.adoc |  15 +++
 doc/modules/ROOT/pages/utilities.adoc     | 138 +++++++++++++++++++++-
 3 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc
index ceeee540..95f36588 100644
--- a/doc/modules/ROOT/nav.adoc
+++ b/doc/modules/ROOT/nav.adoc
@@ -60,6 +60,9 @@
 * xref:string.adoc[]
 * xref:utilities.adoc[]
 ** xref:utilities.adoc#powm[Modular Exponentiation]
+** xref:utilities.adoc#ipow[Integer Power]
+** xref:utilities.adoc#isqrt[Integer Square Root]
+** xref:utilities.adoc#checked[Checked Arithmetic]
 * Benchmarks
 ** xref:u128_benchmarks.adoc[]
 *** xref:u128_benchmarks.adoc#u128_linux[Linux]
diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc
index 2963d1a4..4a55dc25 100644
--- a/doc/modules/ROOT/pages/api_reference.adoc
+++ b/doc/modules/ROOT/pages/api_reference.adoc
@@ -286,6 +286,21 @@ Listed by analogous STL header.
 
 | xref:utilities.adoc#powm[`powm`]
 | Modular exponentiation `(base ^ exp) mod m`
+
+| xref:utilities.adoc#ipow[`ipow`]
+| Integer power `base ^ exp` (wraps modulo `2^128`)
+
+| xref:utilities.adoc#isqrt[`isqrt`]
+| Integer square root `floor(sqrt(n))`
+
+| xref:utilities.adoc#checked[`ckd_add`]
+| Checked addition (C23 `<stdckdint.h>` contract)
+
+| xref:utilities.adoc#checked[`ckd_sub`]
+| Checked subtraction (C23 `<stdckdint.h>` contract)
+
+| xref:utilities.adoc#checked[`ckd_mul`]
+| Checked multiplication (C23 `<stdckdint.h>` contract)
 |===
 
 [#api_macros]
diff --git a/doc/modules/ROOT/pages/utilities.adoc b/doc/modules/ROOT/pages/utilities.adoc
index c633983d..959c87bb 100644
--- a/doc/modules/ROOT/pages/utilities.adoc
+++ b/doc/modules/ROOT/pages/utilities.adoc
@@ -60,6 +60,142 @@ Negative bases are reduced before exponentiation; `(std::numeric_limits<int128_t
 | `base == 0` and `exp > 0`
 | `0`
 
-| Signed overload with `m <= 0` or `exp < 0`
+| Signed overload with non-positive `m` or negative `exp`
 | `0` (modular exponentiation requires a positive modulus; a negative exponent would require a modular inverse, which this interface does not provide)
 |===
+
+[#ipow]
+== Integer Power
+
+Computes `base ^ exp` by exponentiation by squaring, with a non-negative 64-bit exponent.
+Unlike `powm` there is no modulus: the result is the true power reduced modulo `2^128`, which is the same rollover behavior as the library's `operator*`.
+`ipow(base, exp)` is therefore equivalent to multiplying `base` by itself `exp` times.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t ipow(uint128_t base, std::uint64_t exp) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t ipow(int128_t base, std::uint64_t exp) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The exponent is unsigned, so negative powers (which are not integers) cannot be requested.
+Because the result wraps on overflow rather than saturating or reporting an error, `ipow` is appropriate when rollover semantics are intended.
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `exp == 0`
+| `1` (including `ipow(0, 0) == 1`, following the conventional definition `0^0 == 1`)
+
+| `base == 0` and `exp > 0`
+| `0`
+
+| `base ^ exp` exceeds 128 bits
+| The low 128 bits of the true power, matching the rollover of `operator*`
+|===
+
+[#isqrt]
+== Integer Square Root
+
+Computes the integer square root `floor(sqrt(n))`: the largest integer `r` whose square does not exceed `n`.
+The computation runs entirely in integer arithmetic using Newton's method, so it is exact (no floating-point rounding) and usable in a `constexpr` context.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t isqrt(uint128_t n) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(int128_t n) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `n < 0` (signed overload)
+| `0` (a real square root does not exist)
+
+| `n >= 0`
+| `floor(sqrt(n))`, the largest `r` whose square does not exceed `n` (so `isqrt(0) == 0` and `isqrt(1) == 1`)
+|===
+
+[#checked]
+== Checked Arithmetic
+
+`ckd_add`, `ckd_sub`, and `ckd_mul` implement the checked integer arithmetic interface introduced by C23's `<stdckdint.h>`, but without requiring a C23 toolchain; they are available in C++14 and later.
+
+Each function computes `a + b`, `a - b`, or `a * b` respectively, as if both operands were represented in a signed integer type with infinite range, and then converts that mathematical result to the type pointed to by `result`.
+The function returns `false` when `*result` correctly represents the mathematical result of the operation.
+Otherwise it returns `true`, and `*result` is set to the mathematical result wrapped around (reduced modulo `2^N`) to the width `N` of `*result`.
+`*result` is always written, whether or not the operation overflowed.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, T2 a, T3 b) noexcept;
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_sub(T1* result, T2 a, T3 b) noexcept;
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_mul(T1* result, T2 a, T3 b) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The three type parameters are independent: the result type and the two operand types may differ in width and signedness.
+The operation always uses the exact mathematical value of each operand, so a negative signed value added to an unsigned value, or a product that needs up to 256 bits internally, is evaluated correctly.
+
+Following the C23 rules, `T1`, `T2`, and `T3` may be any integer type other than `bool`, plain `char`, an enumerated type, or a bit-precise (`_BitInt`) type.
+In addition to the standard and extended integer types, the library's `uint128_t` and `int128_t` are accepted.
+
+The following example exercises all three operations, including the wrap-around, the `INT128_MIN * -1` case, and the mixed-type behavior described above.
+
+.This https://github.com/cppalliance/int128/blob/develop/examples/checked_arithmetic.cpp[example] demonstrates checked addition, subtraction, and multiplication following the C23 checked-integer contract
+====
+[source, c++]
+----
+include::example$checked_arithmetic.cpp[]
+----
+
+.Expected Output
+[listing]
+----
+=== Results That Fit ===
+ckd_add(20, 22): overflow=false, result=42
+
+=== Addition Overflow ===
+ckd_add(UINT128_MAX, 1): overflow=true, wrapped=0
+
+=== Subtraction Underflow ===
+ckd_sub(0, 1): overflow=true, wrapped=340282366920938463463374607431768211455
+
+=== Multiplication Overflow ===
+ckd_mul(INT128_MAX, 2): overflow=true, wrapped=-2
+ckd_mul(INT128_MIN, -1): overflow=true, wrapped=-170141183460469231731687303715884105728
+
+=== Mixed Types ===
+ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=false, result=2
+ckd_mul<uint8_t>(20, 20): overflow=true, wrapped=144
+----
+====

From 625d7f208625d8203d0cc8f3b315a0f380d85017 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 11:21:27 -0400
Subject: [PATCH 08/10] Add example to main examples page

---
 doc/modules/ROOT/nav.adoc            |  1 +
 doc/modules/ROOT/pages/examples.adoc | 32 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc
index 95f36588..27492ba9 100644
--- a/doc/modules/ROOT/nav.adoc
+++ b/doc/modules/ROOT/nav.adoc
@@ -7,6 +7,7 @@
 ** xref:examples.adoc#examples_bit[`<bit>` support]
 ** xref:examples.adoc#examples_numeric[`<numeric>` support (Saturating Arithmetic)]
 ** xref:examples.adoc#examples_numeric_algorithms[`<numeric>` support (Numeric Algorithms)]
+** xref:examples.adoc#examples_checked[Checked Arithmetic]
 ** xref:examples.adoc#examples_mixed_sign[Mixed Signedness Arithmetic]
 ** xref:examples.adoc#examples_to_string[String Conversion (to_string)]
 ** xref:examples.adoc#examples_boost_math_random[Boost Math and Random Integration]
diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc
index 8378c30c..18620bbc 100644
--- a/doc/modules/ROOT/pages/examples.adoc
+++ b/doc/modules/ROOT/pages/examples.adoc
@@ -278,6 +278,38 @@ midpoint(-100, -50) = -75
 ----
 ====
 
+[#examples_checked]
+== Checked Arithmetic
+
+.This https://github.com/cppalliance/int128/blob/develop/examples/checked_arithmetic.cpp[example] demonstrates checked addition, subtraction, and multiplication following the C23 checked-integer contract
+====
+[source, c++]
+----
+include::example$checked_arithmetic.cpp[]
+----
+
+.Expected Output
+[listing]
+----
+=== Results That Fit ===
+ckd_add(20, 22): overflow=false, result=42
+
+=== Addition Overflow ===
+ckd_add(UINT128_MAX, 1): overflow=true, wrapped=0
+
+=== Subtraction Underflow ===
+ckd_sub(0, 1): overflow=true, wrapped=340282366920938463463374607431768211455
+
+=== Multiplication Overflow ===
+ckd_mul(INT128_MAX, 2): overflow=true, wrapped=-2
+ckd_mul(INT128_MIN, -1): overflow=true, wrapped=-170141183460469231731687303715884105728
+
+=== Mixed Types ===
+ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=false, result=2
+ckd_mul<uint8_t>(20, 20): overflow=true, wrapped=144
+----
+====
+
 [#examples_mixed_sign]
 == Mixed Signedness Arithmetic
 

From 094719e68219e8cb9b44557a35d98e84f17d42ef Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 12:57:52 -0400
Subject: [PATCH 09/10] Attempt fix old compiler problems

---
 test/test_ckd.cpp | 204 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 197 insertions(+), 7 deletions(-)

diff --git a/test/test_ckd.cpp b/test/test_ckd.cpp
index 853f1c59..f1266815 100644
--- a/test/test_ckd.cpp
+++ b/test/test_ckd.cpp
@@ -23,12 +23,104 @@ static std::uniform_int_distribution<std::uint64_t> dist {0, UINT64_MAX};
 static std::uniform_int_distribution<int> small_dist {-1000, 1000};
 
 //
-// Oracle-based testing for the standard integer types. The __builtin_*_overflow
-// intrinsics implement exactly the C23 contract (exact result, wrap into the
-// destination, return true on overflow), so they are an independent reference.
+// Oracle-based testing for the standard integer types. Addition and subtraction
+// are checked against __builtin_add_overflow / __builtin_sub_overflow, which
+// implement the C23 contract exactly (exact result, wrapped into the
+// destination, true on overflow) and so are an independent reference.
+//
+// Multiplication uses a hand-rolled reference instead. __builtin_mul_overflow
+// returns the wrong result for signed operands with an unsigned destination on
+// GCC 7, and on Clang it lowers a 128-bit checked multiply to __muloti4, a
+// compiler-rt symbol that is not always linked. ref_std_mul_overflow forms the
+// exact product from 32-bit limbs (no 128-bit type, no runtime helper) so it is
+// correct and links on every supported toolchain.
 //
 #if defined(__GNUC__) || defined(__clang__)
 
+// 64x64 -> 128 bit unsigned product, returned as hi:lo, built from 32-bit limbs.
+// This needs neither a 128-bit type nor a runtime helper such as __muloti4, so
+// it links on every target including 32-bit ones.
+static void mul_64_to_128(const std::uint64_t a, const std::uint64_t b,
+                          std::uint64_t& hi, std::uint64_t& lo) noexcept
+{
+    const std::uint64_t mask {UINT64_C(0xFFFFFFFF)};
+    const std::uint64_t a0 {a & mask};
+    const std::uint64_t a1 {a >> 32};
+    const std::uint64_t b0 {b & mask};
+    const std::uint64_t b1 {b >> 32};
+
+    const std::uint64_t p00 {a0 * b0};
+    const std::uint64_t p01 {a0 * b1};
+    const std::uint64_t p10 {a1 * b0};
+    const std::uint64_t p11 {a1 * b1};
+
+    const std::uint64_t mid {(p00 >> 32) + (p01 & mask) + (p10 & mask)};
+    lo = (p00 & mask) | (mid << 32);
+    hi = p11 + (p01 >> 32) + (p10 >> 32) + (mid >> 32);
+}
+
+// Signedness usable for the standard integer types and, through the
+// specializations in the 128-bit section below, the native extended types.
+template <typename T>
+struct oracle_is_signed : std::is_signed<T> {};
+
+template <typename T, std::enable_if_t<oracle_is_signed<T>::value, int> = 0>
+std::uint64_t std_magnitude(const T value, bool& negative) noexcept
+{
+    negative = value < 0;
+    const std::uint64_t image {static_cast<std::uint64_t>(value)};
+    return negative ? (std::uint64_t{0} - image) : image;
+}
+
+template <typename T, std::enable_if_t<!oracle_is_signed<T>::value, int> = 0>
+std::uint64_t std_magnitude(const T value, bool& negative) noexcept
+{
+    negative = false;
+    return static_cast<std::uint64_t>(value);
+}
+
+template <typename R, std::enable_if_t<!oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_std(const std::uint64_t magnitude, const bool negative) noexcept
+{
+    const std::uint64_t r_max {static_cast<std::uint64_t>((std::numeric_limits<R>::max)())};
+    return negative ? (magnitude != 0U) : (magnitude > r_max);
+}
+
+template <typename R, std::enable_if_t<oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_std(const std::uint64_t magnitude, const bool negative) noexcept
+{
+    const std::uint64_t r_max {static_cast<std::uint64_t>((std::numeric_limits<R>::max)())};
+    const std::uint64_t min_magnitude {r_max + 1U};
+    return negative ? (magnitude > min_magnitude) : (magnitude > r_max);
+}
+
+// Independent reference for the C23 ckd_mul contract on the standard integer
+// types: forms the exact product, wraps it into *r, and reports whether the
+// destination cannot represent the exact value.
+template <typename A, typename B, typename R>
+bool ref_std_mul_overflow(const A a, const B b, R* r) noexcept
+{
+    bool a_negative {};
+    bool b_negative {};
+    const std::uint64_t a_magnitude {std_magnitude(a, a_negative)};
+    const std::uint64_t b_magnitude {std_magnitude(b, b_negative)};
+
+    std::uint64_t hi {};
+    std::uint64_t lo {};
+    mul_64_to_128(a_magnitude, b_magnitude, hi, lo);
+
+    const bool negative {a_negative != b_negative};
+    const std::uint64_t wrapped {negative ? (std::uint64_t{0} - lo) : lo};
+    *r = static_cast<R>(wrapped);
+
+    if (hi != 0U)
+    {
+        return true;
+    }
+
+    return oracle_overflows_std<R>(lo, negative);
+}
+
 template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
 void check_op(const T2 a, const T3 b, Ref ref_overflow, Ckd ckd_overflow)
 {
@@ -81,7 +173,7 @@ void test_standard_oracle()
         [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
 
     fuzz_all_triples(
-        [](auto a, auto b, auto* r) { return __builtin_mul_overflow(a, b, r); },
+        [](auto a, auto b, auto* r) { return ref_std_mul_overflow(a, b, r); },
         [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
 }
 
@@ -93,8 +185,10 @@ void test_standard_oracle() {}
 
 //
 // Oracle-based testing at the full 128-bit width using the native compiler
-// type, which again matches the C23 contract exactly. This is the only place
-// products genuinely exceed 128 bits, exercising the multiply width check.
+// type. Addition and subtraction again use the builtins; multiplication uses
+// ref_native_mul_overflow, which assembles the 256-bit product from 64-bit limb
+// products so that no 128-bit multiply (hence no __muloti4) is emitted. This is
+// the only place products genuinely exceed 128 bits, exercising the width check.
 //
 #if defined(__SIZEOF_INT128__) && (defined(__GNUC__) || defined(__clang__))
 
@@ -113,6 +207,102 @@ static unsigned __int128 rand_native()
     return (static_cast<unsigned __int128>(dist(rng)) << 64) | static_cast<unsigned __int128>(dist(rng));
 }
 
+// The extended integer types are not guaranteed entries in std::is_signed under
+// a strict -std flag, so their signedness is stated explicitly.
+template <>
+struct oracle_is_signed<__int128> : std::true_type {};
+
+template <>
+struct oracle_is_signed<unsigned __int128> : std::false_type {};
+
+template <typename T, std::enable_if_t<oracle_is_signed<T>::value, int> = 0>
+unsigned __int128 native_magnitude(const T value, bool& negative) noexcept
+{
+    negative = value < 0;
+    const unsigned __int128 image {static_cast<unsigned __int128>(value)};
+    return negative ? (static_cast<unsigned __int128>(0) - image) : image;
+}
+
+template <typename T, std::enable_if_t<!oracle_is_signed<T>::value, int> = 0>
+unsigned __int128 native_magnitude(const T value, bool& negative) noexcept
+{
+    negative = false;
+    return static_cast<unsigned __int128>(value);
+}
+
+template <typename R, std::enable_if_t<!oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_128(const unsigned __int128 magnitude, const bool negative) noexcept
+{
+    // A magnitude that fits in 128 bits fits an unsigned 128-bit target exactly;
+    // only a non-zero negative value is unrepresentable.
+    return negative && magnitude != 0U;
+}
+
+template <typename R, std::enable_if_t<oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_128(const unsigned __int128 magnitude, const bool negative) noexcept
+{
+    const unsigned __int128 positive_max {(static_cast<unsigned __int128>(1) << 127) - 1};
+    const unsigned __int128 negative_max {static_cast<unsigned __int128>(1) << 127};
+    return negative ? (magnitude > negative_max) : (magnitude > positive_max);
+}
+
+// Independent reference for the C23 ckd_mul contract at the full 128-bit width.
+// The 256-bit product is assembled from 64-bit limb products so that no 128-bit
+// multiply (and therefore no __muloti4) is emitted; only native add, shift, and
+// compare on unsigned __int128 are used.
+template <typename A, typename B, typename R>
+bool ref_native_mul_overflow(const A a, const B b, R* r) noexcept
+{
+    bool a_negative {};
+    bool b_negative {};
+    const unsigned __int128 a_magnitude {native_magnitude(a, a_negative)};
+    const unsigned __int128 b_magnitude {native_magnitude(b, b_negative)};
+
+    const std::uint64_t a0 {static_cast<std::uint64_t>(a_magnitude)};
+    const std::uint64_t a1 {static_cast<std::uint64_t>(a_magnitude >> 64)};
+    const std::uint64_t b0 {static_cast<std::uint64_t>(b_magnitude)};
+    const std::uint64_t b1 {static_cast<std::uint64_t>(b_magnitude >> 64)};
+
+    std::uint64_t h00 {};
+    std::uint64_t l00 {};
+    std::uint64_t h01 {};
+    std::uint64_t l01 {};
+    std::uint64_t h10 {};
+    std::uint64_t l10 {};
+    std::uint64_t h11 {};
+    std::uint64_t l11 {};
+    mul_64_to_128(a0, b0, h00, l00);
+    mul_64_to_128(a0, b1, h01, l01);
+    mul_64_to_128(a1, b0, h10, l10);
+    mul_64_to_128(a1, b1, h11, l11);
+
+    const unsigned __int128 p00 {(static_cast<unsigned __int128>(h00) << 64) | l00};
+    const unsigned __int128 p01 {(static_cast<unsigned __int128>(h01) << 64) | l01};
+    const unsigned __int128 p10 {(static_cast<unsigned __int128>(h10) << 64) | l10};
+    const unsigned __int128 p11 {(static_cast<unsigned __int128>(h11) << 64) | l11};
+
+    // product = p11 * 2^128 + (p01 + p10) * 2^64 + p00, split into a low and a
+    // high 128-bit half with the carries tracked explicitly.
+    const unsigned __int128 cross {p01 + p10};
+    const bool cross_carry {cross < p01};
+    const unsigned __int128 low128 {p00 + (cross << 64)};
+    const bool low_carry {low128 < p00};
+    const unsigned __int128 high128 {p11 + (cross >> 64) +
+                                     (static_cast<unsigned __int128>(cross_carry) << 64) +
+                                     static_cast<unsigned __int128>(low_carry)};
+
+    const bool negative {a_negative != b_negative};
+    const unsigned __int128 wrapped {negative ? (static_cast<unsigned __int128>(0) - low128) : low128};
+    *r = static_cast<R>(wrapped);
+
+    if (high128 != 0U)
+    {
+        return true;
+    }
+
+    return oracle_overflows_128<R>(low128, negative);
+}
+
 template <typename Ref, typename Ckd>
 void native_fuzz(Ref ref_overflow, Ckd ckd_overflow)
 {
@@ -176,7 +366,7 @@ void test_native_oracle()
         [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
 
     native_fuzz(
-        [](auto a, auto b, auto* r) { return __builtin_mul_overflow(a, b, r); },
+        [](auto a, auto b, auto* r) { return ref_native_mul_overflow(a, b, r); },
         [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
 }
 

From 9c0a7c9ff437d06c367b704fb0da360fe15428eb Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 1 Jun 2026 13:44:59 -0400
Subject: [PATCH 10/10] Disable constexpr testing with GCC-7

---
 test/test_ckd.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_ckd.cpp b/test/test_ckd.cpp
index f1266815..35b2a84c 100644
--- a/test/test_ckd.cpp
+++ b/test/test_ckd.cpp
@@ -478,6 +478,11 @@ void test_mul_edges()
 //
 // constexpr usability for all three operations.
 //
+
+#if defined(__GNUC__) && __GNUC__ == 7 && !defined(__clang__) && !defined(__SIZEOF_INT128__)
+#  define BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+#endif
+
 constexpr bool add_overflows_int_max()
 {
     int r {0};
@@ -510,11 +515,13 @@ constexpr int mul_value()
     return r;
 }
 
+#ifndef BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
 constexpr bool mul_overflows_i128_min()
 {
     int128_t r {0};
     return ckd_mul(&r, (std::numeric_limits<int128_t>::min)(), int128_t{-1});
 }
+#endif
 
 void test_constexpr()
 {
@@ -523,7 +530,9 @@ void test_constexpr()
     static_assert(mul_overflows_int_max(),  "INT_MAX * 2 overflows int");
     static_assert(sub_value() == 2,         "5 - 3 == 2");
     static_assert(mul_value() == 42,        "6 * 7 == 42");
+#ifndef BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
     static_assert(mul_overflows_i128_min(), "INT128_MIN * -1 overflows int128_t");
+#endif
 }
 
 int main()