From b1889c7738543c7e7764f51e1ede18c378367033 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Tue, 16 Jun 2026 19:38:49 -0700 Subject: [PATCH] gh-151518: Avoid STW starvation of attaching threads Free-threaded stop-the-world pauses can otherwise starve a thread trying to reattach after it was suspended while detached. A tight manual gc.collect() loop can release and immediately request the next stop-the-world pause, repeatedly parking the detached thread before it can attach and make progress. Add a distinct _Py_THREAD_SUSPENDED_DETACHED state for tstates parked from DETACHED. tstate_wait_attach() marks an attach waiter only after observing that detached-origin suspended state, and park_detached_threads() skips only those active waiters on later stop-the-world passes. The ordinary successful tstate_try_attach() path remains the baseline CAS-only path. Teach the related stop-the-world paths about both suspended states, including start_the_world() and tstate_delete_common(). Keep the new wait flag after the existing hot free-threaded _PyThreadStateImpl fields so their offsets do not move. Add a free-threaded GC regression test that runs a subprocess with a tight gc.collect() worker and verifies the main thread can reattach after sleeping and stop the worker. --- Include/cpython/pystate.h | 3 +- Include/internal/pycore_pystate.h | 21 ++++---- Include/internal/pycore_tstate.h | 12 ++++- Lib/test/test_free_threading/test_gc.py | 48 +++++++++++++++++++ ...-06-16-19-20-00.gh-issue-151518.e6v0Js.rst | 2 + Python/pystate.c | 40 ++++++++++++++-- 6 files changed, 107 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-16-19-20-00.gh-issue-151518.e6v0Js.rst diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index f367146e262bfe8..e551f624b6289b4 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -118,8 +118,7 @@ struct _ts { int _whence; - /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED). - See Include/internal/pycore_pystate.h for more details. */ + /* Thread state. See Include/internal/pycore_pystate.h for details. */ int state; int py_recursion_remaining; diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index ca6819d2cd44730..c85ff8a23b1accc 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -21,13 +21,13 @@ extern "C" { // interpreter at the same time. Only the "bound" thread may perform the // transitions between "attached" and "detached" on its own PyThreadState. // -// The "suspended" state is used to implement stop-the-world pauses, such as -// for cyclic garbage collection. It is only used in `--disable-gil` builds. -// The "suspended" state is similar to the "detached" state in that in both -// states the thread is not allowed to call most Python APIs. However, unlike -// the "detached" state, a thread may not transition itself out from the -// "suspended" state. Only the thread performing a stop-the-world pause may -// transition a thread from the "suspended" state back to the "detached" state. +// The "suspended" states are used to implement stop-the-world pauses, such as +// for cyclic garbage collection. They are only used in `--disable-gil` builds. +// They are similar to the "detached" state in that the thread is not allowed +// to call most Python APIs. However, unlike the "detached" state, a thread may +// not transition itself out from a "suspended" state. Only the thread +// performing a stop-the-world pause may transition a thread from a "suspended" +// state back to the "detached" state. // // The "shutting down" state is used when the interpreter is being finalized. // Threads in this state can't do anything other than block the OS thread. @@ -36,9 +36,9 @@ extern "C" { // State transition diagram: // // (bound thread) (stop-the-world thread) -// [attached] <-> [detached] <-> [suspended] +// [attached] <-> [detached] <-> [suspended-detached] // | ^ -// +---------------------------->---------------------------+ +// +----------------------> [suspended] ---------------------+ // (bound thread) // // The (bound thread) and (stop-the-world thread) labels indicate which thread @@ -46,7 +46,8 @@ extern "C" { #define _Py_THREAD_DETACHED 0 #define _Py_THREAD_ATTACHED 1 #define _Py_THREAD_SUSPENDED 2 -#define _Py_THREAD_SHUTTING_DOWN 3 +#define _Py_THREAD_SUSPENDED_DETACHED 3 +#define _Py_THREAD_SHUTTING_DOWN 4 /* Check if the current thread is the main thread. diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index eb2b0c84acdc7c8..5e9f441ed51373e 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -105,8 +105,16 @@ typedef struct _PyThreadStateImpl { #ifdef Py_GIL_DISABLED // gh-144438: Add padding to ensure that the fields above don't share a - // cache line with other allocations. - char __padding[64]; + // cache line with other allocations. Reuse the first bytes of the padding + // for a cold stop-the-world flag without growing the thread state. + union { + struct { + // Set while the thread is waiting to attach after a + // stop-the-world pause suspended it while detached. + int stw_attach_waiting; + }; + char __padding[64]; + }; #endif } _PyThreadStateImpl; diff --git a/Lib/test/test_free_threading/test_gc.py b/Lib/test/test_free_threading/test_gc.py index 399010234509408..e2c91172fd1a45f 100644 --- a/Lib/test/test_free_threading/test_gc.py +++ b/Lib/test/test_free_threading/test_gc.py @@ -1,10 +1,14 @@ import unittest +import subprocess +import sys +import textwrap import threading from threading import Thread from unittest import TestCase import gc +from test import support from test.support import threading_helper @@ -153,6 +157,50 @@ def reader(): with threading_helper.start_threads(threads): pass + @support.requires_subprocess() + def test_tight_gc_loop_does_not_starve_attach(self): + script = textwrap.dedent(""" + import gc + import importlib + import threading + import time + + modules = ( + "abc", "argparse", "collections", "contextlib", + "decimal", "enum", "functools", "heapq", + "importlib", "inspect", "itertools", "json", + "math", "operator", "random", "re", + ) + for name in modules: + importlib.import_module(name) + + stop = threading.Event() + + def collect(): + while not stop.is_set(): + gc.collect() + + thread = threading.Thread(target=collect, daemon=True) + thread.start() + time.sleep(1.0) + stop.set() + thread.join(5.0) + if thread.is_alive(): + raise SystemExit("GC thread did not stop") + """) + proc = subprocess.run( + [sys.executable, "-I", "-X", "faulthandler", "-c", script], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=support.SHORT_TIMEOUT, + ) + self.assertEqual( + proc.returncode, + 0, + f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}", + ) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-16-19-20-00.gh-issue-151518.e6v0Js.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-16-19-20-00.gh-issue-151518.e6v0Js.rst new file mode 100644 index 000000000000000..af52eac69c476a5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-16-19-20-00.gh-issue-151518.e6v0Js.rst @@ -0,0 +1,2 @@ +Fix a free-threaded stop-the-world race that could starve a thread reattaching +after being suspended while detached. diff --git a/Python/pystate.c b/Python/pystate.c index e90642fa882db72..e0db2568dd7397a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1938,7 +1938,9 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) if (tstate->next) { tstate->next->prev = tstate->prev; } - if (tstate->state != _Py_THREAD_SUSPENDED) { + if (tstate->state != _Py_THREAD_SUSPENDED && + tstate->state != _Py_THREAD_SUSPENDED_DETACHED) + { // Any ongoing stop-the-world request should not wait for us because // our thread is getting deleted. if (interp->stoptheworld.requested) { @@ -2236,9 +2238,22 @@ tstate_set_detached(PyThreadState *tstate, int detached_state) static void tstate_wait_attach(PyThreadState *tstate) { +#ifdef Py_GIL_DISABLED + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + int stw_attach_waiting = 0; +#endif do { int state = _Py_atomic_load_int_relaxed(&tstate->state); - if (state == _Py_THREAD_SUSPENDED) { + if (state == _Py_THREAD_SUSPENDED || + state == _Py_THREAD_SUSPENDED_DETACHED) + { +#ifdef Py_GIL_DISABLED + if (state == _Py_THREAD_SUSPENDED_DETACHED) { + stw_attach_waiting = 1; + _Py_atomic_store_int_relaxed( + &tstate_impl->stw_attach_waiting, 1); + } +#endif // Wait until we're switched out of SUSPENDED to DETACHED. _PyParkingLot_Park(&tstate->state, &state, sizeof(tstate->state), /*timeout=*/-1, NULL, /*detach=*/0); @@ -2252,6 +2267,11 @@ tstate_wait_attach(PyThreadState *tstate) } // Once we're back in DETACHED we can re-attach } while (!tstate_try_attach(tstate)); +#ifdef Py_GIL_DISABLED + if (stw_attach_waiting) { + _Py_atomic_store_int_relaxed(&tstate_impl->stw_attach_waiting, 0); + } +#endif } void @@ -2421,9 +2441,16 @@ park_detached_threads(struct _stoptheworld_state *stw) _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) { int state = _Py_atomic_load_int_relaxed(&t->state); if (state == _Py_THREAD_DETACHED) { + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)t; + if (_Py_atomic_load_int_relaxed( + &tstate_impl->stw_attach_waiting)) + { + continue; + } // Atomically transition to "suspended" if in "detached" state. if (_Py_atomic_compare_exchange_int( - &t->state, &state, _Py_THREAD_SUSPENDED)) { + &t->state, &state, + _Py_THREAD_SUSPENDED_DETACHED)) { num_parked++; } } @@ -2508,8 +2535,11 @@ start_the_world(struct _stoptheworld_state *stw) _Py_FOR_EACH_STW_INTERP(stw, i) { _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) { if (t != stw->requester) { - assert(_Py_atomic_load_int_relaxed(&t->state) == - _Py_THREAD_SUSPENDED); +#ifndef NDEBUG + int state = _Py_atomic_load_int_relaxed(&t->state); + assert(state == _Py_THREAD_SUSPENDED || + state == _Py_THREAD_SUSPENDED_DETACHED); +#endif _Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED); _PyParkingLot_UnparkAll(&t->state); }