From 4629c2215a9a4b3d1ec4a306cd4dd7d11dcfebb4 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 23 Apr 2026 14:42:57 -0400 Subject: [PATCH 1/2] gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886) Avoid racing with the owning thread's refcount operations when immortalizing an interned string: if we don't own it and its refcount isn't merged, intern a copy we own instead. Use atomic stores in _Py_SetImmortalUntracked so concurrent atomic reads are race-free. --- Lib/test/test_free_threading/test_str.py | 22 +++++++- ...-04-22-14-55-18.gh-issue-113956.0VEXd6.rst | 4 ++ Objects/object.c | 6 +- Objects/unicodeobject.c | 56 ++++++++++++++++--- 4 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst diff --git a/Lib/test/test_free_threading/test_str.py b/Lib/test/test_free_threading/test_str.py index 9a1ce3620ac4b2..11e04009956db1 100644 --- a/Lib/test/test_free_threading/test_str.py +++ b/Lib/test/test_free_threading/test_str.py @@ -1,7 +1,9 @@ +import sys +import threading import unittest from itertools import cycle -from threading import Event, Thread +from threading import Barrier, Event, Thread from unittest import TestCase from test.support import threading_helper @@ -69,6 +71,24 @@ def reader_func(): for reader in readers: reader.join() + def test_intern_unowned_string(self): + # Test interning strings owned by various threads. + strings = [f"intern_race_owner_{i}" for i in range(50)] + + NUM_THREADS = 5 + b = Barrier(NUM_THREADS) + + def interner(): + tid = threading.get_ident() + for i in range(20): + strings.append(f"intern_{tid}_{i}") + b.wait() + for s in strings: + r = sys.intern(s) + self.assertTrue(sys._is_interned(r)) + + threading_helper.run_concurrently(interner, nthreads=NUM_THREADS) + def test_maketrans_dict_concurrent_modification(self): for _ in range(5): d = {2000: 'a'} diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst new file mode 100644 index 00000000000000..54c04bbc28d416 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst @@ -0,0 +1,4 @@ +Fix a data race in :func:`sys.intern` in the free-threaded build when +interning a string owned by another thread. An interned copy owned by the +current thread is used instead when it is not safe to immortalize the +original. diff --git a/Objects/object.c b/Objects/object.c index 3166254f6f640b..4fa20470601eb3 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op) return; } #ifdef Py_GIL_DISABLED - op->ob_tid = _Py_UNOWNED_TID; - op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; - op->ob_ref_shared = 0; + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); _Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED); #elif SIZEOF_VOID_P > 4 op->ob_flags = _Py_IMMORTAL_FLAGS; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d2569132998acc..9aee7120c811de 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) { #define CHECK(expr) \ do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0) +#ifdef Py_GIL_DISABLED +# define CHECK_IF_GIL(expr) (void)(expr) +# define CHECK_IF_FT(expr) CHECK(expr) +#else +# define CHECK_IF_GIL(expr) CHECK(expr) +# define CHECK_IF_FT(expr) (void)(expr) +#endif + assert(op != NULL); CHECK(PyUnicode_Check(op)); @@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) /* Check interning state */ #ifdef Py_DEBUG - // Note that we do not check `_Py_IsImmortal(op)`, since stable ABI - // extensions can make immortal strings mortal (but with a high enough - // refcount). - // The other way is extremely unlikely (worth a potential failed assertion - // in a debug build), so we do check `!_Py_IsImmortal(op)`. + // Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build + // since stable ABI extensions can make immortal strings mortal (but with a + // high enough refcount). switch (PyUnicode_CHECK_INTERNED(op)) { case SSTATE_NOT_INTERNED: if (ascii->state.statically_allocated) { @@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) // are static but use SSTATE_NOT_INTERNED } else { - CHECK(!_Py_IsImmortal(op)); + CHECK_IF_GIL(!_Py_IsImmortal(op)); } break; case SSTATE_INTERNED_MORTAL: CHECK(!ascii->state.statically_allocated); - CHECK(!_Py_IsImmortal(op)); + CHECK_IF_GIL(!_Py_IsImmortal(op)); break; case SSTATE_INTERNED_IMMORTAL: CHECK(!ascii->state.statically_allocated); + CHECK_IF_FT(_Py_IsImmortal(op)); break; case SSTATE_INTERNED_IMMORTAL_STATIC: CHECK(ascii->state.statically_allocated); + CHECK_IF_FT(_Py_IsImmortal(op)); break; default: Py_UNREACHABLE(); @@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s) FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); } +#ifdef Py_GIL_DISABLED +static bool +can_immortalize_safely(PyObject *s) +{ + if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) { + return true; + } + Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared); + return _Py_REF_IS_MERGED(shared); +} +#endif + static /* non-null */ PyObject* intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, bool immortalize) @@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, // no, go on break; case SSTATE_INTERNED_MORTAL: +#ifndef Py_GIL_DISABLED // yes but we might need to make it immortal if (immortalize) { immortalize_interned(s); } return s; +#else + // not fully interned yet; fall through to the locking path + break; +#endif default: // all done return s; @@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(r); } #endif + +#ifdef Py_GIL_DISABLED + // Immortalization writes to the refcount fields non-atomically. That + // races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we + // don't own it (and its refcount hasn't been merged), intern a copy + // we own instead. + if (!can_immortalize_safely(s)) { + PyObject *copy = _PyUnicode_Copy(s); + if (copy == NULL) { + PyErr_Clear(); + return s; + } + Py_DECREF(s); + s = copy; + } +#endif + FT_MUTEX_LOCK(INTERN_MUTEX); PyObject *t; { From 448d7b96c181d13ca7f8977780e85b53b2716294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20S=C5=82awecki?= Date: Thu, 23 Apr 2026 21:07:28 +0200 Subject: [PATCH 2/2] gh-145239: Accept unary plus literal pattern (#148566) Add '+' alternatives to signed_number and signed_real_number grammar rules, mirroring how unary minus is already handled for pattern matching. Unary plus is a no-op on numbers so the value is returned directly without wrapping in a UnaryOp node. --- Doc/reference/compound_stmts.rst | 2 +- Doc/whatsnew/3.15.rst | 4 + Grammar/python.gram | 3 + Lib/test/.ruff.toml | 2 + Lib/test/test_patma.py | 90 +++++++++++++++++++ ...-04-13-23-21-45.gh-issue-145239.pL8qRt.rst | 3 + Parser/parser.c | 87 +++++++++++++++++- 7 files changed, 187 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-13-23-21-45.gh-issue-145239.pL8qRt.rst diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index 0cf0a41bfb400c..72e1cad3bbd892 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -858,7 +858,7 @@ A literal pattern corresponds to most : | "None" : | "True" : | "False" - signed_number: ["-"] NUMBER + signed_number: ["+" | "-"] NUMBER The rule ``strings`` and the token ``NUMBER`` are defined in the :doc:`standard Python grammar <./grammar>`. Triple-quoted strings are diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 500797910edb90..dbdd5de01700a3 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -645,6 +645,10 @@ Other language changes * Allow the *count* argument of :meth:`bytes.replace` to be a keyword. (Contributed by Stan Ulbrych in :gh:`147856`.) +* Unary plus is now accepted in :keyword:`match` literal patterns, mirroring + the existing support for unary minus. + (Contributed by Bartosz Sławecki in :gh:`145239`.) + New modules =========== diff --git a/Grammar/python.gram b/Grammar/python.gram index 3a91d426c36501..9bf3a67939fcf3 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -554,10 +554,12 @@ complex_number[expr_ty]: signed_number[expr_ty]: | NUMBER + | '+' number=NUMBER { number } | '-' number=NUMBER { _PyAST_UnaryOp(USub, number, EXTRA) } signed_real_number[expr_ty]: | real_number + | '+' real=real_number { real } | '-' real=real_number { _PyAST_UnaryOp(USub, real, EXTRA) } real_number[expr_ty]: @@ -565,6 +567,7 @@ real_number[expr_ty]: imaginary_number[expr_ty]: | imag=NUMBER { _PyPegen_ensure_imaginary(p, imag) } + | '+' imag=NUMBER { _PyPegen_ensure_imaginary(p, imag) } capture_pattern[pattern_ty]: | target=pattern_capture_target { _PyAST_MatchAs(NULL, target->v.Name.id, EXTRA) } diff --git a/Lib/test/.ruff.toml b/Lib/test/.ruff.toml index a960543f277935..dca74eb6e14bbd 100644 --- a/Lib/test/.ruff.toml +++ b/Lib/test/.ruff.toml @@ -18,6 +18,8 @@ extend-exclude = [ "test_lazy_import/__init__.py", "test_lazy_import/data/*.py", "test_lazy_import/data/**/*.py", + # Unary plus literal pattern is not yet supported by Ruff (GH-145239) + "test_patma.py", ] [lint] diff --git a/Lib/test/test_patma.py b/Lib/test/test_patma.py index 5d0857b059ea23..29cce4ee6d271f 100644 --- a/Lib/test/test_patma.py +++ b/Lib/test/test_patma.py @@ -2762,6 +2762,96 @@ def test_patma_255(self): self.assertEqual(y, 1) self.assertIs(z, x) + def test_patma_256(self): + x = 0 + match x: + case +0: + y = 0 + self.assertEqual(x, 0) + self.assertEqual(y, 0) + + def test_patma_257(self): + x = 0 + match x: + case +0.0: + y = 0 + self.assertEqual(x, 0) + self.assertEqual(y, 0) + + def test_patma_258(self): + x = 0 + match x: + case +0j: + y = 0 + self.assertEqual(x, 0) + self.assertEqual(y, 0) + + def test_patma_259(self): + x = 0 + match x: + case +0.0j: + y = 0 + self.assertEqual(x, 0) + self.assertEqual(y, 0) + + def test_patma_260(self): + x = 1 + match x: + case +1: + y = 0 + self.assertEqual(x, 1) + self.assertEqual(y, 0) + + def test_patma_261(self): + x = 1.5 + match x: + case +1.5: + y = 0 + self.assertEqual(x, 1.5) + self.assertEqual(y, 0) + + def test_patma_262(self): + x = 1j + match x: + case +1j: + y = 0 + self.assertEqual(x, 1j) + self.assertEqual(y, 0) + + def test_patma_263(self): + x = 1.5j + match x: + case +1.5j: + y = 0 + self.assertEqual(x, 1.5j) + self.assertEqual(y, 0) + + def test_patma_264(self): + x = 0.25 + 1.75j + match x: + case +0.25 + 1.75j: + y = 0 + self.assertEqual(x, 0.25 + 1.75j) + self.assertEqual(y, 0) + + def test_patma_265(self): + x = 0.25 - 1.75j + match x: + case 0.25 - +1.75j: + y = 0 + self.assertEqual(x, 0.25 - 1.75j) + self.assertEqual(y, 0) + + def test_patma_266(self): + x = 0 + match x: + case +1e1000: + y = 0 + case 0: + y = 1 + self.assertEqual(x, 0) + self.assertEqual(y, 1) + def test_patma_runtime_checkable_protocol(self): # Runtime-checkable protocol from typing import Protocol, runtime_checkable diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-13-23-21-45.gh-issue-145239.pL8qRt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-13-23-21-45.gh-issue-145239.pL8qRt.rst new file mode 100644 index 00000000000000..282b99176642bc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-13-23-21-45.gh-issue-145239.pL8qRt.rst @@ -0,0 +1,3 @@ +Unary plus is now accepted in :keyword:`match` literal patterns, mirroring +the existing support for unary minus. +Patch by Bartosz Sławecki. diff --git a/Parser/parser.c b/Parser/parser.c index f853d309de9180..c55c081dfc3d8e 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -9066,7 +9066,7 @@ complex_number_rule(Parser *p) return _res; } -// signed_number: NUMBER | '-' NUMBER +// signed_number: NUMBER | '+' NUMBER | '-' NUMBER static expr_ty signed_number_rule(Parser *p) { @@ -9107,6 +9107,33 @@ signed_number_rule(Parser *p) D(fprintf(stderr, "%*c%s signed_number[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NUMBER")); } + { // '+' NUMBER + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> signed_number[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+' NUMBER")); + Token * _literal; + expr_ty number; + if ( + (_literal = _PyPegen_expect_token(p, 14)) // token='+' + && + (number = _PyPegen_number_token(p)) // NUMBER + ) + { + D(fprintf(stderr, "%*c+ signed_number[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+' NUMBER")); + _res = number; + if ((_res == NULL || p->error_indicator) && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s signed_number[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'+' NUMBER")); + } { // '-' NUMBER if (p->error_indicator) { p->level--; @@ -9149,7 +9176,7 @@ signed_number_rule(Parser *p) return _res; } -// signed_real_number: real_number | '-' real_number +// signed_real_number: real_number | '+' real_number | '-' real_number static expr_ty signed_real_number_rule(Parser *p) { @@ -9190,6 +9217,33 @@ signed_real_number_rule(Parser *p) D(fprintf(stderr, "%*c%s signed_real_number[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "real_number")); } + { // '+' real_number + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> signed_real_number[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+' real_number")); + Token * _literal; + expr_ty real; + if ( + (_literal = _PyPegen_expect_token(p, 14)) // token='+' + && + (real = real_number_rule(p)) // real_number + ) + { + D(fprintf(stderr, "%*c+ signed_real_number[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+' real_number")); + _res = real; + if ((_res == NULL || p->error_indicator) && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s signed_real_number[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'+' real_number")); + } { // '-' real_number if (p->error_indicator) { p->level--; @@ -9275,7 +9329,7 @@ real_number_rule(Parser *p) return _res; } -// imaginary_number: NUMBER +// imaginary_number: NUMBER | '+' NUMBER static expr_ty imaginary_number_rule(Parser *p) { @@ -9312,6 +9366,33 @@ imaginary_number_rule(Parser *p) D(fprintf(stderr, "%*c%s imaginary_number[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NUMBER")); } + { // '+' NUMBER + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> imaginary_number[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+' NUMBER")); + Token * _literal; + expr_ty imag; + if ( + (_literal = _PyPegen_expect_token(p, 14)) // token='+' + && + (imag = _PyPegen_number_token(p)) // NUMBER + ) + { + D(fprintf(stderr, "%*c+ imaginary_number[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+' NUMBER")); + _res = _PyPegen_ensure_imaginary ( p , imag ); + if ((_res == NULL || p->error_indicator) && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s imaginary_number[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'+' NUMBER")); + } _res = NULL; done: p->level--;