https://github.com/python/cpython/commit/4629c2215a9a4b3d1ec4a306cd4dd7d11dcfebb4
commit: 4629c2215a9a4b3d1ec4a306cd4dd7d11dcfebb4
branch: main
author: Sam Gross <[email protected]>
committer: colesbury <[email protected]>
date: 2026-04-23T14:42:57-04:00
summary:
gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886)
Avoid racing with the owning thread's refcount operations when
immortalizing an interned string: if we don't own it and its refcount
isn't merged, intern a copy we own instead. Use atomic stores in
_Py_SetImmortalUntracked so concurrent atomic reads are race-free.
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst
M Lib/test/test_free_threading/test_str.py
M Objects/object.c
M Objects/unicodeobject.c
diff --git a/Lib/test/test_free_threading/test_str.py
b/Lib/test/test_free_threading/test_str.py
index 9a1ce3620ac4b2..11e04009956db1 100644
--- a/Lib/test/test_free_threading/test_str.py
+++ b/Lib/test/test_free_threading/test_str.py
@@ -1,7 +1,9 @@
+import sys
+import threading
import unittest
from itertools import cycle
-from threading import Event, Thread
+from threading import Barrier, Event, Thread
from unittest import TestCase
from test.support import threading_helper
@@ -69,6 +71,24 @@ def reader_func():
for reader in readers:
reader.join()
+ def test_intern_unowned_string(self):
+ # Test interning strings owned by various threads.
+ strings = [f"intern_race_owner_{i}" for i in range(50)]
+
+ NUM_THREADS = 5
+ b = Barrier(NUM_THREADS)
+
+ def interner():
+ tid = threading.get_ident()
+ for i in range(20):
+ strings.append(f"intern_{tid}_{i}")
+ b.wait()
+ for s in strings:
+ r = sys.intern(s)
+ self.assertTrue(sys._is_interned(r))
+
+ threading_helper.run_concurrently(interner, nthreads=NUM_THREADS)
+
def test_maketrans_dict_concurrent_modification(self):
for _ in range(5):
d = {2000: 'a'}
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst
new file mode 100644
index 00000000000000..54c04bbc28d416
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst
@@ -0,0 +1,4 @@
+Fix a data race in :func:`sys.intern` in the free-threaded build when
+interning a string owned by another thread. An interned copy owned by the
+current thread is used instead when it is not safe to immortalize the
+original.
diff --git a/Objects/object.c b/Objects/object.c
index 3166254f6f640b..4fa20470601eb3 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op)
return;
}
#ifdef Py_GIL_DISABLED
- op->ob_tid = _Py_UNOWNED_TID;
- op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
- op->ob_ref_shared = 0;
+ _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID);
+ _Py_atomic_store_uint32_relaxed(&op->ob_ref_local,
_Py_IMMORTAL_REFCNT_LOCAL);
+ _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0);
_Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED);
#elif SIZEOF_VOID_P > 4
op->ob_flags = _Py_IMMORTAL_FLAGS;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d2569132998acc..9aee7120c811de 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int
check_content)
{
#define CHECK(expr) \
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); }
} while (0)
+#ifdef Py_GIL_DISABLED
+# define CHECK_IF_GIL(expr) (void)(expr)
+# define CHECK_IF_FT(expr) CHECK(expr)
+#else
+# define CHECK_IF_GIL(expr) CHECK(expr)
+# define CHECK_IF_FT(expr) (void)(expr)
+#endif
+
assert(op != NULL);
CHECK(PyUnicode_Check(op));
@@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int
check_content)
/* Check interning state */
#ifdef Py_DEBUG
- // Note that we do not check `_Py_IsImmortal(op)`, since stable ABI
- // extensions can make immortal strings mortal (but with a high enough
- // refcount).
- // The other way is extremely unlikely (worth a potential failed assertion
- // in a debug build), so we do check `!_Py_IsImmortal(op)`.
+ // Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build
+ // since stable ABI extensions can make immortal strings mortal (but with a
+ // high enough refcount).
switch (PyUnicode_CHECK_INTERNED(op)) {
case SSTATE_NOT_INTERNED:
if (ascii->state.statically_allocated) {
@@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int
check_content)
// are static but use SSTATE_NOT_INTERNED
}
else {
- CHECK(!_Py_IsImmortal(op));
+ CHECK_IF_GIL(!_Py_IsImmortal(op));
}
break;
case SSTATE_INTERNED_MORTAL:
CHECK(!ascii->state.statically_allocated);
- CHECK(!_Py_IsImmortal(op));
+ CHECK_IF_GIL(!_Py_IsImmortal(op));
break;
case SSTATE_INTERNED_IMMORTAL:
CHECK(!ascii->state.statically_allocated);
+ CHECK_IF_FT(_Py_IsImmortal(op));
break;
case SSTATE_INTERNED_IMMORTAL_STATIC:
CHECK(ascii->state.statically_allocated);
+ CHECK_IF_FT(_Py_IsImmortal(op));
break;
default:
Py_UNREACHABLE();
@@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s)
FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned,
SSTATE_INTERNED_IMMORTAL);
}
+#ifdef Py_GIL_DISABLED
+static bool
+can_immortalize_safely(PyObject *s)
+{
+ if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) {
+ return true;
+ }
+ Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared);
+ return _Py_REF_IS_MERGED(shared);
+}
+#endif
+
static /* non-null */ PyObject*
intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
bool immortalize)
@@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s
/* stolen */,
// no, go on
break;
case SSTATE_INTERNED_MORTAL:
+#ifndef Py_GIL_DISABLED
// yes but we might need to make it immortal
if (immortalize) {
immortalize_interned(s);
}
return s;
+#else
+ // not fully interned yet; fall through to the locking path
+ break;
+#endif
default:
// all done
return s;
@@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s
/* stolen */,
Py_DECREF(r);
}
#endif
+
+#ifdef Py_GIL_DISABLED
+ // Immortalization writes to the refcount fields non-atomically. That
+ // races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we
+ // don't own it (and its refcount hasn't been merged), intern a copy
+ // we own instead.
+ if (!can_immortalize_safely(s)) {
+ PyObject *copy = _PyUnicode_Copy(s);
+ if (copy == NULL) {
+ PyErr_Clear();
+ return s;
+ }
+ Py_DECREF(s);
+ s = copy;
+ }
+#endif
+
FT_MUTEX_LOCK(INTERN_MUTEX);
PyObject *t;
{
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]