From c063bd193ef350f838d40fb676468a985a3ca6cb Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 18 Feb 2026 12:47:50 -0500 Subject: [PATCH] gh-144777: Fix data races in IncrementalNewlineDecoder Add critical sections to methods of IncrementalNewlineDecoder to prevent concurrent access to shared bitfields (pendingcr, seennl). --- Lib/test/test_free_threading/test_io.py | 55 +++++++++++++++++++ ...-02-18-13-45-00.gh-issue-144777.R97q0a.rst | 1 + Modules/_io/clinic/textio.c.h | 22 +++++++- Modules/_io/textio.c | 12 ++-- 4 files changed, 83 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-02-18-13-45-00.gh-issue-144777.R97q0a.rst diff --git a/Lib/test/test_free_threading/test_io.py b/Lib/test/test_free_threading/test_io.py index c67aaff31b3f5b..8a0ad30c4bc770 100644 --- a/Lib/test/test_free_threading/test_io.py +++ b/Lib/test/test_free_threading/test_io.py @@ -1,11 +1,15 @@ +import codecs import io import _pyio as pyio import threading from unittest import TestCase from test.support import threading_helper +from test.support.threading_helper import run_concurrently from random import randint from sys import getsizeof +threading_helper.requires_working_threading(module=True) + class ThreadSafetyMixin: # Test pretty much everything that can break under free-threading. @@ -115,3 +119,54 @@ class CBytesIOTest(ThreadSafetyMixin, TestCase): class PyBytesIOTest(ThreadSafetyMixin, TestCase): ioclass = pyio.BytesIO + + +class IncrementalNewlineDecoderTest(TestCase): + def make_decoder(self): + utf8_decoder = codecs.getincrementaldecoder('utf-8')() + return io.IncrementalNewlineDecoder(utf8_decoder, translate=True) + + def test_concurrent_reset(self): + decoder = self.make_decoder() + + def worker(): + for _ in range(100): + decoder.reset() + + run_concurrently(worker_func=worker, nthreads=2) + + def test_concurrent_decode(self): + decoder = self.make_decoder() + + def worker(): + for _ in range(100): + decoder.decode(b"line\r\n", final=False) + + run_concurrently(worker_func=worker, nthreads=2) + + def test_concurrent_getstate_setstate(self): + decoder = self.make_decoder() + state = decoder.getstate() + + def getstate_worker(): + for _ in range(100): + decoder.getstate() + + def setstate_worker(): + for _ in range(100): + decoder.setstate(state) + + run_concurrently([getstate_worker] * 2 + [setstate_worker] * 2) + + def test_concurrent_decode_and_reset(self): + decoder = self.make_decoder() + + def decode_worker(): + for _ in range(100): + decoder.decode(b"line\r\n", final=False) + + def reset_worker(): + for _ in range(100): + decoder.reset() + + run_concurrently([decode_worker] * 2 + [reset_worker] * 2) diff --git a/Misc/NEWS.d/next/Library/2026-02-18-13-45-00.gh-issue-144777.R97q0a.rst b/Misc/NEWS.d/next/Library/2026-02-18-13-45-00.gh-issue-144777.R97q0a.rst new file mode 100644 index 00000000000000..fd720bfd3f3da6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-18-13-45-00.gh-issue-144777.R97q0a.rst @@ -0,0 +1 @@ +Fix data races in :class:`io.IncrementalNewlineDecoder` in the :term:`free-threaded build`. diff --git a/Modules/_io/clinic/textio.c.h b/Modules/_io/clinic/textio.c.h index 128a5ad1678f26..3898a9c2982436 100644 --- a/Modules/_io/clinic/textio.c.h +++ b/Modules/_io/clinic/textio.c.h @@ -430,7 +430,9 @@ _io_IncrementalNewlineDecoder_decode(PyObject *self, PyObject *const *args, Py_s goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_IncrementalNewlineDecoder_decode_impl((nldecoder_object *)self, input, final); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -450,7 +452,13 @@ _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self); static PyObject * _io_IncrementalNewlineDecoder_getstate(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_IncrementalNewlineDecoder_getstate_impl((nldecoder_object *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_IncrementalNewlineDecoder_getstate_impl((nldecoder_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_IncrementalNewlineDecoder_setstate__doc__, @@ -470,7 +478,9 @@ _io_IncrementalNewlineDecoder_setstate(PyObject *self, PyObject *state) { PyObject *return_value = NULL; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_IncrementalNewlineDecoder_setstate_impl((nldecoder_object *)self, state); + Py_END_CRITICAL_SECTION(); return return_value; } @@ -489,7 +499,13 @@ _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self); static PyObject * _io_IncrementalNewlineDecoder_reset(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_IncrementalNewlineDecoder_reset_impl((nldecoder_object *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_IncrementalNewlineDecoder_reset_impl((nldecoder_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_TextIOWrapper___init____doc__, @@ -1312,4 +1328,4 @@ _io_TextIOWrapper__CHUNK_SIZE_set(PyObject *self, PyObject *value, void *Py_UNUS return return_value; } -/*[clinic end generated code: output=30404271a1151056 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c38e6cd5ff4b7eea input=a9049054013a1b77]*/ diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index f9881952561292..347bfe976619e8 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -519,6 +519,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, } /*[clinic input] +@critical_section _io.IncrementalNewlineDecoder.decode input: object final: bool = False @@ -527,18 +528,19 @@ _io.IncrementalNewlineDecoder.decode static PyObject * _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, PyObject *input, int final) -/*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/ +/*[clinic end generated code: output=0d486755bb37a66e input=9475d16a73168504]*/ { return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); } /*[clinic input] +@critical_section _io.IncrementalNewlineDecoder.getstate [clinic start generated code]*/ static PyObject * _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) -/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ +/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=dc3e1f27aa850f12]*/ { PyObject *buffer; unsigned long long flag; @@ -576,6 +578,7 @@ _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) } /*[clinic input] +@critical_section _io.IncrementalNewlineDecoder.setstate state: object / @@ -584,7 +587,7 @@ _io.IncrementalNewlineDecoder.setstate static PyObject * _io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self, PyObject *state) -/*[clinic end generated code: output=09135cb6e78a1dc8 input=c53fb505a76dbbe2]*/ +/*[clinic end generated code: output=09135cb6e78a1dc8 input=275fd3982d2b08cb]*/ { PyObject *buffer; unsigned long long flag; @@ -614,12 +617,13 @@ _io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self, } /*[clinic input] +@critical_section _io.IncrementalNewlineDecoder.reset [clinic start generated code]*/ static PyObject * _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) -/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ +/*[clinic end generated code: output=32fa40c7462aa8ff input=31bd8ae4e36cec83]*/ { CHECK_INITIALIZED_DECODER(self);