From 42caa08ed68701af7e039caea089312fb8b47341 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 14 Mar 2026 18:50:02 -0700 Subject: [PATCH 1/5] gh-143008: Safer TextIOWrapper underlying stream access TextIOWrapper keeps its underlying stream in a member called `self->buffer`. That stream can be detached by user code, such as custom `.flush` implementations resulting in `self->buffer` being set to NULL. The implementation often checked at the start of functions if `self->buffer` is in a good state, but did not always recheck after other Python code was called which could modify `self->buffer`. The cases which need to be re-checked are hard to spot so rather than rely on reviewer effort create better safety by making all self->buffer access go through helper functions. Thank you yihong0618 for the test, NEWS and initial implementation in gh-143041. Co-authored-by: yihong0618 --- Lib/test/test_io/test_textio.py | 49 +++++++ ...-12-21-17-56-37.gh-issue-143008.aakErJ.rst | 1 + Modules/_io/textio.c | 127 +++++++++++++----- 3 files changed, 144 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index d725f9212ceaae..1ecba967afea93 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -1560,6 +1560,55 @@ def closed(self): wrapper = self.TextIOWrapper(raw) wrapper.close() # should not crash + def test_reentrant_detach_during_flush(self): + # gh-143008: Reentrant detach() during flush should raise RuntimeError + # instead of crashing. + wrapper = None + wrapper_ref = None + + class EvilBuffer(self.BufferedRandom): + detach_on_write = False + + def flush(self): + wrapper = wrapper_ref() if wrapper_ref is not None else None + if wrapper is not None and not self.detach_on_write: + wrapper.detach() + return super().flush() + + def write(self, b): + wrapper = wrapper_ref() if wrapper_ref is not None else None + if wrapper is not None and self.detach_on_write: + wrapper.detach() + return len(b) + + # Many calls could result in the same null self->buffer crash. + tests = [ + ('truncate', lambda: wrapper.truncate(0)), + ('close', lambda: wrapper.close()), + ('detach', lambda: wrapper.detach()), + ('seek', lambda: wrapper.seek(0)), + ('tell', lambda: wrapper.tell()), + ('reconfigure', lambda: wrapper.reconfigure(line_buffering=True)), + ] + for name, method in tests: + with self.subTest(name): + wrapper = self.TextIOWrapper(EvilBuffer(self.MockRawIO()), encoding='utf-8') + wrapper_ref = weakref.ref(wrapper) + # These used to crash; now either return detached or keep + # running until out of stack. + self.assertRaisesRegex(RuntimeError, "detached|recursion depth exceeded", method) + wrapper_ref = None + del wrapper + + with self.subTest('read via writeflush'): + EvilBuffer.detach_on_write = True + wrapper = self.TextIOWrapper(EvilBuffer(self.MockRawIO()), encoding='utf-8') + wrapper_ref = weakref.ref(wrapper) + wrapper.write('x') + self.assertRaisesRegex(ValueError, "detached", wrapper.read) + wrapper_ref = None + del wrapper + class PyTextIOWrapperTest(TextIOWrapperTest, PyTestCase): shutdown_error = "LookupError: unknown encoding: ascii" diff --git a/Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst b/Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst new file mode 100644 index 00000000000000..cac314452eaa4c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-21-17-56-37.gh-issue-143008.aakErJ.rst @@ -0,0 +1 @@ +Fix crash in :class:`io.TextIOWrapper` when reentrant :meth:`io.TextIOBase.detach` called. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 347bfe976619e8..2d3a62e3eec185 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -672,6 +672,8 @@ struct textio int ok; /* initialized? */ int detached; Py_ssize_t chunk_size; + /* Use helpers like _textiowrapper_buffer_get to access buffer; many + operations can set it to NULL (ref: gh-143008, gh-142594). */ PyObject *buffer; PyObject *encoding; PyObject *encoder; @@ -729,6 +731,55 @@ struct textio #define textio_CAST(op) ((textio *)(op)) +/* Helpers to safely operate on self->buffer. + +self->buffer can be detached (set to NULL) by any user code that is called +leading to NULL pointer dereferences (ex. gh-143008, gh-142594). Protect against +that by using helpers to check self->buffer validity at callsites. */ +static PyObject * +_textiowrapper_buffer_safe(textio *self) { + /* Check self->buffer directly but match errors of CHECK_ATTACHED since this + is called during construction and destruction where self->ok which + CHECK_ATTACHED uses does not imply self->buffer state. */ + if (self->buffer == NULL) { + if (self->ok <= 0) + PyErr_SetString(PyExc_ValueError, + "I/O operation on uninitialized object"); + else + PyErr_SetString(PyExc_ValueError, + "underlying buffer has been detached"); + return NULL; + } + return self->buffer; +} + +static PyObject * +_textiowrapper_buffer_get_attr(textio *self, PyObject *attr_name) { + PyObject *buffer = _textiowrapper_buffer_safe(self); + if (buffer == NULL) { + return NULL; + } + return PyObject_GetAttr(buffer, attr_name); +} + +static PyObject * +_textiowrapper_buffer_callmethod_noargs(textio *self, PyObject *name) { + PyObject *buffer = _textiowrapper_buffer_safe(self); + if (buffer == NULL) { + return NULL; + } + return PyObject_CallMethodNoArgs(buffer, name); +} + +static PyObject * +_textiowrapper_buffer_callmethod_onearg(textio *self, PyObject *name, PyObject *arg) { + PyObject *buffer = _textiowrapper_buffer_safe(self); + if (buffer == NULL) { + return NULL; + } + return PyObject_CallMethodOneArg(buffer, name, arg); +} + static void textiowrapper_set_decoded_chars(textio *self, PyObject *chars); @@ -898,7 +949,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info, PyObject *res; int r; - res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); + res = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(readable)); if (res == NULL) return -1; @@ -954,7 +1005,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info, PyObject *res; int r; - res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); + res = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(writable)); if (res == NULL) return -1; @@ -1000,8 +1051,8 @@ _textiowrapper_fix_encoder_state(textio *self) self->encoding_start_of_stream = 1; - PyObject *cookieObj = PyObject_CallMethodNoArgs( - self->buffer, &_Py_ID(tell)); + PyObject *cookieObj = _textiowrapper_buffer_callmethod_noargs( + self, &_Py_ID(tell)); if (cookieObj == NULL) { return -1; } @@ -1641,7 +1692,7 @@ _textiowrapper_writeflush(textio *self) PyObject *ret; do { - ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b); + ret = _textiowrapper_buffer_callmethod_onearg(self, &_Py_ID(write), b); } while (ret == NULL && _PyIO_trap_eintr()); Py_DECREF(b); // NOTE: We cleared buffer but we don't know how many bytes are actually written @@ -1787,7 +1838,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) } if (needflush) { - if (_PyFile_Flush(self->buffer) < 0) { + PyObject *buffer = _textiowrapper_buffer_safe(self); + if (buffer == NULL || _PyFile_Flush(buffer) < 0) { return NULL; } } @@ -1917,9 +1969,9 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) if (chunk_size == NULL) goto fail; - input_chunk = PyObject_CallMethodOneArg(self->buffer, - (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), - chunk_size); + input_chunk = _textiowrapper_buffer_callmethod_onearg(self, + (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), + chunk_size); Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; @@ -2003,7 +2055,8 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) if (n < 0) { /* Read everything */ - PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read)); + PyObject *bytes = _textiowrapper_buffer_callmethod_noargs(self, + &_Py_ID(read)); PyObject *decoded; if (bytes == NULL) goto fail; @@ -2600,7 +2653,11 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) Py_DECREF(res); } - res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2); + PyObject *buf = _textiowrapper_buffer_safe(self); + if (buf == NULL) { + goto fail; + } + res = _PyObject_CallMethod(buf, &_Py_ID(seek), "ii", 0, 2); Py_CLEAR(cookieObj); if (res == NULL) goto fail; @@ -2648,7 +2705,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) posobj = PyLong_FromOff_t(cookie.start_pos); if (posobj == NULL) goto fail; - res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj); + res = _textiowrapper_buffer_callmethod_onearg(self, &_Py_ID(seek), posobj); Py_DECREF(posobj); if (res == NULL) goto fail; @@ -2665,8 +2722,14 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) if (cookie.chars_to_skip) { /* Just like _read_chunk, feed the decoder and save a snapshot. */ - PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read), - "i", cookie.bytes_to_feed); + PyObject *bytes_to_feed = PyLong_FromLong(cookie.bytes_to_feed); + if (bytes_to_feed == NULL) { + goto fail; + } + PyObject *input_chunk = _textiowrapper_buffer_callmethod_onearg(self, + &_Py_ID(read), bytes_to_feed); + Py_DECREF(bytes_to_feed); + PyObject *decoded; if (input_chunk == NULL) @@ -2765,7 +2828,7 @@ _io_TextIOWrapper_tell_impl(textio *self) goto fail; } - posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell)); + posobj = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(tell)); if (posobj == NULL) goto fail; @@ -2975,7 +3038,7 @@ _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) return NULL; } - return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos); + return _textiowrapper_buffer_callmethod_onearg(self, &_Py_ID(truncate), pos); } static PyObject * @@ -3057,8 +3120,7 @@ static PyObject * _io_TextIOWrapper_fileno_impl(textio *self) /*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno)); + return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(fileno)); } /*[clinic input] @@ -3070,8 +3132,7 @@ static PyObject * _io_TextIOWrapper_seekable_impl(textio *self) /*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable)); + return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(seekable)); } /*[clinic input] @@ -3083,8 +3144,7 @@ static PyObject * _io_TextIOWrapper_readable_impl(textio *self) /*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); + return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(readable)); } /*[clinic input] @@ -3096,8 +3156,7 @@ static PyObject * _io_TextIOWrapper_writable_impl(textio *self) /*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); + return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(writable)); } /*[clinic input] @@ -3109,8 +3168,7 @@ static PyObject * _io_TextIOWrapper_isatty_impl(textio *self) /*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/ { - CHECK_ATTACHED(self); - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty)); + return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(isatty)); } /*[clinic input] @@ -3127,7 +3185,7 @@ _io_TextIOWrapper_flush_impl(textio *self) self->telling = self->seekable; if (_textiowrapper_writeflush(self) < 0) return NULL; - return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush)); + return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(flush)); } /*[clinic input] @@ -3160,8 +3218,8 @@ _io_TextIOWrapper_close_impl(textio *self) else { PyObject *exc = NULL; if (self->finalizing) { - res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn), - (PyObject *)self); + res = _textiowrapper_buffer_callmethod_onearg(self, + &_Py_ID(_dealloc_warn), (PyObject *)self); if (res) { Py_DECREF(res); } @@ -3173,7 +3231,7 @@ _io_TextIOWrapper_close_impl(textio *self) exc = PyErr_GetRaisedException(); } - res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close)); + res = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(close)); if (exc != NULL) { _PyErr_ChainExceptions1(exc); Py_CLEAR(res); @@ -3241,8 +3299,7 @@ static PyObject * _io_TextIOWrapper_name_get_impl(textio *self) /*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/ { - CHECK_ATTACHED(self); - return PyObject_GetAttr(self->buffer, &_Py_ID(name)); + return _textiowrapper_buffer_get_attr(self, &_Py_ID(name)); } /*[clinic input] @@ -3255,8 +3312,12 @@ static PyObject * _io_TextIOWrapper_closed_get_impl(textio *self) /*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/ { + /* During destruction self->ok is 0 but self->buffer is non-NULL and this + needs to error in that case which the safe buffer wrapper does not. + + Match original behavior by calling CHECK_ATTACHED explicitly. */ CHECK_ATTACHED(self); - return PyObject_GetAttr(self->buffer, &_Py_ID(closed)); + return _textiowrapper_buffer_get_attr(self, &_Py_ID(closed)); } /*[clinic input] From 551c3eb5f5d57d97374c297a558e6802077edb64 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 14 Mar 2026 19:12:24 -0700 Subject: [PATCH 2/5] fixup: Refer to existing helpers --- Modules/_io/textio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 2d3a62e3eec185..497412f484def3 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -672,7 +672,7 @@ struct textio int ok; /* initialized? */ int detached; Py_ssize_t chunk_size; - /* Use helpers like _textiowrapper_buffer_get to access buffer; many + /* Use helpers _textiowrapper_buffer_* to access buffer; many operations can set it to NULL (ref: gh-143008, gh-142594). */ PyObject *buffer; PyObject *encoding; From 4f455f4f6dbab848542fb5255530d017a5b48500 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 16 Mar 2026 22:48:49 -0700 Subject: [PATCH 3/5] fixup: More precise recursion errorcatching --- Lib/test/test_io/test_textio.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index 1ecba967afea93..3a642e027d8547 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -11,7 +11,11 @@ import weakref from collections import UserList from test import support -from test.support import os_helper, threading_helper +from test.support import ( + os_helper, + set_recursion_limit, + threading_helper +) from test.support.script_helper import assert_python_ok from .utils import CTestCase, PyTestCase @@ -1591,12 +1595,12 @@ def write(self, b): ('reconfigure', lambda: wrapper.reconfigure(line_buffering=True)), ] for name, method in tests: - with self.subTest(name): + with self.subTest(name), set_recursion_limit(100): wrapper = self.TextIOWrapper(EvilBuffer(self.MockRawIO()), encoding='utf-8') wrapper_ref = weakref.ref(wrapper) # These used to crash; now either return detached or keep # running until out of stack. - self.assertRaisesRegex(RuntimeError, "detached|recursion depth exceeded", method) + self.assertRaises((RecursionError, RuntimeError), method) wrapper_ref = None del wrapper From ec39a6118f238cb8bc2f34088c2e4e5ee18acf00 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 16 Mar 2026 23:08:52 -0700 Subject: [PATCH 4/5] fixup: Formatting fixes, simplify test --- Lib/test/test_io/test_textio.py | 15 +++++++-------- Modules/_io/textio.c | 12 ++++++------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index 3a642e027d8547..7ecfc2d151036f 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -1568,19 +1568,19 @@ def test_reentrant_detach_during_flush(self): # gh-143008: Reentrant detach() during flush should raise RuntimeError # instead of crashing. wrapper = None - wrapper_ref = None + wrapper_ref = lambda: None class EvilBuffer(self.BufferedRandom): detach_on_write = False def flush(self): - wrapper = wrapper_ref() if wrapper_ref is not None else None + wrapper = wrapper_ref() if wrapper is not None and not self.detach_on_write: wrapper.detach() return super().flush() def write(self, b): - wrapper = wrapper_ref() if wrapper_ref is not None else None + wrapper = wrapper_ref() if wrapper is not None and self.detach_on_write: wrapper.detach() return len(b) @@ -1598,10 +1598,9 @@ def write(self, b): with self.subTest(name), set_recursion_limit(100): wrapper = self.TextIOWrapper(EvilBuffer(self.MockRawIO()), encoding='utf-8') wrapper_ref = weakref.ref(wrapper) - # These used to crash; now either return detached or keep - # running until out of stack. - self.assertRaises((RecursionError, RuntimeError), method) - wrapper_ref = None + # Used to crash; now will run out of stack. + self.assertRaises(RecursionError, method) + wrapper_ref = lambda: None del wrapper with self.subTest('read via writeflush'): @@ -1610,7 +1609,7 @@ def write(self, b): wrapper_ref = weakref.ref(wrapper) wrapper.write('x') self.assertRaisesRegex(ValueError, "detached", wrapper.read) - wrapper_ref = None + wrapper_ref = lambda: None del wrapper diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 497412f484def3..765de2b818f809 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1052,7 +1052,7 @@ _textiowrapper_fix_encoder_state(textio *self) self->encoding_start_of_stream = 1; PyObject *cookieObj = _textiowrapper_buffer_callmethod_noargs( - self, &_Py_ID(tell)); + self, &_Py_ID(tell)); if (cookieObj == NULL) { return -1; } @@ -1970,8 +1970,8 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) goto fail; input_chunk = _textiowrapper_buffer_callmethod_onearg(self, - (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), - chunk_size); + (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), + chunk_size); Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; @@ -2056,7 +2056,7 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) if (n < 0) { /* Read everything */ PyObject *bytes = _textiowrapper_buffer_callmethod_noargs(self, - &_Py_ID(read)); + &_Py_ID(read)); PyObject *decoded; if (bytes == NULL) goto fail; @@ -2727,7 +2727,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) goto fail; } PyObject *input_chunk = _textiowrapper_buffer_callmethod_onearg(self, - &_Py_ID(read), bytes_to_feed); + &_Py_ID(read), bytes_to_feed); Py_DECREF(bytes_to_feed); PyObject *decoded; @@ -3219,7 +3219,7 @@ _io_TextIOWrapper_close_impl(textio *self) PyObject *exc = NULL; if (self->finalizing) { res = _textiowrapper_buffer_callmethod_onearg(self, - &_Py_ID(_dealloc_warn), (PyObject *)self); + &_Py_ID(_dealloc_warn), (PyObject *)self); if (res) { Py_DECREF(res); } From 98bc8653ea6f23900b7dc0d93fe0b06315094a6a Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 16 Mar 2026 23:10:09 -0700 Subject: [PATCH 5/5] fixup: correct commment --- Lib/test/test_io/test_textio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index 7ecfc2d151036f..9eba9a51208af7 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -1565,8 +1565,7 @@ def closed(self): wrapper.close() # should not crash def test_reentrant_detach_during_flush(self): - # gh-143008: Reentrant detach() during flush should raise RuntimeError - # instead of crashing. + # gh-143008: Reentrant detach() during flush should not crash. wrapper = None wrapper_ref = lambda: None