Skip to content

Commit 42caa08

Browse files
cmaloneyyihong0618
andcommitted
gh-143008: Safer TextIOWrapper underlying stream access
TextIOWrapper keeps its underlying stream in a member called `self->buffer`. That stream can be detached by user code, such as custom `.flush` implementations resulting in `self->buffer` being set to NULL. The implementation often checked at the start of functions if `self->buffer` is in a good state, but did not always recheck after other Python code was called which could modify `self->buffer`. The cases which need to be re-checked are hard to spot so rather than rely on reviewer effort create better safety by making all self->buffer access go through helper functions. Thank you yihong0618 for the test, NEWS and initial implementation in gh-143041. Co-authored-by: yihong0618 <zouzou0208@gmail.com>
1 parent 788c329 commit 42caa08

3 files changed

Lines changed: 144 additions & 33 deletions

File tree

Lib/test/test_io/test_textio.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,6 +1560,55 @@ def closed(self):
15601560
wrapper = self.TextIOWrapper(raw)
15611561
wrapper.close() # should not crash
15621562

1563+
def test_reentrant_detach_during_flush(self):
1564+
# gh-143008: Reentrant detach() during flush should raise RuntimeError
1565+
# instead of crashing.
1566+
wrapper = None
1567+
wrapper_ref = None
1568+
1569+
class EvilBuffer(self.BufferedRandom):
1570+
detach_on_write = False
1571+
1572+
def flush(self):
1573+
wrapper = wrapper_ref() if wrapper_ref is not None else None
1574+
if wrapper is not None and not self.detach_on_write:
1575+
wrapper.detach()
1576+
return super().flush()
1577+
1578+
def write(self, b):
1579+
wrapper = wrapper_ref() if wrapper_ref is not None else None
1580+
if wrapper is not None and self.detach_on_write:
1581+
wrapper.detach()
1582+
return len(b)
1583+
1584+
# Many calls could result in the same null self->buffer crash.
1585+
tests = [
1586+
('truncate', lambda: wrapper.truncate(0)),
1587+
('close', lambda: wrapper.close()),
1588+
('detach', lambda: wrapper.detach()),
1589+
('seek', lambda: wrapper.seek(0)),
1590+
('tell', lambda: wrapper.tell()),
1591+
('reconfigure', lambda: wrapper.reconfigure(line_buffering=True)),
1592+
]
1593+
for name, method in tests:
1594+
with self.subTest(name):
1595+
wrapper = self.TextIOWrapper(EvilBuffer(self.MockRawIO()), encoding='utf-8')
1596+
wrapper_ref = weakref.ref(wrapper)
1597+
# These used to crash; now either return detached or keep
1598+
# running until out of stack.
1599+
self.assertRaisesRegex(RuntimeError, "detached|recursion depth exceeded", method)
1600+
wrapper_ref = None
1601+
del wrapper
1602+
1603+
with self.subTest('read via writeflush'):
1604+
EvilBuffer.detach_on_write = True
1605+
wrapper = self.TextIOWrapper(EvilBuffer(self.MockRawIO()), encoding='utf-8')
1606+
wrapper_ref = weakref.ref(wrapper)
1607+
wrapper.write('x')
1608+
self.assertRaisesRegex(ValueError, "detached", wrapper.read)
1609+
wrapper_ref = None
1610+
del wrapper
1611+
15631612

15641613
class PyTextIOWrapperTest(TextIOWrapperTest, PyTestCase):
15651614
shutdown_error = "LookupError: unknown encoding: ascii"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix crash in :class:`io.TextIOWrapper` when reentrant :meth:`io.TextIOBase.detach` called.

Modules/_io/textio.c

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,8 @@ struct textio
672672
int ok; /* initialized? */
673673
int detached;
674674
Py_ssize_t chunk_size;
675+
/* Use helpers like _textiowrapper_buffer_get to access buffer; many
676+
operations can set it to NULL (ref: gh-143008, gh-142594). */
675677
PyObject *buffer;
676678
PyObject *encoding;
677679
PyObject *encoder;
@@ -729,6 +731,55 @@ struct textio
729731

730732
#define textio_CAST(op) ((textio *)(op))
731733

734+
/* Helpers to safely operate on self->buffer.
735+
736+
self->buffer can be detached (set to NULL) by any user code that is called
737+
leading to NULL pointer dereferences (ex. gh-143008, gh-142594). Protect against
738+
that by using helpers to check self->buffer validity at callsites. */
739+
static PyObject *
740+
_textiowrapper_buffer_safe(textio *self) {
741+
/* Check self->buffer directly but match errors of CHECK_ATTACHED since this
742+
is called during construction and destruction where self->ok which
743+
CHECK_ATTACHED uses does not imply self->buffer state. */
744+
if (self->buffer == NULL) {
745+
if (self->ok <= 0)
746+
PyErr_SetString(PyExc_ValueError,
747+
"I/O operation on uninitialized object");
748+
else
749+
PyErr_SetString(PyExc_ValueError,
750+
"underlying buffer has been detached");
751+
return NULL;
752+
}
753+
return self->buffer;
754+
}
755+
756+
static PyObject *
757+
_textiowrapper_buffer_get_attr(textio *self, PyObject *attr_name) {
758+
PyObject *buffer = _textiowrapper_buffer_safe(self);
759+
if (buffer == NULL) {
760+
return NULL;
761+
}
762+
return PyObject_GetAttr(buffer, attr_name);
763+
}
764+
765+
static PyObject *
766+
_textiowrapper_buffer_callmethod_noargs(textio *self, PyObject *name) {
767+
PyObject *buffer = _textiowrapper_buffer_safe(self);
768+
if (buffer == NULL) {
769+
return NULL;
770+
}
771+
return PyObject_CallMethodNoArgs(buffer, name);
772+
}
773+
774+
static PyObject *
775+
_textiowrapper_buffer_callmethod_onearg(textio *self, PyObject *name, PyObject *arg) {
776+
PyObject *buffer = _textiowrapper_buffer_safe(self);
777+
if (buffer == NULL) {
778+
return NULL;
779+
}
780+
return PyObject_CallMethodOneArg(buffer, name, arg);
781+
}
782+
732783
static void
733784
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
734785

@@ -898,7 +949,7 @@ _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
898949
PyObject *res;
899950
int r;
900951

901-
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
952+
res = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(readable));
902953
if (res == NULL)
903954
return -1;
904955

@@ -954,7 +1005,7 @@ _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
9541005
PyObject *res;
9551006
int r;
9561007

957-
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
1008+
res = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(writable));
9581009
if (res == NULL)
9591010
return -1;
9601011

@@ -1000,8 +1051,8 @@ _textiowrapper_fix_encoder_state(textio *self)
10001051

10011052
self->encoding_start_of_stream = 1;
10021053

1003-
PyObject *cookieObj = PyObject_CallMethodNoArgs(
1004-
self->buffer, &_Py_ID(tell));
1054+
PyObject *cookieObj = _textiowrapper_buffer_callmethod_noargs(
1055+
self, &_Py_ID(tell));
10051056
if (cookieObj == NULL) {
10061057
return -1;
10071058
}
@@ -1641,7 +1692,7 @@ _textiowrapper_writeflush(textio *self)
16411692

16421693
PyObject *ret;
16431694
do {
1644-
ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1695+
ret = _textiowrapper_buffer_callmethod_onearg(self, &_Py_ID(write), b);
16451696
} while (ret == NULL && _PyIO_trap_eintr());
16461697
Py_DECREF(b);
16471698
// NOTE: We cleared buffer but we don't know how many bytes are actually written
@@ -1787,7 +1838,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
17871838
}
17881839

17891840
if (needflush) {
1790-
if (_PyFile_Flush(self->buffer) < 0) {
1841+
PyObject *buffer = _textiowrapper_buffer_safe(self);
1842+
if (buffer == NULL || _PyFile_Flush(buffer) < 0) {
17911843
return NULL;
17921844
}
17931845
}
@@ -1917,9 +1969,9 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
19171969
if (chunk_size == NULL)
19181970
goto fail;
19191971

1920-
input_chunk = PyObject_CallMethodOneArg(self->buffer,
1921-
(self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1922-
chunk_size);
1972+
input_chunk = _textiowrapper_buffer_callmethod_onearg(self,
1973+
(self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1974+
chunk_size);
19231975
Py_DECREF(chunk_size);
19241976
if (input_chunk == NULL)
19251977
goto fail;
@@ -2003,7 +2055,8 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
20032055

20042056
if (n < 0) {
20052057
/* Read everything */
2006-
PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
2058+
PyObject *bytes = _textiowrapper_buffer_callmethod_noargs(self,
2059+
&_Py_ID(read));
20072060
PyObject *decoded;
20082061
if (bytes == NULL)
20092062
goto fail;
@@ -2600,7 +2653,11 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
26002653
Py_DECREF(res);
26012654
}
26022655

2603-
res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2656+
PyObject *buf = _textiowrapper_buffer_safe(self);
2657+
if (buf == NULL) {
2658+
goto fail;
2659+
}
2660+
res = _PyObject_CallMethod(buf, &_Py_ID(seek), "ii", 0, 2);
26042661
Py_CLEAR(cookieObj);
26052662
if (res == NULL)
26062663
goto fail;
@@ -2648,7 +2705,7 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
26482705
posobj = PyLong_FromOff_t(cookie.start_pos);
26492706
if (posobj == NULL)
26502707
goto fail;
2651-
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2708+
res = _textiowrapper_buffer_callmethod_onearg(self, &_Py_ID(seek), posobj);
26522709
Py_DECREF(posobj);
26532710
if (res == NULL)
26542711
goto fail;
@@ -2665,8 +2722,14 @@ _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
26652722

26662723
if (cookie.chars_to_skip) {
26672724
/* Just like _read_chunk, feed the decoder and save a snapshot. */
2668-
PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2669-
"i", cookie.bytes_to_feed);
2725+
PyObject *bytes_to_feed = PyLong_FromLong(cookie.bytes_to_feed);
2726+
if (bytes_to_feed == NULL) {
2727+
goto fail;
2728+
}
2729+
PyObject *input_chunk = _textiowrapper_buffer_callmethod_onearg(self,
2730+
&_Py_ID(read), bytes_to_feed);
2731+
Py_DECREF(bytes_to_feed);
2732+
26702733
PyObject *decoded;
26712734

26722735
if (input_chunk == NULL)
@@ -2765,7 +2828,7 @@ _io_TextIOWrapper_tell_impl(textio *self)
27652828
goto fail;
27662829
}
27672830

2768-
posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2831+
posobj = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(tell));
27692832
if (posobj == NULL)
27702833
goto fail;
27712834

@@ -2975,7 +3038,7 @@ _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
29753038
return NULL;
29763039
}
29773040

2978-
return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
3041+
return _textiowrapper_buffer_callmethod_onearg(self, &_Py_ID(truncate), pos);
29793042
}
29803043

29813044
static PyObject *
@@ -3057,8 +3120,7 @@ static PyObject *
30573120
_io_TextIOWrapper_fileno_impl(textio *self)
30583121
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
30593122
{
3060-
CHECK_ATTACHED(self);
3061-
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3123+
return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(fileno));
30623124
}
30633125

30643126
/*[clinic input]
@@ -3070,8 +3132,7 @@ static PyObject *
30703132
_io_TextIOWrapper_seekable_impl(textio *self)
30713133
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
30723134
{
3073-
CHECK_ATTACHED(self);
3074-
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3135+
return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(seekable));
30753136
}
30763137

30773138
/*[clinic input]
@@ -3083,8 +3144,7 @@ static PyObject *
30833144
_io_TextIOWrapper_readable_impl(textio *self)
30843145
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
30853146
{
3086-
CHECK_ATTACHED(self);
3087-
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3147+
return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(readable));
30883148
}
30893149

30903150
/*[clinic input]
@@ -3096,8 +3156,7 @@ static PyObject *
30963156
_io_TextIOWrapper_writable_impl(textio *self)
30973157
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
30983158
{
3099-
CHECK_ATTACHED(self);
3100-
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3159+
return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(writable));
31013160
}
31023161

31033162
/*[clinic input]
@@ -3109,8 +3168,7 @@ static PyObject *
31093168
_io_TextIOWrapper_isatty_impl(textio *self)
31103169
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
31113170
{
3112-
CHECK_ATTACHED(self);
3113-
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3171+
return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(isatty));
31143172
}
31153173

31163174
/*[clinic input]
@@ -3127,7 +3185,7 @@ _io_TextIOWrapper_flush_impl(textio *self)
31273185
self->telling = self->seekable;
31283186
if (_textiowrapper_writeflush(self) < 0)
31293187
return NULL;
3130-
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3188+
return _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(flush));
31313189
}
31323190

31333191
/*[clinic input]
@@ -3160,8 +3218,8 @@ _io_TextIOWrapper_close_impl(textio *self)
31603218
else {
31613219
PyObject *exc = NULL;
31623220
if (self->finalizing) {
3163-
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3164-
(PyObject *)self);
3221+
res = _textiowrapper_buffer_callmethod_onearg(self,
3222+
&_Py_ID(_dealloc_warn), (PyObject *)self);
31653223
if (res) {
31663224
Py_DECREF(res);
31673225
}
@@ -3173,7 +3231,7 @@ _io_TextIOWrapper_close_impl(textio *self)
31733231
exc = PyErr_GetRaisedException();
31743232
}
31753233

3176-
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3234+
res = _textiowrapper_buffer_callmethod_noargs(self, &_Py_ID(close));
31773235
if (exc != NULL) {
31783236
_PyErr_ChainExceptions1(exc);
31793237
Py_CLEAR(res);
@@ -3241,8 +3299,7 @@ static PyObject *
32413299
_io_TextIOWrapper_name_get_impl(textio *self)
32423300
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
32433301
{
3244-
CHECK_ATTACHED(self);
3245-
return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3302+
return _textiowrapper_buffer_get_attr(self, &_Py_ID(name));
32463303
}
32473304

32483305
/*[clinic input]
@@ -3255,8 +3312,12 @@ static PyObject *
32553312
_io_TextIOWrapper_closed_get_impl(textio *self)
32563313
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
32573314
{
3315+
/* During destruction self->ok is 0 but self->buffer is non-NULL and this
3316+
needs to error in that case which the safe buffer wrapper does not.
3317+
3318+
Match original behavior by calling CHECK_ATTACHED explicitly. */
32583319
CHECK_ATTACHED(self);
3259-
return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3320+
return _textiowrapper_buffer_get_attr(self, &_Py_ID(closed));
32603321
}
32613322

32623323
/*[clinic input]

0 commit comments

Comments
 (0)