From 36e008c65abb45ef7c5e303080ce78cc390a9e97 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 17 Feb 2026 11:31:41 +0100 Subject: [PATCH 1/3] gh-141510, PEP 814: Add frozendict support to pickle --- Lib/pickle.py | 32 ++++++++++ Lib/pickletools.py | 22 +++++++ Lib/test/pickletester.py | 3 + Lib/test/test_pickletools.py | 2 +- Modules/_pickle.c | 117 ++++++++++++++++++++++++++++++++++- 5 files changed, 174 insertions(+), 2 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 3e7cf25cb05337..392a688994d800 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -185,6 +185,7 @@ def __init__(self, value): BYTEARRAY8 = b'\x96' # push bytearray NEXT_BUFFER = b'\x97' # push next out-of-band buffer READONLY_BUFFER = b'\x98' # make top of stack readonly +FROZENDICT = b'\x99' # A Python frozendict object __all__.extend(x for x in dir() if x.isupper() and not x.startswith('_')) @@ -1081,6 +1082,30 @@ def save_dict(self, obj): dispatch[dict] = save_dict + def save_frozendict(self, obj): + if self.proto < 4: + items = list(obj.items()) + self.save_reduce(frozendict, (items,), obj=obj) + return + + self.write(MARK) + + save = self.save + for key, value in obj.items(): + save(key) + save(value) + + if id(obj) in self.memo: + # If the object is already in the memo, this means it is + # recursive. In this case, throw away everything we put on the + # stack, and fetch the object back from the memo. + self.write(POP_MARK + self.get(self.memo[id(obj)][0])) + return + + self.write(FROZENDICT) + self.memoize(obj) + dispatch[frozendict] = save_frozendict + def _batch_setitems(self, items, obj): # Helper to batch up SETITEMS sequences; proto >= 1 only save = self.save @@ -1621,6 +1646,13 @@ def load_dict(self): self.append(d) dispatch[DICT[0]] = load_dict + def load_frozendict(self): + items = self.pop_mark() + d = frozendict((items[i], items[i+1]) + for i in range(0, len(items), 2)) + self.append(d) + dispatch[FROZENDICT[0]] = load_frozendict + # INST and OBJ differ only in how they get a class object. It's not # only sensible to do the rest in a common routine, the two routines # previously diverged and grew different bugs. diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 29baf3be7ebb6e..2c516db35ab0f2 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -1035,6 +1035,11 @@ def __repr__(self): obtype=dict, doc="A Python dict object.") +pyfrozendict = StackObject( + name="frozendict", + obtype=frozendict, + doc="A Python frozendict object.") + pyset = StackObject( name="set", obtype=set, @@ -1384,6 +1389,23 @@ def __init__(self, name, code, arg, proto=5, doc="Make an out-of-band buffer object read-only."), + I(name='FROZENDICT', + code='\x99', + arg=None, + stack_before=[markobject, stackslice], + stack_after=[pyfrozendict], + proto=5, + doc="""Build a frozendict out of the topmost stack slice, after markobject. + + All the stack entries following the topmost markobject are placed into + a single Python dict, which single dict object replaces all of the + stack from the topmost markobject onward. The stack slice alternates + key, value, key, value, .... For example, + + Stack before: ... markobject 1 2 3 'abc' + Stack after: ... {1: 2, 3: 'abc'} + """), + # Ways to spell None. I(name='NONE', diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index c4460c2e44d578..09ed77152466fd 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -2860,6 +2860,9 @@ def test_recursive_tuple_and_inst(self): def test_recursive_dict_and_inst(self): self._test_recursive_collection_and_inst(dict.fromkeys, oldminproto=0) + def test_recursive_frozendict_and_inst(self): + self._test_recursive_collection_and_inst(frozendict.fromkeys, oldminproto=0) + def test_recursive_set_and_inst(self): self._test_recursive_collection_and_inst(set) diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 57285ddf6ebef5..fc2d1e6f13b2e8 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -510,7 +510,7 @@ def test__all__(self): 'StackObject', 'pyint', 'pylong', 'pyinteger_or_bool', 'pybool', 'pyfloat', 'pybytes_or_str', 'pystring', 'pybytes', 'pybytearray', - 'pyunicode', 'pynone', 'pytuple', 'pylist', 'pydict', + 'pyunicode', 'pynone', 'pytuple', 'pylist', 'pydict', 'pyfrozendict', 'pyset', 'pyfrozenset', 'pybuffer', 'anyobject', 'markobject', 'stackslice', 'OpcodeInfo', 'opcodes', 'code2op', diff --git a/Modules/_pickle.c b/Modules/_pickle.c index a897e45f00fab6..b26f77cc127e4f 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -137,7 +137,8 @@ enum opcode { /* Protocol 5 */ BYTEARRAY8 = '\x96', NEXT_BUFFER = '\x97', - READONLY_BUFFER = '\x98' + READONLY_BUFFER = '\x98', + FROZENDICT = '\x99', }; enum { @@ -596,6 +597,34 @@ Pdata_poplist(Pdata *self, Py_ssize_t start) return list; } +static PyObject * +Pdata_poplist2(PickleState *state, Pdata *self, Py_ssize_t start) +{ + if (start < self->fence) { + Pdata_stack_underflow(state, self); + return NULL; + } + + Py_ssize_t len = (Py_SIZE(self) - start) >> 1; + + PyObject *list = PyList_New(len); + if (list == NULL) { + return NULL; + } + + for (Py_ssize_t i = start, j = 0; j < len; i+=2, j++) { + PyObject *subtuple = PyTuple_Pack(2, self->data[i], self->data[i+1]); + if (subtuple == NULL) { + Py_DECREF(list); + return NULL; + } + PyList_SET_ITEM(list, j, subtuple); + } + + Py_SET_SIZE(self, start); + return list; +} + typedef struct { PyObject *me_key; Py_ssize_t me_value; @@ -3594,6 +3623,63 @@ save_dict(PickleState *state, PicklerObject *self, PyObject *obj) return status; } +static int +save_frozendict(PickleState *state, PicklerObject *self, PyObject *obj) +{ + if (self->fast && !fast_save_enter(self, obj)) { + return -1; + } + + if (self->proto < 4) { + PyObject *items = PyDict_Items(obj); + if (items == NULL) { + return -1; + } + + PyObject *reduce_value; + reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenDict_Type, + items); + Py_DECREF(items); + if (reduce_value == NULL) { + return -1; + } + + /* save_reduce() will memoize the object automatically */ + int status = save_reduce(state, self, reduce_value, obj); + Py_DECREF(reduce_value); + return status; + } + + const char mark_op = MARK; + if (_Pickler_Write(self, &mark_op, 1) < 0) { + return -1; + } + + PyObject *key = NULL, *value = NULL; + Py_ssize_t pos = 0; + while (PyDict_Next(obj, &pos, &key, &value)) { + int res = save(state, self, key, 0); + if (res < 0) { + return -1; + } + + res = save(state, self, value, 0); + if (res < 0) { + return -1; + } + } + + const char frozendict_op = FROZENDICT; + if (_Pickler_Write(self, &frozendict_op, 1) < 0) { + return -1; + } + + if (memo_put(state, self, obj) < 0) { + return -1; + } + return 0; +} + static int save_set(PickleState *state, PicklerObject *self, PyObject *obj) { @@ -4569,6 +4655,10 @@ save(PickleState *st, PicklerObject *self, PyObject *obj, int pers_save) status = save_dict(st, self, obj); goto done; } + else if (type == &PyFrozenDict_Type) { + status = save_frozendict(st, self, obj); + goto done; + } else if (type == &PySet_Type) { status = save_set(st, self, obj); goto done; @@ -6030,6 +6120,30 @@ load_dict(PickleState *st, UnpicklerObject *self) return 0; } + +static int +load_frozendict(PickleState *st, UnpicklerObject *self) +{ + Py_ssize_t i = marker(st, self); + if (i < 0) { + return -1; + } + + PyObject *items = Pdata_poplist2(st, self->stack, i); + if (items == NULL) { + return -1; + } + + PyObject *frozendict = PyFrozenDict_New(items); + Py_DECREF(items); + if (frozendict == NULL) { + return -1; + } + + PDATA_PUSH(self->stack, frozendict, -1); + return 0; +} + static int load_frozenset(PickleState *state, UnpicklerObject *self) { @@ -7130,6 +7244,7 @@ load(PickleState *st, UnpicklerObject *self) OP(LIST, load_list) OP(EMPTY_DICT, load_empty_dict) OP(DICT, load_dict) + OP(FROZENDICT, load_frozendict) OP(EMPTY_SET, load_empty_set) OP(ADDITEMS, load_additems) OP(FROZENSET, load_frozenset) From a26910ebd7470a422e98bdd58944812197033cbd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 Feb 2026 18:17:10 +0100 Subject: [PATCH 2/3] Fix the C implementation --- Modules/_pickle.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 8435b5748fe684..df2dc4d2b73ee6 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -3666,6 +3666,19 @@ save_frozendict(PickleState *state, PicklerObject *self, PyObject *obj) } } + /* If the object is already in the memo, this means it is + recursive. In this case, throw away everything we put on the + stack, and fetch the object back from the memo. */ + if (PyMemoTable_Get(self->memo, obj)) { + const char pop_mark_op = POP_MARK; + + if (_Pickler_Write(self, &pop_mark_op, 1) < 0) + return -1; + if (memo_get(state, self, obj) < 0) + return -1; + return 0; + } + const char frozendict_op = FROZENDICT; if (_Pickler_Write(self, &frozendict_op, 1) < 0) { return -1; From 962cfc9c1f676679c2e6e95aaeb1379fb4093929 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 Feb 2026 18:18:14 +0100 Subject: [PATCH 3/3] Format C code --- Modules/_pickle.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index df2dc4d2b73ee6..ce6f66b306d03e 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -3671,11 +3671,12 @@ save_frozendict(PickleState *state, PicklerObject *self, PyObject *obj) stack, and fetch the object back from the memo. */ if (PyMemoTable_Get(self->memo, obj)) { const char pop_mark_op = POP_MARK; - - if (_Pickler_Write(self, &pop_mark_op, 1) < 0) + if (_Pickler_Write(self, &pop_mark_op, 1) < 0) { return -1; - if (memo_get(state, self, obj) < 0) + } + if (memo_get(state, self, obj) < 0) { return -1; + } return 0; }