From 9efc8d1acc53376f1321f41260859276d98655e1 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 16 Jan 2026 21:34:34 +0900 Subject: [PATCH 1/6] Implement pickle more --- crates/vm/src/builtins/object.rs | 210 +++++++++++++++++++++++++++---- crates/vm/src/stdlib/io.rs | 124 +++++++++++++++++- crates/vm/src/vm/context.rs | 1 + 3 files changed, 311 insertions(+), 24 deletions(-) diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index 6f07254254..0d2de76403 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -184,15 +184,15 @@ fn type_slot_names(typ: &Py, vm: &VirtualMachine) -> PyResult PyResult { - // TODO: itemsize - // if required && obj.class().slots.itemsize > 0 { - // return vm.new_type_error(format!( - // "cannot pickle {:.200} objects", - // obj.class().name() - // )); - // } + // Check itemsize + if required && obj.class().slots.itemsize > 0 { + return Err(vm.new_type_error(format!( + "cannot pickle {:.200} objects", + obj.class().name() + ))); + } let state = if obj.dict().is_none_or(|d| d.is_empty()) { vm.ctx.none() @@ -208,21 +208,23 @@ fn object_getstate_default(obj: &PyObject, required: bool, vm: &VirtualMachine) type_slot_names(obj.class(), vm).map_err(|_| vm.new_type_error("cannot pickle object"))?; if required { - let mut basicsize = obj.class().slots.basicsize; - // if obj.class().slots.dict_offset > 0 - // && !obj.class().slots.flags.has_feature(PyTypeFlags::MANAGED_DICT) - // { - // basicsize += std::mem::size_of::(); - // } - // if obj.class().slots.weaklist_offset > 0 { - // basicsize += std::mem::size_of::(); - // } + // Start with PyBaseObject_Type's basicsize + let mut basicsize = vm.ctx.types.object_type.slots.basicsize; + + // Add __dict__ size if type has dict + if obj.class().slots.flags.has_feature(PyTypeFlags::HAS_DICT) { + basicsize += core::mem::size_of::(); + } + + // Add slots size if let Some(ref slot_names) = slot_names { basicsize += core::mem::size_of::() * slot_names.__len__(); } + + // Fail if actual type's basicsize > expected basicsize if obj.class().slots.basicsize > basicsize { return Err( - vm.new_type_error(format!("cannot pickle {:.200} object", obj.class().name())) + vm.new_type_error(format!("cannot pickle '{}' object", obj.class().name())) ); } } @@ -249,7 +251,7 @@ fn object_getstate_default(obj: &PyObject, required: bool, vm: &VirtualMachine) Ok(state) } -// object_getstate in CPython +// object_getstate // fn object_getstate( // obj: &PyObject, // required: bool, @@ -550,11 +552,175 @@ pub fn init(ctx: &Context) { PyBaseObject::extend_class(ctx, ctx.types.object_type); } +/// Get arguments for __new__ from __getnewargs_ex__ or __getnewargs__ +/// Returns (args, kwargs) tuple where either can be None +fn get_new_arguments( + obj: &PyObject, + vm: &VirtualMachine, +) -> PyResult<(Option, Option)> { + // First try __getnewargs_ex__ + if let Some(getnewargs_ex) = vm.get_special_method(obj, identifier!(vm, __getnewargs_ex__))? { + let newargs = getnewargs_ex.invoke((), vm)?; + + let newargs_tuple: PyRef = newargs.downcast().map_err(|obj| { + vm.new_type_error(format!( + "__getnewargs_ex__ should return a tuple, not '{}'", + obj.class().name() + )) + })?; + + if newargs_tuple.len() != 2 { + return Err(vm.new_value_error(format!( + "__getnewargs_ex__ should return a tuple of length 2, not {}", + newargs_tuple.len() + ))); + } + + let args = newargs_tuple.as_slice()[0].clone(); + let kwargs = newargs_tuple.as_slice()[1].clone(); + + let args_tuple: PyRef = args.downcast().map_err(|obj| { + vm.new_type_error(format!( + "first item of the tuple returned by __getnewargs_ex__ must be a tuple, not '{}'", + obj.class().name() + )) + })?; + + let kwargs_dict: PyRef = kwargs.downcast().map_err(|obj| { + vm.new_type_error(format!( + "second item of the tuple returned by __getnewargs_ex__ must be a dict, not '{}'", + obj.class().name() + )) + })?; + + return Ok((Some(args_tuple), Some(kwargs_dict))); + } + + // Fall back to __getnewargs__ + if let Some(getnewargs) = vm.get_special_method(obj, identifier!(vm, __getnewargs__))? { + let args = getnewargs.invoke((), vm)?; + + let args_tuple: PyRef = args.downcast().map_err(|obj| { + vm.new_type_error(format!( + "__getnewargs__ should return a tuple, not '{}'", + obj.class().name() + )) + })?; + + return Ok((Some(args_tuple), None)); + } + + // No __getnewargs_ex__ or __getnewargs__ + Ok((None, None)) +} + +/// Check if __getstate__ is overridden by comparing with object.__getstate__ +fn is_getstate_overridden(obj: &PyObject, vm: &VirtualMachine) -> bool { + let obj_cls = obj.class(); + let object_type = vm.ctx.types.object_type; + + // If the class is object itself, not overridden + if obj_cls.is(object_type) { + return false; + } + + // Check if __getstate__ in the MRO comes from object or elsewhere + // If the type has its own __getstate__, it's overridden + if let Some(getstate) = obj_cls.get_attr(identifier!(vm, __getstate__)) + && let Some(obj_getstate) = object_type.get_attr(identifier!(vm, __getstate__)) + { + return !getstate.is(&obj_getstate); + } + false +} + +/// object_getstate - calls __getstate__ method or default implementation +fn object_getstate(obj: &PyObject, required: bool, vm: &VirtualMachine) -> PyResult { + // If __getstate__ is not overridden, use the default implementation with required flag + if !is_getstate_overridden(obj, vm) { + return object_getstate_default(obj, required, vm); + } + + // __getstate__ is overridden, call it without required + let getstate = obj.get_attr(identifier!(vm, __getstate__), vm)?; + getstate.call((), vm) +} + +/// Get list items iterator if obj is a list (or subclass), None iterator otherwise +fn get_items_iter(obj: &PyObjectRef, vm: &VirtualMachine) -> PyResult<(PyObjectRef, PyObjectRef)> { + let listitems: PyObjectRef = if obj.fast_isinstance(vm.ctx.types.list_type) { + obj.get_iter(vm)?.into() + } else { + vm.ctx.none() + }; + + let dictitems: PyObjectRef = if obj.fast_isinstance(vm.ctx.types.dict_type) { + let items = vm.call_method(obj, "items", ())?; + items.get_iter(vm)?.into() + } else { + vm.ctx.none() + }; + + Ok((listitems, dictitems)) +} + +/// reduce_newobj - creates reduce tuple for protocol >= 2 +fn reduce_newobj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // Check if type has tp_new + let cls = obj.class(); + if cls.slots.new.load().is_none() { + return Err( + vm.new_type_error(format!("cannot pickle '{}' object", cls.name())) + ); + } + + let (args, kwargs) = get_new_arguments(&obj, vm)?; + + let copyreg = vm.import("copyreg", 0)?; + + let has_args = args.is_some(); + + let (newobj, newargs): (PyObjectRef, PyObjectRef) = if kwargs.is_none() || kwargs.as_ref().is_some_and(|k| k.is_empty()) { + // Use copyreg.__newobj__ + let newobj = copyreg.get_attr("__newobj__", vm)?; + + let args_vec: Vec = args + .map(|a| a.as_slice().to_vec()) + .unwrap_or_default(); + + // Create (cls, *args) tuple + let mut newargs_vec: Vec = vec![cls.to_owned().into()]; + newargs_vec.extend(args_vec); + let newargs = vm.ctx.new_tuple(newargs_vec); + + (newobj, newargs.into()) + } else { + // Use copyreg.__newobj_ex__ + let newobj = copyreg.get_attr("__newobj_ex__", vm)?; + let args_tuple: PyObjectRef = args.map(|a| a.into()).unwrap_or_else(|| vm.ctx.empty_tuple.clone().into()); + let kwargs_dict: PyObjectRef = kwargs.map(|k| k.into()).unwrap_or_else(|| vm.ctx.new_dict().into()); + + let newargs = vm.ctx.new_tuple(vec![cls.to_owned().into(), args_tuple, kwargs_dict]); + (newobj, newargs.into()) + }; + + // Determine if state is required + // required = !(has_args || is_list || is_dict) + let is_list = obj.fast_isinstance(vm.ctx.types.list_type); + let is_dict = obj.fast_isinstance(vm.ctx.types.dict_type); + let required = !(has_args || is_list || is_dict); + + let state = object_getstate(&obj, required, vm)?; + + let (listitems, dictitems) = get_items_iter(&obj, vm)?; + + let result = vm.ctx.new_tuple(vec![newobj, newargs, state, listitems, dictitems]); + Ok(result.into()) +} + fn common_reduce(obj: PyObjectRef, proto: usize, vm: &VirtualMachine) -> PyResult { if proto >= 2 { - let reducelib = vm.import("__reducelib", 0)?; - let reduce_2 = reducelib.get_attr("reduce_2", vm)?; - reduce_2.call((obj,), vm) + reduce_newobj(obj, vm) } else { let copyreg = vm.import("copyreg", 0)?; let reduce_ex = copyreg.get_attr("_reduce_ex", vm)?; diff --git a/crates/vm/src/stdlib/io.rs b/crates/vm/src/stdlib/io.rs index 54a38ef20e..552378050a 100644 --- a/crates/vm/src/stdlib/io.rs +++ b/crates/vm/src/stdlib/io.rs @@ -158,8 +158,8 @@ mod _io { AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject, TryFromObject, builtins::{ - PyBaseExceptionRef, PyBool, PyByteArray, PyBytes, PyBytesRef, PyMemoryView, PyStr, - PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, PyUtf8StrRef, + PyBaseExceptionRef, PyBool, PyByteArray, PyBytes, PyBytesRef, PyDict, PyMemoryView, + PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, PyUtf8StrRef, }, class::StaticType, common::lock::{ @@ -4077,6 +4077,67 @@ mod _io { const fn line_buffering(&self) -> bool { false } + + #[pymethod] + fn __getstate__(zelf: PyRef, vm: &VirtualMachine) -> PyResult { + let buffer = zelf.buffer(vm)?; + let content = Wtf8Buf::from_bytes(buffer.getvalue()) + .map_err(|_| vm.new_value_error("Error Retrieving Value"))?; + let pos = buffer.tell(); + drop(buffer); + + // Get __dict__ if it exists and is non-empty + let dict_obj: PyObjectRef = match zelf.as_object().dict() { + Some(d) if !d.is_empty() => d.into(), + _ => vm.ctx.none(), + }; + + // Return (content, newline, position, dict) + // TODO: store actual newline setting when it's implemented + Ok(vm.ctx.new_tuple(vec![ + vm.ctx.new_str(content).into(), + vm.ctx.new_str("\n").into(), + vm.ctx.new_int(pos).into(), + dict_obj, + ])) + } + + #[pymethod] + fn __setstate__(zelf: PyRef, state: PyTupleRef, vm: &VirtualMachine) -> PyResult<()> { + if state.len() != 4 { + return Err(vm.new_type_error(format!( + "__setstate__ argument should be 4-tuple, got {}", + state.len() + ))); + } + + let content: PyStrRef = state[0].clone().try_into_value(vm)?; + // state[1] is newline - TODO: use when newline handling is implemented + let pos: u64 = state[2].clone().try_into_value(vm)?; + let dict = &state[3]; + + // Set content + let raw_bytes = content.as_bytes().to_vec(); + *zelf.buffer.write() = BufferedIO::new(Cursor::new(raw_bytes)); + + // Set position + zelf.buffer(vm)? + .seek(SeekFrom::Start(pos)) + .map_err(|err| os_err(vm, err))?; + + // Set __dict__ if provided + if !vm.is_none(dict) { + let dict_ref: PyRef = dict.clone().try_into_value(vm)?; + if let Some(obj_dict) = zelf.as_object().dict() { + obj_dict.clear(); + for (key, value) in dict_ref.into_iter() { + obj_dict.set_item(&*key, value, vm)?; + } + } + } + + Ok(()) + } } #[pyattr] @@ -4225,6 +4286,65 @@ mod _io { self.closed.store(true); Ok(()) } + + #[pymethod] + fn __getstate__(zelf: PyRef, vm: &VirtualMachine) -> PyResult { + let buffer = zelf.buffer(vm)?; + let content = buffer.getvalue(); + let pos = buffer.tell(); + drop(buffer); + + // Get __dict__ if it exists and is non-empty + let dict_obj: PyObjectRef = match zelf.as_object().dict() { + Some(d) if !d.is_empty() => d.into(), + _ => vm.ctx.none(), + }; + + // Return (content, position, dict) + Ok(vm.ctx.new_tuple(vec![ + vm.ctx.new_bytes(content).into(), + vm.ctx.new_int(pos).into(), + dict_obj, + ])) + } + + #[pymethod] + fn __setstate__(zelf: PyRef, state: PyTupleRef, vm: &VirtualMachine) -> PyResult<()> { + if zelf.closed.load() { + return Err(vm.new_value_error("__setstate__ on closed file")); + } + if state.len() != 3 { + return Err(vm.new_type_error(format!( + "__setstate__ argument should be 3-tuple, got {}", + state.len() + ))); + } + + let content: PyBytesRef = state[0].clone().try_into_value(vm)?; + let pos: u64 = state[1].clone().try_into_value(vm)?; + let dict = &state[2]; + + // Set content + *zelf.buffer.write() = BufferedIO::new(Cursor::new(content.as_bytes().to_vec())); + + // Set position + zelf.buffer(vm)? + .seek(SeekFrom::Start(pos)) + .map_err(|err| os_err(vm, err))?; + + // Set __dict__ if provided + if !vm.is_none(dict) { + let dict_ref: PyRef = dict.clone().try_into_value(vm)?; + if let Some(obj_dict) = zelf.as_object().dict() { + obj_dict.clear(); + for (key, value) in dict_ref.into_iter() { + obj_dict.set_item(&*key, value, vm)?; + } + } + } + + Ok(()) + } } #[pyclass] diff --git a/crates/vm/src/vm/context.rs b/crates/vm/src/vm/context.rs index b12352f6ee..65c742e491 100644 --- a/crates/vm/src/vm/context.rs +++ b/crates/vm/src/vm/context.rs @@ -135,6 +135,7 @@ declare_const_name! { __getformat__, __getitem__, __getnewargs__, + __getnewargs_ex__, __getstate__, __gt__, __hash__, From 97207989affe0f0b5c935cfc2329585eb6b1d591 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 16 Jan 2026 23:58:45 +0900 Subject: [PATCH 2/6] unmark succesful tests --- Lib/test/test_copy.py | 4 ---- Lib/test/test_csv.py | 1 - Lib/test/test_descr.py | 3 --- Lib/test/test_enum.py | 1 - Lib/test/test_lzma.py | 4 +--- Lib/test/test_memoryio.py | 12 ------------ Lib/test/test_pickle.py | 20 -------------------- 7 files changed, 1 insertion(+), 44 deletions(-) diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index 456767bbe0..e543cc236c 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -207,8 +207,6 @@ def __eq__(self, other): self.assertIsNot(y, x) self.assertEqual(y.foo, x.foo) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_copy_inst_getnewargs_ex(self): class C(int): def __new__(cls, *, foo): @@ -507,8 +505,6 @@ def __eq__(self, other): self.assertEqual(y.foo, x.foo) self.assertIsNot(y.foo, x.foo) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_deepcopy_inst_getnewargs_ex(self): class C(int): def __new__(cls, *, foo): diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index b7f93d1bac..bf9b187557 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -698,7 +698,6 @@ def test_copy(self): dialect = csv.get_dialect(name) self.assertRaises(TypeError, copy.copy, dialect) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_pickle(self): for name in csv.list_dialects(): dialect = csv.get_dialect(name) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 7420a49b8f..2ad302690c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -5258,7 +5258,6 @@ def _check_reduce(self, proto, obj, args=(), kwargs={}, state=None, self.assertEqual(obj.__reduce_ex__(proto), reduce_value) self.assertEqual(obj.__reduce__(), reduce_value) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_reduce(self): protocols = range(pickle.HIGHEST_PROTOCOL + 1) args = (-101, "spam") @@ -5382,7 +5381,6 @@ class C16(list): for proto in protocols: self._check_reduce(proto, obj, listitems=list(obj)) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_special_method_lookup(self): protocols = range(pickle.HIGHEST_PROTOCOL + 1) class Picky: @@ -5515,7 +5513,6 @@ class E(C): y = pickle_copier.copy(x) self._assert_is_copy(x, y) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_reduce_copying(self): # Tests pickling and copying new-style classes and objects. global C1 diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 5a961711cc..21a3b8edd4 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -2130,7 +2130,6 @@ class NEI(NamedInt, Enum): test_pickle_dump_load(self.assertIs, NEI.y) test_pickle_dump_load(self.assertIs, NEI) - @unittest.expectedFailure # TODO: RUSTPYTHON; fails on pickle def test_subclasses_with_getnewargs_ex(self): class NamedInt(int): __qualname__ = 'NamedInt' # needed for pickle protocol 4 diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index 1bac61f59e..4010ef9c34 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -409,8 +409,6 @@ def test_decompressor_bigmem(self, size): # Pickling raises an exception; there's no way to serialize an lzma_stream. - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises(TypeError): @@ -2194,4 +2192,4 @@ def test_filter_properties_roundtrip(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 07d9d38d6e..343e5dd7a6 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -745,8 +745,6 @@ def test_init(self): def test_issue5449(self): super().test_issue5449() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pickling(self): super().test_pickling() @@ -777,8 +775,6 @@ def test_truncate(self): def test_write(self): super().test_write() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getstate(self): memio = self.ioclass() state = memio.__getstate__() @@ -911,8 +907,6 @@ def test_newline_none(self): def test_newlines_property(self): super().test_newlines_property() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pickling(self): super().test_pickling() @@ -954,8 +948,6 @@ def test_widechar(self): self.assertEqual(memio.tell(), len(buf) * 2) self.assertEqual(memio.getvalue(), buf + buf) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getstate(self): memio = self.ioclass() state = memio.__getstate__() @@ -1006,8 +998,6 @@ def test_newline_cr(self): def test_newline_crlf(self): super().test_newline_crlf() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_newline_default(self): super().test_newline_default() @@ -1016,8 +1006,6 @@ def test_newline_default(self): def test_newline_empty(self): super().test_newline_empty() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_newline_lf(self): super().test_newline_lf() diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index ea51b9d091..7271696a19 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -97,10 +97,6 @@ def dumps(self, arg, proto=None, **kwargs): def test_picklebuffer_error(self): # TODO(RUSTPYTHON): Remove this test when it passes return super().test_picklebuffer_error() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_bad_getattr(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_bad_getattr() # TODO: RUSTPYTHON @unittest.expectedFailure @@ -135,15 +131,7 @@ def loads(self, buf, **kwds): def test_c_methods(self): # TODO(RUSTPYTHON): Remove this test when it passes return super().test_c_methods() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_complex_newobj_ex(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_complex_newobj_ex() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_py_methods(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_py_methods() # TODO: RUSTPYTHON @unittest.expectedFailure @@ -239,10 +227,6 @@ def loads(self, buf, **kwds): def test_c_methods(self): # TODO(RUSTPYTHON): Remove this test when it passes return super().test_c_methods() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_complex_newobj_ex(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_complex_newobj_ex() # TODO: RUSTPYTHON @unittest.expectedFailure @@ -259,10 +243,6 @@ def test_correctly_quoted_string(self): # TODO(RUSTPYTHON): Remove this test whe def test_load_python2_str_as_bytes(self): # TODO(RUSTPYTHON): Remove this test when it passes return super().test_load_python2_str_as_bytes() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_py_methods(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_py_methods() # TODO: RUSTPYTHON @unittest.expectedFailure From 3277a4db8be7a9de28907fe2c27b74a8e665bc20 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 17 Jan 2026 00:01:07 +0900 Subject: [PATCH 3/6] add weakref check --- crates/vm/src/builtins/object.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index 0d2de76403..03a6c82355 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -216,6 +216,20 @@ fn object_getstate_default(obj: &PyObject, required: bool, vm: &VirtualMachine) basicsize += core::mem::size_of::(); } + // Add __weakref__ size if type has weakref support + let has_weakref = if let Some(ref ext) = obj.class().heaptype_ext { + match &ext.slots { + None => true, // Heap type without __slots__ has automatic weakref + Some(slots) => slots.iter().any(|s| s.as_str() == "__weakref__"), + } + } else { + let weakref_name = vm.ctx.intern_str("__weakref__"); + obj.class().attributes.read().contains_key(weakref_name) + }; + if has_weakref { + basicsize += core::mem::size_of::(); + } + // Add slots size if let Some(ref slot_names) = slot_names { basicsize += core::mem::size_of::() * slot_names.__len__(); From 1fc57afdcc7dd67118c010b813ff50be0f6c7409 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 17 Jan 2026 00:01:21 +0900 Subject: [PATCH 4/6] check exports in __setstate__ --- crates/vm/src/builtins/object.rs | 39 ++++++++++++++++---------------- crates/vm/src/stdlib/io.rs | 24 ++++++++++++-------- crates/vm/src/stdlib/thread.rs | 2 +- 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index 03a6c82355..982e11afd0 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -188,10 +188,7 @@ fn type_slot_names(typ: &Py, vm: &VirtualMachine) -> PyResult PyResult { // Check itemsize if required && obj.class().slots.itemsize > 0 { - return Err(vm.new_type_error(format!( - "cannot pickle {:.200} objects", - obj.class().name() - ))); + return Err(vm.new_type_error(format!("cannot pickle {:.200} objects", obj.class().name()))); } let state = if obj.dict().is_none_or(|d| d.is_empty()) { @@ -237,9 +234,7 @@ fn object_getstate_default(obj: &PyObject, required: bool, vm: &VirtualMachine) // Fail if actual type's basicsize > expected basicsize if obj.class().slots.basicsize > basicsize { - return Err( - vm.new_type_error(format!("cannot pickle '{}' object", obj.class().name())) - ); + return Err(vm.new_type_error(format!("cannot pickle '{}' object", obj.class().name()))); } } @@ -683,9 +678,7 @@ fn reduce_newobj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { // Check if type has tp_new let cls = obj.class(); if cls.slots.new.load().is_none() { - return Err( - vm.new_type_error(format!("cannot pickle '{}' object", cls.name())) - ); + return Err(vm.new_type_error(format!("cannot pickle '{}' object", cls.name()))); } let (args, kwargs) = get_new_arguments(&obj, vm)?; @@ -694,13 +687,13 @@ fn reduce_newobj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { let has_args = args.is_some(); - let (newobj, newargs): (PyObjectRef, PyObjectRef) = if kwargs.is_none() || kwargs.as_ref().is_some_and(|k| k.is_empty()) { + let (newobj, newargs): (PyObjectRef, PyObjectRef) = if kwargs.is_none() + || kwargs.as_ref().is_some_and(|k| k.is_empty()) + { // Use copyreg.__newobj__ let newobj = copyreg.get_attr("__newobj__", vm)?; - let args_vec: Vec = args - .map(|a| a.as_slice().to_vec()) - .unwrap_or_default(); + let args_vec: Vec = args.map(|a| a.as_slice().to_vec()).unwrap_or_default(); // Create (cls, *args) tuple let mut newargs_vec: Vec = vec![cls.to_owned().into()]; @@ -711,10 +704,16 @@ fn reduce_newobj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { } else { // Use copyreg.__newobj_ex__ let newobj = copyreg.get_attr("__newobj_ex__", vm)?; - let args_tuple: PyObjectRef = args.map(|a| a.into()).unwrap_or_else(|| vm.ctx.empty_tuple.clone().into()); - let kwargs_dict: PyObjectRef = kwargs.map(|k| k.into()).unwrap_or_else(|| vm.ctx.new_dict().into()); - - let newargs = vm.ctx.new_tuple(vec![cls.to_owned().into(), args_tuple, kwargs_dict]); + let args_tuple: PyObjectRef = args + .map(|a| a.into()) + .unwrap_or_else(|| vm.ctx.empty_tuple.clone().into()); + let kwargs_dict: PyObjectRef = kwargs + .map(|k| k.into()) + .unwrap_or_else(|| vm.ctx.new_dict().into()); + + let newargs = vm + .ctx + .new_tuple(vec![cls.to_owned().into(), args_tuple, kwargs_dict]); (newobj, newargs.into()) }; @@ -728,7 +727,9 @@ fn reduce_newobj(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { let (listitems, dictitems) = get_items_iter(&obj, vm)?; - let result = vm.ctx.new_tuple(vec![newobj, newargs, state, listitems, dictitems]); + let result = vm + .ctx + .new_tuple(vec![newobj, newargs, state, listitems, dictitems]); Ok(result.into()) } diff --git a/crates/vm/src/stdlib/io.rs b/crates/vm/src/stdlib/io.rs index 552378050a..9cb0b7d7d2 100644 --- a/crates/vm/src/stdlib/io.rs +++ b/crates/vm/src/stdlib/io.rs @@ -4104,6 +4104,10 @@ mod _io { #[pymethod] fn __setstate__(zelf: PyRef, state: PyTupleRef, vm: &VirtualMachine) -> PyResult<()> { + // Check closed state first (like CHECK_CLOSED) + if zelf.closed.load() { + return Err(vm.new_value_error("__setstate__ on closed file")); + } if state.len() != 4 { return Err(vm.new_type_error(format!( "__setstate__ argument should be 4-tuple, got {}", @@ -4116,14 +4120,14 @@ mod _io { let pos: u64 = state[2].clone().try_into_value(vm)?; let dict = &state[3]; - // Set content + // Set content and position let raw_bytes = content.as_bytes().to_vec(); - *zelf.buffer.write() = BufferedIO::new(Cursor::new(raw_bytes)); - - // Set position - zelf.buffer(vm)? + let mut buffer = zelf.buffer.write(); + *buffer = BufferedIO::new(Cursor::new(raw_bytes)); + buffer .seek(SeekFrom::Start(pos)) .map_err(|err| os_err(vm, err))?; + drop(buffer); // Set __dict__ if provided if !vm.is_none(dict) { @@ -4324,13 +4328,13 @@ mod _io { let pos: u64 = state[1].clone().try_into_value(vm)?; let dict = &state[2]; - // Set content - *zelf.buffer.write() = BufferedIO::new(Cursor::new(content.as_bytes().to_vec())); - - // Set position - zelf.buffer(vm)? + // Check exports and set content (like CHECK_EXPORTS) + let mut buffer = zelf.try_resizable(vm)?; + *buffer = BufferedIO::new(Cursor::new(content.as_bytes().to_vec())); + buffer .seek(SeekFrom::Start(pos)) .map_err(|err| os_err(vm, err))?; + drop(buffer); // Set __dict__ if provided if !vm.is_none(dict) { diff --git a/crates/vm/src/stdlib/thread.rs b/crates/vm/src/stdlib/thread.rs index d51d78015d..db588e5eab 100644 --- a/crates/vm/src/stdlib/thread.rs +++ b/crates/vm/src/stdlib/thread.rs @@ -516,7 +516,7 @@ pub(crate) mod _thread { let mut handles = vm.state.shutdown_handles.lock(); // Clean up finished entries handles.retain(|(inner_weak, _): &ShutdownEntry| { - inner_weak.upgrade().map_or(false, |inner| { + inner_weak.upgrade().is_some_and(|inner| { let guard = inner.lock(); guard.state != ThreadHandleState::Done && guard.ident != current_ident }) From 4eb7828ffd2beed1b1b0449e181916ae16c3eb9c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 17 Jan 2026 00:08:52 +0900 Subject: [PATCH 5/6] remove reducelib --- crates/vm/Lib/python_builtins/__reducelib.py | 86 -------------------- 1 file changed, 86 deletions(-) delete mode 100644 crates/vm/Lib/python_builtins/__reducelib.py diff --git a/crates/vm/Lib/python_builtins/__reducelib.py b/crates/vm/Lib/python_builtins/__reducelib.py deleted file mode 100644 index 0067cd0a81..0000000000 --- a/crates/vm/Lib/python_builtins/__reducelib.py +++ /dev/null @@ -1,86 +0,0 @@ -# Modified from code from the PyPy project: -# https://bitbucket.org/pypy/pypy/src/default/pypy/objspace/std/objectobject.py - -# The MIT License - -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import copyreg - - -def _abstract_method_error(typ): - methods = ", ".join(sorted(typ.__abstractmethods__)) - err = "Can't instantiate abstract class %s with abstract methods %s" - raise TypeError(err % (typ.__name__, methods)) - - -def reduce_2(obj): - cls = obj.__class__ - - try: - getnewargs = obj.__getnewargs__ - except AttributeError: - args = () - else: - args = getnewargs() - if not isinstance(args, tuple): - raise TypeError("__getnewargs__ should return a tuple") - - try: - getstate = obj.__getstate__ - except AttributeError: - state = getattr(obj, "__dict__", None) - names = slotnames(cls) # not checking for list - if names is not None: - slots = {} - for name in names: - try: - value = getattr(obj, name) - except AttributeError: - pass - else: - slots[name] = value - if slots: - state = state, slots - else: - state = getstate() - - listitems = iter(obj) if isinstance(obj, list) else None - dictitems = iter(obj.items()) if isinstance(obj, dict) else None - - newobj = copyreg.__newobj__ - - args2 = (cls,) + args - return newobj, args2, state, listitems, dictitems - - -def slotnames(cls): - if not isinstance(cls, type): - return None - - try: - return cls.__dict__["__slotnames__"] - except KeyError: - pass - - slotnames = copyreg._slotnames(cls) - if not isinstance(slotnames, list) and slotnames is not None: - raise TypeError("copyreg._slotnames didn't return a list or None") - return slotnames From a418c68fa0018c97efd38104a02e9fb5396d2080 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 17 Jan 2026 00:54:15 +0900 Subject: [PATCH 6/6] unmark more tests --- Lib/test/test_pickletools.py | 8 -------- Lib/test/test_typing.py | 1 - Lib/test/test_unittest/test_runner.py | 1 - Lib/test/test_zlib.py | 6 ------ 4 files changed, 16 deletions(-) diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 492f57cce2..42a20da00b 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -97,15 +97,7 @@ def test_oob_buffers(self): # TODO(RUSTPYTHON): Remove this test when it passes def test_oob_buffers_writable_to_readonly(self): # TODO(RUSTPYTHON): Remove this test when it passes return super().test_oob_buffers_writable_to_readonly() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_py_methods(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_py_methods() - # TODO: RUSTPYTHON - @unittest.expectedFailure - def test_complex_newobj_ex(self): # TODO(RUSTPYTHON): Remove this test when it passes - return super().test_complex_newobj_ex() # TODO: RUSTPYTHON @unittest.expectedFailure diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 3e6c530cec..17cbaa9221 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -10328,7 +10328,6 @@ def test_special_attrs(self): TypeName = typing.NewType('SpecialAttrsTests.TypeName', Any) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_special_attrs2(self): # Forward refs provide a different introspection API. __name__ and # __qualname__ make little sense for forward refs as they can store diff --git a/Lib/test/test_unittest/test_runner.py b/Lib/test/test_unittest/test_runner.py index 790c4d29ca..4d3cfd60b8 100644 --- a/Lib/test/test_unittest/test_runner.py +++ b/Lib/test/test_unittest/test_runner.py @@ -1297,7 +1297,6 @@ def _makeResult(self): expected = ['startTestRun', 'stopTestRun'] self.assertEqual(events, expected) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_pickle_unpickle(self): # Issue #7197: a TextTestRunner should be (un)pickleable. This is # required by test_multiprocessing under Windows (in verbose mode). diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 0a75457ad8..c9f7b18340 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -747,15 +747,11 @@ def test_baddecompresscopy(self): self.assertRaises(ValueError, copy.copy, d) self.assertRaises(ValueError, copy.deepcopy, d) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_compresspickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises((TypeError, pickle.PicklingError)): pickle.dumps(zlib.compressobj(zlib.Z_BEST_COMPRESSION), proto) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_decompresspickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises((TypeError, pickle.PicklingError)): @@ -1006,8 +1002,6 @@ def testDecompress4G(self, size): compressed = None decompressed = None - # TODO: RUSTPYTHON - @unittest.expectedFailure def testPickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises(TypeError):