From 44577697efdfed6241f93ad4b4c141d59a3bd3af Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sat, 27 Dec 2025 01:19:21 +0900 Subject: [PATCH 1/3] check surrogates --- Lib/test/test_builtin.py | 4 ---- crates/vm/src/builtins/str.rs | 2 +- crates/vm/src/builtins/type.rs | 22 ++++++++++++++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 183caa898e..b3e48890d5 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2401,8 +2401,6 @@ def test_type_nokwargs(self): with self.assertRaises(TypeError): type('a', (), dict={}) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_type_name(self): for name in 'A', '\xc4', '\U0001f40d', 'B.A', '42', '': with self.subTest(name=name): @@ -2452,8 +2450,6 @@ def test_type_qualname(self): A.__qualname__ = b'B' self.assertEqual(A.__qualname__, 'D.E') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_type_doc(self): for doc in 'x', '\xc4', '\U0001f40d', 'x\x00y', b'x', 42, None: A = type('A', (), {'__doc__': doc}) diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index df65fd5a4e..bba918d241 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -441,7 +441,7 @@ impl PyStr { self.data.as_str() } - fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + pub(crate) fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { if self.is_utf8() { Ok(()) } else { diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index bc567cd097..5ca317339d 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -1163,6 +1163,7 @@ impl PyType { if name.as_bytes().contains(&0) { return Err(vm.new_value_error("type name must not contain null characters")); } + name.ensure_valid_utf8(vm)?; // Use std::mem::replace to swap the new value in and get the old value out, // then drop the old value after releasing the lock (similar to CPython's Py_SETREF) @@ -1254,6 +1255,7 @@ impl Constructor for PyType { if name.as_bytes().contains(&0) { return Err(vm.new_value_error("type name must not contain null characters")); } + name.ensure_valid_utf8(vm)?; let (metatype, base, bases, base_is_type) = if bases.is_empty() { let base = vm.ctx.types.object_type.to_owned(); @@ -1306,6 +1308,13 @@ impl Constructor for PyType { }); let mut attributes = dict.to_attributes(vm); + // Check __doc__ for surrogates - raises UnicodeEncodeError during type creation + if let Some(doc) = attributes.get(identifier!(vm, __doc__)) + && let Some(doc_str) = doc.downcast_ref::() + { + doc_str.ensure_valid_utf8(vm)?; + } + if let Some(f) = attributes.get_mut(identifier!(vm, __init_subclass__)) && f.class().is(vm.ctx.types.function_type) { @@ -1340,6 +1349,13 @@ impl Constructor for PyType { let (heaptype_slots, add_dict): (Option>>, bool) = if let Some(x) = attributes.get(identifier!(vm, __slots__)) { + // Check if __slots__ is bytes - not allowed + if x.class().is(vm.ctx.types.bytes_type) { + return Err(vm.new_type_error( + "__slots__ items must be strings, not 'bytes'".to_owned(), + )); + } + let slots = if x.class().is(vm.ctx.types.str_type) { let x = unsafe { x.downcast_unchecked_ref::() }; PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx) @@ -1348,6 +1364,12 @@ impl Constructor for PyType { let elements = { let mut elements = Vec::new(); while let PyIterReturn::Return(element) = iter.next(vm)? { + // Check if any slot item is bytes + if element.class().is(vm.ctx.types.bytes_type) { + return Err(vm.new_type_error( + "__slots__ items must be strings, not 'bytes'".to_owned(), + )); + } elements.push(element); } elements From c29d54a809f0466965f91ea1b538e812969382fd Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sat, 27 Dec 2025 23:13:19 +0900 Subject: [PATCH 2/3] check __slots__ isidentifier --- crates/vm/src/builtins/str.rs | 2 +- crates/vm/src/builtins/type.rs | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index bba918d241..fa35c1725d 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -1336,7 +1336,7 @@ impl PyStr { } #[pymethod] - fn isidentifier(&self) -> bool { + pub fn isidentifier(&self) -> bool { let Some(s) = self.to_str() else { return false }; let mut chars = s.chars(); let is_identifier_start = chars.next().is_some_and(|c| c == '_' || is_xid_start(c)); diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 5ca317339d..a30fa3398a 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -1378,6 +1378,15 @@ impl Constructor for PyType { tuple.try_into_typed(vm)? }; + // Validate that all slots are valid identifiers + for slot in slots.iter() { + if !slot.isidentifier() { + return Err( + vm.new_type_error("__slots__ must be identifiers".to_owned()) + ); + } + } + // Check if __dict__ is in slots let dict_name = "__dict__"; let has_dict = slots.iter().any(|s| s.as_str() == dict_name); From 4184c27538a91e87cf7695f51cb5f821edbb295b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 27 Dec 2025 14:15:43 +0000 Subject: [PATCH 3/3] Auto-format: cargo fmt --all --- crates/vm/src/builtins/type.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index a30fa3398a..de34678ae4 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -1381,9 +1381,7 @@ impl Constructor for PyType { // Validate that all slots are valid identifiers for slot in slots.iter() { if !slot.isidentifier() { - return Err( - vm.new_type_error("__slots__ must be identifiers".to_owned()) - ); + return Err(vm.new_type_error("__slots__ must be identifiers".to_owned())); } }