From f166c47da1981e31277445eb8ca49550a073472a Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Sun, 11 Jan 2026 18:05:35 +0900
Subject: [PATCH 1/9] Parse JSON in Rust

---
 crates/stdlib/src/json.rs           | 357 ++++++++++++++++++++++++++--
 crates/stdlib/src/json/machinery.rs |   2 +-
 2 files changed, 339 insertions(+), 20 deletions(-)
diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index cc98ad912cc..d576e0190b1 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -16,6 +16,26 @@ mod _json {
     use malachite_bigint::BigInt;
     use rustpython_common::wtf8::Wtf8Buf;
 
+    /// Skip JSON whitespace characters (space, tab, newline, carriage return).
+    /// Works with a character iterator and returns the number of characters skipped.
+    #[inline]
+    fn skip_whitespace_chars<I>(chars: &mut std::iter::Peekable<I>) -> usize
+    where
+        I: Iterator<Item = char>,
+    {
+        let mut count = 0;
+        while let Some(&c) = chars.peek() {
+            match c {
+                ' ' | '\t' | '\n' | '\r' => {
+                    chars.next();
+                    count += 1;
+                }
+                _ => break,
+            }
+        }
+        count
+    }
+
     #[pyattr(name = "make_scanner")]
     #[pyclass(name = "Scanner", traverse)]
     #[derive(Debug, PyPayload)]
@@ -90,27 +110,16 @@ mod _json {
                         .map(|x| PyIterReturn::Return(x.to_pyobject(vm)));
                 }
                 '{' => {
-                    // TODO: parse the object in rust
-                    let parse_obj = self.ctx.get_attr("parse_object", vm)?;
-                    let result = parse_obj.call(
-                        (
-                            (pystr, next_idx),
-                            self.strict,
-                            scan_once,
-                            self.object_hook.clone(),
-                            self.object_pairs_hook.clone(),
-                        ),
-                        vm,
-                    );
-                    return PyIterReturn::from_pyresult(result, vm);
+                    // Parse object in Rust
+                    return self
+                        .parse_object(pystr, next_idx, &scan_once, vm)
+                        .map(|(obj, end)| PyIterReturn::Return(vm.new_tuple((obj, end)).into()));
                 }
                 '[' => {
-                    // TODO: parse the array in rust
-                    let parse_array = self.ctx.get_attr("parse_array", vm)?;
-                    return PyIterReturn::from_pyresult(
-                        parse_array.call(((pystr, next_idx), scan_once), vm),
-                        vm,
-                    );
+                    // Parse array in Rust
+                    return self
+                        .parse_array(pystr, next_idx, &scan_once, vm)
+                        .map(|(obj, end)| PyIterReturn::Return(vm.new_tuple((obj, end)).into()));
                 }
                 _ => {}
             }
@@ -189,6 +198,316 @@ mod _json {
             };
             Some((ret, buf.len()))
         }
+
+        /// Parse a JSON object starting after the opening '{'.
+        /// Returns (parsed_object, end_character_index).
+        fn parse_object(
+            &self,
+            pystr: PyStrRef,
+            start_idx: usize, // Character index right after '{'
+            scan_once: &PyObjectRef,
+            vm: &VirtualMachine,
+        ) -> PyResult<(PyObjectRef, usize)> {
+            flame_guard!("JsonScanner::parse_object");
+
+            let s = pystr.as_str();
+            let mut chars = s.chars().skip(start_idx).peekable();
+            let mut idx = start_idx;
+
+            // Skip initial whitespace
+            idx += skip_whitespace_chars(&mut chars);
+
+            // Check for empty object
+            match chars.peek() {
+                Some('}') => {
+                    return self.finalize_object(vec![], idx + 1, vm);
+                }
+                Some('"') => {
+                    // Continue to parse first key
+                }
+                Some(_) | None => {
+                    return Err(self.make_decode_error(
+                        "Expecting property name enclosed in double quotes",
+                        pystr,
+                        idx,
+                        vm,
+                    ));
+                }
+            }
+
+            let mut pairs: Vec<(PyObjectRef, PyObjectRef)> = Vec::new();
+
+            loop {
+                // We're now at '"', skip it
+                chars.next();
+                idx += 1;
+
+                // Parse key string using existing scanstring
+                let (key_wtf8, key_end) = machinery::scanstring(pystr.as_wtf8(), idx, self.strict)
+                    .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
+
+                let key_str = key_wtf8.to_string();
+                let key: PyObjectRef = vm.ctx.new_str(key_str).into();
+
+                // Update position and rebuild iterator
+                idx = key_end;
+                chars = s.chars().skip(idx).peekable();
+
+                // Skip whitespace after key
+                idx += skip_whitespace_chars(&mut chars);
+
+                // Expect ':' delimiter
+                match chars.peek() {
+                    Some(':') => {
+                        chars.next();
+                        idx += 1;
+                    }
+                    _ => {
+                        return Err(self.make_decode_error(
+                            "Expecting ':' delimiter",
+                            pystr,
+                            idx,
+                            vm,
+                        ));
+                    }
+                }
+
+                // Skip whitespace after ':'
+                idx += skip_whitespace_chars(&mut chars);
+
+                // Parse value recursively using scan_once
+                let (value, value_end) = self.call_scan_once(scan_once, pystr.clone(), idx, vm)?;
+
+                pairs.push((key, value));
+                idx = value_end;
+                chars = s.chars().skip(idx).peekable();
+
+                // Skip whitespace after value
+                idx += skip_whitespace_chars(&mut chars);
+
+                // Check for ',' or '}'
+                match chars.peek() {
+                    Some('}') => {
+                        idx += 1;
+                        break;
+                    }
+                    Some(',') => {
+                        let comma_idx = idx;
+                        chars.next();
+                        idx += 1;
+
+                        // Skip whitespace after comma
+                        idx += skip_whitespace_chars(&mut chars);
+
+                        // Next must be '"'
+                        match chars.peek() {
+                            Some('"') => {
+                                // Continue to next key-value pair
+                            }
+                            Some('}') => {
+                                // Trailing comma before end of object
+                                return Err(self.make_decode_error(
+                                    "Illegal trailing comma before end of object",
+                                    pystr,
+                                    comma_idx,
+                                    vm,
+                                ));
+                            }
+                            _ => {
+                                return Err(self.make_decode_error(
+                                    "Expecting property name enclosed in double quotes",
+                                    pystr,
+                                    idx,
+                                    vm,
+                                ));
+                            }
+                        }
+                    }
+                    _ => {
+                        return Err(self.make_decode_error(
+                            "Expecting ',' delimiter",
+                            pystr,
+                            idx,
+                            vm,
+                        ));
+                    }
+                }
+            }
+
+            self.finalize_object(pairs, idx, vm)
+        }
+
+        /// Parse a JSON array starting after the opening '['.
+        /// Returns (parsed_array, end_character_index).
+        fn parse_array(
+            &self,
+            pystr: PyStrRef,
+            start_idx: usize, // Character index right after '['
+            scan_once: &PyObjectRef,
+            vm: &VirtualMachine,
+        ) -> PyResult<(PyObjectRef, usize)> {
+            flame_guard!("JsonScanner::parse_array");
+
+            let s = pystr.as_str();
+            let mut chars = s.chars().skip(start_idx).peekable();
+            let mut idx = start_idx;
+
+            // Skip initial whitespace
+            idx += skip_whitespace_chars(&mut chars);
+
+            // Check for empty array
+            if chars.peek() == Some(&']') {
+                return Ok((vm.ctx.new_list(vec![]).into(), idx + 1));
+            }
+
+            let mut values: Vec<PyObjectRef> = Vec::new();
+
+            loop {
+                // Parse value
+                let (value, value_end) = self.call_scan_once(scan_once, pystr.clone(), idx, vm)?;
+
+                values.push(value);
+                idx = value_end;
+                chars = s.chars().skip(idx).peekable();
+
+                // Skip whitespace after value
+                idx += skip_whitespace_chars(&mut chars);
+
+                match chars.peek() {
+                    Some(']') => {
+                        idx += 1;
+                        break;
+                    }
+                    Some(',') => {
+                        let comma_idx = idx;
+                        chars.next();
+                        idx += 1;
+                        // Skip whitespace after comma
+                        idx += skip_whitespace_chars(&mut chars);
+
+                        // Check for trailing comma
+                        if chars.peek() == Some(&']') {
+                            return Err(self.make_decode_error(
+                                "Illegal trailing comma before end of array",
+                                pystr,
+                                comma_idx,
+                                vm,
+                            ));
+                        }
+                    }
+                    _ => {
+                        return Err(self.make_decode_error(
+                            "Expecting ',' delimiter",
+                            pystr,
+                            idx,
+                            vm,
+                        ));
+                    }
+                }
+            }
+
+            Ok((vm.ctx.new_list(values).into(), idx))
+        }
+
+        /// Finalize object construction with hooks.
+        fn finalize_object(
+            &self,
+            pairs: Vec<(PyObjectRef, PyObjectRef)>,
+            end_idx: usize,
+            vm: &VirtualMachine,
+        ) -> PyResult<(PyObjectRef, usize)> {
+            let result = if let Some(ref pairs_hook) = self.object_pairs_hook {
+                // object_pairs_hook takes priority - pass list of tuples
+                let pairs_list: Vec<PyObjectRef> = pairs
+                    .into_iter()
+                    .map(|(k, v)| vm.new_tuple((k, v)).into())
+                    .collect();
+                pairs_hook.call((vm.ctx.new_list(pairs_list),), vm)?
+            } else {
+                // Build a dict from pairs
+                let dict = vm.ctx.new_dict();
+                for (key, value) in pairs {
+                    dict.set_item(&*key, value, vm)?;
+                }
+
+                // Apply object_hook if present
+                let dict_obj: PyObjectRef = dict.into();
+                if let Some(ref hook) = self.object_hook {
+                    hook.call((dict_obj,), vm)?
+                } else {
+                    dict_obj
+                }
+            };
+
+            Ok((result, end_idx))
+        }
+
+        /// Call scan_once and handle the result.
+        fn call_scan_once(
+            &self,
+            scan_once: &PyObjectRef,
+            pystr: PyStrRef,
+            idx: usize,
+            vm: &VirtualMachine,
+        ) -> PyResult<(PyObjectRef, usize)> {
+            // First try to handle common cases directly in Rust
+            let s = pystr.as_str();
+            let mut chars = s.chars().skip(idx).peekable();
+
+            match chars.peek() {
+                Some('"') => {
+                    // String - parse directly in Rust
+                    let (wtf8, end) = machinery::scanstring(pystr.as_wtf8(), idx + 1, self.strict)
+                        .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
+                    let py_str = vm.ctx.new_str(wtf8.to_string());
+                    return Ok((py_str.into(), end));
+                }
+                Some('{') => {
+                    // Nested object - parse recursively in Rust
+                    return self.parse_object(pystr, idx + 1, scan_once, vm);
+                }
+                Some('[') => {
+                    // Nested array - parse recursively in Rust
+                    return self.parse_array(pystr, idx + 1, scan_once, vm);
+                }
+                _ => {
+                    // For other cases (numbers, null, true, false, etc.)
+                    // fall through to call scan_once
+                }
+            }
+
+            // Fall back to scan_once for other value types
+            let result = scan_once.call((pystr.clone(), idx as isize), vm);
+
+            match result {
+                Ok(tuple) => {
+                    use crate::vm::builtins::PyTupleRef;
+                    let tuple: PyTupleRef = tuple.try_into_value(vm)?;
+                    if tuple.len() != 2 {
+                        return Err(vm.new_value_error("scan_once must return 2-tuple"));
+                    }
+                    let value = tuple.as_slice()[0].clone();
+                    let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
+                    Ok((value, end_idx as usize))
+                }
+                Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
+                    Err(self.make_decode_error("Expecting value", pystr, idx, vm))
+                }
+                Err(err) => Err(err),
+            }
+        }
+
+        /// Create a decode error.
+        fn make_decode_error(
+            &self,
+            msg: &str,
+            s: PyStrRef,
+            pos: usize,
+            vm: &VirtualMachine,
+        ) -> PyBaseExceptionRef {
+            let err = machinery::DecodeError::new(msg, pos);
+            py_decode_error(err, s, vm)
+        }
     }
 
     impl Callable for JsonScanner {
diff --git a/crates/stdlib/src/json/machinery.rs b/crates/stdlib/src/json/machinery.rs
index 57b8ae441f7..de3c1d8547f 100644
--- a/crates/stdlib/src/json/machinery.rs
+++ b/crates/stdlib/src/json/machinery.rs
@@ -108,7 +108,7 @@ pub struct DecodeError {
     pub pos: usize,
 }
 impl DecodeError {
-    fn new(msg: impl Into<String>, pos: usize) -> Self {
+    pub fn new(msg: impl Into<String>, pos: usize) -> Self {
         let msg = msg.into();
         Self { msg, pos }
     }

From 14243d4646e58207324b8859889bc2d7f4cde810 Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Wed, 14 Jan 2026 20:21:12 +0900
Subject: [PATCH 2/9] Reuse key when decoding JSON

---
 crates/stdlib/src/json.rs | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index d576e0190b1..adf77775c72 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -15,6 +15,7 @@ mod _json {
     use core::str::FromStr;
     use malachite_bigint::BigInt;
     use rustpython_common::wtf8::Wtf8Buf;
+    use std::collections::HashMap;
 
     /// Skip JSON whitespace characters (space, tab, newline, carriage return).
     /// Works with a character iterator and returns the number of characters skipped.
@@ -111,14 +112,16 @@ mod _json {
                 }
                 '{' => {
                     // Parse object in Rust
+                    let mut memo = HashMap::new();
                     return self
-                        .parse_object(pystr, next_idx, &scan_once, vm)
+                        .parse_object(pystr, next_idx, &scan_once, &mut memo, vm)
                         .map(|(obj, end)| PyIterReturn::Return(vm.new_tuple((obj, end)).into()));
                 }
                 '[' => {
                     // Parse array in Rust
+                    let mut memo = HashMap::new();
                     return self
-                        .parse_array(pystr, next_idx, &scan_once, vm)
+                        .parse_array(pystr, next_idx, &scan_once, &mut memo, vm)
                         .map(|(obj, end)| PyIterReturn::Return(vm.new_tuple((obj, end)).into()));
                 }
                 _ => {}
@@ -206,6 +209,7 @@ mod _json {
             pystr: PyStrRef,
             start_idx: usize, // Character index right after '{'
             scan_once: &PyObjectRef,
+            memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
         ) -> PyResult<(PyObjectRef, usize)> {
             flame_guard!("JsonScanner::parse_object");
@@ -246,8 +250,16 @@ mod _json {
                 let (key_wtf8, key_end) = machinery::scanstring(pystr.as_wtf8(), idx, self.strict)
                     .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
 
+                // Key memoization - reuse existing key strings
                 let key_str = key_wtf8.to_string();
-                let key: PyObjectRef = vm.ctx.new_str(key_str).into();
+                let key: PyObjectRef = match memo.get(&key_str) {
+                    Some(cached) => cached.clone().into(),
+                    None => {
+                        let py_key = vm.ctx.new_str(key_str.clone());
+                        memo.insert(key_str, py_key.clone());
+                        py_key.into()
+                    }
+                };
 
                 // Update position and rebuild iterator
                 idx = key_end;
@@ -276,7 +288,8 @@ mod _json {
                 idx += skip_whitespace_chars(&mut chars);
 
                 // Parse value recursively using scan_once
-                let (value, value_end) = self.call_scan_once(scan_once, pystr.clone(), idx, vm)?;
+                let (value, value_end) =
+                    self.call_scan_once(scan_once, pystr.clone(), idx, memo, vm)?;
 
                 pairs.push((key, value));
                 idx = value_end;
@@ -344,6 +357,7 @@ mod _json {
             pystr: PyStrRef,
             start_idx: usize, // Character index right after '['
             scan_once: &PyObjectRef,
+            memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
         ) -> PyResult<(PyObjectRef, usize)> {
             flame_guard!("JsonScanner::parse_array");
@@ -364,7 +378,8 @@ mod _json {
 
             loop {
                 // Parse value
-                let (value, value_end) = self.call_scan_once(scan_once, pystr.clone(), idx, vm)?;
+                let (value, value_end) =
+                    self.call_scan_once(scan_once, pystr.clone(), idx, memo, vm)?;
 
                 values.push(value);
                 idx = value_end;
@@ -448,6 +463,7 @@ mod _json {
             scan_once: &PyObjectRef,
             pystr: PyStrRef,
             idx: usize,
+            memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
         ) -> PyResult<(PyObjectRef, usize)> {
             // First try to handle common cases directly in Rust
@@ -464,11 +480,11 @@ mod _json {
                 }
                 Some('{') => {
                     // Nested object - parse recursively in Rust
-                    return self.parse_object(pystr, idx + 1, scan_once, vm);
+                    return self.parse_object(pystr, idx + 1, scan_once, memo, vm);
                 }
                 Some('[') => {
                     // Nested array - parse recursively in Rust
-                    return self.parse_array(pystr, idx + 1, scan_once, vm);
+                    return self.parse_array(pystr, idx + 1, scan_once, memo, vm);
                 }
                 _ => {
                     // For other cases (numbers, null, true, false, etc.)

From 099c8a3128d948a3ef32a86e56715ac6783ec65f Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Wed, 14 Jan 2026 02:01:17 +0900
Subject: [PATCH 3/9] Unmark resolved test

---
 Lib/test/test_json/test_decode.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py
index f07f7d55339..ad37d47f083 100644
--- a/Lib/test/test_json/test_decode.py
+++ b/Lib/test/test_json/test_decode.py
@@ -138,8 +138,6 @@ def test_limit_int(self):
 class TestPyDecode(TestDecode, PyTest): pass
 
 class TestCDecode(TestDecode, CTest):
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
     def test_keys_reuse(self):
         return super().test_keys_reuse()
 

From 788ecb3acabb21854453f76af08b7e594d07221d Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Wed, 14 Jan 2026 20:31:40 +0900
Subject: [PATCH 4/9] Parse null/true/false directly in call_scan_once

Parse JSON constants (null, true, false) directly in Rust within
call_scan_once() instead of falling back to Python scan_once.
This reduces Python-Rust boundary crossings for array/object values.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/stdlib/src/json.rs | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index adf77775c72..e4d1e6fc99c 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -470,6 +470,8 @@ mod _json {
             let s = pystr.as_str();
             let mut chars = s.chars().skip(idx).peekable();
 
+            let remaining = &s[idx..];
+
             match chars.peek() {
                 Some('"') => {
                     // String - parse directly in Rust
@@ -486,8 +488,26 @@ mod _json {
                     // Nested array - parse recursively in Rust
                     return self.parse_array(pystr, idx + 1, scan_once, memo, vm);
                 }
+                Some('n') => {
+                    // null - parse directly in Rust
+                    if remaining.starts_with("null") {
+                        return Ok((vm.ctx.none(), idx + 4));
+                    }
+                }
+                Some('t') => {
+                    // true - parse directly in Rust
+                    if remaining.starts_with("true") {
+                        return Ok((vm.ctx.new_bool(true).into(), idx + 4));
+                    }
+                }
+                Some('f') => {
+                    // false - parse directly in Rust
+                    if remaining.starts_with("false") {
+                        return Ok((vm.ctx.new_bool(false).into(), idx + 5));
+                    }
+                }
                 _ => {
-                    // For other cases (numbers, null, true, false, etc.)
+                    // For other cases (numbers, NaN, Infinity, etc.)
                     // fall through to call scan_once
                 }
             }

From e79ab96f4636d89bdf88021f497b60e09141ddd1 Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Wed, 14 Jan 2026 20:33:00 +0900
Subject: [PATCH 5/9] Parse numbers directly in call_scan_once

Parse JSON numbers starting with digits (0-9) directly in Rust within
call_scan_once() by reusing the existing parse_number() method.
This reduces Python-Rust boundary crossings for array/object values.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/stdlib/src/json.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index e4d1e6fc99c..e83b8099e5d 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -506,8 +506,14 @@ mod _json {
                         return Ok((vm.ctx.new_bool(false).into(), idx + 5));
                     }
                 }
+                Some(c) if c.is_ascii_digit() => {
+                    // Number starting with digit - parse directly in Rust
+                    if let Some((result, len)) = self.parse_number(remaining, vm) {
+                        return Ok((result?, idx + len));
+                    }
+                }
                 _ => {
-                    // For other cases (numbers, NaN, Infinity, etc.)
+                    // For other cases (NaN, Infinity, -Infinity, negative numbers, etc.)
                     // fall through to call scan_once
                 }
             }

From 529e3a61dfd26727405018da557209cf4a50ec0b Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Wed, 14 Jan 2026 20:35:20 +0900
Subject: [PATCH 6/9] Parse NaN/Infinity/-Infinity in call_scan_once

Parse special JSON constants (NaN, Infinity, -Infinity) and negative
numbers directly in Rust within call_scan_once(). This handles:
- 'N' -> NaN via parse_constant callback
- 'I' -> Infinity via parse_constant callback
- '-' -> -Infinity or negative numbers via parse_constant/parse_number

This reduces Python-Rust boundary crossings for array/object values.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/stdlib/src/json.rs | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index e83b8099e5d..c3b5ee01c37 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -512,8 +512,32 @@ mod _json {
                         return Ok((result?, idx + len));
                     }
                 }
+                Some('N') => {
+                    // NaN - parse directly in Rust
+                    if remaining.starts_with("NaN") {
+                        let result = self.parse_constant.call(("NaN",), vm)?;
+                        return Ok((result, idx + 3));
+                    }
+                }
+                Some('I') => {
+                    // Infinity - parse directly in Rust
+                    if remaining.starts_with("Infinity") {
+                        let result = self.parse_constant.call(("Infinity",), vm)?;
+                        return Ok((result, idx + 8));
+                    }
+                }
+                Some('-') => {
+                    // -Infinity or negative number
+                    if remaining.starts_with("-Infinity") {
+                        let result = self.parse_constant.call(("-Infinity",), vm)?;
+                        return Ok((result, idx + 9));
+                    }
+                    // Try parsing as negative number
+                    if let Some((result, len)) = self.parse_number(remaining, vm) {
+                        return Ok((result?, idx + len));
+                    }
+                }
                 _ => {
-                    // For other cases (NaN, Infinity, -Infinity, negative numbers, etc.)
                     // fall through to call scan_once
                 }
             }

From 3adc21dcb3e16e9ed46cc023b2a40ce0da52aeea Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Wed, 14 Jan 2026 22:48:29 +0900
Subject: [PATCH 7/9] Correct wrong index access

---
 crates/stdlib/src/json.rs | 197 ++++++++++++++++++++++++--------------
 1 file changed, 127 insertions(+), 70 deletions(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index c3b5ee01c37..2e410d8df88 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -37,6 +37,22 @@ mod _json {
         count
     }
 
+    /// Check if a character iterator starts with a given pattern.
+    /// This avoids byte/char index mismatch issues with non-ASCII strings.
+    #[inline]
+    fn starts_with_chars<I>(mut chars: I, pattern: &str) -> bool
+    where
+        I: Iterator<Item = char>,
+    {
+        for expected in pattern.chars() {
+            match chars.next() {
+                Some(c) if c == expected => continue,
+                _ => return false,
+            }
+        }
+        true
+    }
+
     #[pyattr(name = "make_scanner")]
     #[pyclass(name = "Scanner", traverse)]
     #[derive(Debug, PyPayload)]
@@ -202,6 +218,54 @@ mod _json {
             Some((ret, buf.len()))
         }
 
+        /// Parse a number from a character iterator.
+        /// Returns (result, character_count) where character_count is the number of chars consumed.
+        fn parse_number_from_chars<I>(
+            &self,
+            chars: I,
+            vm: &VirtualMachine,
+        ) -> Option<(PyResult, usize)>
+        where
+            I: Iterator<Item = char>,
+        {
+            let mut buf = String::new();
+            let mut has_neg = false;
+            let mut has_decimal = false;
+            let mut has_exponent = false;
+            let mut has_e_sign = false;
+
+            for c in chars {
+                let i = buf.len();
+                match c {
+                    '-' if i == 0 => has_neg = true,
+                    n if n.is_ascii_digit() => {}
+                    '.' if !has_decimal => has_decimal = true,
+                    'e' | 'E' if !has_exponent => has_exponent = true,
+                    '+' | '-' if !has_e_sign => has_e_sign = true,
+                    _ => break,
+                }
+                buf.push(c);
+            }
+
+            let len = buf.len();
+            if len == 0 || (len == 1 && has_neg) {
+                return None;
+            }
+
+            let ret = if has_decimal || has_exponent {
+                if let Some(ref parse_float) = self.parse_float {
+                    parse_float.call((&buf,), vm)
+                } else {
+                    Ok(vm.ctx.new_float(f64::from_str(&buf).unwrap()).into())
+                }
+            } else if let Some(ref parse_int) = self.parse_int {
+                parse_int.call((&buf,), vm)
+            } else {
+                Ok(vm.new_pyobj(BigInt::from_str(&buf).unwrap()))
+            };
+            Some((ret, len))
+        }
+
         /// Parse a JSON object starting after the opening '{'.
         /// Returns (parsed_object, end_character_index).
         fn parse_object(
@@ -458,6 +522,7 @@ mod _json {
         }
 
         /// Call scan_once and handle the result.
+        /// Uses character iterators to avoid byte/char index mismatch with non-ASCII strings.
         fn call_scan_once(
             &self,
             scan_once: &PyObjectRef,
@@ -466,100 +531,92 @@ mod _json {
             memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
         ) -> PyResult<(PyObjectRef, usize)> {
-            // First try to handle common cases directly in Rust
             let s = pystr.as_str();
-            let mut chars = s.chars().skip(idx).peekable();
+            let chars = s.chars().skip(idx).peekable();
 
-            let remaining = &s[idx..];
+            let first_char = match chars.clone().next() {
+                Some(c) => c,
+                None => return Err(self.make_decode_error("Expecting value", pystr, idx, vm)),
+            };
 
-            match chars.peek() {
-                Some('"') => {
-                    // String - parse directly in Rust
+            match first_char {
+                '"' => {
+                    // String
                     let (wtf8, end) = machinery::scanstring(pystr.as_wtf8(), idx + 1, self.strict)
                         .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
                     let py_str = vm.ctx.new_str(wtf8.to_string());
-                    return Ok((py_str.into(), end));
+                    Ok((py_str.into(), end))
                 }
-                Some('{') => {
-                    // Nested object - parse recursively in Rust
-                    return self.parse_object(pystr, idx + 1, scan_once, memo, vm);
+                '{' => {
+                    // Object
+                    self.parse_object(pystr, idx + 1, scan_once, memo, vm)
                 }
-                Some('[') => {
-                    // Nested array - parse recursively in Rust
-                    return self.parse_array(pystr, idx + 1, scan_once, memo, vm);
+                '[' => {
+                    // Array
+                    self.parse_array(pystr, idx + 1, scan_once, memo, vm)
                 }
-                Some('n') => {
-                    // null - parse directly in Rust
-                    if remaining.starts_with("null") {
-                        return Ok((vm.ctx.none(), idx + 4));
-                    }
+                'n' if starts_with_chars(chars.clone(), "null") => {
+                    // null
+                    Ok((vm.ctx.none(), idx + 4))
                 }
-                Some('t') => {
-                    // true - parse directly in Rust
-                    if remaining.starts_with("true") {
-                        return Ok((vm.ctx.new_bool(true).into(), idx + 4));
-                    }
+                't' if starts_with_chars(chars.clone(), "true") => {
+                    // true
+                    Ok((vm.ctx.new_bool(true).into(), idx + 4))
                 }
-                Some('f') => {
-                    // false - parse directly in Rust
-                    if remaining.starts_with("false") {
-                        return Ok((vm.ctx.new_bool(false).into(), idx + 5));
-                    }
+                'f' if starts_with_chars(chars.clone(), "false") => {
+                    // false
+                    Ok((vm.ctx.new_bool(false).into(), idx + 5))
                 }
-                Some(c) if c.is_ascii_digit() => {
-                    // Number starting with digit - parse directly in Rust
-                    if let Some((result, len)) = self.parse_number(remaining, vm) {
-                        return Ok((result?, idx + len));
-                    }
+                'N' if starts_with_chars(chars.clone(), "NaN") => {
+                    // NaN
+                    let result = self.parse_constant.call(("NaN",), vm)?;
+                    Ok((result, idx + 3))
                 }
-                Some('N') => {
-                    // NaN - parse directly in Rust
-                    if remaining.starts_with("NaN") {
-                        let result = self.parse_constant.call(("NaN",), vm)?;
-                        return Ok((result, idx + 3));
-                    }
+                'I' if starts_with_chars(chars.clone(), "Infinity") => {
+                    // Infinity
+                    let result = self.parse_constant.call(("Infinity",), vm)?;
+                    Ok((result, idx + 8))
                 }
-                Some('I') => {
-                    // Infinity - parse directly in Rust
-                    if remaining.starts_with("Infinity") {
-                        let result = self.parse_constant.call(("Infinity",), vm)?;
-                        return Ok((result, idx + 8));
-                    }
-                }
-                Some('-') => {
+                '-' => {
                     // -Infinity or negative number
-                    if remaining.starts_with("-Infinity") {
+                    if starts_with_chars(chars.clone(), "-Infinity") {
                         let result = self.parse_constant.call(("-Infinity",), vm)?;
                         return Ok((result, idx + 9));
                     }
-                    // Try parsing as negative number
-                    if let Some((result, len)) = self.parse_number(remaining, vm) {
+                    // Negative number - collect number characters
+                    if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
                         return Ok((result?, idx + len));
                     }
+                    Err(self.make_decode_error("Expecting value", pystr, idx, vm))
                 }
-                _ => {
-                    // fall through to call scan_once
-                }
-            }
-
-            // Fall back to scan_once for other value types
-            let result = scan_once.call((pystr.clone(), idx as isize), vm);
-
-            match result {
-                Ok(tuple) => {
-                    use crate::vm::builtins::PyTupleRef;
-                    let tuple: PyTupleRef = tuple.try_into_value(vm)?;
-                    if tuple.len() != 2 {
-                        return Err(vm.new_value_error("scan_once must return 2-tuple"));
+                c if c.is_ascii_digit() => {
+                    // Positive number
+                    if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
+                        return Ok((result?, idx + len));
                     }
-                    let value = tuple.as_slice()[0].clone();
-                    let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
-                    Ok((value, end_idx as usize))
-                }
-                Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
                     Err(self.make_decode_error("Expecting value", pystr, idx, vm))
                 }
-                Err(err) => Err(err),
+                _ => {
+                    // Fall back to scan_once for unrecognized input
+                    let result = scan_once.call((pystr.clone(), idx as isize), vm);
+
+                    match result {
+                        Ok(tuple) => {
+                            use crate::vm::builtins::PyTupleRef;
+                            let tuple: PyTupleRef = tuple.try_into_value(vm)?;
+                            if tuple.len() != 2 {
+                                return Err(vm.new_value_error("scan_once must return 2-tuple"));
+                            }
+                            let value = tuple.as_slice()[0].clone();
+                            let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
+                            Ok((value, end_idx as usize))
+                        }
+                        Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
+                            Err(self.make_decode_error("Expecting value", pystr, idx, vm))
+                        }
+                        Err(err) => Err(err),
+                    }
+                }
             }
         }
 

From e39c6b9adc7506ef0b978dc3778cd8488005200a Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Thu, 15 Jan 2026 01:33:29 +0900
Subject: [PATCH 8/9] Leave more flame span

---
 crates/stdlib/src/json.rs           |   2 +
 crates/stdlib/src/json/machinery.rs | 184 ++++++++++++++++++----------
 2 files changed, 118 insertions(+), 68 deletions(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index 2e410d8df88..a07acc5bd29 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -24,6 +24,7 @@ mod _json {
     where
         I: Iterator<Item = char>,
     {
+        flame_guard!("_json::skip_whitespace_chars");
         let mut count = 0;
         while let Some(&c) = chars.peek() {
             match c {
@@ -228,6 +229,7 @@ mod _json {
         where
             I: Iterator<Item = char>,
         {
+            flame_guard!("JsonScanner::parse_number_from_chars");
             let mut buf = String::new();
             let mut has_neg = false;
             let mut has_decimal = false;
diff --git a/crates/stdlib/src/json/machinery.rs b/crates/stdlib/src/json/machinery.rs
index de3c1d8547f..9f379a962ac 100644
--- a/crates/stdlib/src/json/machinery.rs
+++ b/crates/stdlib/src/json/machinery.rs
@@ -30,6 +30,7 @@
 use std::io;
 
 use itertools::Itertools;
+use memchr::memchr2;
 use rustpython_common::wtf8::{CodePoint, Wtf8, Wtf8Buf};
 
 static ESCAPE_CHARS: [&str; 0x20] = [
@@ -131,79 +132,125 @@ pub fn scanstring<'a>(
     end: usize,
     strict: bool,
 ) -> Result<(Wtf8Buf, usize), DecodeError> {
-    let mut chunks: Vec<StrOrChar<'a>> = Vec::new();
-    let mut output_len = 0usize;
-    let mut push_chunk = |chunk: StrOrChar<'a>| {
-        output_len += chunk.len();
-        chunks.push(chunk);
-    };
+    flame_guard!("machinery::scanstring");
     let unterminated_err = || DecodeError::new("Unterminated string starting at", end - 1);
-    let mut chars = s.code_point_indices().enumerate().skip(end).peekable();
-    let &(_, (mut chunk_start, _)) = chars.peek().ok_or_else(unterminated_err)?;
-    while let Some((char_i, (byte_i, c))) = chars.next() {
-        match c.to_char_lossy() {
-            '"' => {
-                push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
-                let mut out = Wtf8Buf::with_capacity(output_len);
-                for x in chunks {
-                    match x {
-                        StrOrChar::Str(s) => out.push_wtf8(s),
-                        StrOrChar::Char(c) => out.push(c),
+
+    // Get byte index for character position `end`
+    let byte_start = {
+        flame_guard!("machinery::scanstring::byte_start_initialization");
+        s.code_point_indices()
+            .nth(end)
+            .ok_or_else(unterminated_err)?
+            .0
+    };
+
+    let bytes = s.as_bytes();
+    let search_bytes = &bytes[byte_start..];
+
+    // Fast path: use memchr to find " or \ quickly
+    if let Some(pos) = {
+        flame_guard!("machinery::scanstring::memchr2");
+        memchr2(b'"', b'\\', search_bytes)
+    } {
+        flame_guard!("machinery::scanstring::memchr2::condition_some");
+        if search_bytes[pos] == b'"' {
+            flame_guard!("machinery::scanstring::memchr2::condition_some::condition_if");
+            let content_bytes = &search_bytes[..pos];
+
+            // In strict mode, check for control characters (0x00-0x1F)
+            let has_control_char = strict && content_bytes.iter().any(|&b| b < 0x20);
+
+            if !has_control_char {
+                flame_guard!("machinery::scanstring::fast_path");
+                let result_slice = &s[byte_start..byte_start + pos];
+                let char_count = result_slice.code_points().count();
+                let mut out = Wtf8Buf::with_capacity(pos);
+                out.push_wtf8(result_slice);
+                return Ok((out, end + char_count + 1));
+            }
+        }
+    }
+
+    // Slow path: chunk-based parsing for strings with escapes or control chars
+    {
+        flame_guard!("machinery::scanstring::slow_path");
+        let mut chunks: Vec<StrOrChar<'a>> = Vec::new();
+        let mut output_len = 0usize;
+        let mut push_chunk = |chunk: StrOrChar<'a>| {
+            output_len += chunk.len();
+            chunks.push(chunk);
+        };
+        let mut chars = s.code_point_indices().enumerate().skip(end).peekable();
+        let &(_, (mut chunk_start, _)) = chars.peek().ok_or_else(unterminated_err)?;
+        while let Some((char_i, (byte_i, c))) = chars.next() {
+            match c.to_char_lossy() {
+                '"' => {
+                    push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
+                    flame_guard!("machinery::scanstring::assemble_chunks");
+                    let mut out = Wtf8Buf::with_capacity(output_len);
+                    for x in chunks {
+                        match x {
+                            StrOrChar::Str(s) => out.push_wtf8(s),
+                            StrOrChar::Char(c) => out.push(c),
+                        }
                     }
+                    return Ok((out, char_i + 1));
                 }
-                return Ok((out, char_i + 1));
-            }
-            '\\' => {
-                push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
-                let (_, (_, c)) = chars.next().ok_or_else(unterminated_err)?;
-                let esc = match c.to_char_lossy() {
-                    '"' => "\"",
-                    '\\' => "\\",
-                    '/' => "/",
-                    'b' => "\x08",
-                    'f' => "\x0c",
-                    'n' => "\n",
-                    'r' => "\r",
-                    't' => "\t",
-                    'u' => {
-                        let mut uni = decode_unicode(&mut chars, char_i)?;
-                        chunk_start = byte_i + 6;
-                        if let Some(lead) = uni.to_lead_surrogate() {
-                            // uni is a surrogate -- try to find its pair
-                            let mut chars2 = chars.clone();
-                            if let Some(((pos2, _), (_, _))) = chars2
-                                .next_tuple()
-                                .filter(|((_, (_, c1)), (_, (_, c2)))| *c1 == '\\' && *c2 == 'u')
-                            {
-                                let uni2 = decode_unicode(&mut chars2, pos2)?;
-                                if let Some(trail) = uni2.to_trail_surrogate() {
-                                    // ok, we found what we were looking for -- \uXXXX\uXXXX, both surrogates
-                                    uni = lead.merge(trail).into();
-                                    chunk_start = pos2 + 6;
-                                    chars = chars2;
+                '\\' => {
+                    push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
+                    let (_, (_, c)) = chars.next().ok_or_else(unterminated_err)?;
+                    let esc =
+                        match c.to_char_lossy() {
+                            '"' => "\"",
+                            '\\' => "\\",
+                            '/' => "/",
+                            'b' => "\x08",
+                            'f' => "\x0c",
+                            'n' => "\n",
+                            'r' => "\r",
+                            't' => "\t",
+                            'u' => {
+                                let mut uni = decode_unicode(&mut chars, char_i)?;
+                                chunk_start = byte_i + 6;
+                                if let Some(lead) = uni.to_lead_surrogate() {
+                                    // uni is a surrogate -- try to find its pair
+                                    let mut chars2 = chars.clone();
+                                    if let Some(((pos2, _), (_, _))) = chars2.next_tuple().filter(
+                                        |((_, (_, c1)), (_, (_, c2)))| *c1 == '\\' && *c2 == 'u',
+                                    ) {
+                                        let uni2 = decode_unicode(&mut chars2, pos2)?;
+                                        if let Some(trail) = uni2.to_trail_surrogate() {
+                                            // ok, we found what we were looking for -- \uXXXX\uXXXX, both surrogates
+                                            uni = lead.merge(trail).into();
+                                            chunk_start = pos2 + 6;
+                                            chars = chars2;
+                                        }
+                                    }
                                 }
+                                push_chunk(StrOrChar::Char(uni));
+                                continue;
                             }
-                        }
-                        push_chunk(StrOrChar::Char(uni));
-                        continue;
-                    }
-                    _ => {
-                        return Err(DecodeError::new(format!("Invalid \\escape: {c:?}"), char_i));
-                    }
-                };
-                chunk_start = byte_i + 2;
-                push_chunk(StrOrChar::Str(esc.as_ref()));
-            }
-            '\x00'..='\x1f' if strict => {
-                return Err(DecodeError::new(
-                    format!("Invalid control character {c:?} at"),
-                    char_i,
-                ));
+                            _ => {
+                                return Err(DecodeError::new(
+                                    format!("Invalid \\escape: {c:?}"),
+                                    char_i,
+                                ));
+                            }
+                        };
+                    chunk_start = byte_i + 2;
+                    push_chunk(StrOrChar::Str(esc.as_ref()));
+                }
+                '\x00'..='\x1f' if strict => {
+                    return Err(DecodeError::new(
+                        format!("Invalid control character {c:?} at"),
+                        char_i,
+                    ));
+                }
+                _ => {}
             }
-            _ => {}
         }
+        Err(unterminated_err())
     }
-    Err(unterminated_err())
 }
 
 #[inline]
@@ -211,12 +258,13 @@ fn decode_unicode<I>(it: &mut I, pos: usize) -> Result<CodePoint, DecodeError>
 where
     I: Iterator<Item = (usize, (usize, CodePoint))>,
 {
+    flame_guard!("machinery::decode_unicode");
     let err = || DecodeError::new("Invalid \\uXXXX escape", pos);
-    let mut uni = 0;
-    for x in (0..4).rev() {
+    let mut uni = 0u16;
+    for _ in 0..4 {
         let (_, (_, c)) = it.next().ok_or_else(err)?;
         let d = c.to_char().and_then(|c| c.to_digit(16)).ok_or_else(err)? as u16;
-        uni += d * 16u16.pow(x);
+        uni = (uni << 4) | d;
     }
     Ok(uni.into())
 }

From 088fd8300fd7e6d0cdc2ac8ecea41d2221fbfa60 Mon Sep 17 00:00:00 2001
From: Lee Dogeon <dev.moreal@gmail.com>
Date: Thu, 15 Jan 2026 02:29:11 +0900
Subject: [PATCH 9/9] Refactor json scanstring with byte index

---
 crates/stdlib/src/json.rs           | 500 +++++++++++++++-------------
 crates/stdlib/src/json/machinery.rs | 204 ++++++------
 2 files changed, 364 insertions(+), 340 deletions(-)

diff --git a/crates/stdlib/src/json.rs b/crates/stdlib/src/json.rs
index a07acc5bd29..4bdf4533533 100644
--- a/crates/stdlib/src/json.rs
+++ b/crates/stdlib/src/json.rs
@@ -7,7 +7,7 @@ mod _json {
     use crate::vm::{
         AsObject, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine,
         builtins::{PyBaseExceptionRef, PyStrRef, PyType},
-        convert::{ToPyObject, ToPyResult},
+        convert::ToPyResult,
         function::{IntoFuncArgs, OptionalArg},
         protocol::PyIterReturn,
         types::{Callable, Constructor},
@@ -18,40 +18,25 @@ mod _json {
     use std::collections::HashMap;
 
     /// Skip JSON whitespace characters (space, tab, newline, carriage return).
-    /// Works with a character iterator and returns the number of characters skipped.
+    /// Works with a byte slice and returns the number of bytes skipped.
+    /// Since all JSON whitespace chars are ASCII, bytes == chars.
     #[inline]
-    fn skip_whitespace_chars<I>(chars: &mut std::iter::Peekable<I>) -> usize
-    where
-        I: Iterator<Item = char>,
-    {
-        flame_guard!("_json::skip_whitespace_chars");
+    fn skip_whitespace(bytes: &[u8]) -> usize {
+        flame_guard!("_json::skip_whitespace");
         let mut count = 0;
-        while let Some(&c) = chars.peek() {
-            match c {
-                ' ' | '\t' | '\n' | '\r' => {
-                    chars.next();
-                    count += 1;
-                }
+        for &b in bytes {
+            match b {
+                b' ' | b'\t' | b'\n' | b'\r' => count += 1,
                 _ => break,
             }
         }
         count
     }
 
-    /// Check if a character iterator starts with a given pattern.
-    /// This avoids byte/char index mismatch issues with non-ASCII strings.
+    /// Check if a byte slice starts with a given ASCII pattern.
     #[inline]
-    fn starts_with_chars<I>(mut chars: I, pattern: &str) -> bool
-    where
-        I: Iterator<Item = char>,
-    {
-        for expected in pattern.chars() {
-            match chars.next() {
-                Some(c) if c == expected => continue,
-                _ => return false,
-            }
-        }
-        true
+    fn starts_with_bytes(bytes: &[u8], pattern: &[u8]) -> bool {
+        bytes.len() >= pattern.len() && &bytes[..pattern.len()] == pattern
     }
 
     #[pyattr(name = "make_scanner")]
@@ -106,49 +91,64 @@ mod _json {
     impl JsonScanner {
         fn parse(
             &self,
-            s: &str,
             pystr: PyStrRef,
-            idx: usize,
+            char_idx: usize,
+            byte_idx: usize,
             scan_once: PyObjectRef,
             vm: &VirtualMachine,
         ) -> PyResult<PyIterReturn> {
             flame_guard!("JsonScanner::parse");
-            let c = match s.chars().next() {
-                Some(c) => c,
+            let bytes = pystr.as_str().as_bytes();
+            let wtf8 = pystr.as_wtf8();
+
+            let first_byte = match bytes.get(byte_idx) {
+                Some(&b) => b,
                 None => {
                     return Ok(PyIterReturn::StopIteration(Some(
-                        vm.ctx.new_int(idx).into(),
+                        vm.ctx.new_int(char_idx).into(),
                     )));
                 }
             };
-            let next_idx = idx + c.len_utf8();
-            match c {
-                '"' => {
-                    return scanstring(pystr, next_idx, OptionalArg::Present(self.strict), vm)
-                        .map(|x| PyIterReturn::Return(x.to_pyobject(vm)));
+
+            match first_byte {
+                b'"' => {
+                    // Parse string - pass slice starting after the quote
+                    let (wtf8_result, chars_consumed, _bytes_consumed) =
+                        machinery::scanstring(&wtf8[byte_idx + 1..], char_idx + 1, self.strict)
+                            .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
+                    let end_char_idx = char_idx + 1 + chars_consumed;
+                    return Ok(PyIterReturn::Return(
+                        vm.new_tuple((wtf8_result, end_char_idx)).into(),
+                    ));
                 }
-                '{' => {
+                b'{' => {
                     // Parse object in Rust
                     let mut memo = HashMap::new();
                     return self
-                        .parse_object(pystr, next_idx, &scan_once, &mut memo, vm)
-                        .map(|(obj, end)| PyIterReturn::Return(vm.new_tuple((obj, end)).into()));
+                        .parse_object(pystr, char_idx + 1, byte_idx + 1, &scan_once, &mut memo, vm)
+                        .map(|(obj, end_char, _end_byte)| {
+                            PyIterReturn::Return(vm.new_tuple((obj, end_char)).into())
+                        });
                 }
-                '[' => {
+                b'[' => {
                     // Parse array in Rust
                     let mut memo = HashMap::new();
                     return self
-                        .parse_array(pystr, next_idx, &scan_once, &mut memo, vm)
-                        .map(|(obj, end)| PyIterReturn::Return(vm.new_tuple((obj, end)).into()));
+                        .parse_array(pystr, char_idx + 1, byte_idx + 1, &scan_once, &mut memo, vm)
+                        .map(|(obj, end_char, _end_byte)| {
+                            PyIterReturn::Return(vm.new_tuple((obj, end_char)).into())
+                        });
                 }
                 _ => {}
             }
 
+            let s = &pystr.as_str()[byte_idx..];
+
             macro_rules! parse_const {
                 ($s:literal, $val:expr) => {
                     if s.starts_with($s) {
                         return Ok(PyIterReturn::Return(
-                            vm.new_tuple(($val, idx + $s.len())).into(),
+                            vm.new_tuple(($val, char_idx + $s.len())).into(),
                         ));
                     }
                 };
@@ -159,15 +159,20 @@ mod _json {
             parse_const!("false", false);
 
             if let Some((res, len)) = self.parse_number(s, vm) {
-                return Ok(PyIterReturn::Return(vm.new_tuple((res?, idx + len)).into()));
+                return Ok(PyIterReturn::Return(
+                    vm.new_tuple((res?, char_idx + len)).into(),
+                ));
             }
 
             macro_rules! parse_constant {
                 ($s:literal) => {
                     if s.starts_with($s) {
                         return Ok(PyIterReturn::Return(
-                            vm.new_tuple((self.parse_constant.call(($s,), vm)?, idx + $s.len()))
-                                .into(),
+                            vm.new_tuple((
+                                self.parse_constant.call(($s,), vm)?,
+                                char_idx + $s.len(),
+                            ))
+                            .into(),
                         ));
                     }
                 };
@@ -178,7 +183,7 @@ mod _json {
             parse_constant!("-Infinity");
 
             Ok(PyIterReturn::StopIteration(Some(
-                vm.ctx.new_int(idx).into(),
+                vm.ctx.new_int(char_idx).into(),
             )))
         }
 
@@ -219,87 +224,42 @@ mod _json {
             Some((ret, buf.len()))
         }
 
-        /// Parse a number from a character iterator.
-        /// Returns (result, character_count) where character_count is the number of chars consumed.
-        fn parse_number_from_chars<I>(
-            &self,
-            chars: I,
-            vm: &VirtualMachine,
-        ) -> Option<(PyResult, usize)>
-        where
-            I: Iterator<Item = char>,
-        {
-            flame_guard!("JsonScanner::parse_number_from_chars");
-            let mut buf = String::new();
-            let mut has_neg = false;
-            let mut has_decimal = false;
-            let mut has_exponent = false;
-            let mut has_e_sign = false;
-
-            for c in chars {
-                let i = buf.len();
-                match c {
-                    '-' if i == 0 => has_neg = true,
-                    n if n.is_ascii_digit() => {}
-                    '.' if !has_decimal => has_decimal = true,
-                    'e' | 'E' if !has_exponent => has_exponent = true,
-                    '+' | '-' if !has_e_sign => has_e_sign = true,
-                    _ => break,
-                }
-                buf.push(c);
-            }
-
-            let len = buf.len();
-            if len == 0 || (len == 1 && has_neg) {
-                return None;
-            }
-
-            let ret = if has_decimal || has_exponent {
-                if let Some(ref parse_float) = self.parse_float {
-                    parse_float.call((&buf,), vm)
-                } else {
-                    Ok(vm.ctx.new_float(f64::from_str(&buf).unwrap()).into())
-                }
-            } else if let Some(ref parse_int) = self.parse_int {
-                parse_int.call((&buf,), vm)
-            } else {
-                Ok(vm.new_pyobj(BigInt::from_str(&buf).unwrap()))
-            };
-            Some((ret, len))
-        }
-
         /// Parse a JSON object starting after the opening '{'.
-        /// Returns (parsed_object, end_character_index).
+        /// Returns (parsed_object, end_char_index, end_byte_index).
         fn parse_object(
             &self,
             pystr: PyStrRef,
-            start_idx: usize, // Character index right after '{'
+            start_char_idx: usize,
+            start_byte_idx: usize,
             scan_once: &PyObjectRef,
             memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
-        ) -> PyResult<(PyObjectRef, usize)> {
+        ) -> PyResult<(PyObjectRef, usize, usize)> {
             flame_guard!("JsonScanner::parse_object");
 
-            let s = pystr.as_str();
-            let mut chars = s.chars().skip(start_idx).peekable();
-            let mut idx = start_idx;
+            let bytes = pystr.as_str().as_bytes();
+            let wtf8 = pystr.as_wtf8();
+            let mut char_idx = start_char_idx;
+            let mut byte_idx = start_byte_idx;
 
             // Skip initial whitespace
-            idx += skip_whitespace_chars(&mut chars);
+            let ws = skip_whitespace(&bytes[byte_idx..]);
+            char_idx += ws;
+            byte_idx += ws;
 
             // Check for empty object
-            match chars.peek() {
-                Some('}') => {
-                    return self.finalize_object(vec![], idx + 1, vm);
+            match bytes.get(byte_idx) {
+                Some(b'}') => {
+                    return self.finalize_object(vec![], char_idx + 1, byte_idx + 1, vm);
                 }
-                Some('"') => {
+                Some(b'"') => {
                     // Continue to parse first key
                 }
-                Some(_) | None => {
+                _ => {
                     return Err(self.make_decode_error(
                         "Expecting property name enclosed in double quotes",
                         pystr,
-                        idx,
+                        char_idx,
                         vm,
                     ));
                 }
@@ -309,12 +269,16 @@ mod _json {
 
             loop {
                 // We're now at '"', skip it
-                chars.next();
-                idx += 1;
+                char_idx += 1;
+                byte_idx += 1;
+
+                // Parse key string using scanstring with byte slice
+                let (key_wtf8, chars_consumed, bytes_consumed) =
+                    machinery::scanstring(&wtf8[byte_idx..], char_idx, self.strict)
+                        .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
 
-                // Parse key string using existing scanstring
-                let (key_wtf8, key_end) = machinery::scanstring(pystr.as_wtf8(), idx, self.strict)
-                    .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
+                char_idx += chars_consumed;
+                byte_idx += bytes_consumed;
 
                 // Key memoization - reuse existing key strings
                 let key_str = key_wtf8.to_string();
@@ -327,68 +291,73 @@ mod _json {
                     }
                 };
 
-                // Update position and rebuild iterator
-                idx = key_end;
-                chars = s.chars().skip(idx).peekable();
-
                 // Skip whitespace after key
-                idx += skip_whitespace_chars(&mut chars);
+                let ws = skip_whitespace(&bytes[byte_idx..]);
+                char_idx += ws;
+                byte_idx += ws;
 
                 // Expect ':' delimiter
-                match chars.peek() {
-                    Some(':') => {
-                        chars.next();
-                        idx += 1;
+                match bytes.get(byte_idx) {
+                    Some(b':') => {
+                        char_idx += 1;
+                        byte_idx += 1;
                     }
                     _ => {
                         return Err(self.make_decode_error(
                             "Expecting ':' delimiter",
                             pystr,
-                            idx,
+                            char_idx,
                             vm,
                         ));
                     }
                 }
 
                 // Skip whitespace after ':'
-                idx += skip_whitespace_chars(&mut chars);
+                let ws = skip_whitespace(&bytes[byte_idx..]);
+                char_idx += ws;
+                byte_idx += ws;
 
-                // Parse value recursively using scan_once
-                let (value, value_end) =
-                    self.call_scan_once(scan_once, pystr.clone(), idx, memo, vm)?;
+                // Parse value recursively
+                let (value, value_char_end, value_byte_end) =
+                    self.call_scan_once(scan_once, pystr.clone(), char_idx, byte_idx, memo, vm)?;
 
                 pairs.push((key, value));
-                idx = value_end;
-                chars = s.chars().skip(idx).peekable();
+                char_idx = value_char_end;
+                byte_idx = value_byte_end;
 
                 // Skip whitespace after value
-                idx += skip_whitespace_chars(&mut chars);
+                let ws = skip_whitespace(&bytes[byte_idx..]);
+                char_idx += ws;
+                byte_idx += ws;
 
                 // Check for ',' or '}'
-                match chars.peek() {
-                    Some('}') => {
-                        idx += 1;
+                match bytes.get(byte_idx) {
+                    Some(b'}') => {
+                        char_idx += 1;
+                        byte_idx += 1;
                         break;
                     }
-                    Some(',') => {
-                        let comma_idx = idx;
-                        chars.next();
-                        idx += 1;
+                    Some(b',') => {
+                        let comma_char_idx = char_idx;
+                        char_idx += 1;
+                        byte_idx += 1;
 
                         // Skip whitespace after comma
-                        idx += skip_whitespace_chars(&mut chars);
+                        let ws = skip_whitespace(&bytes[byte_idx..]);
+                        char_idx += ws;
+                        byte_idx += ws;
 
                         // Next must be '"'
-                        match chars.peek() {
-                            Some('"') => {
+                        match bytes.get(byte_idx) {
+                            Some(b'"') => {
                                 // Continue to next key-value pair
                             }
-                            Some('}') => {
+                            Some(b'}') => {
                                 // Trailing comma before end of object
                                 return Err(self.make_decode_error(
                                     "Illegal trailing comma before end of object",
                                     pystr,
-                                    comma_idx,
+                                    comma_char_idx,
                                     vm,
                                 ));
                             }
@@ -396,7 +365,7 @@ mod _json {
                                 return Err(self.make_decode_error(
                                     "Expecting property name enclosed in double quotes",
                                     pystr,
-                                    idx,
+                                    char_idx,
                                     vm,
                                 ));
                             }
@@ -406,72 +375,81 @@ mod _json {
                         return Err(self.make_decode_error(
                             "Expecting ',' delimiter",
                             pystr,
-                            idx,
+                            char_idx,
                             vm,
                         ));
                     }
                 }
             }
 
-            self.finalize_object(pairs, idx, vm)
+            self.finalize_object(pairs, char_idx, byte_idx, vm)
         }
 
         /// Parse a JSON array starting after the opening '['.
-        /// Returns (parsed_array, end_character_index).
+        /// Returns (parsed_array, end_char_index, end_byte_index).
         fn parse_array(
             &self,
             pystr: PyStrRef,
-            start_idx: usize, // Character index right after '['
+            start_char_idx: usize,
+            start_byte_idx: usize,
             scan_once: &PyObjectRef,
             memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
-        ) -> PyResult<(PyObjectRef, usize)> {
+        ) -> PyResult<(PyObjectRef, usize, usize)> {
             flame_guard!("JsonScanner::parse_array");
 
-            let s = pystr.as_str();
-            let mut chars = s.chars().skip(start_idx).peekable();
-            let mut idx = start_idx;
+            let bytes = pystr.as_str().as_bytes();
+            let mut char_idx = start_char_idx;
+            let mut byte_idx = start_byte_idx;
 
             // Skip initial whitespace
-            idx += skip_whitespace_chars(&mut chars);
+            let ws = skip_whitespace(&bytes[byte_idx..]);
+            char_idx += ws;
+            byte_idx += ws;
 
             // Check for empty array
-            if chars.peek() == Some(&']') {
-                return Ok((vm.ctx.new_list(vec![]).into(), idx + 1));
+            if bytes.get(byte_idx) == Some(&b']') {
+                return Ok((vm.ctx.new_list(vec![]).into(), char_idx + 1, byte_idx + 1));
             }
 
             let mut values: Vec<PyObjectRef> = Vec::new();
 
             loop {
                 // Parse value
-                let (value, value_end) =
-                    self.call_scan_once(scan_once, pystr.clone(), idx, memo, vm)?;
+                let (value, value_char_end, value_byte_end) =
+                    self.call_scan_once(scan_once, pystr.clone(), char_idx, byte_idx, memo, vm)?;
 
                 values.push(value);
-                idx = value_end;
-                chars = s.chars().skip(idx).peekable();
+                char_idx = value_char_end;
+                byte_idx = value_byte_end;
 
                 // Skip whitespace after value
-                idx += skip_whitespace_chars(&mut chars);
-
-                match chars.peek() {
-                    Some(']') => {
-                        idx += 1;
+                let ws = skip_whitespace(&bytes[byte_idx..]);
+                char_idx += ws;
+                byte_idx += ws;
+
+                match bytes.get(byte_idx) {
+                    Some(b']') => {
+                        char_idx += 1;
+                        byte_idx += 1;
                         break;
                     }
-                    Some(',') => {
-                        let comma_idx = idx;
-                        chars.next();
-                        idx += 1;
+                    Some(b',') => {
+                        let comma_char_idx = char_idx;
+                        char_idx += 1;
+                        byte_idx += 1;
+
                         // Skip whitespace after comma
-                        idx += skip_whitespace_chars(&mut chars);
+                        let ws = skip_whitespace(&bytes[byte_idx..]);
+                        char_idx += ws;
+                        byte_idx += ws;
 
                         // Check for trailing comma
-                        if chars.peek() == Some(&']') {
+                        if bytes.get(byte_idx) == Some(&b']') {
                             return Err(self.make_decode_error(
                                 "Illegal trailing comma before end of array",
                                 pystr,
-                                comma_idx,
+                                comma_char_idx,
                                 vm,
                             ));
                         }
@@ -480,23 +458,24 @@ mod _json {
                         return Err(self.make_decode_error(
                             "Expecting ',' delimiter",
                             pystr,
-                            idx,
+                            char_idx,
                             vm,
                         ));
                     }
                 }
             }
 
-            Ok((vm.ctx.new_list(values).into(), idx))
+            Ok((vm.ctx.new_list(values).into(), char_idx, byte_idx))
         }
 
         /// Finalize object construction with hooks.
         fn finalize_object(
             &self,
             pairs: Vec<(PyObjectRef, PyObjectRef)>,
-            end_idx: usize,
+            end_char_idx: usize,
+            end_byte_idx: usize,
             vm: &VirtualMachine,
-        ) -> PyResult<(PyObjectRef, usize)> {
+        ) -> PyResult<(PyObjectRef, usize, usize)> {
             let result = if let Some(ref pairs_hook) = self.object_pairs_hook {
                 // object_pairs_hook takes priority - pass list of tuples
                 let pairs_list: Vec<PyObjectRef> = pairs
@@ -520,87 +499,95 @@ mod _json {
                 }
             };
 
-            Ok((result, end_idx))
+            Ok((result, end_char_idx, end_byte_idx))
         }
 
         /// Call scan_once and handle the result.
-        /// Uses character iterators to avoid byte/char index mismatch with non-ASCII strings.
+        /// Returns (value, end_char_idx, end_byte_idx).
         fn call_scan_once(
             &self,
             scan_once: &PyObjectRef,
             pystr: PyStrRef,
-            idx: usize,
+            char_idx: usize,
+            byte_idx: usize,
             memo: &mut HashMap<String, PyStrRef>,
             vm: &VirtualMachine,
-        ) -> PyResult<(PyObjectRef, usize)> {
+        ) -> PyResult<(PyObjectRef, usize, usize)> {
             let s = pystr.as_str();
-            let chars = s.chars().skip(idx).peekable();
+            let bytes = s.as_bytes();
+            let wtf8 = pystr.as_wtf8();
 
-            let first_char = match chars.clone().next() {
-                Some(c) => c,
-                None => return Err(self.make_decode_error("Expecting value", pystr, idx, vm)),
+            let first_byte = match bytes.get(byte_idx) {
+                Some(&b) => b,
+                None => return Err(self.make_decode_error("Expecting value", pystr, char_idx, vm)),
             };
 
-            match first_char {
-                '"' => {
-                    // String
-                    let (wtf8, end) = machinery::scanstring(pystr.as_wtf8(), idx + 1, self.strict)
-                        .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
-                    let py_str = vm.ctx.new_str(wtf8.to_string());
-                    Ok((py_str.into(), end))
+            match first_byte {
+                b'"' => {
+                    // String - pass slice starting after the quote
+                    let (wtf8_result, chars_consumed, bytes_consumed) =
+                        machinery::scanstring(&wtf8[byte_idx + 1..], char_idx + 1, self.strict)
+                            .map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
+                    let py_str = vm.ctx.new_str(wtf8_result.to_string());
+                    Ok((
+                        py_str.into(),
+                        char_idx + 1 + chars_consumed,
+                        byte_idx + 1 + bytes_consumed,
+                    ))
                 }
-                '{' => {
+                b'{' => {
                     // Object
-                    self.parse_object(pystr, idx + 1, scan_once, memo, vm)
+                    self.parse_object(pystr, char_idx + 1, byte_idx + 1, scan_once, memo, vm)
                 }
-                '[' => {
+                b'[' => {
                     // Array
-                    self.parse_array(pystr, idx + 1, scan_once, memo, vm)
+                    self.parse_array(pystr, char_idx + 1, byte_idx + 1, scan_once, memo, vm)
                 }
-                'n' if starts_with_chars(chars.clone(), "null") => {
+                b'n' if starts_with_bytes(&bytes[byte_idx..], b"null") => {
                     // null
-                    Ok((vm.ctx.none(), idx + 4))
+                    Ok((vm.ctx.none(), char_idx + 4, byte_idx + 4))
                 }
-                't' if starts_with_chars(chars.clone(), "true") => {
+                b't' if starts_with_bytes(&bytes[byte_idx..], b"true") => {
                     // true
-                    Ok((vm.ctx.new_bool(true).into(), idx + 4))
+                    Ok((vm.ctx.new_bool(true).into(), char_idx + 4, byte_idx + 4))
                 }
-                'f' if starts_with_chars(chars.clone(), "false") => {
+                b'f' if starts_with_bytes(&bytes[byte_idx..], b"false") => {
                     // false
-                    Ok((vm.ctx.new_bool(false).into(), idx + 5))
+                    Ok((vm.ctx.new_bool(false).into(), char_idx + 5, byte_idx + 5))
                 }
-                'N' if starts_with_chars(chars.clone(), "NaN") => {
+                b'N' if starts_with_bytes(&bytes[byte_idx..], b"NaN") => {
                     // NaN
                     let result = self.parse_constant.call(("NaN",), vm)?;
-                    Ok((result, idx + 3))
+                    Ok((result, char_idx + 3, byte_idx + 3))
                 }
-                'I' if starts_with_chars(chars.clone(), "Infinity") => {
+                b'I' if starts_with_bytes(&bytes[byte_idx..], b"Infinity") => {
                     // Infinity
                     let result = self.parse_constant.call(("Infinity",), vm)?;
-                    Ok((result, idx + 8))
+                    Ok((result, char_idx + 8, byte_idx + 8))
                 }
-                '-' => {
+                b'-' => {
                     // -Infinity or negative number
-                    if starts_with_chars(chars.clone(), "-Infinity") {
+                    if starts_with_bytes(&bytes[byte_idx..], b"-Infinity") {
                         let result = self.parse_constant.call(("-Infinity",), vm)?;
-                        return Ok((result, idx + 9));
+                        return Ok((result, char_idx + 9, byte_idx + 9));
                     }
-                    // Negative number - collect number characters
-                    if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
-                        return Ok((result?, idx + len));
+                    // Negative number - numbers are ASCII so len == bytes
+                    if let Some((result, len)) = self.parse_number(&s[byte_idx..], vm) {
+                        return Ok((result?, char_idx + len, byte_idx + len));
                     }
-                    Err(self.make_decode_error("Expecting value", pystr, idx, vm))
+                    Err(self.make_decode_error("Expecting value", pystr, char_idx, vm))
                 }
-                c if c.is_ascii_digit() => {
-                    // Positive number
-                    if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
-                        return Ok((result?, idx + len));
+                b'0'..=b'9' => {
+                    // Positive number - numbers are ASCII so len == bytes
+                    if let Some((result, len)) = self.parse_number(&s[byte_idx..], vm) {
+                        return Ok((result?, char_idx + len, byte_idx + len));
                     }
-                    Err(self.make_decode_error("Expecting value", pystr, idx, vm))
+                    Err(self.make_decode_error("Expecting value", pystr, char_idx, vm))
                 }
                 _ => {
                     // Fall back to scan_once for unrecognized input
-                    let result = scan_once.call((pystr.clone(), idx as isize), vm);
+                    // Note: This path requires char_idx for Python compatibility
+                    let result = scan_once.call((pystr.clone(), char_idx as isize), vm);
 
                     match result {
                         Ok(tuple) => {
@@ -610,11 +597,18 @@ mod _json {
                                 return Err(vm.new_value_error("scan_once must return 2-tuple"));
                             }
                             let value = tuple.as_slice()[0].clone();
-                            let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
-                            Ok((value, end_idx as usize))
+                            let end_char_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
+                            // For fallback, we need to calculate byte_idx from char_idx
+                            // This is expensive but fallback should be rare
+                            let end_byte_idx = s
+                                .char_indices()
+                                .nth(end_char_idx as usize)
+                                .map(|(i, _)| i)
+                                .unwrap_or(s.len());
+                            Ok((value, end_char_idx as usize, end_byte_idx))
                         }
                         Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
-                            Err(self.make_decode_error("Expecting value", pystr, idx, vm))
+                            Err(self.make_decode_error("Expecting value", pystr, char_idx, vm))
                         }
                         Err(err) => Err(err),
                     }
@@ -637,24 +631,35 @@ mod _json {
 
     impl Callable for JsonScanner {
         type Args = (PyStrRef, isize);
-        fn call(zelf: &Py<Self>, (pystr, idx): Self::Args, vm: &VirtualMachine) -> PyResult {
-            if idx < 0 {
+        fn call(zelf: &Py<Self>, (pystr, char_idx): Self::Args, vm: &VirtualMachine) -> PyResult {
+            if char_idx < 0 {
                 return Err(vm.new_value_error("idx cannot be negative"));
             }
-            let idx = idx as usize;
-            let mut chars = pystr.as_str().chars();
-            if idx > 0 && chars.nth(idx - 1).is_none() {
-                PyIterReturn::StopIteration(Some(vm.ctx.new_int(idx).into())).to_pyresult(vm)
+            let char_idx = char_idx as usize;
+            let s = pystr.as_str();
+
+            // Calculate byte index from char index (O(char_idx) but only at entry point)
+            let byte_idx = if char_idx == 0 {
+                0
             } else {
-                zelf.parse(
-                    chars.as_str(),
-                    pystr.clone(),
-                    idx,
-                    zelf.to_owned().into(),
-                    vm,
-                )
-                .and_then(|x| x.to_pyresult(vm))
-            }
+                match s.char_indices().nth(char_idx) {
+                    Some((byte_i, _)) => byte_i,
+                    None => {
+                        // char_idx is beyond the string length
+                        return PyIterReturn::StopIteration(Some(vm.ctx.new_int(char_idx).into()))
+                            .to_pyresult(vm);
+                    }
+                }
+            };
+
+            zelf.parse(
+                pystr.clone(),
+                char_idx,
+                byte_idx,
+                zelf.to_owned().into(),
+                vm,
+            )
+            .and_then(|x| x.to_pyresult(vm))
         }
     }
 
@@ -701,7 +706,28 @@ mod _json {
         vm: &VirtualMachine,
     ) -> PyResult<(Wtf8Buf, usize)> {
         flame_guard!("_json::scanstring");
-        machinery::scanstring(s.as_wtf8(), end, strict.unwrap_or(true))
-            .map_err(|e| py_decode_error(e, s, vm))
+        let wtf8 = s.as_wtf8();
+
+        // Convert char index `end` to byte index
+        let byte_idx = if end == 0 {
+            0
+        } else {
+            wtf8.code_point_indices()
+                .nth(end)
+                .map(|(i, _)| i)
+                .ok_or_else(|| {
+                    py_decode_error(
+                        machinery::DecodeError::new("Unterminated string starting at", end - 1),
+                        s.clone(),
+                        vm,
+                    )
+                })?
+        };
+
+        let (result, chars_consumed, _bytes_consumed) =
+            machinery::scanstring(&wtf8[byte_idx..], end, strict.unwrap_or(true))
+                .map_err(|e| py_decode_error(e, s, vm))?;
+
+        Ok((result, end + chars_consumed))
     }
 }
diff --git a/crates/stdlib/src/json/machinery.rs b/crates/stdlib/src/json/machinery.rs
index 9f379a962ac..f33a135ab20 100644
--- a/crates/stdlib/src/json/machinery.rs
+++ b/crates/stdlib/src/json/machinery.rs
@@ -127,130 +127,128 @@ impl StrOrChar<'_> {
         }
     }
 }
+/// Scan a JSON string starting right after the opening quote.
+///
+/// # Arguments
+/// * `s` - The string slice starting at the first character after the opening `"`
+/// * `char_offset` - The character index where this slice starts (for error messages)
+/// * `strict` - Whether to reject control characters
+///
+/// # Returns
+/// * `Ok((result, chars_consumed, bytes_consumed))` - The decoded string and how much was consumed
+/// * `Err(DecodeError)` - If the string is malformed
 pub fn scanstring<'a>(
     s: &'a Wtf8,
-    end: usize,
+    char_offset: usize,
     strict: bool,
-) -> Result<(Wtf8Buf, usize), DecodeError> {
+) -> Result<(Wtf8Buf, usize, usize), DecodeError> {
     flame_guard!("machinery::scanstring");
-    let unterminated_err = || DecodeError::new("Unterminated string starting at", end - 1);
-
-    // Get byte index for character position `end`
-    let byte_start = {
-        flame_guard!("machinery::scanstring::byte_start_initialization");
-        s.code_point_indices()
-            .nth(end)
-            .ok_or_else(unterminated_err)?
-            .0
-    };
+    let unterminated_err = || DecodeError::new("Unterminated string starting at", char_offset - 1);
 
     let bytes = s.as_bytes();
-    let search_bytes = &bytes[byte_start..];
 
     // Fast path: use memchr to find " or \ quickly
-    if let Some(pos) = {
-        flame_guard!("machinery::scanstring::memchr2");
-        memchr2(b'"', b'\\', search_bytes)
-    } {
-        flame_guard!("machinery::scanstring::memchr2::condition_some");
-        if search_bytes[pos] == b'"' {
-            flame_guard!("machinery::scanstring::memchr2::condition_some::condition_if");
-            let content_bytes = &search_bytes[..pos];
+    if let Some(pos) = memchr2(b'"', b'\\', bytes)
+        && bytes[pos] == b'"'
+    {
+        let content_bytes = &bytes[..pos];
 
-            // In strict mode, check for control characters (0x00-0x1F)
-            let has_control_char = strict && content_bytes.iter().any(|&b| b < 0x20);
+        // In strict mode, check for control characters (0x00-0x1F)
+        let has_control_char = strict && content_bytes.iter().any(|&b| b < 0x20);
 
-            if !has_control_char {
-                flame_guard!("machinery::scanstring::fast_path");
-                let result_slice = &s[byte_start..byte_start + pos];
-                let char_count = result_slice.code_points().count();
-                let mut out = Wtf8Buf::with_capacity(pos);
-                out.push_wtf8(result_slice);
-                return Ok((out, end + char_count + 1));
-            }
+        if !has_control_char {
+            flame_guard!("machinery::scanstring::fast_path");
+            let result_slice = &s[..pos];
+            let char_count = result_slice.code_points().count();
+            let mut out = Wtf8Buf::with_capacity(pos);
+            out.push_wtf8(result_slice);
+            // +1 for the closing quote
+            return Ok((out, char_count + 1, pos + 1));
         }
     }
 
     // Slow path: chunk-based parsing for strings with escapes or control chars
-    {
-        flame_guard!("machinery::scanstring::slow_path");
-        let mut chunks: Vec<StrOrChar<'a>> = Vec::new();
-        let mut output_len = 0usize;
-        let mut push_chunk = |chunk: StrOrChar<'a>| {
-            output_len += chunk.len();
-            chunks.push(chunk);
-        };
-        let mut chars = s.code_point_indices().enumerate().skip(end).peekable();
-        let &(_, (mut chunk_start, _)) = chars.peek().ok_or_else(unterminated_err)?;
-        while let Some((char_i, (byte_i, c))) = chars.next() {
-            match c.to_char_lossy() {
-                '"' => {
-                    push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
-                    flame_guard!("machinery::scanstring::assemble_chunks");
-                    let mut out = Wtf8Buf::with_capacity(output_len);
-                    for x in chunks {
-                        match x {
-                            StrOrChar::Str(s) => out.push_wtf8(s),
-                            StrOrChar::Char(c) => out.push(c),
-                        }
+    flame_guard!("machinery::scanstring::slow_path");
+    let mut chunks: Vec<StrOrChar<'a>> = Vec::new();
+    let mut output_len = 0usize;
+    let mut push_chunk = |chunk: StrOrChar<'a>| {
+        output_len += chunk.len();
+        chunks.push(chunk);
+    };
+
+    let mut chars = s.code_point_indices().enumerate().peekable();
+    let mut chunk_start: usize = 0;
+
+    while let Some((char_i, (byte_i, c))) = chars.next() {
+        match c.to_char_lossy() {
+            '"' => {
+                push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
+                flame_guard!("machinery::scanstring::assemble_chunks");
+                let mut out = Wtf8Buf::with_capacity(output_len);
+                for x in chunks {
+                    match x {
+                        StrOrChar::Str(s) => out.push_wtf8(s),
+                        StrOrChar::Char(c) => out.push(c),
                     }
-                    return Ok((out, char_i + 1));
                 }
-                '\\' => {
-                    push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
-                    let (_, (_, c)) = chars.next().ok_or_else(unterminated_err)?;
-                    let esc =
-                        match c.to_char_lossy() {
-                            '"' => "\"",
-                            '\\' => "\\",
-                            '/' => "/",
-                            'b' => "\x08",
-                            'f' => "\x0c",
-                            'n' => "\n",
-                            'r' => "\r",
-                            't' => "\t",
-                            'u' => {
-                                let mut uni = decode_unicode(&mut chars, char_i)?;
-                                chunk_start = byte_i + 6;
-                                if let Some(lead) = uni.to_lead_surrogate() {
-                                    // uni is a surrogate -- try to find its pair
-                                    let mut chars2 = chars.clone();
-                                    if let Some(((pos2, _), (_, _))) = chars2.next_tuple().filter(
-                                        |((_, (_, c1)), (_, (_, c2)))| *c1 == '\\' && *c2 == 'u',
-                                    ) {
-                                        let uni2 = decode_unicode(&mut chars2, pos2)?;
-                                        if let Some(trail) = uni2.to_trail_surrogate() {
-                                            // ok, we found what we were looking for -- \uXXXX\uXXXX, both surrogates
-                                            uni = lead.merge(trail).into();
-                                            chunk_start = pos2 + 6;
-                                            chars = chars2;
-                                        }
-                                    }
+                // +1 for the closing quote
+                return Ok((out, char_i + 1, byte_i + 1));
+            }
+            '\\' => {
+                push_chunk(StrOrChar::Str(&s[chunk_start..byte_i]));
+                let (next_char_i, (_, c)) = chars.next().ok_or_else(unterminated_err)?;
+                let esc = match c.to_char_lossy() {
+                    '"' => "\"",
+                    '\\' => "\\",
+                    '/' => "/",
+                    'b' => "\x08",
+                    'f' => "\x0c",
+                    'n' => "\n",
+                    'r' => "\r",
+                    't' => "\t",
+                    'u' => {
+                        let mut uni = decode_unicode(&mut chars, char_offset + char_i)?;
+                        chunk_start = byte_i + 6;
+                        if let Some(lead) = uni.to_lead_surrogate() {
+                            // uni is a surrogate -- try to find its pair
+                            let mut chars2 = chars.clone();
+                            if let Some(((_, (byte_pos2, _)), (_, _))) = chars2
+                                .next_tuple()
+                                .filter(|((_, (_, c1)), (_, (_, c2)))| *c1 == '\\' && *c2 == 'u')
+                            {
+                                let uni2 =
+                                    decode_unicode(&mut chars2, char_offset + next_char_i + 1)?;
+                                if let Some(trail) = uni2.to_trail_surrogate() {
+                                    // ok, we found what we were looking for -- \uXXXX\uXXXX, both surrogates
+                                    uni = lead.merge(trail).into();
+                                    chunk_start = byte_pos2 + 6;
+                                    chars = chars2;
                                 }
-                                push_chunk(StrOrChar::Char(uni));
-                                continue;
-                            }
-                            _ => {
-                                return Err(DecodeError::new(
-                                    format!("Invalid \\escape: {c:?}"),
-                                    char_i,
-                                ));
                             }
-                        };
-                    chunk_start = byte_i + 2;
-                    push_chunk(StrOrChar::Str(esc.as_ref()));
-                }
-                '\x00'..='\x1f' if strict => {
-                    return Err(DecodeError::new(
-                        format!("Invalid control character {c:?} at"),
-                        char_i,
-                    ));
-                }
-                _ => {}
+                        }
+                        push_chunk(StrOrChar::Char(uni));
+                        continue;
+                    }
+                    _ => {
+                        return Err(DecodeError::new(
+                            format!("Invalid \\escape: {c:?}"),
+                            char_offset + char_i,
+                        ));
+                    }
+                };
+                chunk_start = byte_i + 2;
+                push_chunk(StrOrChar::Str(esc.as_ref()));
+            }
+            '\x00'..='\x1f' if strict => {
+                return Err(DecodeError::new(
+                    format!("Invalid control character {c:?} at"),
+                    char_offset + char_i,
+                ));
             }
+            _ => {}
         }
-        Err(unterminated_err())
     }
+    Err(unterminated_err())
 }
 
 #[inline]