diff --git a/Lib/test/test_parser_utf7_r.py b/Lib/test/test_parser_utf7_r.py
new file mode 100644
index 00000000000000..dc173373889624
--- /dev/null
+++ b/Lib/test/test_parser_utf7_r.py
@@ -0,0 +1,31 @@
+import unittest
+import py_compile
+import os
+
+class TestParserUTF7Newline(unittest.TestCase):
+    def test_utf7_r_after_coding_cookie(self):
+        # This reproduced a SystemError in string_parser.c
+        # where \r introduced by codec caused the lexer to
+        # produce a broken token.
+        filename = 'test_utf7_r.py'
+        if os.path.exists(filename):
+            os.remove(filename)
+        self.addCleanup(os.remove, filename)
+
+        # '+AA0-' is UTF-7 for '\r'.
+        # The '-' is optional if followed by non-base64.
+        with open(filename, 'wb') as f:
+            f.write(b"#coding=u7+AA0''")
+
+        try:
+            py_compile.compile(filename, doraise=True)
+        except SyntaxError:
+            # We don't care if it's a syntax error (it shouldn't be,
+            # but that's not the bug), we care that it doesn't
+            # raise SystemError.
+            pass
+        except SystemError as e:
+            self.fail(f"SystemError raised: {e}")
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 7f25afec302c22..f9235a8980975b 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -1,1635 +1,1597 @@
 #include "Python.h"
+#include "errcode.h"
 #include "pycore_token.h"
 #include "pycore_unicodeobject.h"
-#include "errcode.h"
 
-#include "state.h"
 #include "../tokenizer/helpers.h"
+#include "state.h"
 
 /* Alternate tab spacing */
 #define ALTTABSIZE 1
 
-#define is_potential_identifier_start(c) (\
-              (c >= 'a' && c <= 'z')\
-               || (c >= 'A' && c <= 'Z')\
-               || c == '_'\
-               || (c >= 128))
+#define is_potential_identifier_start(c)                                       \
+  ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= 128))
 
-#define is_potential_identifier_char(c) (\
-              (c >= 'a' && c <= 'z')\
-               || (c >= 'A' && c <= 'Z')\
-               || (c >= '0' && c <= '9')\
-               || c == '_'\
-               || (c >= 128))
+#define is_potential_identifier_char(c)                                        \
+  ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||                         \
+   (c >= '0' && c <= '9') || c == '_' || (c >= 128))
 
 #ifdef Py_DEBUG
-static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
-    assert(tok->tok_mode_stack_index >= 0);
-    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
-    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
+static inline tokenizer_mode *TOK_GET_MODE(struct tok_state *tok) {
+  assert(tok->tok_mode_stack_index >= 0);
+  assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
+  return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
 }
-static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
-    assert(tok->tok_mode_stack_index >= 0);
-    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
-    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
+static inline tokenizer_mode *TOK_NEXT_MODE(struct tok_state *tok) {
+  assert(tok->tok_mode_stack_index >= 0);
+  assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
+  return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
 }
 #else
 #define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
 #define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
 #endif
 
-#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
-#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
-#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
-#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
-#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
-                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
+#define FTSTRING_MIDDLE(tok_mode)                                              \
+  (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
+#define FTSTRING_END(tok_mode)                                                 \
+  (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
+#define TOK_GET_STRING_PREFIX(tok)                                             \
+  (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
+#define MAKE_TOKEN(token_type)                                                 \
+  _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
+#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset)        \
+  (_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset,       \
+                                     end_col_offset, p_start, p_end))
 
 /* Spaces in this constant are treated as "zero or more spaces or tabs" when
    tokenizing. */
-static const char* type_comment_prefix = "# type: ";
+static const char *type_comment_prefix = "# type: ";
 
-static inline int
-contains_null_bytes(const char* str, size_t size)
-{
-    return memchr(str, 0, size) != NULL;
+static inline int contains_null_bytes(const char *str, size_t size) {
+  return memchr(str, 0, size) != NULL;
 }
 
 /* Get next char, updating state; error code goes into tok->done */
-static int
-tok_nextc(struct tok_state *tok)
-{
-    int rc;
-    for (;;) {
-        if (tok->cur != tok->inp) {
-            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
-                tok->done = E_COLUMNOVERFLOW;
-                return EOF;
-            }
-            tok->col_offset++;
-            return Py_CHARMASK(*tok->cur++); /* Fast path */
-        }
-        if (tok->done != E_OK) {
-            return EOF;
-        }
-        rc = tok->underflow(tok);
+static int tok_nextc(struct tok_state *tok) {
+  int rc;
+  for (;;) {
+    if (tok->cur != tok->inp) {
+      if ((unsigned int)tok->col_offset >= (unsigned int)INT_MAX) {
+        tok->done = E_COLUMNOVERFLOW;
+        return EOF;
+      }
+      tok->col_offset++;
+      return Py_CHARMASK(*tok->cur++); /* Fast path */
+    }
+    if (tok->done != E_OK) {
+      return EOF;
+    }
+    rc = tok->underflow(tok);
 #if defined(Py_DEBUG)
-        if (tok->debug) {
-            fprintf(stderr, "line[%d] = ", tok->lineno);
-            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
-            fprintf(stderr, "  tok->done = %d\n", tok->done);
-        }
+    if (tok->debug) {
+      fprintf(stderr, "line[%d] = ", tok->lineno);
+      _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
+      fprintf(stderr, "  tok->done = %d\n", tok->done);
+    }
 #endif
-        if (!rc) {
-            tok->cur = tok->inp;
-            return EOF;
-        }
-        tok->line_start = tok->cur;
+    if (!rc) {
+      tok->cur = tok->inp;
+      return EOF;
+    }
+    tok->line_start = tok->cur;
 
-        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
-            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
-            tok->cur = tok->inp;
-            return EOF;
-        }
+    if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
+      _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
+      tok->cur = tok->inp;
+      return EOF;
     }
-    Py_UNREACHABLE();
+  }
+  Py_UNREACHABLE();
 }
 
 /* Back-up one character */
-static void
-tok_backup(struct tok_state *tok, int c)
-{
-    if (c != EOF) {
-        if (--tok->cur < tok->buf) {
-            Py_FatalError("tokenizer beginning of buffer");
-        }
-        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
-            Py_FatalError("tok_backup: wrong character");
-        }
-        tok->col_offset--;
+static void tok_backup(struct tok_state *tok, int c) {
+  if (c != EOF) {
+    if (--tok->cur < tok->buf) {
+      Py_FatalError("tokenizer beginning of buffer");
+    }
+    if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
+      Py_FatalError("tok_backup: wrong character");
     }
+    tok->col_offset--;
+  }
 }
 
-static int
-set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
-    assert(token != NULL);
-    assert(c == '}' || c == ':' || c == '!');
-    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
+static int set_ftstring_expr(struct tok_state *tok, struct token *token,
+                             char c) {
+  assert(token != NULL);
+  assert(c == '}' || c == ':' || c == '!');
+  tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
 
-    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
-        return 0;
+  if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) ||
+      token->metadata) {
+    return 0;
+  }
+  PyObject *res = NULL;
+
+  // Look for a # character outside of string literals
+  int hash_detected = 0;
+  int in_string = 0;
+  char quote_char = 0;
+
+  for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end;
+       i++) {
+    char ch = tok_mode->last_expr_buffer[i];
+
+    // Skip escaped characters
+    if (ch == '\\') {
+      i++;
+      continue;
     }
-    PyObject *res = NULL;
-
-    // Look for a # character outside of string literals
-    int hash_detected = 0;
-    int in_string = 0;
-    char quote_char = 0;
-
-    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
-        char ch = tok_mode->last_expr_buffer[i];
-
-        // Skip escaped characters
-        if (ch == '\\') {
-            i++;
-            continue;
-        }
-
-        // Handle quotes
-        if (ch == '"' || ch == '\'') {
-            // The following if/else block works becase there is an off number
-            // of quotes in STRING tokens and the lexer only ever reaches this
-            // function with valid STRING tokens.
-            // For example: """hello"""
-            // First quote: in_string = 1
-            // Second quote: in_string = 0
-            // Third quote: in_string = 1
-            if (!in_string) {
-                in_string = 1;
-                quote_char = ch;
-            }
-            else if (ch == quote_char) {
-                in_string = 0;
-            }
-            continue;
-        }
 
-        // Check for # outside strings
-        if (ch == '#' && !in_string) {
-            hash_detected = 1;
-            break;
-        }
+    // Handle quotes
+    if (ch == '"' || ch == '\'') {
+      // The following if/else block works becase there is an off number
+      // of quotes in STRING tokens and the lexer only ever reaches this
+      // function with valid STRING tokens.
+      // For example: """hello"""
+      // First quote: in_string = 1
+      // Second quote: in_string = 0
+      // Third quote: in_string = 1
+      if (!in_string) {
+        in_string = 1;
+        quote_char = ch;
+      } else if (ch == quote_char) {
+        in_string = 0;
+      }
+      continue;
     }
-    // If we found a # character in the expression, we need to handle comments
-    if (hash_detected) {
-        // Allocate buffer for processed result
-        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
-        if (!result) {
-            return -1;
-        }
 
-        Py_ssize_t i = 0;  // Input position
-        Py_ssize_t j = 0;  // Output position
-        in_string = 0;     // Whether we're in a string
-        quote_char = 0;    // Current string quote char
-
-        // Process each character
-        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
-            char ch = tok_mode->last_expr_buffer[i];
-
-            // Handle string quotes
-            if (ch == '"' || ch == '\'') {
-                // See comment above to understand this part
-                if (!in_string) {
-                    in_string = 1;
-                    quote_char = ch;
-                } else if (ch == quote_char) {
-                    in_string = 0;
-                }
-                result[j++] = ch;
-            }
-            // Skip comments
-            else if (ch == '#' && !in_string) {
-                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
-                       tok_mode->last_expr_buffer[i] != '\n') {
-                    i++;
-                }
-                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
-                    result[j++] = '\n';
-                }
-            }
-            // Copy other chars
-            else {
-                result[j++] = ch;
-            }
-            i++;
-        }
-
-        result[j] = '\0';  // Null-terminate the result string
-        res = PyUnicode_DecodeUTF8(result, j, NULL);
-        PyMem_Free(result);
-    } else {
-        res = PyUnicode_DecodeUTF8(
-            tok_mode->last_expr_buffer,
-            tok_mode->last_expr_size - tok_mode->last_expr_end,
-            NULL
-        );
+    // Check for # outside strings
+    if (ch == '#' && !in_string) {
+      hash_detected = 1;
+      break;
+    }
+  }
+  // If we found a # character in the expression, we need to handle comments
+  if (hash_detected) {
+    // Allocate buffer for processed result
+    char *result = (char *)PyMem_Malloc(
+        (tok_mode->last_expr_size - tok_mode->last_expr_end + 1) *
+        sizeof(char));
+    if (!result) {
+      return -1;
     }
 
-    if (!res) {
-        return -1;
+    Py_ssize_t i = 0; // Input position
+    Py_ssize_t j = 0; // Output position
+    in_string = 0;    // Whether we're in a string
+    quote_char = 0;   // Current string quote char
+
+    // Process each character
+    while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+      char ch = tok_mode->last_expr_buffer[i];
+
+      // Handle string quotes
+      if (ch == '"' || ch == '\'') {
+        // See comment above to understand this part
+        if (!in_string) {
+          in_string = 1;
+          quote_char = ch;
+        } else if (ch == quote_char) {
+          in_string = 0;
+        }
+        result[j++] = ch;
+      }
+      // Skip comments
+      else if (ch == '#' && !in_string) {
+        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
+               tok_mode->last_expr_buffer[i] != '\n') {
+          i++;
+        }
+        if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+          result[j++] = '\n';
+        }
+      }
+      // Copy other chars
+      else {
+        result[j++] = ch;
+      }
+      i++;
     }
-    token->metadata = res;
-    return 0;
+
+    result[j] = '\0'; // Null-terminate the result string
+    res = PyUnicode_DecodeUTF8(result, j, NULL);
+    PyMem_Free(result);
+  } else {
+    res = PyUnicode_DecodeUTF8(
+        tok_mode->last_expr_buffer,
+        tok_mode->last_expr_size - tok_mode->last_expr_end, NULL);
+  }
+
+  if (!res) {
+    return -1;
+  }
+  token->metadata = res;
+  return 0;
 }
 
-int
-_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
-{
-    assert(tok->cur != NULL);
+int _PyLexer_update_ftstring_expr(struct tok_state *tok, char cur) {
+  assert(tok->cur != NULL);
 
-    Py_ssize_t size = strlen(tok->cur);
-    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
+  Py_ssize_t size = strlen(tok->cur);
+  tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
 
-    switch (cur) {
-       case 0:
-            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
-                return 1;
-            }
-            char *new_buffer = PyMem_Realloc(
-                tok_mode->last_expr_buffer,
-                tok_mode->last_expr_size + size
-            );
-            if (new_buffer == NULL) {
-                PyMem_Free(tok_mode->last_expr_buffer);
-                goto error;
-            }
-            tok_mode->last_expr_buffer = new_buffer;
-            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
-            tok_mode->last_expr_size += size;
-            break;
-        case '{':
-            if (tok_mode->last_expr_buffer != NULL) {
-                PyMem_Free(tok_mode->last_expr_buffer);
-            }
-            tok_mode->last_expr_buffer = PyMem_Malloc(size);
-            if (tok_mode->last_expr_buffer == NULL) {
-                goto error;
-            }
-            tok_mode->last_expr_size = size;
-            tok_mode->last_expr_end = -1;
-            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
-            break;
-        case '}':
-        case '!':
-            tok_mode->last_expr_end = strlen(tok->start);
-            break;
-        case ':':
-            if (tok_mode->last_expr_end == -1) {
-               tok_mode->last_expr_end = strlen(tok->start);
-            }
-            break;
-        default:
-            Py_UNREACHABLE();
+  switch (cur) {
+  case 0:
+    if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
+      return 1;
     }
-    return 1;
+    char *new_buffer = PyMem_Realloc(tok_mode->last_expr_buffer,
+                                     tok_mode->last_expr_size + size);
+    if (new_buffer == NULL) {
+      PyMem_Free(tok_mode->last_expr_buffer);
+      goto error;
+    }
+    tok_mode->last_expr_buffer = new_buffer;
+    strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur,
+            size);
+    tok_mode->last_expr_size += size;
+    break;
+  case '{':
+    if (tok_mode->last_expr_buffer != NULL) {
+      PyMem_Free(tok_mode->last_expr_buffer);
+    }
+    tok_mode->last_expr_buffer = PyMem_Malloc(size);
+    if (tok_mode->last_expr_buffer == NULL) {
+      goto error;
+    }
+    tok_mode->last_expr_size = size;
+    tok_mode->last_expr_end = -1;
+    strncpy(tok_mode->last_expr_buffer, tok->cur, size);
+    break;
+  case '}':
+  case '!':
+    tok_mode->last_expr_end = strlen(tok->start);
+    break;
+  case ':':
+    if (tok_mode->last_expr_end == -1) {
+      tok_mode->last_expr_end = strlen(tok->start);
+    }
+    break;
+  default:
+    Py_UNREACHABLE();
+  }
+  return 1;
 error:
-    tok->done = E_NOMEM;
-    return 0;
+  tok->done = E_NOMEM;
+  return 0;
 }
 
-static int
-lookahead(struct tok_state *tok, const char *test)
-{
-    const char *s = test;
-    int res = 0;
-    while (1) {
-        int c = tok_nextc(tok);
-        if (*s == 0) {
-            res = !is_potential_identifier_char(c);
-        }
-        else if (c == *s) {
-            s++;
-            continue;
-        }
+static int lookahead(struct tok_state *tok, const char *test) {
+  const char *s = test;
+  int res = 0;
+  while (1) {
+    int c = tok_nextc(tok);
+    if (*s == 0) {
+      res = !is_potential_identifier_char(c);
+    } else if (c == *s) {
+      s++;
+      continue;
+    }
 
-        tok_backup(tok, c);
-        while (s != test) {
-            tok_backup(tok, *--s);
-        }
-        return res;
+    tok_backup(tok, c);
+    while (s != test) {
+      tok_backup(tok, *--s);
     }
+    return res;
+  }
 }
 
-static int
-verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
-    if (tok->tok_extra_tokens) {
-        // When we are parsing extra tokens, we don't want to emit warnings
-        // about invalid literals, because we want to be a bit more liberal.
-        return 1;
-    }
-    /* Emit a deprecation warning only if the numeric literal is immediately
-     * followed by one of keywords which can occur after a numeric literal
-     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
-     * It allows to gradually deprecate existing valid code without adding
-     * warning before error in most cases of invalid numeric literal (which
-     * would be confusing and break existing tests).
-     * Raise a syntax error with slightly better message than plain
-     * "invalid syntax" if the numeric literal is immediately followed by
-     * other keyword or identifier.
-     */
-    int r = 0;
-    if (c == 'a') {
-        r = lookahead(tok, "nd");
-    }
-    else if (c == 'e') {
-        r = lookahead(tok, "lse");
-    }
-    else if (c == 'f') {
-        r = lookahead(tok, "or");
-    }
-    else if (c == 'i') {
-        int c2 = tok_nextc(tok);
-        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
-            r = 1;
-        }
-        tok_backup(tok, c2);
-    }
-    else if (c == 'o') {
-        r = lookahead(tok, "r");
-    }
-    else if (c == 'n') {
-        r = lookahead(tok, "ot");
+static int verify_end_of_number(struct tok_state *tok, int c,
+                                const char *kind) {
+  if (tok->tok_extra_tokens) {
+    // When we are parsing extra tokens, we don't want to emit warnings
+    // about invalid literals, because we want to be a bit more liberal.
+    return 1;
+  }
+  /* Emit a deprecation warning only if the numeric literal is immediately
+   * followed by one of keywords which can occur after a numeric literal
+   * in valid code: "and", "else", "for", "if", "in", "is" and "or".
+   * It allows to gradually deprecate existing valid code without adding
+   * warning before error in most cases of invalid numeric literal (which
+   * would be confusing and break existing tests).
+   * Raise a syntax error with slightly better message than plain
+   * "invalid syntax" if the numeric literal is immediately followed by
+   * other keyword or identifier.
+   */
+  int r = 0;
+  if (c == 'a') {
+    r = lookahead(tok, "nd");
+  } else if (c == 'e') {
+    r = lookahead(tok, "lse");
+  } else if (c == 'f') {
+    r = lookahead(tok, "or");
+  } else if (c == 'i') {
+    int c2 = tok_nextc(tok);
+    if (c2 == 'f' || c2 == 'n' || c2 == 's') {
+      r = 1;
     }
-    if (r) {
-        tok_backup(tok, c);
-        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
-                "invalid %s literal", kind))
-        {
-            return 0;
-        }
-        tok_nextc(tok);
+    tok_backup(tok, c2);
+  } else if (c == 'o') {
+    r = lookahead(tok, "r");
+  } else if (c == 'n') {
+    r = lookahead(tok, "ot");
+  }
+  if (r) {
+    tok_backup(tok, c);
+    if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning, "invalid %s literal",
+                                 kind)) {
+      return 0;
     }
-    else /* In future releases, only error will remain. */
+    tok_nextc(tok);
+  } else /* In future releases, only error will remain. */
     if (c < 128 && is_potential_identifier_char(c)) {
-        tok_backup(tok, c);
-        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
-        return 0;
+      tok_backup(tok, c);
+      _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
+      return 0;
     }
-    return 1;
+  return 1;
 }
 
 /* Verify that the identifier follows PEP 3131. */
-static int
-verify_identifier(struct tok_state *tok)
-{
-    if (tok->tok_extra_tokens) {
-        return 1;
-    }
-    PyObject *s;
-    if (tok->decoding_erred)
-        return 0;
-    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
-    if (s == NULL) {
-        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
-            tok->done = E_DECODE;
-        }
-        else {
-            tok->done = E_ERROR;
-        }
-        return 0;
+static int verify_identifier(struct tok_state *tok) {
+  if (tok->tok_extra_tokens) {
+    return 1;
+  }
+  PyObject *s;
+  if (tok->decoding_erred)
+    return 0;
+  s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
+  if (s == NULL) {
+    if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+      tok->done = E_DECODE;
+    } else {
+      tok->done = E_ERROR;
     }
-    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
-    assert(invalid >= 0);
-    assert(PyUnicode_GET_LENGTH(s) > 0);
-    if (invalid < PyUnicode_GET_LENGTH(s)) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
-        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
-            /* Determine the offset in UTF-8 encoded input */
-            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
-            if (s != NULL) {
-                Py_SETREF(s, PyUnicode_AsUTF8String(s));
-            }
-            if (s == NULL) {
-                tok->done = E_ERROR;
-                return 0;
-            }
-            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
-        }
-        Py_DECREF(s);
-        if (Py_UNICODE_ISPRINTABLE(ch)) {
-            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
-        }
-        else {
-            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
-        }
+    return 0;
+  }
+  Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
+  assert(invalid >= 0);
+  assert(PyUnicode_GET_LENGTH(s) > 0);
+  if (invalid < PyUnicode_GET_LENGTH(s)) {
+    Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
+    if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
+      /* Determine the offset in UTF-8 encoded input */
+      Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
+      if (s != NULL) {
+        Py_SETREF(s, PyUnicode_AsUTF8String(s));
+      }
+      if (s == NULL) {
+        tok->done = E_ERROR;
         return 0;
+      }
+      tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
     }
     Py_DECREF(s);
-    return 1;
-}
-
-static int
-tok_decimal_tail(struct tok_state *tok)
-{
-    int c;
-
-    while (1) {
-        do {
-            c = tok_nextc(tok);
-        } while (Py_ISDIGIT(c));
-        if (c != '_') {
-            break;
-        }
-        c = tok_nextc(tok);
-        if (!Py_ISDIGIT(c)) {
-            tok_backup(tok, c);
-            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
-            return 0;
-        }
-    }
-    return c;
-}
-
-static inline int
-tok_continuation_line(struct tok_state *tok) {
-    int c = tok_nextc(tok);
-    if (c == '\r') {
-        c = tok_nextc(tok);
-    }
-    if (c != '\n') {
-        tok->done = E_LINECONT;
-        return -1;
-    }
-    c = tok_nextc(tok);
-    if (c == EOF) {
-        tok->done = E_EOF;
-        tok->cur = tok->inp;
-        return -1;
+    if (Py_UNICODE_ISPRINTABLE(ch)) {
+      _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
     } else {
-        tok_backup(tok, c);
+      _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X",
+                               ch);
     }
-    return c;
+    return 0;
+  }
+  Py_DECREF(s);
+  return 1;
 }
 
-static int
-maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
-                                             int saw_b, int saw_r, int saw_u,
-                                             int saw_f, int saw_t) {
-    // Supported: rb, rf, rt (in any order)
-    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
-
-#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
-    do {                                                                  \
-        (void)_PyTokenizer_syntaxerror_known_range(                       \
-            tok, (int)(tok->start + 1 - tok->line_start),                 \
-            (int)(tok->cur - tok->line_start),                            \
-            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
-        return -1;                                                        \
-    } while (0)
+static int tok_decimal_tail(struct tok_state *tok) {
+  int c;
 
-    if (saw_u && saw_b) {
-        RETURN_SYNTAX_ERROR("u", "b");
-    }
-    if (saw_u && saw_r) {
-        RETURN_SYNTAX_ERROR("u", "r");
-    }
-    if (saw_u && saw_f) {
-        RETURN_SYNTAX_ERROR("u", "f");
+  while (1) {
+    do {
+      c = tok_nextc(tok);
+    } while (Py_ISDIGIT(c));
+    if (c != '_') {
+      break;
     }
-    if (saw_u && saw_t) {
-        RETURN_SYNTAX_ERROR("u", "t");
+    c = tok_nextc(tok);
+    if (!Py_ISDIGIT(c)) {
+      tok_backup(tok, c);
+      _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
+      return 0;
     }
+  }
+  return c;
+}
 
-    if (saw_b && saw_f) {
-        RETURN_SYNTAX_ERROR("b", "f");
-    }
-    if (saw_b && saw_t) {
-        RETURN_SYNTAX_ERROR("b", "t");
-    }
+static inline int tok_continuation_line(struct tok_state *tok) {
+  int c = tok_nextc(tok);
+  if (c == '\r') {
+    c = tok_nextc(tok);
+  }
+  if (c != '\n') {
+    tok->done = E_LINECONT;
+    return -1;
+  }
+  c = tok_nextc(tok);
+  if (c == EOF) {
+    tok->done = E_EOF;
+    tok->cur = tok->inp;
+    return -1;
+  } else {
+    tok_backup(tok, c);
+  }
+  return c;
+}
 
-    if (saw_f && saw_t) {
-        RETURN_SYNTAX_ERROR("f", "t");
-    }
+static int maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
+                                                        int saw_b, int saw_r,
+                                                        int saw_u, int saw_f,
+                                                        int saw_t) {
+  // Supported: rb, rf, rt (in any order)
+  // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
+
+#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                                  \
+  do {                                                                         \
+    (void)_PyTokenizer_syntaxerror_known_range(                                \
+        tok, (int)(tok->start + 1 - tok->line_start),                          \
+        (int)(tok->cur - tok->line_start),                                     \
+        "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible");          \
+    return -1;                                                                 \
+  } while (0)
+
+  if (saw_u && saw_b) {
+    RETURN_SYNTAX_ERROR("u", "b");
+  }
+  if (saw_u && saw_r) {
+    RETURN_SYNTAX_ERROR("u", "r");
+  }
+  if (saw_u && saw_f) {
+    RETURN_SYNTAX_ERROR("u", "f");
+  }
+  if (saw_u && saw_t) {
+    RETURN_SYNTAX_ERROR("u", "t");
+  }
+
+  if (saw_b && saw_f) {
+    RETURN_SYNTAX_ERROR("b", "f");
+  }
+  if (saw_b && saw_t) {
+    RETURN_SYNTAX_ERROR("b", "t");
+  }
+
+  if (saw_f && saw_t) {
+    RETURN_SYNTAX_ERROR("f", "t");
+  }
 
 #undef RETURN_SYNTAX_ERROR
 
-    return 0;
+  return 0;
 }
 
-static int
-tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
-{
-    int c;
-    int blankline, nonascii;
-
-    const char *p_start = NULL;
-    const char *p_end = NULL;
-  nextline:
-    tok->start = NULL;
-    tok->starting_col_offset = -1;
-    blankline = 0;
-
-
-    /* Get indentation level */
-    if (tok->atbol) {
-        int col = 0;
-        int altcol = 0;
-        tok->atbol = 0;
-        int cont_line_col = 0;
-        for (;;) {
-            c = tok_nextc(tok);
-            if (c == ' ') {
-                col++, altcol++;
-            }
-            else if (c == '\t') {
-                col = (col / tok->tabsize + 1) * tok->tabsize;
-                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
-            }
-            else if (c == '\014')  {/* Control-L (formfeed) */
-                col = altcol = 0; /* For Emacs users */
-            }
-            else if (c == '\\') {
-                // Indentation cannot be split over multiple physical lines
-                // using backslashes. This means that if we found a backslash
-                // preceded by whitespace, **the first one we find** determines
-                // the level of indentation of whatever comes next.
-                cont_line_col = cont_line_col ? cont_line_col : col;
-                if ((c = tok_continuation_line(tok)) == -1) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
-            else if (c == EOF && PyErr_Occurred()) {
-                return MAKE_TOKEN(ERRORTOKEN);
-            }
-            else {
-                break;
-            }
-        }
-        tok_backup(tok, c);
-        if (c == '#' || c == '\n' || c == '\r') {
-            /* Lines with only whitespace and/or comments
-               shouldn't affect the indentation and are
-               not passed to the parser as NEWLINE tokens,
-               except *totally* empty lines in interactive
-               mode, which signal the end of a command group. */
-            if (col == 0 && c == '\n' && tok->prompt != NULL) {
-                blankline = 0; /* Let it through */
-            }
-            else if (tok->prompt != NULL && tok->lineno == 1) {
-                /* In interactive mode, if the first line contains
-                   only spaces and/or a comment, let it through. */
-                blankline = 0;
-                col = altcol = 0;
-            }
-            else {
-                blankline = 1; /* Ignore completely */
-            }
-            /* We can't jump back right here since we still
-               may need to skip to the end of a comment */
-        }
-        if (!blankline && tok->level == 0) {
-            col = cont_line_col ? cont_line_col : col;
-            altcol = cont_line_col ? cont_line_col : altcol;
-            if (col == tok->indstack[tok->indent]) {
-                /* No change */
-                if (altcol != tok->altindstack[tok->indent]) {
-                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
-                }
-            }
-            else if (col > tok->indstack[tok->indent]) {
-                /* Indent -- always one */
-                if (tok->indent+1 >= MAXINDENT) {
-                    tok->done = E_TOODEEP;
-                    tok->cur = tok->inp;
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-                if (altcol <= tok->altindstack[tok->indent]) {
-                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
-                }
-                tok->pendin++;
-                tok->indstack[++tok->indent] = col;
-                tok->altindstack[tok->indent] = altcol;
-            }
-            else /* col < tok->indstack[tok->indent] */ {
-                /* Dedent -- any number, must be consistent */
-                while (tok->indent > 0 &&
-                    col < tok->indstack[tok->indent]) {
-                    tok->pendin--;
-                    tok->indent--;
-                }
-                if (col != tok->indstack[tok->indent]) {
-                    tok->done = E_DEDENT;
-                    tok->cur = tok->inp;
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-                if (altcol != tok->altindstack[tok->indent]) {
-                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
-                }
-            }
+static int tok_get_normal_mode(struct tok_state *tok,
+                               tokenizer_mode *current_tok,
+                               struct token *token) {
+  int c;
+  int blankline, nonascii;
+
+  const char *p_start = NULL;
+  const char *p_end = NULL;
+nextline:
+  tok->start = NULL;
+  tok->starting_col_offset = -1;
+  blankline = 0;
+
+  /* Get indentation level */
+  if (tok->atbol) {
+    int col = 0;
+    int altcol = 0;
+    tok->atbol = 0;
+    int cont_line_col = 0;
+    for (;;) {
+      c = tok_nextc(tok);
+      if (c == ' ') {
+        col++, altcol++;
+      } else if (c == '\t') {
+        col = (col / tok->tabsize + 1) * tok->tabsize;
+        altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
+      } else if (c == '\014') { /* Control-L (formfeed) */
+        col = altcol = 0;       /* For Emacs users */
+      } else if (c == '\\') {
+        // Indentation cannot be split over multiple physical lines
+        // using backslashes. This means that if we found a backslash
+        // preceded by whitespace, **the first one we find** determines
+        // the level of indentation of whatever comes next.
+        cont_line_col = cont_line_col ? cont_line_col : col;
+        if ((c = tok_continuation_line(tok)) == -1) {
+          return MAKE_TOKEN(ERRORTOKEN);
         }
+      } else if (c == EOF && PyErr_Occurred()) {
+        return MAKE_TOKEN(ERRORTOKEN);
+      } else {
+        break;
+      }
+    }
+    tok_backup(tok, c);
+    if (c == '#' || c == '\n' || c == '\r') {
+      /* Lines with only whitespace and/or comments
+         shouldn't affect the indentation and are
+         not passed to the parser as NEWLINE tokens,
+         except *totally* empty lines in interactive
+         mode, which signal the end of a command group. */
+      if (col == 0 && c == '\n' && tok->prompt != NULL) {
+        blankline = 0; /* Let it through */
+      } else if (tok->prompt != NULL && tok->lineno == 1) {
+        /* In interactive mode, if the first line contains
+           only spaces and/or a comment, let it through. */
+        blankline = 0;
+        col = altcol = 0;
+      } else {
+        blankline = 1; /* Ignore completely */
+      }
+      /* We can't jump back right here since we still
+         may need to skip to the end of a comment */
     }
+    if (!blankline && tok->level == 0) {
+      col = cont_line_col ? cont_line_col : col;
+      altcol = cont_line_col ? cont_line_col : altcol;
+      if (col == tok->indstack[tok->indent]) {
+        /* No change */
+        if (altcol != tok->altindstack[tok->indent]) {
+          return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
+        }
+      } else if (col > tok->indstack[tok->indent]) {
+        /* Indent -- always one */
+        if (tok->indent + 1 >= MAXINDENT) {
+          tok->done = E_TOODEEP;
+          tok->cur = tok->inp;
+          return MAKE_TOKEN(ERRORTOKEN);
+        }
+        if (altcol <= tok->altindstack[tok->indent]) {
+          return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
+        }
+        tok->pendin++;
+        tok->indstack[++tok->indent] = col;
+        tok->altindstack[tok->indent] = altcol;
+      } else /* col < tok->indstack[tok->indent] */ {
+        /* Dedent -- any number, must be consistent */
+        while (tok->indent > 0 && col < tok->indstack[tok->indent]) {
+          tok->pendin--;
+          tok->indent--;
+        }
+        if (col != tok->indstack[tok->indent]) {
+          tok->done = E_DEDENT;
+          tok->cur = tok->inp;
+          return MAKE_TOKEN(ERRORTOKEN);
+        }
+        if (altcol != tok->altindstack[tok->indent]) {
+          return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
+        }
+      }
+    }
+  }
 
-    tok->start = tok->cur;
-    tok->starting_col_offset = tok->col_offset;
+  tok->start = tok->cur;
+  tok->starting_col_offset = tok->col_offset;
 
-    /* Return pending indents/dedents */
-    if (tok->pendin != 0) {
-        if (tok->pendin < 0) {
-            if (tok->tok_extra_tokens) {
-                p_start = tok->cur;
-                p_end = tok->cur;
-            }
-            tok->pendin++;
-            return MAKE_TOKEN(DEDENT);
-        }
-        else {
-            if (tok->tok_extra_tokens) {
-                p_start = tok->buf;
-                p_end = tok->cur;
-            }
-            tok->pendin--;
-            return MAKE_TOKEN(INDENT);
-        }
+  /* Return pending indents/dedents */
+  if (tok->pendin != 0) {
+    if (tok->pendin < 0) {
+      if (tok->tok_extra_tokens) {
+        p_start = tok->cur;
+        p_end = tok->cur;
+      }
+      tok->pendin++;
+      return MAKE_TOKEN(DEDENT);
+    } else {
+      if (tok->tok_extra_tokens) {
+        p_start = tok->buf;
+        p_end = tok->cur;
+      }
+      tok->pendin--;
+      return MAKE_TOKEN(INDENT);
     }
+  }
 
-    /* Peek ahead at the next character */
+  /* Peek ahead at the next character */
+  c = tok_nextc(tok);
+  tok_backup(tok, c);
+
+again:
+  tok->start = NULL;
+  /* Skip spaces */
+  do {
     c = tok_nextc(tok);
-    tok_backup(tok, c);
+  } while (c == ' ' || c == '\t' || c == '\014');
 
- again:
-    tok->start = NULL;
-    /* Skip spaces */
-    do {
-        c = tok_nextc(tok);
-    } while (c == ' ' || c == '\t' || c == '\014');
+  /* Set start of current token */
+  tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
+  tok->starting_col_offset = tok->col_offset - 1;
 
-    /* Set start of current token */
-    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
-    tok->starting_col_offset = tok->col_offset - 1;
+  /* Skip comment, unless it's a type comment */
+  if (c == '#') {
 
-    /* Skip comment, unless it's a type comment */
-    if (c == '#') {
+    const char *p = NULL;
+    const char *prefix, *type_start;
+    int current_starting_col_offset;
 
-        const char* p = NULL;
-        const char *prefix, *type_start;
-        int current_starting_col_offset;
+    while (c != EOF && c != '\n' && c != '\r') {
+      c = tok_nextc(tok);
+    }
 
-        while (c != EOF && c != '\n' && c != '\r') {
-            c = tok_nextc(tok);
-        }
+    if (tok->tok_extra_tokens) {
+      p = tok->start;
+    }
 
-        if (tok->tok_extra_tokens) {
-            p = tok->start;
+    if (tok->type_comments) {
+      p = tok->start;
+      current_starting_col_offset = tok->starting_col_offset;
+      prefix = type_comment_prefix;
+      while (*prefix && p < tok->cur) {
+        if (*prefix == ' ') {
+          while (*p == ' ' || *p == '\t') {
+            p++;
+            current_starting_col_offset++;
+          }
+        } else if (*prefix == *p) {
+          p++;
+          current_starting_col_offset++;
+        } else {
+          break;
+        }
+
+        prefix++;
+      }
+
+      /* This is a type comment if we matched all of type_comment_prefix. */
+      if (!*prefix) {
+        int is_type_ignore = 1;
+        // +6 in order to skip the word 'ignore'
+        const char *ignore_end = p + 6;
+        const int ignore_end_col_offset = current_starting_col_offset + 6;
+        tok_backup(tok, c); /* don't eat the newline or EOF */
+
+        type_start = p;
+
+        /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
+         * or anything ASCII and non-alphanumeric. */
+        is_type_ignore =
+            (tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0 &&
+             !(tok->cur > ignore_end && ((unsigned char)ignore_end[0] >= 128 ||
+                                         Py_ISALNUM(ignore_end[0]))));
+
+        if (is_type_ignore) {
+          p_start = ignore_end;
+          p_end = tok->cur;
+
+          /* If this type ignore is the only thing on the line, consume the
+           * newline also. */
+          if (blankline) {
+            tok_nextc(tok);
+            tok->atbol = 1;
+          }
+          return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset,
+                                         tok->col_offset);
+        } else {
+          p_start = type_start;
+          p_end = tok->cur;
+          return MAKE_TYPE_COMMENT_TOKEN(
+              TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
         }
+      }
+    }
+    if (tok->tok_extra_tokens) {
+      tok_backup(tok, c); /* don't eat the newline or EOF */
+      p_start = p;
+      p_end = tok->cur;
+      tok->comment_newline = blankline;
+      return MAKE_TOKEN(COMMENT);
+    }
+  }
 
-        if (tok->type_comments) {
-            p = tok->start;
-            current_starting_col_offset = tok->starting_col_offset;
-            prefix = type_comment_prefix;
-            while (*prefix && p < tok->cur) {
-                if (*prefix == ' ') {
-                    while (*p == ' ' || *p == '\t') {
-                        p++;
-                        current_starting_col_offset++;
-                    }
-                } else if (*prefix == *p) {
-                    p++;
-                    current_starting_col_offset++;
-                } else {
-                    break;
-                }
-
-                prefix++;
-            }
+  if (tok->done == E_INTERACT_STOP) {
+    return MAKE_TOKEN(ENDMARKER);
+  }
 
-            /* This is a type comment if we matched all of type_comment_prefix. */
-            if (!*prefix) {
-                int is_type_ignore = 1;
-                // +6 in order to skip the word 'ignore'
-                const char *ignore_end = p + 6;
-                const int ignore_end_col_offset = current_starting_col_offset + 6;
-                tok_backup(tok, c);  /* don't eat the newline or EOF */
-
-                type_start = p;
-
-                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
-                 * or anything ASCII and non-alphanumeric. */
-                is_type_ignore = (
-                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
-                    && !(tok->cur > ignore_end
-                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
-
-                if (is_type_ignore) {
-                    p_start = ignore_end;
-                    p_end = tok->cur;
-
-                    /* If this type ignore is the only thing on the line, consume the newline also. */
-                    if (blankline) {
-                        tok_nextc(tok);
-                        tok->atbol = 1;
-                    }
-                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
-                } else {
-                    p_start = type_start;
-                    p_end = tok->cur;
-                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
-                }
-            }
-        }
-        if (tok->tok_extra_tokens) {
-            tok_backup(tok, c);  /* don't eat the newline or EOF */
-            p_start = p;
-            p_end = tok->cur;
-            tok->comment_newline = blankline;
-            return MAKE_TOKEN(COMMENT);
-        }
+  /* Check for EOF and errors now */
+  if (c == EOF) {
+    if (tok->level) {
+      return MAKE_TOKEN(ERRORTOKEN);
     }
-
-    if (tok->done == E_INTERACT_STOP) {
-        return MAKE_TOKEN(ENDMARKER);
+    return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
+  }
+
+  /* Identifier (most frequent token!) */
+  nonascii = 0;
+  if (is_potential_identifier_start(c)) {
+    /* Process the various legal combinations of b"", r"", u"", and f"". */
+    int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
+    while (1) {
+      if (!saw_b && (c == 'b' || c == 'B')) {
+        saw_b = 1;
+      }
+      /* Since this is a backwards compatibility support literal we don't
+         want to support it in arbitrary order like byte literals. */
+      else if (!saw_u && (c == 'u' || c == 'U')) {
+        saw_u = 1;
+      }
+      /* ur"" and ru"" are not supported */
+      else if (!saw_r && (c == 'r' || c == 'R')) {
+        saw_r = 1;
+      } else if (!saw_f && (c == 'f' || c == 'F')) {
+        saw_f = 1;
+      } else if (!saw_t && (c == 't' || c == 'T')) {
+        saw_t = 1;
+      } else {
+        break;
+      }
+      c = tok_nextc(tok);
+      if (c == '"' || c == '\'') {
+        // Raise error on incompatible string prefixes:
+        int status = maybe_raise_syntax_error_for_string_prefixes(
+            tok, saw_b, saw_r, saw_u, saw_f, saw_t);
+        if (status < 0) {
+          return MAKE_TOKEN(ERRORTOKEN);
+        }
+
+        // Handle valid f or t string creation:
+        if (saw_f || saw_t) {
+          goto f_string_quote;
+        }
+        goto letter_quote;
+      }
+    }
+    while (is_potential_identifier_char(c)) {
+      if (c >= 128) {
+        nonascii = 1;
+      }
+      c = tok_nextc(tok);
+    }
+    tok_backup(tok, c);
+    if (nonascii && !verify_identifier(tok)) {
+      return MAKE_TOKEN(ERRORTOKEN);
     }
 
-    /* Check for EOF and errors now */
-    if (c == EOF) {
-        if (tok->level) {
-            return MAKE_TOKEN(ERRORTOKEN);
-        }
-        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
+    p_start = tok->start;
+    p_end = tok->cur;
+
+    return MAKE_TOKEN(NAME);
+  }
+
+  if (c == '\r') {
+    c = tok_nextc(tok);
+    if (c != '\n') {
+      tok_backup(tok, c);
     }
+    c = '\n';
+  }
 
-    /* Identifier (most frequent token!) */
-    nonascii = 0;
-    if (is_potential_identifier_start(c)) {
-        /* Process the various legal combinations of b"", r"", u"", and f"". */
-        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
-        while (1) {
-            if (!saw_b && (c == 'b' || c == 'B')) {
-                saw_b = 1;
-            }
-            /* Since this is a backwards compatibility support literal we don't
-               want to support it in arbitrary order like byte literals. */
-            else if (!saw_u && (c == 'u'|| c == 'U')) {
-                saw_u = 1;
-            }
-            /* ur"" and ru"" are not supported */
-            else if (!saw_r && (c == 'r' || c == 'R')) {
-                saw_r = 1;
-            }
-            else if (!saw_f && (c == 'f' || c == 'F')) {
-                saw_f = 1;
-            }
-            else if (!saw_t && (c == 't' || c == 'T')) {
-                saw_t = 1;
-            }
-            else {
-                break;
-            }
-            c = tok_nextc(tok);
-            if (c == '"' || c == '\'') {
-                // Raise error on incompatible string prefixes:
-                int status = maybe_raise_syntax_error_for_string_prefixes(
-                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
-                if (status < 0) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-
-                // Handle valid f or t string creation:
-                if (saw_f || saw_t) {
-                    goto f_string_quote;
-                }
-                goto letter_quote;
-            }
-        }
-        while (is_potential_identifier_char(c)) {
-            if (c >= 128) {
-                nonascii = 1;
-            }
-            c = tok_nextc(tok);
-        }
-        tok_backup(tok, c);
-        if (nonascii && !verify_identifier(tok)) {
-            return MAKE_TOKEN(ERRORTOKEN);
+  /* Newline */
+  if (c == '\n') {
+    tok->atbol = 1;
+    if (blankline || tok->level > 0) {
+      if (tok->tok_extra_tokens) {
+        if (tok->comment_newline) {
+          tok->comment_newline = 0;
         }
-
         p_start = tok->start;
         p_end = tok->cur;
-
-        return MAKE_TOKEN(NAME);
+        return MAKE_TOKEN(NL);
+      }
+      goto nextline;
     }
-
-    if (c == '\r') {
-        c = tok_nextc(tok);
+    if (tok->comment_newline && tok->tok_extra_tokens) {
+      tok->comment_newline = 0;
+      p_start = tok->start;
+      p_end = tok->cur;
+      return MAKE_TOKEN(NL);
     }
+    p_start = tok->start;
+    p_end = tok->cur - 1; /* Leave '\n' out of the string */
+    tok->cont_line = 0;
+    return MAKE_TOKEN(NEWLINE);
+  }
 
-    /* Newline */
-    if (c == '\n') {
-        tok->atbol = 1;
-        if (blankline || tok->level > 0) {
-            if (tok->tok_extra_tokens) {
-                if (tok->comment_newline) {
-                    tok->comment_newline = 0;
-                }
-                p_start = tok->start;
-                p_end = tok->cur;
-                return MAKE_TOKEN(NL);
-            }
-            goto nextline;
-        }
-        if (tok->comment_newline && tok->tok_extra_tokens) {
-            tok->comment_newline = 0;
-            p_start = tok->start;
-            p_end = tok->cur;
-            return MAKE_TOKEN(NL);
-        }
+  /* Period or number starting with period? */
+  if (c == '.') {
+    c = tok_nextc(tok);
+    if (Py_ISDIGIT(c)) {
+      goto fraction;
+    } else if (c == '.') {
+      c = tok_nextc(tok);
+      if (c == '.') {
         p_start = tok->start;
-        p_end = tok->cur - 1; /* Leave '\n' out of the string */
-        tok->cont_line = 0;
-        return MAKE_TOKEN(NEWLINE);
+        p_end = tok->cur;
+        return MAKE_TOKEN(ELLIPSIS);
+      } else {
+        tok_backup(tok, c);
+      }
+      tok_backup(tok, '.');
+    } else {
+      tok_backup(tok, c);
     }
-
-    /* Period or number starting with period? */
-    if (c == '.') {
+    p_start = tok->start;
+    p_end = tok->cur;
+    return MAKE_TOKEN(DOT);
+  }
+
+  /* Number */
+  if (Py_ISDIGIT(c)) {
+    if (c == '0') {
+      /* Hex, octal or binary -- maybe. */
+      c = tok_nextc(tok);
+      if (c == 'x' || c == 'X') {
+        /* Hex */
         c = tok_nextc(tok);
-        if (Py_ISDIGIT(c)) {
-            goto fraction;
-        } else if (c == '.') {
+        do {
+          if (c == '_') {
             c = tok_nextc(tok);
-            if (c == '.') {
-                p_start = tok->start;
-                p_end = tok->cur;
-                return MAKE_TOKEN(ELLIPSIS);
-            }
-            else {
-                tok_backup(tok, c);
-            }
-            tok_backup(tok, '.');
-        }
-        else {
+          }
+          if (!Py_ISXDIGIT(c)) {
             tok_backup(tok, c);
-        }
-        p_start = tok->start;
-        p_end = tok->cur;
-        return MAKE_TOKEN(DOT);
-    }
-
-    /* Number */
-    if (Py_ISDIGIT(c)) {
-        if (c == '0') {
-            /* Hex, octal or binary -- maybe. */
+            return MAKE_TOKEN(
+                _PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
+          }
+          do {
             c = tok_nextc(tok);
-            if (c == 'x' || c == 'X') {
-                /* Hex */
-                c = tok_nextc(tok);
-                do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
-                    if (!Py_ISXDIGIT(c)) {
-                        tok_backup(tok, c);
-                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
-                    }
-                    do {
-                        c = tok_nextc(tok);
-                    } while (Py_ISXDIGIT(c));
-                } while (c == '_');
-                if (!verify_end_of_number(tok, c, "hexadecimal")) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
-            else if (c == 'o' || c == 'O') {
-                /* Octal */
-                c = tok_nextc(tok);
-                do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
-                    if (c < '0' || c >= '8') {
-                        if (Py_ISDIGIT(c)) {
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                                    "invalid digit '%c' in octal literal", c));
-                        }
-                        else {
-                            tok_backup(tok, c);
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
-                        }
-                    }
-                    do {
-                        c = tok_nextc(tok);
-                    } while ('0' <= c && c < '8');
-                } while (c == '_');
-                if (Py_ISDIGIT(c)) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                            "invalid digit '%c' in octal literal", c));
-                }
-                if (!verify_end_of_number(tok, c, "octal")) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
-            else if (c == 'b' || c == 'B') {
-                /* Binary */
-                c = tok_nextc(tok);
-                do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
-                    if (c != '0' && c != '1') {
-                        if (Py_ISDIGIT(c)) {
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
-                        }
-                        else {
-                            tok_backup(tok, c);
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
-                        }
-                    }
-                    do {
-                        c = tok_nextc(tok);
-                    } while (c == '0' || c == '1');
-                } while (c == '_');
-                if (Py_ISDIGIT(c)) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
-                }
-                if (!verify_end_of_number(tok, c, "binary")) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
-            else {
-                int nonzero = 0;
-                /* maybe old-style octal; c is first char of it */
-                /* in any case, allow '0' as a literal */
-                while (1) {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                        if (!Py_ISDIGIT(c)) {
-                            tok_backup(tok, c);
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
-                        }
-                    }
-                    if (c != '0') {
-                        break;
-                    }
-                    c = tok_nextc(tok);
-                }
-                char* zeros_end = tok->cur;
-                if (Py_ISDIGIT(c)) {
-                    nonzero = 1;
-                    c = tok_decimal_tail(tok);
-                    if (c == 0) {
-                        return MAKE_TOKEN(ERRORTOKEN);
-                    }
-                }
-                if (c == '.') {
-                    c = tok_nextc(tok);
-                    goto fraction;
-                }
-                else if (c == 'e' || c == 'E') {
-                    goto exponent;
-                }
-                else if (c == 'j' || c == 'J') {
-                    goto imaginary;
-                }
-                else if (nonzero && !tok->tok_extra_tokens) {
-                    /* Old-style octal: now disallowed. */
-                    tok_backup(tok, c);
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
-                            tok, (int)(tok->start + 1 - tok->line_start),
-                            (int)(zeros_end - tok->line_start),
-                            "leading zeros in decimal integer "
-                            "literals are not permitted; "
-                            "use an 0o prefix for octal integers"));
-                }
-                if (!verify_end_of_number(tok, c, "decimal")) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
-        }
-        else {
-            /* Decimal */
-            c = tok_decimal_tail(tok);
-            if (c == 0) {
-                return MAKE_TOKEN(ERRORTOKEN);
-            }
-            {
-                /* Accept floating-point numbers. */
-                if (c == '.') {
-                    c = tok_nextc(tok);
-        fraction:
-                    /* Fraction */
-                    if (Py_ISDIGIT(c)) {
-                        c = tok_decimal_tail(tok);
-                        if (c == 0) {
-                            return MAKE_TOKEN(ERRORTOKEN);
-                        }
-                    }
-                }
-                if (c == 'e' || c == 'E') {
-                    int e;
-                  exponent:
-                    e = c;
-                    /* Exponent part */
-                    c = tok_nextc(tok);
-                    if (c == '+' || c == '-') {
-                        c = tok_nextc(tok);
-                        if (!Py_ISDIGIT(c)) {
-                            tok_backup(tok, c);
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
-                        }
-                    } else if (!Py_ISDIGIT(c)) {
-                        tok_backup(tok, c);
-                        if (!verify_end_of_number(tok, e, "decimal")) {
-                            return MAKE_TOKEN(ERRORTOKEN);
-                        }
-                        tok_backup(tok, e);
-                        p_start = tok->start;
-                        p_end = tok->cur;
-                        return MAKE_TOKEN(NUMBER);
-                    }
-                    c = tok_decimal_tail(tok);
-                    if (c == 0) {
-                        return MAKE_TOKEN(ERRORTOKEN);
-                    }
-                }
-                if (c == 'j' || c == 'J') {
-                    /* Imaginary part */
-        imaginary:
-                    c = tok_nextc(tok);
-                    if (!verify_end_of_number(tok, c, "imaginary")) {
-                        return MAKE_TOKEN(ERRORTOKEN);
-                    }
-                }
-                else if (!verify_end_of_number(tok, c, "decimal")) {
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
+          } while (Py_ISXDIGIT(c));
+        } while (c == '_');
+        if (!verify_end_of_number(tok, c, "hexadecimal")) {
+          return MAKE_TOKEN(ERRORTOKEN);
         }
-        tok_backup(tok, c);
-        p_start = tok->start;
-        p_end = tok->cur;
-        return MAKE_TOKEN(NUMBER);
-    }
-
-  f_string_quote:
-    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
-        && (c == '\'' || c == '"'))) {
-
-        int quote = c;
-        int quote_size = 1;             /* 1 or 3 */
-
-        /* Nodes of type STRING, especially multi line strings
-           must be handled differently in order to get both
-           the starting line number and the column offset right.
-           (cf. issue 16806) */
-        tok->first_lineno = tok->lineno;
-        tok->multi_line_start = tok->line_start;
-
-        /* Find the quote size and start of string */
-        int after_quote = tok_nextc(tok);
-        if (after_quote == quote) {
-            int after_after_quote = tok_nextc(tok);
-            if (after_after_quote == quote) {
-                quote_size = 3;
-            }
-            else {
-                // TODO: Check this
-                tok_backup(tok, after_after_quote);
-                tok_backup(tok, after_quote);
+      } else if (c == 'o' || c == 'O') {
+        /* Octal */
+        c = tok_nextc(tok);
+        do {
+          if (c == '_') {
+            c = tok_nextc(tok);
+          }
+          if (c < '0' || c >= '8') {
+            if (Py_ISDIGIT(c)) {
+              return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+                  tok, "invalid digit '%c' in octal literal", c));
+            } else {
+              tok_backup(tok, c);
+              return MAKE_TOKEN(
+                  _PyTokenizer_syntaxerror(tok, "invalid octal literal"));
             }
+          }
+          do {
+            c = tok_nextc(tok);
+          } while ('0' <= c && c < '8');
+        } while (c == '_');
+        if (Py_ISDIGIT(c)) {
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+              tok, "invalid digit '%c' in octal literal", c));
         }
-        if (after_quote != quote) {
-            tok_backup(tok, after_quote);
-        }
-
-
-        p_start = tok->start;
-        p_end = tok->cur;
-        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
-            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
-        }
-        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
-        the_current_tok->kind = TOK_FSTRING_MODE;
-        the_current_tok->quote = quote;
-        the_current_tok->quote_size = quote_size;
-        the_current_tok->start = tok->start;
-        the_current_tok->multi_line_start = tok->line_start;
-        the_current_tok->first_line = tok->lineno;
-        the_current_tok->start_offset = -1;
-        the_current_tok->multi_line_start_offset = -1;
-        the_current_tok->last_expr_buffer = NULL;
-        the_current_tok->last_expr_size = 0;
-        the_current_tok->last_expr_end = -1;
-        the_current_tok->in_format_spec = 0;
-        the_current_tok->in_debug = 0;
-
-        enum string_kind_t string_kind = FSTRING;
-        switch (*tok->start) {
-            case 'T':
-            case 't':
-                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
-                string_kind = TSTRING;
-                break;
-            case 'F':
-            case 'f':
-                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
-                break;
-            case 'R':
-            case 'r':
-                the_current_tok->raw = 1;
-                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
-                    string_kind = TSTRING;
-                }
-                break;
-            default:
-                Py_UNREACHABLE();
+        if (!verify_end_of_number(tok, c, "octal")) {
+          return MAKE_TOKEN(ERRORTOKEN);
         }
-
-        the_current_tok->string_kind = string_kind;
-        the_current_tok->curly_bracket_depth = 0;
-        the_current_tok->curly_bracket_expr_start_depth = -1;
-        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
-    }
-
-  letter_quote:
-    /* String */
-    if (c == '\'' || c == '"') {
-        int quote = c;
-        int quote_size = 1;             /* 1 or 3 */
-        int end_quote_size = 0;
-        int has_escaped_quote = 0;
-
-        /* Nodes of type STRING, especially multi line strings
-           must be handled differently in order to get both
-           the starting line number and the column offset right.
-           (cf. issue 16806) */
-        tok->first_lineno = tok->lineno;
-        tok->multi_line_start = tok->line_start;
-
-        /* Find the quote size and start of string */
+      } else if (c == 'b' || c == 'B') {
+        /* Binary */
         c = tok_nextc(tok);
-        if (c == quote) {
+        do {
+          if (c == '_') {
             c = tok_nextc(tok);
-            if (c == quote) {
-                quote_size = 3;
-            }
-            else {
-                end_quote_size = 1;     /* empty string found */
+          }
+          if (c != '0' && c != '1') {
+            if (Py_ISDIGIT(c)) {
+              return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+                  tok, "invalid digit '%c' in binary literal", c));
+            } else {
+              tok_backup(tok, c);
+              return MAKE_TOKEN(
+                  _PyTokenizer_syntaxerror(tok, "invalid binary literal"));
             }
+          }
+          do {
+            c = tok_nextc(tok);
+          } while (c == '0' || c == '1');
+        } while (c == '_');
+        if (Py_ISDIGIT(c)) {
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+              tok, "invalid digit '%c' in binary literal", c));
         }
-        if (c != quote) {
-            tok_backup(tok, c);
+        if (!verify_end_of_number(tok, c, "binary")) {
+          return MAKE_TOKEN(ERRORTOKEN);
         }
-
-        /* Get rest of string */
-        while (end_quote_size != quote_size) {
+      } else {
+        int nonzero = 0;
+        /* maybe old-style octal; c is first char of it */
+        /* in any case, allow '0' as a literal */
+        while (1) {
+          if (c == '_') {
             c = tok_nextc(tok);
-            if (tok->done == E_ERROR) {
-                return MAKE_TOKEN(ERRORTOKEN);
-            }
-            if (tok->done == E_DECODE) {
-                break;
-            }
-            if (c == EOF || (quote_size == 1 && c == '\n')) {
-                assert(tok->multi_line_start != NULL);
-                // shift the tok_state's location into
-                // the start of string, and report the error
-                // from the initial quote character
-                tok->cur = (char *)tok->start;
-                tok->cur++;
-                tok->line_start = tok->multi_line_start;
-                int start = tok->lineno;
-                tok->lineno = tok->first_lineno;
-
-                if (INSIDE_FSTRING(tok)) {
-                    /* When we are in an f-string, before raising the
-                     * unterminated string literal error, check whether
-                     * does the initial quote matches with f-strings quotes
-                     * and if it is, then this must be a missing '}' token
-                     * so raise the proper error */
-                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
-                    if (the_current_tok->quote == quote &&
-                        the_current_tok->quote_size == quote_size) {
-                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
-                    }
-                }
-
-                if (quote_size == 3) {
-                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
-                                     " (detected at line %d)", start);
-                    if (c != '\n') {
-                        tok->done = E_EOFS;
-                    }
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-                else {
-                    if (has_escaped_quote) {
-                        _PyTokenizer_syntaxerror(
-                            tok,
-                            "unterminated string literal (detected at line %d); "
-                            "perhaps you escaped the end quote?",
-                            start
-                        );
-                    } else {
-                        _PyTokenizer_syntaxerror(
-                            tok, "unterminated string literal (detected at line %d)", start
-                        );
-                    }
-                    if (c != '\n') {
-                        tok->done = E_EOLS;
-                    }
-                    return MAKE_TOKEN(ERRORTOKEN);
-                }
-            }
-            if (c == quote) {
-                end_quote_size += 1;
-            }
-            else {
-                end_quote_size = 0;
-                if (c == '\\') {
-                    c = tok_nextc(tok);  /* skip escaped char */
-                    if (c == quote) {  /* but record whether the escaped char was a quote */
-                        has_escaped_quote = 1;
-                    }
-                    if (c == '\r') {
-                        c = tok_nextc(tok);
-                    }
-                }
+            if (!Py_ISDIGIT(c)) {
+              tok_backup(tok, c);
+              return MAKE_TOKEN(
+                  _PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
             }
+          }
+          if (c != '0') {
+            break;
+          }
+          c = tok_nextc(tok);
         }
-
-        p_start = tok->start;
-        p_end = tok->cur;
-        return MAKE_TOKEN(STRING);
-    }
-
-    /* Line continuation */
-    if (c == '\\') {
-        if ((c = tok_continuation_line(tok)) == -1) {
-            return MAKE_TOKEN(ERRORTOKEN);
-        }
-        tok->cont_line = 1;
-        goto again; /* Read next line */
-    }
-
-    /* Punctuation character */
-    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
-    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
-        /* This code block gets executed before the curly_bracket_depth is incremented
-         * by the `{` case, so for ensuring that we are on the 0th level, we need
-         * to adjust it manually */
-        int cursor = current_tok->curly_bracket_depth - (c != '{');
-        int in_format_spec = current_tok->in_format_spec;
-         int cursor_in_format_with_debug =
-             cursor == 1 && (current_tok->in_debug || in_format_spec);
-         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
-        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
-            return MAKE_TOKEN(ENDMARKER);
-        }
-        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
+        char *zeros_end = tok->cur;
+        if (Py_ISDIGIT(c)) {
+          nonzero = 1;
+          c = tok_decimal_tail(tok);
+          if (c == 0) {
             return MAKE_TOKEN(ERRORTOKEN);
+          }
+        }
+        if (c == '.') {
+          c = tok_nextc(tok);
+          goto fraction;
+        } else if (c == 'e' || c == 'E') {
+          goto exponent;
+        } else if (c == 'j' || c == 'J') {
+          goto imaginary;
+        } else if (nonzero && !tok->tok_extra_tokens) {
+          /* Old-style octal: now disallowed. */
+          tok_backup(tok, c);
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
+              tok, (int)(tok->start + 1 - tok->line_start),
+              (int)(zeros_end - tok->line_start),
+              "leading zeros in decimal integer "
+              "literals are not permitted; "
+              "use an 0o prefix for octal integers"));
+        }
+        if (!verify_end_of_number(tok, c, "decimal")) {
+          return MAKE_TOKEN(ERRORTOKEN);
+        }
+      }
+    } else {
+      /* Decimal */
+      c = tok_decimal_tail(tok);
+      if (c == 0) {
+        return MAKE_TOKEN(ERRORTOKEN);
+      }
+      {
+        /* Accept floating-point numbers. */
+        if (c == '.') {
+          c = tok_nextc(tok);
+        fraction:
+          /* Fraction */
+          if (Py_ISDIGIT(c)) {
+            c = tok_decimal_tail(tok);
+            if (c == 0) {
+              return MAKE_TOKEN(ERRORTOKEN);
+            }
+          }
         }
-
-        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
-            current_tok->kind = TOK_FSTRING_MODE;
-            current_tok->in_format_spec = 1;
-            p_start = tok->start;
-            p_end = tok->cur;
-            return MAKE_TOKEN(_PyToken_OneChar(c));
-        }
-    }
-
-    /* Check for two-character token */
-    {
-        int c2 = tok_nextc(tok);
-        int current_token = _PyToken_TwoChars(c, c2);
-        if (current_token != OP) {
-            int c3 = tok_nextc(tok);
-            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
-            if (current_token3 != OP) {
-                current_token = current_token3;
+        if (c == 'e' || c == 'E') {
+          int e;
+        exponent:
+          e = c;
+          /* Exponent part */
+          c = tok_nextc(tok);
+          if (c == '+' || c == '-') {
+            c = tok_nextc(tok);
+            if (!Py_ISDIGIT(c)) {
+              tok_backup(tok, c);
+              return MAKE_TOKEN(
+                  _PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
             }
-            else {
-                tok_backup(tok, c3);
+          } else if (!Py_ISDIGIT(c)) {
+            tok_backup(tok, c);
+            if (!verify_end_of_number(tok, e, "decimal")) {
+              return MAKE_TOKEN(ERRORTOKEN);
             }
+            tok_backup(tok, e);
             p_start = tok->start;
             p_end = tok->cur;
-            return MAKE_TOKEN(current_token);
+            return MAKE_TOKEN(NUMBER);
+          }
+          c = tok_decimal_tail(tok);
+          if (c == 0) {
+            return MAKE_TOKEN(ERRORTOKEN);
+          }
         }
-        tok_backup(tok, c2);
+        if (c == 'j' || c == 'J') {
+          /* Imaginary part */
+        imaginary:
+          c = tok_nextc(tok);
+          if (!verify_end_of_number(tok, c, "imaginary")) {
+            return MAKE_TOKEN(ERRORTOKEN);
+          }
+        } else if (!verify_end_of_number(tok, c, "decimal")) {
+          return MAKE_TOKEN(ERRORTOKEN);
+        }
+      }
     }
+    tok_backup(tok, c);
+    p_start = tok->start;
+    p_end = tok->cur;
+    return MAKE_TOKEN(NUMBER);
+  }
 
-    /* Keep track of parentheses nesting level */
-    switch (c) {
-    case '(':
-    case '[':
-    case '{':
-        if (tok->level >= MAXLEVEL) {
-            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
-        }
-        tok->parenstack[tok->level] = c;
-        tok->parenlinenostack[tok->level] = tok->lineno;
-        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
-        tok->level++;
-        if (INSIDE_FSTRING(tok)) {
-            current_tok->curly_bracket_depth++;
-        }
-        break;
-    case ')':
-    case ']':
-    case '}':
-        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
-            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
-        }
-        if (!tok->tok_extra_tokens && !tok->level) {
-            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
-        }
-        if (tok->level > 0) {
-            tok->level--;
-            int opening = tok->parenstack[tok->level];
-            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
-                                            (opening == '[' && c == ']') ||
-                                            (opening == '{' && c == '}'))) {
-                /* If the opening bracket belongs to an f-string's expression
-                part (e.g. f"{)}") and the closing bracket is an arbitrary
-                nested expression, then instead of matching a different
-                syntactical construct with it; we'll throw an unmatched
-                parentheses error. */
-                if (INSIDE_FSTRING(tok) && opening == '{') {
-                    assert(current_tok->curly_bracket_depth >= 0);
-                    int previous_bracket = current_tok->curly_bracket_depth - 1;
-                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
-                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
-                    }
-                }
-                if (tok->parenlinenostack[tok->level] != tok->lineno) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                            "closing parenthesis '%c' does not match "
-                            "opening parenthesis '%c' on line %d",
-                            c, opening, tok->parenlinenostack[tok->level]));
-                }
-                else {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                            "closing parenthesis '%c' does not match "
-                            "opening parenthesis '%c'",
-                            c, opening));
-                }
-            }
-        }
+f_string_quote:
+  if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' ||
+        Py_TOLOWER(*tok->start) == 't') &&
+       (c == '\'' || c == '"'))) {
 
-        if (INSIDE_FSTRING(tok)) {
-            current_tok->curly_bracket_depth--;
-            if (current_tok->curly_bracket_depth < 0) {
-                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
-                    TOK_GET_STRING_PREFIX(tok), c));
-            }
-            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
-                current_tok->curly_bracket_expr_start_depth--;
-                current_tok->kind = TOK_FSTRING_MODE;
-                current_tok->in_format_spec = 0;
-                current_tok->in_debug = 0;
-            }
-        }
-        break;
-    default:
-        break;
-    }
+    int quote = c;
+    int quote_size = 1; /* 1 or 3 */
 
-    if (!Py_UNICODE_ISPRINTABLE(c)) {
-        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
-    }
+    /* Nodes of type STRING, especially multi line strings
+       must be handled differently in order to get both
+       the starting line number and the column offset right.
+       (cf. issue 16806) */
+    tok->first_lineno = tok->lineno;
+    tok->multi_line_start = tok->line_start;
 
-    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
-        current_tok->in_debug = 1;
+    /* Find the quote size and start of string */
+    int after_quote = tok_nextc(tok);
+    if (after_quote == quote) {
+      int after_after_quote = tok_nextc(tok);
+      if (after_after_quote == quote) {
+        quote_size = 3;
+      } else {
+        // TODO: Check this
+        tok_backup(tok, after_after_quote);
+        tok_backup(tok, after_quote);
+      }
+    }
+    if (after_quote != quote) {
+      tok_backup(tok, after_quote);
     }
 
-    /* Punctuation character */
     p_start = tok->start;
     p_end = tok->cur;
-    return MAKE_TOKEN(_PyToken_OneChar(c));
-}
+    if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
+      return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+          tok, "too many nested f-strings or t-strings"));
+    }
+    tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
+    the_current_tok->kind = TOK_FSTRING_MODE;
+    the_current_tok->quote = quote;
+    the_current_tok->quote_size = quote_size;
+    the_current_tok->start = tok->start;
+    the_current_tok->multi_line_start = tok->line_start;
+    the_current_tok->first_line = tok->lineno;
+    the_current_tok->start_offset = -1;
+    the_current_tok->multi_line_start_offset = -1;
+    the_current_tok->last_expr_buffer = NULL;
+    the_current_tok->last_expr_size = 0;
+    the_current_tok->last_expr_end = -1;
+    the_current_tok->in_format_spec = 0;
+    the_current_tok->in_debug = 0;
+
+    enum string_kind_t string_kind = FSTRING;
+    switch (*tok->start) {
+    case 'T':
+    case 't':
+      the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
+      string_kind = TSTRING;
+      break;
+    case 'F':
+    case 'f':
+      the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
+      break;
+    case 'R':
+    case 'r':
+      the_current_tok->raw = 1;
+      if (Py_TOLOWER(*(tok->start + 1)) == 't') {
+        string_kind = TSTRING;
+      }
+      break;
+    default:
+      Py_UNREACHABLE();
+    }
 
-static int
-tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
-{
-    const char *p_start = NULL;
-    const char *p_end = NULL;
+    the_current_tok->string_kind = string_kind;
+    the_current_tok->curly_bracket_depth = 0;
+    the_current_tok->curly_bracket_expr_start_depth = -1;
+    return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START)
+                                  : MAKE_TOKEN(FSTRING_START);
+  }
+
+letter_quote:
+  /* String */
+  if (c == '\'' || c == '"') {
+    int quote = c;
+    int quote_size = 1; /* 1 or 3 */
     int end_quote_size = 0;
-    int unicode_escape = 0;
+    int has_escaped_quote = 0;
 
-    tok->start = tok->cur;
+    /* Nodes of type STRING, especially multi line strings
+       must be handled differently in order to get both
+       the starting line number and the column offset right.
+       (cf. issue 16806) */
     tok->first_lineno = tok->lineno;
-    tok->starting_col_offset = tok->col_offset;
-
-    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
-    // before it.
-    int start_char = tok_nextc(tok);
-    if (start_char == '{') {
-        int peek1 = tok_nextc(tok);
-        tok_backup(tok, peek1);
-        tok_backup(tok, start_char);
-        if (peek1 != '{') {
-            current_tok->curly_bracket_expr_start_depth++;
-            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
-                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
-            }
-            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
-            return tok_get_normal_mode(tok, current_tok, token);
-        }
+    tok->multi_line_start = tok->line_start;
+
+    /* Find the quote size and start of string */
+    c = tok_nextc(tok);
+    if (c == quote) {
+      c = tok_nextc(tok);
+      if (c == quote) {
+        quote_size = 3;
+      } else {
+        end_quote_size = 1; /* empty string found */
+      }
     }
-    else {
-        tok_backup(tok, start_char);
+    if (c != quote) {
+      tok_backup(tok, c);
     }
 
-    // Check if we are at the end of the string
-    for (int i = 0; i < current_tok->quote_size; i++) {
-        int quote = tok_nextc(tok);
-        if (quote != current_tok->quote) {
-            tok_backup(tok, quote);
-            goto f_string_middle;
-        }
-    }
+    /* Get rest of string */
+    while (end_quote_size != quote_size) {
+      c = tok_nextc(tok);
+      if (tok->done == E_ERROR) {
+        return MAKE_TOKEN(ERRORTOKEN);
+      }
+      if (tok->done == E_DECODE) {
+        break;
+      }
+      if (c == EOF || (quote_size == 1 && c == '\n')) {
+        assert(tok->multi_line_start != NULL);
+        // shift the tok_state's location into
+        // the start of string, and report the error
+        // from the initial quote character
+        tok->cur = (char *)tok->start;
+        tok->cur++;
+        tok->line_start = tok->multi_line_start;
+        int start = tok->lineno;
+        tok->lineno = tok->first_lineno;
 
-    if (current_tok->last_expr_buffer != NULL) {
-        PyMem_Free(current_tok->last_expr_buffer);
-        current_tok->last_expr_buffer = NULL;
-        current_tok->last_expr_size = 0;
-        current_tok->last_expr_end = -1;
+        if (INSIDE_FSTRING(tok)) {
+          /* When we are in an f-string, before raising the
+           * unterminated string literal error, check whether
+           * does the initial quote matches with f-strings quotes
+           * and if it is, then this must be a missing '}' token
+           * so raise the proper error */
+          tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
+          if (the_current_tok->quote == quote &&
+              the_current_tok->quote_size == quote_size) {
+            return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+                tok, "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
+          }
+        }
+
+        if (quote_size == 3) {
+          _PyTokenizer_syntaxerror(tok,
+                                   "unterminated triple-quoted string literal"
+                                   " (detected at line %d)",
+                                   start);
+          if (c != '\n') {
+            tok->done = E_EOFS;
+          }
+          return MAKE_TOKEN(ERRORTOKEN);
+        } else {
+          if (has_escaped_quote) {
+            _PyTokenizer_syntaxerror(
+                tok,
+                "unterminated string literal (detected at line %d); "
+                "perhaps you escaped the end quote?",
+                start);
+          } else {
+            _PyTokenizer_syntaxerror(
+                tok, "unterminated string literal (detected at line %d)",
+                start);
+          }
+          if (c != '\n') {
+            tok->done = E_EOLS;
+          }
+          return MAKE_TOKEN(ERRORTOKEN);
+        }
+      }
+      if (c == quote) {
+        end_quote_size += 1;
+      } else {
+        end_quote_size = 0;
+        if (c == '\\') {
+          c = tok_nextc(tok); /* skip escaped char */
+          if (c ==
+              quote) { /* but record whether the escaped char was a quote */
+            has_escaped_quote = 1;
+          }
+          if (c == '\r') {
+            c = tok_nextc(tok);
+          }
+        }
+      }
     }
 
     p_start = tok->start;
     p_end = tok->cur;
-    tok->tok_mode_stack_index--;
-    return MAKE_TOKEN(FTSTRING_END(current_tok));
+    return MAKE_TOKEN(STRING);
+  }
 
-f_string_middle:
+  /* Line continuation */
+  if (c == '\\') {
+    if ((c = tok_continuation_line(tok)) == -1) {
+      return MAKE_TOKEN(ERRORTOKEN);
+    }
+    tok->cont_line = 1;
+    goto again; /* Read next line */
+  }
+
+  /* Punctuation character */
+  int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
+  if (is_punctuation && INSIDE_FSTRING(tok) &&
+      INSIDE_FSTRING_EXPR(current_tok)) {
+    /* This code block gets executed before the curly_bracket_depth is
+     * incremented by the `{` case, so for ensuring that we are on the 0th
+     * level, we need to adjust it manually */
+    int cursor = current_tok->curly_bracket_depth - (c != '{');
+    int in_format_spec = current_tok->in_format_spec;
+    int cursor_in_format_with_debug =
+        cursor == 1 && (current_tok->in_debug || in_format_spec);
+    int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
+    if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
+      return MAKE_TOKEN(ENDMARKER);
+    }
+    if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
+      return MAKE_TOKEN(ERRORTOKEN);
+    }
 
-    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
-    // this.
-    tok->multi_line_start = tok->line_start;
-    while (end_quote_size != current_tok->quote_size) {
-        int c = tok_nextc(tok);
-        if (tok->done == E_ERROR || tok->done == E_DECODE) {
-            return MAKE_TOKEN(ERRORTOKEN);
+    if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
+      current_tok->kind = TOK_FSTRING_MODE;
+      current_tok->in_format_spec = 1;
+      p_start = tok->start;
+      p_end = tok->cur;
+      return MAKE_TOKEN(_PyToken_OneChar(c));
+    }
+  }
+
+  /* Check for two-character token */
+  {
+    int c2 = tok_nextc(tok);
+    int current_token = _PyToken_TwoChars(c, c2);
+    if (current_token != OP) {
+      int c3 = tok_nextc(tok);
+      int current_token3 = _PyToken_ThreeChars(c, c2, c3);
+      if (current_token3 != OP) {
+        current_token = current_token3;
+      } else {
+        tok_backup(tok, c3);
+      }
+      p_start = tok->start;
+      p_end = tok->cur;
+      return MAKE_TOKEN(current_token);
+    }
+    tok_backup(tok, c2);
+  }
+
+  /* Keep track of parentheses nesting level */
+  switch (c) {
+  case '(':
+  case '[':
+  case '{':
+    if (tok->level >= MAXLEVEL) {
+      return MAKE_TOKEN(
+          _PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
+    }
+    tok->parenstack[tok->level] = c;
+    tok->parenlinenostack[tok->level] = tok->lineno;
+    tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
+    tok->level++;
+    if (INSIDE_FSTRING(tok)) {
+      current_tok->curly_bracket_depth++;
+    }
+    break;
+  case ')':
+  case ']':
+  case '}':
+    if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
+      return MAKE_TOKEN(
+          _PyTokenizer_syntaxerror(tok, "%c-string: single '}' is not allowed",
+                                   TOK_GET_STRING_PREFIX(tok)));
+    }
+    if (!tok->tok_extra_tokens && !tok->level) {
+      return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
+    }
+    if (tok->level > 0) {
+      tok->level--;
+      int opening = tok->parenstack[tok->level];
+      if (!tok->tok_extra_tokens &&
+          !((opening == '(' && c == ')') || (opening == '[' && c == ']') ||
+            (opening == '{' && c == '}'))) {
+        /* If the opening bracket belongs to an f-string's expression
+        part (e.g. f"{)}") and the closing bracket is an arbitrary
+        nested expression, then instead of matching a different
+        syntactical construct with it; we'll throw an unmatched
+        parentheses error. */
+        if (INSIDE_FSTRING(tok) && opening == '{') {
+          assert(current_tok->curly_bracket_depth >= 0);
+          int previous_bracket = current_tok->curly_bracket_depth - 1;
+          if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
+            return MAKE_TOKEN(
+                _PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
+                                         TOK_GET_STRING_PREFIX(tok), c));
+          }
+        }
+        if (tok->parenlinenostack[tok->level] != tok->lineno) {
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+              tok,
+              "closing parenthesis '%c' does not match "
+              "opening parenthesis '%c' on line %d",
+              c, opening, tok->parenlinenostack[tok->level]));
+        } else {
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+              tok,
+              "closing parenthesis '%c' does not match "
+              "opening parenthesis '%c'",
+              c, opening));
         }
-        int in_format_spec = (
-                current_tok->in_format_spec
-                &&
-                INSIDE_FSTRING_EXPR(current_tok)
-        );
-
-       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
-            if (tok->decoding_erred) {
-                return MAKE_TOKEN(ERRORTOKEN);
-            }
+      }
+    }
 
-            // If we are in a format spec and we found a newline,
-            // it means that the format spec ends here and we should
-            // return to the regular mode.
-            if (in_format_spec && c == '\n') {
-                if (current_tok->quote_size == 1) {
-                    return MAKE_TOKEN(
-                        _PyTokenizer_syntaxerror(
-                            tok,
-                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
-                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
-                        )
-                    );
-                }
-                tok_backup(tok, c);
-                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
-                current_tok->in_format_spec = 0;
-                p_start = tok->start;
-                p_end = tok->cur;
-                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
-            }
+    if (INSIDE_FSTRING(tok)) {
+      current_tok->curly_bracket_depth--;
+      if (current_tok->curly_bracket_depth < 0) {
+        return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+            tok, "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
+      }
+      if (c == '}' && current_tok->curly_bracket_depth ==
+                          current_tok->curly_bracket_expr_start_depth) {
+        current_tok->curly_bracket_expr_start_depth--;
+        current_tok->kind = TOK_FSTRING_MODE;
+        current_tok->in_format_spec = 0;
+        current_tok->in_debug = 0;
+      }
+    }
+    break;
+  default:
+    break;
+  }
+
+  if (!Py_UNICODE_ISPRINTABLE(c)) {
+    return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+        tok, "invalid non-printable character U+%04X", c));
+  }
+
+  if (c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
+    current_tok->in_debug = 1;
+  }
+
+  /* Punctuation character */
+  p_start = tok->start;
+  p_end = tok->cur;
+  return MAKE_TOKEN(_PyToken_OneChar(c));
+}
 
-            assert(tok->multi_line_start != NULL);
-            // shift the tok_state's location into
-            // the start of string, and report the error
-            // from the initial quote character
-            tok->cur = (char *)current_tok->start;
-            tok->cur++;
-            tok->line_start = current_tok->multi_line_start;
-            int start = tok->lineno;
-
-            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
-            tok->lineno = the_current_tok->first_line;
-
-            if (current_tok->quote_size == 3) {
-                _PyTokenizer_syntaxerror(tok,
-                                    "unterminated triple-quoted %c-string literal"
-                                    " (detected at line %d)",
-                                    TOK_GET_STRING_PREFIX(tok), start);
-                if (c != '\n') {
-                    tok->done = E_EOFS;
-                }
-                return MAKE_TOKEN(ERRORTOKEN);
-            }
-            else {
-                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                                    "unterminated %c-string literal (detected at"
-                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
-            }
-        }
+static int tok_get_fstring_mode(struct tok_state *tok,
+                                tokenizer_mode *current_tok,
+                                struct token *token) {
+  const char *p_start = NULL;
+  const char *p_end = NULL;
+  int end_quote_size = 0;
+  int unicode_escape = 0;
+
+  tok->start = tok->cur;
+  tok->first_lineno = tok->lineno;
+  tok->starting_col_offset = tok->col_offset;
+
+  // If we start with a bracket, we defer to the normal mode as there is nothing
+  // for us to tokenize before it.
+  int start_char = tok_nextc(tok);
+  if (start_char == '{') {
+    int peek1 = tok_nextc(tok);
+    tok_backup(tok, peek1);
+    tok_backup(tok, start_char);
+    if (peek1 != '{') {
+      current_tok->curly_bracket_expr_start_depth++;
+      if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
+        return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+            tok, "%c-string: expressions nested too deeply",
+            TOK_GET_STRING_PREFIX(tok)));
+      }
+      TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+      return tok_get_normal_mode(tok, current_tok, token);
+    }
+  } else {
+    tok_backup(tok, start_char);
+  }
+
+  // Check if we are at the end of the string
+  for (int i = 0; i < current_tok->quote_size; i++) {
+    int quote = tok_nextc(tok);
+    if (quote != current_tok->quote) {
+      tok_backup(tok, quote);
+      goto f_string_middle;
+    }
+  }
 
-        if (c == current_tok->quote) {
-            end_quote_size += 1;
-            continue;
-        } else {
-            end_quote_size = 0;
-        }
+  if (current_tok->last_expr_buffer != NULL) {
+    PyMem_Free(current_tok->last_expr_buffer);
+    current_tok->last_expr_buffer = NULL;
+    current_tok->last_expr_size = 0;
+    current_tok->last_expr_end = -1;
+  }
 
-        if (c == '{') {
-            if (!_PyLexer_update_ftstring_expr(tok, c)) {
-                return MAKE_TOKEN(ENDMARKER);
-            }
-            int peek = tok_nextc(tok);
-            if (peek != '{' || in_format_spec) {
-                tok_backup(tok, peek);
-                tok_backup(tok, c);
-                current_tok->curly_bracket_expr_start_depth++;
-                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
-                }
-                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
-                current_tok->in_format_spec = 0;
-                p_start = tok->start;
-                p_end = tok->cur;
-            } else {
-                p_start = tok->start;
-                p_end = tok->cur - 1;
-            }
-            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
-        } else if (c == '}') {
-            if (unicode_escape) {
-                p_start = tok->start;
-                p_end = tok->cur;
-                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
-            }
-            int peek = tok_nextc(tok);
-
-            // The tokenizer can only be in the format spec if we have already completed the expression
-            // scanning (indicated by the end of the expression being set) and we are not at the top level
-            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
-            // brackets, we can bypass it here.
-            int cursor = current_tok->curly_bracket_depth;
-            if (peek == '}' && !in_format_spec && cursor == 0) {
-                p_start = tok->start;
-                p_end = tok->cur - 1;
-            } else {
-                tok_backup(tok, peek);
-                tok_backup(tok, c);
-                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
-                current_tok->in_format_spec = 0;
-                p_start = tok->start;
-                p_end = tok->cur;
-            }
-            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
-        } else if (c == '\\') {
-            int peek = tok_nextc(tok);
-            if (peek == '\r') {
-                peek = tok_nextc(tok);
-            }
-            // Special case when the backslash is right before a curly
-            // brace. We have to restore and return the control back
-            // to the loop for the next iteration.
-            if (peek == '{' || peek == '}') {
-                if (!current_tok->raw) {
-                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
-                        return MAKE_TOKEN(ERRORTOKEN);
-                    }
-                }
-                tok_backup(tok, peek);
-                continue;
-            }
+  p_start = tok->start;
+  p_end = tok->cur;
+  tok->tok_mode_stack_index--;
+  return MAKE_TOKEN(FTSTRING_END(current_tok));
+
+f_string_middle:
 
-            if (!current_tok->raw) {
-                if (peek == 'N') {
-                    /* Handle named unicode escapes (\N{BULLET}) */
-                    peek = tok_nextc(tok);
-                    if (peek == '{') {
-                        unicode_escape = 1;
-                    } else {
-                        tok_backup(tok, peek);
-                    }
-                }
-            } /* else {
-                skip the escaped character
-            }*/
+  // TODO: This is a bit of a hack, but it works for now. We need to find a
+  // better way to handle this.
+  tok->multi_line_start = tok->line_start;
+  while (end_quote_size != current_tok->quote_size) {
+    int c = tok_nextc(tok);
+    if (tok->done == E_ERROR || tok->done == E_DECODE) {
+      return MAKE_TOKEN(ERRORTOKEN);
+    }
+    int in_format_spec =
+        (current_tok->in_format_spec && INSIDE_FSTRING_EXPR(current_tok));
+
+    if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
+      if (tok->decoding_erred) {
+        return MAKE_TOKEN(ERRORTOKEN);
+      }
+
+      // If we are in a format spec and we found a newline,
+      // it means that the format spec ends here and we should
+      // return to the regular mode.
+      if (in_format_spec && c == '\n') {
+        if (current_tok->quote_size == 1) {
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+              tok,
+              "%c-string: newlines are not allowed in format specifiers for "
+              "single quoted %c-strings",
+              TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)));
         }
+        tok_backup(tok, c);
+        TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+        current_tok->in_format_spec = 0;
+        p_start = tok->start;
+        p_end = tok->cur;
+        return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
+      }
+
+      assert(tok->multi_line_start != NULL);
+      // shift the tok_state's location into
+      // the start of string, and report the error
+      // from the initial quote character
+      tok->cur = (char *)current_tok->start;
+      tok->cur++;
+      tok->line_start = current_tok->multi_line_start;
+      int start = tok->lineno;
+
+      tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
+      tok->lineno = the_current_tok->first_line;
+
+      if (current_tok->quote_size == 3) {
+        _PyTokenizer_syntaxerror(tok,
+                                 "unterminated triple-quoted %c-string literal"
+                                 " (detected at line %d)",
+                                 TOK_GET_STRING_PREFIX(tok), start);
+        if (c != '\n') {
+          tok->done = E_EOFS;
+        }
+        return MAKE_TOKEN(ERRORTOKEN);
+      } else {
+        return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+            tok,
+            "unterminated %c-string literal (detected at"
+            " line %d)",
+            TOK_GET_STRING_PREFIX(tok), start));
+      }
     }
 
-    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
-    // add the quotes to the FSTRING_END in the next tokenizer iteration.
-    for (int i = 0; i < current_tok->quote_size; i++) {
-        tok_backup(tok, current_tok->quote);
+    if (c == current_tok->quote) {
+      end_quote_size += 1;
+      continue;
+    } else {
+      end_quote_size = 0;
     }
-    p_start = tok->start;
-    p_end = tok->cur;
-    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
-}
 
-static int
-tok_get(struct tok_state *tok, struct token *token)
-{
-    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
-    if (current_tok->kind == TOK_REGULAR_MODE) {
-        return tok_get_normal_mode(tok, current_tok, token);
-    } else {
-        return tok_get_fstring_mode(tok, current_tok, token);
+    if (c == '{') {
+      if (!_PyLexer_update_ftstring_expr(tok, c)) {
+        return MAKE_TOKEN(ENDMARKER);
+      }
+      int peek = tok_nextc(tok);
+      if (peek != '{' || in_format_spec) {
+        tok_backup(tok, peek);
+        tok_backup(tok, c);
+        current_tok->curly_bracket_expr_start_depth++;
+        if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
+          return MAKE_TOKEN(_PyTokenizer_syntaxerror(
+              tok, "%c-string: expressions nested too deeply",
+              TOK_GET_STRING_PREFIX(tok)));
+        }
+        TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+        current_tok->in_format_spec = 0;
+        p_start = tok->start;
+        p_end = tok->cur;
+      } else {
+        p_start = tok->start;
+        p_end = tok->cur - 1;
+      }
+      return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
+    } else if (c == '}') {
+      if (unicode_escape) {
+        p_start = tok->start;
+        p_end = tok->cur;
+        return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
+      }
+      int peek = tok_nextc(tok);
+
+      // The tokenizer can only be in the format spec if we have already
+      // completed the expression scanning (indicated by the end of the
+      // expression being set) and we are not at the top level of the bracket
+      // stack (-1 is the top level). Since format specifiers can't legally use
+      // double brackets, we can bypass it here.
+      int cursor = current_tok->curly_bracket_depth;
+      if (peek == '}' && !in_format_spec && cursor == 0) {
+        p_start = tok->start;
+        p_end = tok->cur - 1;
+      } else {
+        tok_backup(tok, peek);
+        tok_backup(tok, c);
+        TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
+        current_tok->in_format_spec = 0;
+        p_start = tok->start;
+        p_end = tok->cur;
+      }
+      return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
+    } else if (c == '\\') {
+      int peek = tok_nextc(tok);
+      if (peek == '\r') {
+        peek = tok_nextc(tok);
+      }
+      // Special case when the backslash is right before a curly
+      // brace. We have to restore and return the control back
+      // to the loop for the next iteration.
+      if (peek == '{' || peek == '}') {
+        if (!current_tok->raw) {
+          if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
+            return MAKE_TOKEN(ERRORTOKEN);
+          }
+        }
+        tok_backup(tok, peek);
+        continue;
+      }
+
+      if (!current_tok->raw) {
+        if (peek == 'N') {
+          /* Handle named unicode escapes (\N{BULLET}) */
+          peek = tok_nextc(tok);
+          if (peek == '{') {
+            unicode_escape = 1;
+          } else {
+            tok_backup(tok, peek);
+          }
+        }
+      } /* else {
+          skip the escaped character
+      }*/
     }
+  }
+
+  // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
+  // add the quotes to the FSTRING_END in the next tokenizer iteration.
+  for (int i = 0; i < current_tok->quote_size; i++) {
+    tok_backup(tok, current_tok->quote);
+  }
+  p_start = tok->start;
+  p_end = tok->cur;
+  return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
 }
 
-int
-_PyTokenizer_Get(struct tok_state *tok, struct token *token)
-{
-    int result = tok_get(tok, token);
-    if (tok->decoding_erred) {
-        result = ERRORTOKEN;
-        tok->done = E_DECODE;
-    }
-    return result;
+static int tok_get(struct tok_state *tok, struct token *token) {
+  tokenizer_mode *current_tok = TOK_GET_MODE(tok);
+  if (current_tok->kind == TOK_REGULAR_MODE) {
+    return tok_get_normal_mode(tok, current_tok, token);
+  } else {
+    return tok_get_fstring_mode(tok, current_tok, token);
+  }
+}
+
+int _PyTokenizer_Get(struct tok_state *tok, struct token *token) {
+  int result = tok_get(tok, token);
+  if (tok->decoding_erred) {
+    result = ERRORTOKEN;
+    tok->done = E_DECODE;
+  }
+  return result;
 }
diff --git a/Parser/tokenizer/file_tokenizer.c b/Parser/tokenizer/file_tokenizer.c
index 8c836a3f725829..b18f702a56c303 100644
--- a/Parser/tokenizer/file_tokenizer.c
+++ b/Parser/tokenizer/file_tokenizer.c
@@ -1,438 +1,431 @@
 #include "Python.h"
-#include "pycore_call.h"          // _PyObject_CallNoArgs()
-#include "pycore_fileutils.h"     // _Py_UniversalNewlineFgetsWithSize()
-#include "pycore_runtime.h"       // _Py_ID()
+#include "pycore_call.h"      // _PyObject_CallNoArgs()
+#include "pycore_fileutils.h" // _Py_UniversalNewlineFgetsWithSize()
+#include "pycore_runtime.h"   // _Py_ID()
 
-#include "errcode.h"              // E_NOMEM
+#include "errcode.h" // E_NOMEM
 
 #ifdef HAVE_UNISTD_H
-#  include <unistd.h>             // lseek(), read()
+#include <unistd.h> // lseek(), read()
 #endif
 
-#include "helpers.h"
-#include "../lexer/state.h"
-#include "../lexer/lexer.h"
 #include "../lexer/buffer.h"
+#include "../lexer/lexer.h"
+#include "../lexer/state.h"
+#include "helpers.h"
 
+static int tok_concatenate_interactive_new_line(struct tok_state *tok,
+                                                const char *line) {
+  assert(tok->fp_interactive);
 
-static int
-tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
-    assert(tok->fp_interactive);
+  if (!line) {
+    return 0;
+  }
+
+  Py_ssize_t current_size =
+      tok->interactive_src_end - tok->interactive_src_start;
+  Py_ssize_t line_size = strlen(line);
+  char last_char = line[line_size > 0 ? line_size - 1 : line_size];
+  if (last_char != '\n') {
+    line_size += 1;
+  }
+  char *new_str = tok->interactive_src_start;
+
+  new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
+  if (!new_str) {
+    if (tok->interactive_src_start) {
+      PyMem_Free(tok->interactive_src_start);
+    }
+    tok->interactive_src_start = NULL;
+    tok->interactive_src_end = NULL;
+    tok->done = E_NOMEM;
+    return -1;
+  }
+  strcpy(new_str + current_size, line);
+  tok->implicit_newline = 0;
+  if (last_char != '\n') {
+    /* Last line does not end in \n, fake one */
+    new_str[current_size + line_size - 1] = '\n';
+    new_str[current_size + line_size] = '\0';
+    tok->implicit_newline = 1;
+  }
+  tok->interactive_src_start = new_str;
+  tok->interactive_src_end = new_str + current_size + line_size;
+  return 0;
+}
 
-    if (!line) {
-        return 0;
+static int tok_readline_raw(struct tok_state *tok) {
+  do {
+    if (!_PyLexer_tok_reserve_buf(tok, BUFSIZ)) {
+      return 0;
     }
-
-    Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
-    Py_ssize_t line_size = strlen(line);
-    char last_char = line[line_size > 0 ? line_size - 1 : line_size];
-    if (last_char != '\n') {
-        line_size += 1;
+    int n_chars = (int)(tok->end - tok->inp);
+    size_t line_size = 0;
+    char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp,
+                                                   NULL, &line_size);
+    if (line == NULL) {
+      return 1;
     }
-    char* new_str = tok->interactive_src_start;
-
-    new_str = PyMem_Realloc(new_str, current_size + line_size + 1);
-    if (!new_str) {
-        if (tok->interactive_src_start) {
-            PyMem_Free(tok->interactive_src_start);
-        }
-        tok->interactive_src_start = NULL;
-        tok->interactive_src_end = NULL;
-        tok->done = E_NOMEM;
-        return -1;
+    if (tok->fp_interactive &&
+        tok_concatenate_interactive_new_line(tok, line) == -1) {
+      return 0;
     }
-    strcpy(new_str + current_size, line);
-    tok->implicit_newline = 0;
-    if (last_char != '\n') {
-        /* Last line does not end in \n, fake one */
-        new_str[current_size + line_size - 1] = '\n';
-        new_str[current_size + line_size] = '\0';
-        tok->implicit_newline = 1;
+    tok->inp += line_size;
+    if (tok->inp == tok->buf) {
+      return 0;
     }
-    tok->interactive_src_start = new_str;
-    tok->interactive_src_end = new_str + current_size + line_size;
-    return 0;
+  } while (tok->inp[-1] != '\n');
+  return 1;
 }
 
-static int
-tok_readline_raw(struct tok_state *tok)
-{
-    do {
-        if (!_PyLexer_tok_reserve_buf(tok, BUFSIZ)) {
-            return 0;
-        }
-        int n_chars = (int)(tok->end - tok->inp);
-        size_t line_size = 0;
-        char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
-        if (line == NULL) {
-            return 1;
-        }
-        if (tok->fp_interactive &&
-            tok_concatenate_interactive_new_line(tok, line) == -1) {
-            return 0;
-        }
-        tok->inp += line_size;
-        if (tok->inp == tok->buf) {
-            return 0;
-        }
-    } while (tok->inp[-1] != '\n');
-    return 1;
-}
-
-static int
-tok_readline_recode(struct tok_state *tok) {
-    PyObject *line;
-    const  char *buf;
-    Py_ssize_t buflen;
-    line = tok->decoding_buffer;
+static int tok_readline_recode(struct tok_state *tok) {
+  PyObject *line;
+  const char *buf;
+  Py_ssize_t buflen;
+  line = tok->decoding_buffer;
+  if (line == NULL) {
+    line = PyObject_CallNoArgs(tok->decoding_readline);
     if (line == NULL) {
-        line = PyObject_CallNoArgs(tok->decoding_readline);
-        if (line == NULL) {
-            _PyTokenizer_error_ret(tok);
-            goto error;
-        }
-    }
-    else {
-        tok->decoding_buffer = NULL;
-    }
-    buf = PyUnicode_AsUTF8AndSize(line, &buflen);
-    if (buf == NULL) {
-        _PyTokenizer_error_ret(tok);
-        goto error;
-    }
-    // Make room for the null terminator *and* potentially
-    // an extra newline character that we may need to artificially
-    // add.
-    size_t buffer_size = buflen + 2;
-    if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
-        goto error;
-    }
-    memcpy(tok->inp, buf, buflen);
-    tok->inp += buflen;
-    *tok->inp = '\0';
-    if (tok->fp_interactive &&
-        tok_concatenate_interactive_new_line(tok, buf) == -1) {
-        goto error;
-    }
-    Py_DECREF(line);
-    return 1;
+      _PyTokenizer_error_ret(tok);
+      goto error;
+    }
+  } else {
+    tok->decoding_buffer = NULL;
+  }
+  buf = PyUnicode_AsUTF8AndSize(line, &buflen);
+  if (buf == NULL) {
+    _PyTokenizer_error_ret(tok);
+    goto error;
+  }
+
+  char *translated = _PyTokenizer_translate_newlines(buf, 0, 0, tok);
+  if (translated == NULL) {
+    goto error;
+  }
+  buflen = strlen(translated);
+
+  // Make room for the null terminator *and* potentially
+  // an extra newline character that we may need to artificially
+  // add.
+  size_t buffer_size = buflen + 2;
+  if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
+    PyMem_Free(translated);
+    goto error;
+  }
+  memcpy(tok->inp, translated, buflen);
+  tok->inp += buflen;
+  PyMem_Free(translated);
+  *tok->inp = '\0';
+  if (tok->fp_interactive &&
+      tok_concatenate_interactive_new_line(tok, buf) == -1) {
+    goto error;
+  }
+  Py_DECREF(line);
+  return 1;
 error:
-    Py_XDECREF(line);
-    return 0;
+  Py_XDECREF(line);
+  return 0;
 }
 
 /* Fetch the next byte from TOK. */
-static int fp_getc(struct tok_state *tok) {
-    return getc(tok->fp);
-}
+static int fp_getc(struct tok_state *tok) { return getc(tok->fp); }
 
 /* Unfetch the last byte back into TOK.  */
-static void fp_ungetc(int c, struct tok_state *tok) {
-    ungetc(c, tok->fp);
-}
+static void fp_ungetc(int c, struct tok_state *tok) { ungetc(c, tok->fp); }
 
 /* Set the readline function for TOK to a StreamReader's
    readline function. The StreamReader is named ENC.
 
-   This function is called from _PyTokenizer_check_bom and _PyTokenizer_check_coding_spec.
+   This function is called from _PyTokenizer_check_bom and
+   _PyTokenizer_check_coding_spec.
 
    ENC is usually identical to the future value of tok->encoding,
    except for the (currently unsupported) case of UTF-16.
 
    Return 1 on success, 0 on failure. */
-static int
-fp_setreadl(struct tok_state *tok, const char* enc)
-{
-    PyObject *readline, *open, *stream;
-    int fd;
-    long pos;
-
-    fd = fileno(tok->fp);
-    /* Due to buffering the file offset for fd can be different from the file
-     * position of tok->fp.  If tok->fp was opened in text mode on Windows,
-     * its file position counts CRLF as one char and can't be directly mapped
-     * to the file offset for fd.  Instead we step back one byte and read to
-     * the end of line.*/
-    pos = ftell(tok->fp);
-    if (pos == -1 ||
-        lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) {
-        PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
-        return 0;
-    }
+static int fp_setreadl(struct tok_state *tok, const char *enc) {
+  PyObject *readline, *open, *stream;
+  int fd;
+  long pos;
+
+  fd = fileno(tok->fp);
+  /* Due to buffering the file offset for fd can be different from the file
+   * position of tok->fp.  If tok->fp was opened in text mode on Windows,
+   * its file position counts CRLF as one char and can't be directly mapped
+   * to the file offset for fd.  Instead we step back one byte and read to
+   * the end of line.*/
+  pos = ftell(tok->fp);
+  if (pos == -1 ||
+      lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) {
+    PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
+    return 0;
+  }
 
-    open = PyImport_ImportModuleAttrString("io", "open");
-    if (open == NULL) {
-        return 0;
-    }
-    stream = PyObject_CallFunction(open, "isisOOO",
-                    fd, "r", -1, enc, Py_None, Py_None, Py_False);
-    Py_DECREF(open);
-    if (stream == NULL) {
-        return 0;
-    }
+  open = PyImport_ImportModuleAttrString("io", "open");
+  if (open == NULL) {
+    return 0;
+  }
+  stream = PyObject_CallFunction(open, "isisOOO", fd, "r", -1, enc, Py_None,
+                                 Py_None, Py_False);
+  Py_DECREF(open);
+  if (stream == NULL) {
+    return 0;
+  }
 
-    readline = PyObject_GetAttr(stream, &_Py_ID(readline));
-    Py_DECREF(stream);
-    if (readline == NULL) {
-        return 0;
-    }
-    Py_XSETREF(tok->decoding_readline, readline);
-
-    if (pos > 0) {
-        PyObject *bufobj = _PyObject_CallNoArgs(readline);
-        if (bufobj == NULL) {
-            return 0;
-        }
-        Py_DECREF(bufobj);
+  readline = PyObject_GetAttr(stream, &_Py_ID(readline));
+  Py_DECREF(stream);
+  if (readline == NULL) {
+    return 0;
+  }
+  Py_XSETREF(tok->decoding_readline, readline);
+
+  if (pos > 0) {
+    PyObject *bufobj = _PyObject_CallNoArgs(readline);
+    if (bufobj == NULL) {
+      return 0;
     }
+    tok->decoding_buffer = bufobj;
+  }
 
-    return 1;
+  return 1;
 }
 
-static int
-tok_underflow_interactive(struct tok_state *tok) {
-    if (tok->interactive_underflow == IUNDERFLOW_STOP) {
-        tok->done = E_INTERACT_STOP;
-        return 1;
-    }
-    char *newtok = PyOS_Readline(tok->fp ? tok->fp : stdin, stdout, tok->prompt);
-    if (newtok != NULL) {
-        char *translated = _PyTokenizer_translate_newlines(newtok, 0, 0, tok);
-        PyMem_Free(newtok);
-        if (translated == NULL) {
-            return 0;
-        }
-        newtok = translated;
-    }
-    if (tok->encoding && newtok && *newtok) {
-        /* Recode to UTF-8 */
-        Py_ssize_t buflen;
-        const char* buf;
-        PyObject *u = _PyTokenizer_translate_into_utf8(newtok, tok->encoding);
-        PyMem_Free(newtok);
-        if (u == NULL) {
-            tok->done = E_DECODE;
-            return 0;
-        }
-        buflen = PyBytes_GET_SIZE(u);
-        buf = PyBytes_AS_STRING(u);
-        newtok = PyMem_Malloc(buflen+1);
-        if (newtok == NULL) {
-            Py_DECREF(u);
-            tok->done = E_NOMEM;
-            return 0;
-        }
-        strcpy(newtok, buf);
-        Py_DECREF(u);
-    }
-    if (tok->fp_interactive &&
-        tok_concatenate_interactive_new_line(tok, newtok) == -1) {
-        PyMem_Free(newtok);
-        return 0;
-    }
-    if (tok->nextprompt != NULL) {
-        tok->prompt = tok->nextprompt;
-    }
+static int tok_underflow_interactive(struct tok_state *tok) {
+  if (tok->interactive_underflow == IUNDERFLOW_STOP) {
+    tok->done = E_INTERACT_STOP;
+    return 1;
+  }
+  char *newtok = PyOS_Readline(tok->fp ? tok->fp : stdin, stdout, tok->prompt);
+  if (newtok != NULL) {
+    char *translated = _PyTokenizer_translate_newlines(newtok, 0, 0, tok);
+    PyMem_Free(newtok);
+    if (translated == NULL) {
+      return 0;
+    }
+    newtok = translated;
+  }
+  if (tok->encoding && newtok && *newtok) {
+    /* Recode to UTF-8 */
+    Py_ssize_t buflen;
+    const char *buf;
+    PyObject *u = _PyTokenizer_translate_into_utf8(newtok, tok->encoding);
+    PyMem_Free(newtok);
+    if (u == NULL) {
+      tok->done = E_DECODE;
+      return 0;
+    }
+    buflen = PyBytes_GET_SIZE(u);
+    buf = PyBytes_AS_STRING(u);
+    newtok = PyMem_Malloc(buflen + 1);
     if (newtok == NULL) {
-        tok->done = E_INTR;
-    }
-    else if (*newtok == '\0') {
-        PyMem_Free(newtok);
-        tok->done = E_EOF;
-    }
-    else if (tok->start != NULL) {
-        Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
-        _PyLexer_remember_fstring_buffers(tok);
-        size_t size = strlen(newtok);
-        ADVANCE_LINENO();
-        if (!_PyLexer_tok_reserve_buf(tok, size + 1)) {
-            PyMem_Free(tok->buf);
-            tok->buf = NULL;
-            PyMem_Free(newtok);
-            return 0;
-        }
-        memcpy(tok->cur, newtok, size + 1);
-        PyMem_Free(newtok);
-        tok->inp += size;
-        tok->multi_line_start = tok->buf + cur_multi_line_start;
-        _PyLexer_restore_fstring_buffers(tok);
-    }
-    else {
-        _PyLexer_remember_fstring_buffers(tok);
-        ADVANCE_LINENO();
-        PyMem_Free(tok->buf);
-        tok->buf = newtok;
-        tok->cur = tok->buf;
-        tok->line_start = tok->buf;
-        tok->inp = strchr(tok->buf, '\0');
-        tok->end = tok->inp + 1;
-        _PyLexer_restore_fstring_buffers(tok);
-    }
-    if (tok->done != E_OK) {
-        if (tok->prompt != NULL) {
-            PySys_WriteStderr("\n");
-        }
-        return 0;
+      Py_DECREF(u);
+      tok->done = E_NOMEM;
+      return 0;
+    }
+    strcpy(newtok, buf);
+    Py_DECREF(u);
+  }
+  if (tok->fp_interactive &&
+      tok_concatenate_interactive_new_line(tok, newtok) == -1) {
+    PyMem_Free(newtok);
+    return 0;
+  }
+  if (tok->nextprompt != NULL) {
+    tok->prompt = tok->nextprompt;
+  }
+  if (newtok == NULL) {
+    tok->done = E_INTR;
+  } else if (*newtok == '\0') {
+    PyMem_Free(newtok);
+    tok->done = E_EOF;
+  } else if (tok->start != NULL) {
+    Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
+    _PyLexer_remember_fstring_buffers(tok);
+    size_t size = strlen(newtok);
+    ADVANCE_LINENO();
+    if (!_PyLexer_tok_reserve_buf(tok, size + 1)) {
+      PyMem_Free(tok->buf);
+      tok->buf = NULL;
+      PyMem_Free(newtok);
+      return 0;
+    }
+    memcpy(tok->cur, newtok, size + 1);
+    PyMem_Free(newtok);
+    tok->inp += size;
+    tok->multi_line_start = tok->buf + cur_multi_line_start;
+    _PyLexer_restore_fstring_buffers(tok);
+  } else {
+    _PyLexer_remember_fstring_buffers(tok);
+    ADVANCE_LINENO();
+    PyMem_Free(tok->buf);
+    tok->buf = newtok;
+    tok->cur = tok->buf;
+    tok->line_start = tok->buf;
+    tok->inp = strchr(tok->buf, '\0');
+    tok->end = tok->inp + 1;
+    _PyLexer_restore_fstring_buffers(tok);
+  }
+  if (tok->done != E_OK) {
+    if (tok->prompt != NULL) {
+      PySys_WriteStderr("\n");
     }
+    return 0;
+  }
 
-    if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
-        return 0;
-    }
-    return 1;
+  if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
+    return 0;
+  }
+  return 1;
 }
 
-static int
-tok_underflow_file(struct tok_state *tok)
-{
-    if (tok->decoding_state == STATE_INIT) {
-        /* We have not yet determined the encoding.
-           If an encoding is found, use the file-pointer
-           reader functions from now on. */
-        if (!_PyTokenizer_check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) {
-            _PyTokenizer_error_ret(tok);
-            return 0;
-        }
-        assert(tok->decoding_state != STATE_INIT);
-    }
-    int raw = tok->decoding_readline == NULL;
-    if (raw && tok->decoding_state != STATE_NORMAL) {
-        /* Keep the first line in the buffer to validate it later if
-         * the encoding has not yet been determined. */
-    }
-    else if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
-        tok->cur = tok->inp = tok->buf;
-    }
-    /* Read until '\n' or EOF */
-    if (!raw) {
-        /* We already have a codec associated with this input. */
-        if (!tok_readline_recode(tok)) {
-            return 0;
-        }
-    }
-    else {
-        /* We want a 'raw' read. */
-        if (!tok_readline_raw(tok)) {
-            return 0;
-        }
-    }
-    if (tok->inp == tok->cur) {
-        tok->done = E_EOF;
-        return 0;
-    }
-    tok->implicit_newline = 0;
-    if (tok->inp[-1] != '\n') {
-        assert(tok->inp + 1 < tok->end);
-        /* Last line does not end in \n, fake one */
-        *tok->inp++ = '\n';
-        *tok->inp = '\0';
-        tok->implicit_newline = 1;
-    }
+static int tok_underflow_file(struct tok_state *tok) {
+  if (tok->decoding_state == STATE_INIT) {
+    /* We have not yet determined the encoding.
+       If an encoding is found, use the file-pointer
+       reader functions from now on. */
+    if (!_PyTokenizer_check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) {
+      _PyTokenizer_error_ret(tok);
+      return 0;
+    }
+    assert(tok->decoding_state != STATE_INIT);
+  }
+  int raw = tok->decoding_readline == NULL;
+  if (raw && tok->decoding_state != STATE_NORMAL) {
+    /* Keep the first line in the buffer to validate it later if
+     * the encoding has not yet been determined. */
+  } else if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
+    tok->cur = tok->inp = tok->buf;
+  }
+  /* Read until '\n' or EOF */
+  if (!raw) {
+    /* We already have a codec associated with this input. */
+    if (!tok_readline_recode(tok)) {
+      return 0;
+    }
+  } else {
+    /* We want a 'raw' read. */
+    if (!tok_readline_raw(tok)) {
+      return 0;
+    }
+  }
+  if (tok->inp == tok->cur) {
+    tok->done = E_EOF;
+    return 0;
+  }
+  tok->implicit_newline = 0;
+  if (tok->inp[-1] != '\n') {
+    assert(tok->inp + 1 < tok->end);
+    /* Last line does not end in \n, fake one */
+    *tok->inp++ = '\n';
+    *tok->inp = '\0';
+    tok->implicit_newline = 1;
+  }
 
-    if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
+  if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
+    return 0;
+  }
+
+  ADVANCE_LINENO();
+  if (tok->decoding_state != STATE_NORMAL) {
+    if (!_PyTokenizer_check_coding_spec(tok->cur, strlen(tok->cur), tok,
+                                        fp_setreadl)) {
+      return 0;
+    }
+    if (raw && tok->decoding_readline != NULL) {
+      /* We just switched to recoding. Discard the raw line and
+         re-read it using the codec. */
+      tok->cur = tok->inp = tok->buf;
+      if (!tok_readline_recode(tok)) {
         return 0;
+      }
+    }
+    if (tok->lineno >= 2) {
+      tok->decoding_state = STATE_NORMAL;
+    }
+  }
+  if (raw && tok->decoding_state == STATE_NORMAL) {
+    const char *line = tok->lineno <= 2 ? tok->buf : tok->cur;
+    int lineno = tok->lineno <= 2 ? 1 : tok->lineno;
+    if (!tok->encoding) {
+      /* The default encoding is UTF-8, so make sure we don't have any
+         non-UTF-8 sequences in it. */
+      if (!_PyTokenizer_ensure_utf8(line, tok, lineno)) {
+        _PyTokenizer_error_ret(tok);
+        return 0;
+      }
+    } else {
+      PyObject *tmp = PyUnicode_Decode(line, strlen(line), tok->encoding, NULL);
+      if (tmp == NULL) {
+        _PyTokenizer_error_ret(tok);
+        return 0;
+      }
+      Py_DECREF(tmp);
     }
-
-    ADVANCE_LINENO();
-    if (tok->decoding_state != STATE_NORMAL) {
-        if (!_PyTokenizer_check_coding_spec(tok->cur, strlen(tok->cur),
-                                    tok, fp_setreadl))
-        {
-            return 0;
-        }
-        if (tok->lineno >= 2) {
-            tok->decoding_state = STATE_NORMAL;
-        }
-    }
-    if (raw && tok->decoding_state == STATE_NORMAL) {
-        const char *line = tok->lineno <= 2 ? tok->buf : tok->cur;
-        int lineno = tok->lineno <= 2 ? 1 : tok->lineno;
-        if (!tok->encoding) {
-            /* The default encoding is UTF-8, so make sure we don't have any
-               non-UTF-8 sequences in it. */
-            if (!_PyTokenizer_ensure_utf8(line, tok, lineno)) {
-                _PyTokenizer_error_ret(tok);
-                return 0;
-            }
-        }
-        else {
-            PyObject *tmp = PyUnicode_Decode(line, strlen(line),
-                                             tok->encoding, NULL);
-            if (tmp == NULL) {
-                _PyTokenizer_error_ret(tok);
-                return 0;
-            }
-            Py_DECREF(tmp);
-        }
-    }
-    assert(tok->done == E_OK);
-    return tok->done == E_OK;
+  }
+  assert(tok->done == E_OK);
+  return tok->done == E_OK;
 }
 
 /* Set up tokenizer for file */
-struct tok_state *
-_PyTokenizer_FromFile(FILE *fp, const char* enc,
-                      const char *ps1, const char *ps2)
-{
-    struct tok_state *tok = _PyTokenizer_tok_new();
-    if (tok == NULL)
-        return NULL;
-    if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
-        _PyTokenizer_Free(tok);
-        return NULL;
-    }
-    tok->cur = tok->inp = tok->buf;
-    tok->end = tok->buf + BUFSIZ;
-    tok->fp = fp;
-    tok->prompt = ps1;
-    tok->nextprompt = ps2;
-    if (ps1 || ps2) {
-        tok->underflow = &tok_underflow_interactive;
-    } else {
-        tok->underflow = &tok_underflow_file;
-    }
-    if (enc != NULL) {
-        /* Must copy encoding declaration since it
-           gets copied into the parse tree. */
-        tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
-        if (!tok->encoding) {
-            _PyTokenizer_Free(tok);
-            return NULL;
-        }
-        tok->decoding_state = STATE_NORMAL;
-    }
-    return tok;
+struct tok_state *_PyTokenizer_FromFile(FILE *fp, const char *enc,
+                                        const char *ps1, const char *ps2) {
+  struct tok_state *tok = _PyTokenizer_tok_new();
+  if (tok == NULL)
+    return NULL;
+  if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
+    _PyTokenizer_Free(tok);
+    return NULL;
+  }
+  tok->cur = tok->inp = tok->buf;
+  tok->end = tok->buf + BUFSIZ;
+  tok->fp = fp;
+  tok->prompt = ps1;
+  tok->nextprompt = ps2;
+  if (ps1 || ps2) {
+    tok->underflow = &tok_underflow_interactive;
+  } else {
+    tok->underflow = &tok_underflow_file;
+  }
+  if (enc != NULL) {
+    /* Must copy encoding declaration since it
+       gets copied into the parse tree. */
+    tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
+    if (!tok->encoding) {
+      _PyTokenizer_Free(tok);
+      return NULL;
+    }
+    tok->decoding_state = STATE_NORMAL;
+  }
+  return tok;
 }
 
-#if defined(__wasi__) || (defined(__EMSCRIPTEN__) && (__EMSCRIPTEN_major__ >= 3))
+#if defined(__wasi__) ||                                                       \
+    (defined(__EMSCRIPTEN__) && (__EMSCRIPTEN_major__ >= 3))
 // fdopen() with borrowed fd. WASI does not provide dup() and Emscripten's
 // dup() emulation with open() is slow.
 typedef union {
-    void *cookie;
-    int fd;
+  void *cookie;
+  int fd;
 } borrowed;
 
-static ssize_t
-borrow_read(void *cookie, char *buf, size_t size)
-{
-    borrowed b = {.cookie = cookie};
-    return read(b.fd, (void *)buf, size);
+static ssize_t borrow_read(void *cookie, char *buf, size_t size) {
+  borrowed b = {.cookie = cookie};
+  return read(b.fd, (void *)buf, size);
 }
 
-static FILE *
-fdopen_borrow(int fd) {
-    // supports only reading. seek fails. close and write are no-ops.
-    cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL};
-    borrowed b = {.fd = fd};
-    return fopencookie(b.cookie, "r", io_cb);
+static FILE *fdopen_borrow(int fd) {
+  // supports only reading. seek fails. close and write are no-ops.
+  cookie_io_functions_t io_cb = {borrow_read, NULL, NULL, NULL};
+  borrowed b = {.fd = fd};
+  return fopencookie(b.cookie, "r", io_cb);
 }
 #else
-static FILE *
-fdopen_borrow(int fd) {
-    fd = _Py_dup(fd);
-    if (fd < 0) {
-        return NULL;
-    }
-    return fdopen(fd, "r");
+static FILE *fdopen_borrow(int fd) {
+  fd = _Py_dup(fd);
+  if (fd < 0) {
+    return NULL;
+  }
+  return fdopen(fd, "r");
 }
 #endif
 
@@ -445,49 +438,46 @@ fdopen_borrow(int fd) {
 
    The char* returned is malloc'ed via PyMem_Malloc() and thus must be freed
    by the caller. */
-char *
-_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
-{
-    struct tok_state *tok;
-    FILE *fp;
-    char *encoding = NULL;
-
-    fp = fdopen_borrow(fd);
-    if (fp == NULL) {
-        return NULL;
-    }
-    tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL);
-    if (tok == NULL) {
-        fclose(fp);
-        return NULL;
-    }
-    if (filename != NULL) {
-        tok->filename = Py_NewRef(filename);
-    }
-    else {
-        tok->filename = PyUnicode_FromString("<string>");
-        if (tok->filename == NULL) {
-            fclose(fp);
-            _PyTokenizer_Free(tok);
-            return encoding;
-        }
-    }
-    struct token token;
-    // We don't want to report warnings here because it could cause infinite recursion
-    // if fetching the encoding shows a warning.
-    tok->report_warnings = 0;
-    while (tok->lineno < 2 && tok->done == E_OK) {
-        _PyToken_Init(&token);
-        _PyTokenizer_Get(tok, &token);
-        _PyToken_Free(&token);
-    }
+char *_PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) {
+  struct tok_state *tok;
+  FILE *fp;
+  char *encoding = NULL;
+
+  fp = fdopen_borrow(fd);
+  if (fp == NULL) {
+    return NULL;
+  }
+  tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL);
+  if (tok == NULL) {
     fclose(fp);
-    if (tok->encoding) {
-        encoding = (char *)PyMem_Malloc(strlen(tok->encoding) + 1);
-        if (encoding) {
-            strcpy(encoding, tok->encoding);
-        }
-    }
-    _PyTokenizer_Free(tok);
-    return encoding;
+    return NULL;
+  }
+  if (filename != NULL) {
+    tok->filename = Py_NewRef(filename);
+  } else {
+    tok->filename = PyUnicode_FromString("<string>");
+    if (tok->filename == NULL) {
+      fclose(fp);
+      _PyTokenizer_Free(tok);
+      return encoding;
+    }
+  }
+  struct token token;
+  // We don't want to report warnings here because it could cause infinite
+  // recursion if fetching the encoding shows a warning.
+  tok->report_warnings = 0;
+  while (tok->lineno < 2 && tok->done == E_OK) {
+    _PyToken_Init(&token);
+    _PyTokenizer_Get(tok, &token);
+    _PyToken_Free(&token);
+  }
+  fclose(fp);
+  if (tok->encoding) {
+    encoding = (char *)PyMem_Malloc(strlen(tok->encoding) + 1);
+    if (encoding) {
+      strcpy(encoding, tok->encoding);
+    }
+  }
+  _PyTokenizer_Free(tok);
+  return encoding;
 }