From b176b12c9498dcda0eb662a358e09e621ec4c70e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= <gaborjbernat@gmail.com>
Date: Wed, 3 Jun 2026 09:23:45 -0700
Subject: [PATCH 1/3] gh-150871: Speed up JSON string decoding for long ASCII
 strings

scanstring_unicode scans each JSON string one character at a time for the
closing quote, a backslash, or a control character. For the one-byte
(ASCII/Latin-1) representation, skip eight bytes at a time with a word-at-a-time
test using the same masks Objects/unicodeobject.c applies for ASCII scanning;
the existing per-character loop then pins the exact byte and performs every
decode decision. Two-byte and four-byte strings keep the current loop.

Output is byte-identical, verified against test_json, a 347-input differential
corpus, and all 340 nst/JSONTestSuite files. Long ASCII string values decode up
to 6.3x faster; short keys, numbers, and non-Latin-1 strings are unaffected.
---
 ...-06-03-09-23-45.gh-issue-150871.aEM9sM.rst |  4 +++
 Modules/_json.c                               | 30 ++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst

diff --git a/Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst b/Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst
new file mode 100644
index 000000000000000..3243e20374400ab
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst
@@ -0,0 +1,4 @@
+Speed up :func:`json.loads` decoding of strings that contain long runs of
+ordinary characters by scanning eight bytes at a time for the closing quote, a
+backslash, or a control character. Strings containing non-Latin-1 characters
+and short strings are unaffected. Patch by Bernát Gábor.
diff --git a/Modules/_json.c b/Modules/_json.c
index 6c4f38834631d30..d4741850e7099f4 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -501,7 +501,35 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
         {
             // Use tight scope variable to help register allocation.
             Py_UCS4 d = 0;
-            for (next = end; next < len; next++) {
+            next = end;
+            /* SWAR PROTOTYPE: for the 1-byte representation, skip 8 bytes at a
+               time while none is '"', '\\', or (strict) a control char < 0x20.
+               Masks use the exact haszero trick (no false negatives); the
+               scalar loop below pins the exact first special char. */
+            if (kind == PyUnicode_1BYTE_KIND) {
+                const Py_UCS1 *p = (const Py_UCS1 *)buf;
+                const uint64_t ones = 0x0101010101010101ULL;
+                const uint64_t high = 0x8080808080808080ULL;
+                const uint64_t bq = 0x22ULL * ones;   /* '"'  */
+                const uint64_t bs = 0x5cULL * ones;    /* '\\' */
+                const uint64_t bc = 0xE0ULL * ones;    /* (b & 0xE0)==0 iff b<0x20 */
+                while (next + 8 <= len) {
+                    uint64_t w;
+                    memcpy(&w, p + next, 8);
+                    uint64_t mq = w ^ bq; mq = (mq - ones) & ~mq & high;
+                    uint64_t ms = w ^ bs; ms = (ms - ones) & ~ms & high;
+                    uint64_t mc = 0;
+                    if (strict) {
+                        uint64_t v = w & bc;
+                        mc = (v - ones) & ~v & high;
+                    }
+                    if (mq | ms | mc) {
+                        break;
+                    }
+                    next += 8;
+                }
+            }
+            for (; next < len; next++) {
                 d = PyUnicode_READ(kind, buf, next);
                 if (d == '"' || d == '\\') {
                     break;

From c17ac21a6fb17479c194e2d77b94f9aaff4ecdbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= <gaborjbernat@gmail.com>
Date: Wed, 3 Jun 2026 09:39:00 -0700
Subject: [PATCH 2/3] Reword the scan comment now that it is a proposed change

---
 Modules/_json.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Modules/_json.c b/Modules/_json.c
index d4741850e7099f4..bc953520824ee45 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -502,10 +502,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
             // Use tight scope variable to help register allocation.
             Py_UCS4 d = 0;
             next = end;
-            /* SWAR PROTOTYPE: for the 1-byte representation, skip 8 bytes at a
-               time while none is '"', '\\', or (strict) a control char < 0x20.
-               Masks use the exact haszero trick (no false negatives); the
-               scalar loop below pins the exact first special char. */
+            /* For the 1-byte representation, skip 8 bytes at a time while none
+               is '"', '\\', or (strict) a control char < 0x20.  The masks are
+               exact (no false negatives); the scalar loop below pins the exact
+               first special char and does the work. */
             if (kind == PyUnicode_1BYTE_KIND) {
                 const Py_UCS1 *p = (const Py_UCS1 *)buf;
                 const uint64_t ones = 0x0101010101010101ULL;

From 8a7c6df49f7bc5d06d7c28546ebcf358dce67f6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= <gaborjbernat@gmail.com>
Date: Wed, 3 Jun 2026 15:01:33 -0700
Subject: [PATCH 3/3] Add tests exercising the string-decode scan paths

Cover long runs that cross the scan windows with a terminator, backslash
escape and \uXXXX escape at every offset in 1-byte and wider strings, plus
strict and non-strict control-character handling at the window boundaries.
---
 Lib/test/test_json/test_decode.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py
index 1d51fb2de0e69e4..c881fe7f13aeba8 100644
--- a/Lib/test/test_json/test_decode.py
+++ b/Lib/test/test_json/test_decode.py
@@ -155,6 +155,25 @@ def test_limit_int(self):
             with self.assertRaises(ValueError):
                 self.loads('1' * (maxdigits + 1))
 
+    def test_long_string_scan_paths(self):
+        # Exercise the string scan over long runs that cross the 8-byte scan
+        # windows: a terminator, a backslash escape and a \uXXXX escape at every
+        # offset, in 1-byte and wider (BMP, astral) strings.
+        loads = self.loads
+        for n in range(40):
+            run = "a" * n
+            self.assertEqual(loads('"' + run + '"'), run)
+            self.assertEqual(loads('"' + run + '\\nz"'), run + "\nz")
+            self.assertEqual(loads('"' + run + '\\u00e9z"'), run + "\xe9z")
+            self.assertEqual(loads('"' + "中" * n + '\\n"'), "中" * n + "\n")
+            self.assertEqual(loads('"' + "\U0001f600" * n + '"'), "\U0001f600" * n)
+        # Strict control-character detection at the window boundaries, and the
+        # non-strict path that keeps them.
+        for n in (7, 8, 15, 16, 17, 23, 24):
+            self.assertRaises(self.JSONDecodeError, loads, '"' + "a" * n + '\x01"')
+            self.assertEqual(loads('"' + "a" * n + '\x01"', strict=False),
+                             "a" * n + "\x01")
+
 
 class TestPyDecode(TestDecode, PyTest): pass
 class TestCDecode(TestDecode, CTest): pass