From b176b12c9498dcda0eb662a358e09e621ec4c70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 09:23:45 -0700 Subject: [PATCH 1/3] gh-150871: Speed up JSON string decoding for long ASCII strings scanstring_unicode scans each JSON string one character at a time for the closing quote, a backslash, or a control character. For the one-byte (ASCII/Latin-1) representation, skip eight bytes at a time with a word-at-a-time test using the same masks Objects/unicodeobject.c applies for ASCII scanning; the existing per-character loop then pins the exact byte and performs every decode decision. Two-byte and four-byte strings keep the current loop. Output is byte-identical, verified against test_json, a 347-input differential corpus, and all 340 nst/JSONTestSuite files. Long ASCII string values decode up to 6.3x faster; short keys, numbers, and non-Latin-1 strings are unaffected. --- ...-06-03-09-23-45.gh-issue-150871.aEM9sM.rst | 4 +++ Modules/_json.c | 30 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst diff --git a/Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst b/Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst new file mode 100644 index 000000000000000..3243e20374400ab --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-03-09-23-45.gh-issue-150871.aEM9sM.rst @@ -0,0 +1,4 @@ +Speed up :func:`json.loads` decoding of strings that contain long runs of +ordinary characters by scanning eight bytes at a time for the closing quote, a +backslash, or a control character. Strings containing non-Latin-1 characters +and short strings are unaffected. Patch by Bernát Gábor. diff --git a/Modules/_json.c b/Modules/_json.c index 6c4f38834631d30..d4741850e7099f4 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -501,7 +501,35 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next { // Use tight scope variable to help register allocation. Py_UCS4 d = 0; - for (next = end; next < len; next++) { + next = end; + /* SWAR PROTOTYPE: for the 1-byte representation, skip 8 bytes at a + time while none is '"', '\\', or (strict) a control char < 0x20. + Masks use the exact haszero trick (no false negatives); the + scalar loop below pins the exact first special char. */ + if (kind == PyUnicode_1BYTE_KIND) { + const Py_UCS1 *p = (const Py_UCS1 *)buf; + const uint64_t ones = 0x0101010101010101ULL; + const uint64_t high = 0x8080808080808080ULL; + const uint64_t bq = 0x22ULL * ones; /* '"' */ + const uint64_t bs = 0x5cULL * ones; /* '\\' */ + const uint64_t bc = 0xE0ULL * ones; /* (b & 0xE0)==0 iff b<0x20 */ + while (next + 8 <= len) { + uint64_t w; + memcpy(&w, p + next, 8); + uint64_t mq = w ^ bq; mq = (mq - ones) & ~mq & high; + uint64_t ms = w ^ bs; ms = (ms - ones) & ~ms & high; + uint64_t mc = 0; + if (strict) { + uint64_t v = w & bc; + mc = (v - ones) & ~v & high; + } + if (mq | ms | mc) { + break; + } + next += 8; + } + } + for (; next < len; next++) { d = PyUnicode_READ(kind, buf, next); if (d == '"' || d == '\\') { break; From c17ac21a6fb17479c194e2d77b94f9aaff4ecdbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 09:39:00 -0700 Subject: [PATCH 2/3] Reword the scan comment now that it is a proposed change --- Modules/_json.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index d4741850e7099f4..bc953520824ee45 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -502,10 +502,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next // Use tight scope variable to help register allocation. Py_UCS4 d = 0; next = end; - /* SWAR PROTOTYPE: for the 1-byte representation, skip 8 bytes at a - time while none is '"', '\\', or (strict) a control char < 0x20. - Masks use the exact haszero trick (no false negatives); the - scalar loop below pins the exact first special char. */ + /* For the 1-byte representation, skip 8 bytes at a time while none + is '"', '\\', or (strict) a control char < 0x20. The masks are + exact (no false negatives); the scalar loop below pins the exact + first special char and does the work. */ if (kind == PyUnicode_1BYTE_KIND) { const Py_UCS1 *p = (const Py_UCS1 *)buf; const uint64_t ones = 0x0101010101010101ULL; From 8a7c6df49f7bc5d06d7c28546ebcf358dce67f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 15:01:33 -0700 Subject: [PATCH 3/3] Add tests exercising the string-decode scan paths Cover long runs that cross the scan windows with a terminator, backslash escape and \uXXXX escape at every offset in 1-byte and wider strings, plus strict and non-strict control-character handling at the window boundaries. --- Lib/test/test_json/test_decode.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 1d51fb2de0e69e4..c881fe7f13aeba8 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -155,6 +155,25 @@ def test_limit_int(self): with self.assertRaises(ValueError): self.loads('1' * (maxdigits + 1)) + def test_long_string_scan_paths(self): + # Exercise the string scan over long runs that cross the 8-byte scan + # windows: a terminator, a backslash escape and a \uXXXX escape at every + # offset, in 1-byte and wider (BMP, astral) strings. + loads = self.loads + for n in range(40): + run = "a" * n + self.assertEqual(loads('"' + run + '"'), run) + self.assertEqual(loads('"' + run + '\\nz"'), run + "\nz") + self.assertEqual(loads('"' + run + '\\u00e9z"'), run + "\xe9z") + self.assertEqual(loads('"' + "中" * n + '\\n"'), "中" * n + "\n") + self.assertEqual(loads('"' + "\U0001f600" * n + '"'), "\U0001f600" * n) + # Strict control-character detection at the window boundaries, and the + # non-strict path that keeps them. + for n in (7, 8, 15, 16, 17, 23, 24): + self.assertRaises(self.JSONDecodeError, loads, '"' + "a" * n + '\x01"') + self.assertEqual(loads('"' + "a" * n + '\x01"', strict=False), + "a" * n + "\x01") + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass