From 167626bd28aef58d7da4ddcb184df247b8f728bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Mon, 1 Jun 2026 21:41:06 -0700 Subject: [PATCH 1/3] gh-150820: Speed up json.loads() and json.dumps() for small documents json.loads() and json.dumps() enter through Python wrappers that run on every call: decode() scanned for leading and trailing whitespace with a regex, and iterencode() built a float-formatting closure even when dispatching to the C encoder. Skip the whitespace scan when there is none, and build the float helper only on the Python encoding path. Both wrappers stay in Python, so the wins apply to the default build; output is byte-identical. --- Lib/json/decoder.py | 12 +++-- Lib/json/encoder.py | 48 ++++++++++--------- ...-06-02-15-45-00.gh-issue-150820.W7tpO7.rst | 3 ++ 3 files changed, 37 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 364e44d40cc3073..42caf1f8fada9b1 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -355,10 +355,16 @@ def decode(self, s, _w=WHITESPACE.match): containing a JSON document). """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) - end = _w(s, end).end() + # Skip the WHITESPACE.match() call (and its match-object allocation) + # for the common case where there is no leading whitespace. + idx = _w(s, 0).end() if s and s[0] in ' \t\n\r' else 0 + obj, end = self.raw_decode(s, idx=idx) + # Likewise avoid the trailing-whitespace match when the parse already + # consumed the whole string. if end != len(s): - raise JSONDecodeError("Extra data", s, end) + end = _w(s, end).end() + if end != len(s): + raise JSONDecodeError("Extra data", s, end) return obj def raw_decode(self, s, idx=0): diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index 718b3254241c565..1f52a95ad3b76e1 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -223,29 +223,6 @@ def iterencode(self, o, _one_shot=False): else: _encoder = encode_basestring - def floatstr(o, allow_nan=self.allow_nan, - _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor - # and/or platform-specific, so do tests which don't depend on the - # internals. - - if o != o: - text = 'NaN' - elif o == _inf: - text = 'Infinity' - elif o == _neginf: - text = '-Infinity' - else: - return _repr(o) - - if not allow_nan: - raise ValueError( - "Out of range float values are not JSON compliant: " + - repr(o)) - - return text - - if self.indent is None or isinstance(self.indent, str): indent = self.indent else: @@ -256,6 +233,31 @@ def floatstr(o, allow_nan=self.allow_nan, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, self.allow_nan) else: + # floatstr is only needed by the pure-Python encoder; defining it + # lazily avoids building this closure on every encode that takes + # the C fast path above. + def floatstr(o, allow_nan=self.allow_nan, + _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + _iterencode = _make_iterencode( markers, self.default, _encoder, indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, diff --git a/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst b/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst new file mode 100644 index 000000000000000..4333bea78c28225 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst @@ -0,0 +1,3 @@ +Speed up :func:`json.loads` and :func:`json.dumps` for small documents by +avoiding a redundant whitespace scan on decode and by building the float +helper only on the Python encoding path. Patch by Bernát Gábor. From 284d568a66b0b795fcfce3beeafcbcd3de3e268f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 07:10:38 -0700 Subject: [PATCH 2/3] Split out the decode whitespace change to a separate PR Keep only the encoder floatstr deferral here; the decoder whitespace skip is contentious (depends on gh-117397 and shows mixed results on large documents) and moves to its own PR. --- Lib/json/decoder.py | 12 +++--------- .../2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst | 6 +++--- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 42caf1f8fada9b1..364e44d40cc3073 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -355,16 +355,10 @@ def decode(self, s, _w=WHITESPACE.match): containing a JSON document). """ - # Skip the WHITESPACE.match() call (and its match-object allocation) - # for the common case where there is no leading whitespace. - idx = _w(s, 0).end() if s and s[0] in ' \t\n\r' else 0 - obj, end = self.raw_decode(s, idx=idx) - # Likewise avoid the trailing-whitespace match when the parse already - # consumed the whole string. + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() if end != len(s): - end = _w(s, end).end() - if end != len(s): - raise JSONDecodeError("Extra data", s, end) + raise JSONDecodeError("Extra data", s, end) return obj def raw_decode(self, s, idx=0): diff --git a/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst b/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst index 4333bea78c28225..799264ec7adae03 100644 --- a/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst +++ b/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst @@ -1,3 +1,3 @@ -Speed up :func:`json.loads` and :func:`json.dumps` for small documents by -avoiding a redundant whitespace scan on decode and by building the float -helper only on the Python encoding path. Patch by Bernát Gábor. +Speed up :func:`json.dumps` for small documents by building the float +formatting helper only on the slower Python encoding path instead of on every +call. Patch by Bernát Gábor. From 63808992663714bb8499059854b34dd67aa1a65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 12:48:29 -0700 Subject: [PATCH 3/3] Drop the floatstr comment and trim the NEWS entry Per review: the comment and the implementation detail in the NEWS entry are not relevant to end users. --- Lib/json/encoder.py | 3 --- .../Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index 1f52a95ad3b76e1..8768b63a3f80417 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -233,9 +233,6 @@ def iterencode(self, o, _one_shot=False): self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, self.allow_nan) else: - # floatstr is only needed by the pure-Python encoder; defining it - # lazily avoids building this closure on every encode that takes - # the C fast path above. def floatstr(o, allow_nan=self.allow_nan, _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): # Check for specials. Note that this type of test is processor diff --git a/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst b/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst index 799264ec7adae03..ae9858f5294183a 100644 --- a/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst +++ b/Misc/NEWS.d/next/Library/2026-06-02-15-45-00.gh-issue-150820.W7tpO7.rst @@ -1,3 +1 @@ -Speed up :func:`json.dumps` for small documents by building the float -formatting helper only on the slower Python encoding path instead of on every -call. Patch by Bernát Gábor. +Speed up :func:`json.dumps` for small documents. Patch by Bernát Gábor.