diff --git a/README.md b/README.md index af52b048..ceccea71 100644 --- a/README.md +++ b/README.md @@ -1033,81 +1033,81 @@ format, containing floats and arrays, indented. | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.3 | 3085 | 1 | -| ujson | 2.2 | 454 | 6.7 | -| rapidjson | 1.7 | 605 | 5.1 | -| simplejson | 2.9 | 350 | 8.8 | -| json | 2.3 | 439 | 7 | +| orjson | 0.1 | 8377 | 1 | +| ujson | 0.9 | 1088 | 7.3 | +| rapidjson | 0.8 | 1228 | 6.8 | +| simplejson | 1.9 | 531 | 15.6 | +| json | 1.4 | 744 | 11.3 | #### twitter.json deserialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 1.2 | 839 | 1 | -| ujson | 2.5 | 396 | 2.1 | -| rapidjson | 4.1 | 243 | 3.5 | -| simplejson | 2.7 | 367 | 2.3 | -| json | 3.2 | 310 | 2.7 | +| orjson | 0.6 | 1811 | 1 | +| ujson | 1.2 | 814 | 2.1 | +| rapidjson | 2.1 | 476 | 3.8 | +| simplejson | 1.6 | 626 | 3 | +| json | 1.8 | 557 | 3.3 | #### github.json serialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 0 | 33474 | 1 | -| ujson | 0.2 | 5179 | 6.5 | -| rapidjson | 0.2 | 5910 | 5.7 | -| simplejson | 0.3 | 3051 | 11 | -| json | 0.2 | 4222 | 7.9 | +| orjson | 0.01 | 104424 | 1 | +| ujson | 0.09 | 10594 | 9.8 | +| rapidjson | 0.07 | 13667 | 7.6 | +| simplejson | 0.2 | 5051 | 20.6 | +| json | 0.14 | 7133 | 14.6 | #### github.json deserialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.1 | 10211 | 1 | -| ujson | 0.2 | 4222 | 2.2 | -| rapidjson | 0.3 | 3947 | 2.6 | -| simplejson | 0.2 | 5437 | 1.9 | -| json | 0.2 | 5240 | 1.9 | +| orjson | 0.05 | 20069 | 1 | +| ujson | 0.11 | 8913 | 2.3 | +| rapidjson | 0.13 | 8077 | 2.6 | +| simplejson | 0.11 | 9342 | 2.1 | +| json | 0.11 | 9291 | 2.2 | #### citm_catalog.json serialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.6 | 1549 | 1 | -| ujson | 2.7 | 366 | 4.2 | -| rapidjson | 2.2 | 446 | 3.5 | -| simplejson | 11.3 | 88 | 17.6 | -| json | 5.1 | 195 | 7.9 | +| orjson | 0.3 | 3757 | 1 | +| ujson | 1.7 | 598 | 6.3 | +| rapidjson | 1.3 | 768 | 4.9 | +| simplejson | 8.3 | 120 | 31.1 | +| json | 3 | 331 | 11.3 | #### citm_catalog.json deserialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 2.7 | 367 | 1 | -| ujson | 4.7 | 213 | 1.7 | -| rapidjson | 7.2 | 139 | 2.6 | -| simplejson | 6 | 167 | 2.2 | -| json | 6.3 | 158 | 2.3 | +| orjson | 1.4 | 730 | 1 | +| ujson | 2.6 | 384 | 1.9 | +| rapidjson | 4 | 246 | 3 | +| simplejson | 3.7 | 271 | 2.7 | +| json | 3.7 | 267 | 2.7 | #### canada.json serialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 4.8 | 208 | 1 | -| ujson | 15.6 | 63 | 3.3 | -| rapidjson | 42.4 | 23 | 8.9 | -| simplejson | 72 | 13 | 15 | -| json | 46.2 | 21 | 9.6 | +| orjson | 2.4 | 410 | 1 | +| ujson | 9.6 | 104 | 3.9 | +| rapidjson | 28.7 | 34 | 11.8 | +| simplejson | 49.3 | 20 | 20.3 | +| json | 30.6 | 32 | 12.6 | #### canada.json deserialization | Library | Median latency (milliseconds) | Operations per second | Relative (latency) | |------------|---------------------------------|-------------------------|----------------------| -| orjson | 5.7 | 176 | 1 | -| ujson | 14 | 71 | 2.5 | -| rapidjson | 27.5 | 36 | 4.9 | -| simplejson | 28.4 | 35 | 5 | -| json | 28.3 | 35 | 5 | +| orjson | 3 | 336 | 1 | +| ujson | 7.1 | 141 | 2.4 | +| rapidjson | 20.1 | 49 | 6.7 | +| simplejson | 16.8 | 59 | 5.6 | +| json | 18.2 | 55 | 6.1 | ### Memory @@ -1162,8 +1162,8 @@ calling `loads()` on the fixture. ### Reproducing -The above was measured using Python 3.11.8 on Linux (amd64) with -orjson 3.10.0, ujson 5.9.0, python-rapidson 1.16, and simplejson 3.19.2. +The above was measured using Python 3.11.9 on Linux (amd64) with +orjson 3.10.6, ujson 5.10.0, python-rapidson 1.18, and simplejson 3.19.2. The latency results can be reproduced using the `pybench` and `graph` scripts. The memory results can be reproduced using the `pymem` script. diff --git a/bench/benchmark_dumps.py b/bench/benchmark_dumps.py index 205d1d47..d5be6e7d 100644 --- a/bench/benchmark_dumps.py +++ b/bench/benchmark_dumps.py @@ -15,5 +15,5 @@ def test_dumps(benchmark, fixture, library): benchmark.group = f"{fixture} serialization" benchmark.extra_info["lib"] = library data = read_fixture_obj(f"{fixture}.xz") - benchmark.extra_info["correct"] = json_loads(dumper(data)) == data + benchmark.extra_info["correct"] = json_loads(dumper(data)) == data # type: ignore benchmark(dumper, data) diff --git a/bench/benchmark_empty.py b/bench/benchmark_empty.py index 6fb9425f..7d0a945e 100644 --- a/bench/benchmark_empty.py +++ b/bench/benchmark_empty.py @@ -11,7 +11,6 @@ @pytest.mark.parametrize("library", libraries) def test_empty(benchmark, data, library): dumper, loader = libraries[library] - benchmark.extra_info["correct"] = json_loads(dumper(loader(data))) == json_loads( - data - ) + correct = json_loads(dumper(loader(data))) == json_loads(data) # type: ignore + benchmark.extra_info["correct"] = correct benchmark(loader, data) diff --git a/bench/benchmark_loads.py b/bench/benchmark_loads.py index 75d7b5ed..89f417d3 100644 --- a/bench/benchmark_loads.py +++ b/bench/benchmark_loads.py @@ -15,7 +15,6 @@ def test_loads(benchmark, fixture, library): benchmark.group = f"{fixture} deserialization" benchmark.extra_info["lib"] = library data = read_fixture(f"{fixture}.xz") - benchmark.extra_info["correct"] = json_loads(dumper(loader(data))) == json_loads( - data - ) + correct = json_loads(dumper(loader(data))) == json_loads(data) # type: ignore + benchmark.extra_info["correct"] = correct benchmark(loader, data) diff --git a/bench/data.py b/bench/data.py index c0dbc6ef..079957a6 100644 --- a/bench/data.py +++ b/bench/data.py @@ -10,15 +10,9 @@ from ujson import dumps as _ujson_dumps from ujson import loads as ujson_loads -from orjson import dumps as _orjson_dumps +from orjson import dumps as orjson_dumps from orjson import loads as orjson_loads -# dumps wrappers that return UTF-8 - - -def orjson_dumps(obj): - return _orjson_dumps(obj) - def ujson_dumps(obj): return _ujson_dumps(obj).encode("utf-8") @@ -36,7 +30,6 @@ def simplejson_dumps(obj): return _simplejson_dumps(obj).encode("utf-8") -# Add new libraries here (pair of UTF-8 dumper, loader) libraries = { "orjson": (orjson_dumps, orjson_loads), "ujson": (ujson_dumps, ujson_loads), @@ -45,7 +38,7 @@ def simplejson_dumps(obj): "simplejson": (simplejson_dumps, simplejson_loads), } -# Add new JSON files here (corresponding to ../data/*.json.xz) + fixtures = [ "canada.json", "citm_catalog.json", diff --git a/doc/deserialization.png b/doc/deserialization.png index 44fe2216..d83f5d51 100644 Binary files a/doc/deserialization.png and b/doc/deserialization.png differ diff --git a/doc/serialization.png b/doc/serialization.png index 5edcce8e..be98efd8 100644 Binary files a/doc/serialization.png and b/doc/serialization.png differ diff --git a/script/graph b/script/graph index 5217df93..7cf47d0d 100755 --- a/script/graph +++ b/script/graph @@ -127,8 +127,14 @@ def tab(obj): # ensure Y range max_y = int(math.ceil(max_y)) + if max_y > 10 and max_y % 2 > 0: + max_y = max_y + 1 pyplot.gca().set_yticks( - list((int(y) for y in pyplot.gca().get_yticks() if int(y) <= max_y)) + list( + {1, max_y}.union( + set(int(y) for y in pyplot.gca().get_yticks() if int(y) <= max_y) + ) + ) ) # print Y as percent diff --git a/script/pycorrectness b/script/pycorrectness index 0ca9f905..3e9c3f36 100755 --- a/script/pycorrectness +++ b/script/pycorrectness @@ -3,10 +3,14 @@ import collections import io +import json import lzma import os from pathlib import Path +import rapidjson +import simplejson +import ujson from tabulate import tabulate import orjson @@ -16,6 +20,15 @@ dirname = os.path.join(os.path.dirname(__file__), "..", "data") LIBRARIES = ["orjson", "ujson", "rapidjson", "simplejson", "json"] +LIBRARY_FUNC_MAP = { + "orjson": orjson.loads, + "ujson": ujson.loads, + "rapidjson": rapidjson.loads, + "simplejson": simplejson.loads, + "json": json.loads, +} + + def read_fixture_bytes(filename, subdir=None): if subdir is None: parts = (dirname, filename) @@ -43,29 +56,30 @@ JSONCHECKER = { RESULTS = collections.defaultdict(dict) -def test_passed(library, fixture): +def test_passed(libname, fixture): passed = [] + loads = LIBRARY_FUNC_MAP[libname] try: - passed.append(library.loads(fixture) == orjson.loads(fixture)) + passed.append(loads(fixture) == orjson.loads(fixture)) passed.append( - library.loads(fixture.decode("utf-8")) - == orjson.loads(fixture.decode("utf-8")) + loads(fixture.decode("utf-8")) == orjson.loads(fixture.decode("utf-8")) ) except Exception: passed.append(False) return all(passed) -def test_failed(library, fixture): +def test_failed(libname, fixture): rejected_as_bytes = False + loads = LIBRARY_FUNC_MAP[libname] try: - library.loads(fixture) + loads(fixture) except Exception: rejected_as_bytes = True rejected_as_str = False try: - library.loads(fixture.decode("utf-8")) + loads(fixture.decode("utf-8")) except Exception: rejected_as_str = True return rejected_as_bytes and rejected_as_str @@ -96,17 +110,16 @@ def should_fail(filename): for libname in LIBRARIES: - library = __import__(libname) for fixture_set in (PARSING, JSONCHECKER): for filename, fixture in fixture_set.items(): if should_pass(filename): - res = test_passed(library, fixture) + res = test_passed(libname, fixture) RESULTS[filename][libname] = res if not res: MISTAKEN_PASSES[libname] += 1 elif should_fail(filename): - res = test_failed(library, fixture) + res = test_failed(libname, fixture) RESULTS[filename][libname] = res if not res: MISTAKEN_FAILS[libname] += 1