Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use orjson for faster JSONL output #1019

Merged
merged 2 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where

## Unreleased

- diff: Use [orjson](https://github.com/ijl/orjson?tab=readme-ov-file#orjson) for faster JSON-Lines output. [#1019](https://github.com/koordinates/kart/pull/1019)
- Upgrade to PDAL 2.7 [#1005](https://github.com/koordinates/kart/pull/1005)
- Adds a `--drop-empty-geometry-features` option to `kart export`. [#1007](https://github.com/koordinates/kart/pull/1007)
- Adds diagnostic output to Kart when `KART_DIAGNOSTICS=1` environment variable is set. [#1013](https://github.com/koordinates/kart/pull/1013)
Expand Down
16 changes: 12 additions & 4 deletions kart/json_diff_writers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import json
import orjson
import logging
import threading
from datetime import datetime, timedelta, timezone
Expand All @@ -19,7 +19,11 @@
from kart.diff_structs import FILES_KEY, BINARY_FILE, DatasetDiff
from kart.key_filters import DeltaFilter
from kart.log import commit_obj_to_json
from kart.output_util import dump_json_output, resolve_output_path
from kart.output_util import (
dump_json_output,
resolve_output_path,
orjson_encode_default,
)
from kart.tabular.feature_output import feature_as_geojson, feature_as_json
from kart.timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

Expand Down Expand Up @@ -241,9 +245,13 @@ def __init__(self, *args, diff_estimate_accuracy=None, delta_filter=None, **kwar
self._output_lock = threading.RLock()

def dump(self, obj):
output: bytes = orjson.dumps(
obj,
default=orjson_encode_default,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_NON_STR_KEYS,
)
with self._output_lock:
json.dump(obj, self.fp, separators=self.separators)
self.fp.write("\n")
self.fp.buffer.write(output)

def write_header(self):
self.dump(
Expand Down
25 changes: 24 additions & 1 deletion kart/output_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,27 @@
import types
from pathlib import Path

import orjson
import pygments
from pygments.lexers import JsonLexer

from .wkt_lexer import WKTLexer

_terminal_formatter = None

# note: `json` and `orjson` libraries aren't quite interchangeable.
# * orjson is much faster, so we use it where we can
# * orjson doesn't support custom separators
# * orjson doesn't support iterencode(), so can't stream unbounded iterators to stdout :(
ORJSON_OPTIONS = {
"compact": 0, # orjson doesn't support custom separators, so extracompact and compact look identical
"extracompact": 0,
"pretty": orjson.OPT_INDENT_2,
}
JSON_PARAMS = {
"compact": {},
"pretty": {"indent": 2},
"extracompact": {"separators": (",", ":")},
"pretty": {"indent": 2},
}


Expand All @@ -37,6 +47,19 @@ def __iter__(self):
return itertools.chain(self._head, *self[:1])


def orjson_encode_default(obj):
"""
Hook to extend the default serialisation of `orjson.dumps()`
"""
if isinstance(obj, tuple):
return list(obj)

if hasattr(obj, "__json__"):
return obj.__json__()

raise TypeError


class ExtendedJsonEncoder(json.JSONEncoder):
"""A JSONEncoder that tries calling __json__() if it can't serialise an object another way."""

Expand Down
1 change: 1 addition & 0 deletions requirements/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ certifi
click~=8.1.7
docutils<0.18
msgpack~=0.6.1
orjson
Pygments
pymysql
rst2txt
Expand Down
4 changes: 3 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ certifi==2022.12.7
# reflink
click==8.1.7
# via -r requirements.in
#cryptography==42.0.4
#cryptography==43.0.1
# via -r requirements/vendor-wheels.txt
docutils==0.17.1
# via
Expand All @@ -38,6 +38,8 @@ jsonschema==4.17.3
# via -r requirements.in
msgpack==0.6.2
# via -r requirements.in
orjson==3.10.11
# via -r requirements.in
#psycopg2==2.9.9
# via -r requirements/vendor-wheels.txt
pycparser==2.21
Expand Down
Loading
Loading