Skip to content

Commit

Permalink
Merge pull request #1019 from koordinates/orjson
Browse files Browse the repository at this point in the history
Use `orjson` for faster JSONL output
  • Loading branch information
craigds authored Nov 13, 2024
2 parents 3f93dfd + 7c0e513 commit 764953a
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where

## Unreleased

- diff: Use [orjson](https://github.com/ijl/orjson?tab=readme-ov-file#orjson) for faster JSON-Lines output. [#1019](https://github.com/koordinates/kart/pull/1019)
- Upgrade to PDAL 2.7 [#1005](https://github.com/koordinates/kart/pull/1005)
- Adds a `--drop-empty-geometry-features` option to `kart export`. [#1007](https://github.com/koordinates/kart/pull/1007)
- Adds diagnostic output to Kart when `KART_DIAGNOSTICS=1` environment variable is set. [#1013](https://github.com/koordinates/kart/pull/1013)
Expand Down
16 changes: 12 additions & 4 deletions kart/json_diff_writers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import json
import orjson
import logging
import threading
from datetime import datetime, timedelta, timezone
Expand All @@ -19,7 +19,11 @@
from kart.diff_structs import FILES_KEY, BINARY_FILE, DatasetDiff
from kart.key_filters import DeltaFilter
from kart.log import commit_obj_to_json
from kart.output_util import dump_json_output, resolve_output_path
from kart.output_util import (
dump_json_output,
resolve_output_path,
orjson_encode_default,
)
from kart.tabular.feature_output import feature_as_geojson, feature_as_json
from kart.timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

Expand Down Expand Up @@ -241,9 +245,13 @@ def __init__(self, *args, diff_estimate_accuracy=None, delta_filter=None, **kwar
self._output_lock = threading.RLock()

def dump(self, obj):
output: bytes = orjson.dumps(
obj,
default=orjson_encode_default,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_NON_STR_KEYS,
)
with self._output_lock:
json.dump(obj, self.fp, separators=self.separators)
self.fp.write("\n")
self.fp.buffer.write(output)

def write_header(self):
self.dump(
Expand Down
25 changes: 24 additions & 1 deletion kart/output_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,27 @@
import types
from pathlib import Path

import orjson
import pygments
from pygments.lexers import JsonLexer

from .wkt_lexer import WKTLexer

_terminal_formatter = None

# note: `json` and `orjson` libraries aren't quite interchangeable.
# * orjson is much faster, so we use it where we can
# * orjson doesn't support custom separators
# * orjson doesn't support iterencode(), so can't stream unbounded iterators to stdout :(
ORJSON_OPTIONS = {
"compact": 0, # orjson doesn't support custom separators, so extracompact and compact look identical
"extracompact": 0,
"pretty": orjson.OPT_INDENT_2,
}
JSON_PARAMS = {
"compact": {},
"pretty": {"indent": 2},
"extracompact": {"separators": (",", ":")},
"pretty": {"indent": 2},
}


Expand All @@ -37,6 +47,19 @@ def __iter__(self):
return itertools.chain(self._head, *self[:1])


def orjson_encode_default(obj):
"""
Hook to extend the default serialisation of `orjson.dumps()`
"""
if isinstance(obj, tuple):
return list(obj)

if hasattr(obj, "__json__"):
return obj.__json__()

raise TypeError


class ExtendedJsonEncoder(json.JSONEncoder):
"""A JSONEncoder that tries calling __json__() if it can't serialise an object another way."""

Expand Down
1 change: 1 addition & 0 deletions requirements/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ certifi
click~=8.1.7
docutils<0.18
msgpack~=0.6.1
orjson
Pygments
pymysql
rst2txt
Expand Down
4 changes: 3 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ certifi==2022.12.7
# reflink
click==8.1.7
# via -r requirements.in
#cryptography==42.0.4
#cryptography==43.0.1
# via -r requirements/vendor-wheels.txt
docutils==0.17.1
# via
Expand All @@ -38,6 +38,8 @@ jsonschema==4.17.3
# via -r requirements.in
msgpack==0.6.2
# via -r requirements.in
orjson==3.10.11
# via -r requirements.in
#psycopg2==2.9.9
# via -r requirements/vendor-wheels.txt
pycparser==2.21
Expand Down
Loading

0 comments on commit 764953a

Please sign in to comment.