Skip to content

Commit

Permalink
feat(duckdb): add to_json() to Table and duckdb backend
Browse files Browse the repository at this point in the history
Fixes #10413
  • Loading branch information
NickCrews committed Jan 18, 2025
1 parent a720e68 commit 0e2ed8c
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 1 deletion.
26 changes: 26 additions & 0 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,32 @@ def to_delta(
with expr.to_pyarrow_batches(params=params) as batch_reader:
write_deltalake(path, batch_reader, **kwargs)

@util.experimental
def to_json(
self,
expr: ir.Table,
path: str | Path,
**kwargs: Any,
) -> None:
"""Write the results of `expr` to a json file of [{column -> value}, ...] objects.
This method is eager and will execute the associated expression
immediately.
Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
The data source. A string or Path to the Delta Lake table.
kwargs
Additional, backend-specifc keyword arguments.
"""
backend = expr._find_backend(use_default=True)
raise NotImplementedError(
f"{backend.__class__.__name__} does not support writing to JSON."
)


class CanListCatalog(abc.ABC):
@abc.abstractmethod
Expand Down
41 changes: 40 additions & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import warnings
from operator import itemgetter
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Literal

import duckdb
import pyarrow as pa
Expand Down Expand Up @@ -1566,6 +1566,45 @@ def to_geo(
with self._safe_raw_sql(copy_cmd):
pass

@util.experimental
def to_json(
self,
expr: ir.Table,
path: str | Path,
*,
compression: Literal["auto", "none", "gzip", "zstd"] = "auto",
dateformat: str | None = None,
timestampformat: str | None = None,
) -> None:
"""Write the results of `expr` to a json file of [{column -> value}, ...] objects.
This method is eager and will execute the associated expression
immediately.
See https://duckdb.org/docs/sql/statements/copy.html#json-options
for more info.
Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
URLs such as S3 buckets are supported.
compression
Compression codec to use. One of "auto", "none", "gzip", "zstd".
dateformat
Date format string.
timestampformat
Timestamp format string.
"""
opts = f", COMPRESSION '{compression.upper()}'"
if dateformat:
opts += f", DATEFORMAT '{dateformat}'"

Check warning on line 1601 in ibis/backends/duckdb/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/__init__.py#L1601

Added line #L1601 was not covered by tests
if timestampformat:
opts += f", TIMESTAMPFORMAT '{timestampformat}'"

Check warning on line 1603 in ibis/backends/duckdb/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/__init__.py#L1603

Added line #L1603 was not covered by tests
self.raw_sql(
f"COPY ({self.compile(expr)}) TO '{path!s}' (FORMAT JSON, ARRAY true{opts});"
)

def _get_schema_using_query(self, query: str) -> sch.Schema:
with self._safe_raw_sql(f"DESCRIBE {query}") as cur:
rows = cur.fetch_arrow_table()
Expand Down
32 changes: 32 additions & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,38 @@ def test_table_to_csv(tmp_path, backend, awards_players):
backend.assert_frame_equal(awards_players.to_pandas(), df)


@pytest.mark.notimpl(
[
"athena",
"bigquery",
"clickhouse",
"databricks",
"datafusion",
"druid",
"exasol",
"flink",
"impala",
"mssql",
"mysql",
"oracle",
"polars",
"postgres",
"pyspark",
"risingwave",
"snowflake",
"sqlite",
"trino",
],
reason="haven't gotten to them yet. Might be easy!",
raises=NotImplementedError,
)
def test_to_json(backend, tmp_path, awards_players):
out_path = tmp_path / "out.json"
awards_players.to_json(out_path)
df = pd.read_json(out_path, orient="records")
backend.assert_frame_equal(awards_players.to_pandas(), df)


@pytest.mark.notimpl(
["duckdb"],
reason="cannot inline WriteOptions objects",
Expand Down
22 changes: 22 additions & 0 deletions ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,28 @@ def to_delta(
"""
self._find_backend(use_default=True).to_delta(self, path, **kwargs)

@experimental
def to_json(
self,
path: str | Path,
**kwargs: Any,
) -> None:
"""Write the results of `expr` to a json file of [{column -> value}, ...] objects.
This method is eager and will execute the associated expression
immediately.
Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
The data source. A string or Path to the Delta Lake table.
kwargs
Additional, backend-specifc keyword arguments.
"""
self._find_backend(use_default=True).to_json(self, path, **kwargs)

@experimental
def to_torch(
self,
Expand Down

0 comments on commit 0e2ed8c

Please sign in to comment.