Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(duckdb): add to_json() to Table and duckdb backend #10681

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,32 @@ def to_delta(
with expr.to_pyarrow_batches(params=params) as batch_reader:
write_deltalake(path, batch_reader, **kwargs)

@util.experimental
def to_json(
self,
expr: ir.Table,
path: str | Path,
**kwargs: Any,
) -> None:
"""Write the results of `expr` to a json file of [{column -> value}, ...] objects.

This method is eager and will execute the associated expression
immediately.

Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
The data source. A string or Path to the Delta Lake table.
kwargs
Additional, backend-specifc keyword arguments.
"""
backend = expr._find_backend(use_default=True)
raise NotImplementedError(
f"{backend.__class__.__name__} does not support writing to JSON."
)


class CanListCatalog(abc.ABC):
@abc.abstractmethod
Expand Down
41 changes: 40 additions & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import warnings
from operator import itemgetter
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Literal

import duckdb
import pyarrow as pa
Expand Down Expand Up @@ -1566,6 +1566,45 @@
with self._safe_raw_sql(copy_cmd):
pass

@util.experimental
def to_json(
self,
expr: ir.Table,
path: str | Path,
*,
compression: Literal["auto", "none", "gzip", "zstd"] = "auto",
dateformat: str | None = None,
timestampformat: str | None = None,
) -> None:
"""Write the results of `expr` to a json file of [{column -> value}, ...] objects.

This method is eager and will execute the associated expression
immediately.
See https://duckdb.org/docs/sql/statements/copy.html#json-options
for more info.

Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
URLs such as S3 buckets are supported.
compression
Compression codec to use. One of "auto", "none", "gzip", "zstd".
dateformat
Date format string.
timestampformat
Timestamp format string.
"""
opts = f", COMPRESSION '{compression.upper()}'"
if dateformat:
opts += f", DATEFORMAT '{dateformat}'"

Check warning on line 1601 in ibis/backends/duckdb/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/__init__.py#L1601

Added line #L1601 was not covered by tests
if timestampformat:
opts += f", TIMESTAMPFORMAT '{timestampformat}'"

Check warning on line 1603 in ibis/backends/duckdb/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/__init__.py#L1603

Added line #L1603 was not covered by tests
self.raw_sql(
f"COPY ({self.compile(expr)}) TO '{path!s}' (FORMAT JSON, ARRAY true{opts});"
)

def _get_schema_using_query(self, query: str) -> sch.Schema:
with self._safe_raw_sql(f"DESCRIBE {query}") as cur:
rows = cur.fetch_arrow_table()
Expand Down
32 changes: 32 additions & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,38 @@ def test_table_to_csv(tmp_path, backend, awards_players):
backend.assert_frame_equal(awards_players.to_pandas(), df)


@pytest.mark.notimpl(
[
"athena",
"bigquery",
"clickhouse",
"databricks",
"datafusion",
"druid",
"exasol",
"flink",
"impala",
"mssql",
"mysql",
"oracle",
"polars",
"postgres",
"pyspark",
"risingwave",
"snowflake",
"sqlite",
"trino",
],
reason="haven't gotten to them yet. Might be easy!",
raises=NotImplementedError,
)
def test_to_json(backend, tmp_path, awards_players):
out_path = tmp_path / "out.json"
awards_players.to_json(out_path)
df = pd.read_json(out_path, orient="records")
backend.assert_frame_equal(awards_players.to_pandas(), df)


@pytest.mark.notimpl(
["duckdb"],
reason="cannot inline WriteOptions objects",
Expand Down
22 changes: 22 additions & 0 deletions ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,28 @@ def to_delta(
"""
self._find_backend(use_default=True).to_delta(self, path, **kwargs)

@experimental
def to_json(
self,
path: str | Path,
**kwargs: Any,
) -> None:
"""Write the results of `expr` to a json file of [{column -> value}, ...] objects.

This method is eager and will execute the associated expression
immediately.

Parameters
----------
expr
The ibis expression to execute and persist to Delta Lake table.
path
The data source. A string or Path to the Delta Lake table.
kwargs
Additional, backend-specifc keyword arguments.
"""
self._find_backend(use_default=True).to_json(self, path, **kwargs)

@experimental
def to_torch(
self,
Expand Down
Loading