From 167a601fa61aaaffe9ecd06bc3c62ff8b2eea4a1 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 16 Jan 2025 21:07:25 -0800 Subject: [PATCH] feat(duckdb): add to_json() to Table and duckdb backend Fixes https://github.com/ibis-project/ibis/issues/10413 --- ibis/backends/__init__.py | 26 +++++++++++++++++++++++++ ibis/backends/duckdb/__init__.py | 25 ++++++++++++++++++++++++ ibis/backends/tests/test_export.py | 31 ++++++++++++++++++++++++++++++ ibis/expr/types/core.py | 22 +++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 830e1f292253..c77be762da1a 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -586,6 +586,32 @@ def to_delta( with expr.to_pyarrow_batches(params=params) as batch_reader: write_deltalake(path, batch_reader, **kwargs) + @util.experimental + def to_json( + self, + expr: ir.Table, + path: str | Path, + **kwargs: Any, + ) -> None: + """Write the results of `expr` to a json file of [{column -> value}, ...] objects. + + This method is eager and will execute the associated expression + immediately. + + Parameters + ---------- + expr + The ibis expression to execute and persist to Delta Lake table. + path + The data source. A string or Path to the Delta Lake table. + kwargs + Additional, backend-specifc keyword arguments. + """ + backend = expr._find_backend(use_default=True) + raise NotImplementedError( + f"{backend.__class__.__name__} does not support writing to JSON." + ) + class CanListCatalog(abc.ABC): @abc.abstractmethod diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index ffe68f217b1c..f02196f712f1 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1566,6 +1566,31 @@ def to_geo( with self._safe_raw_sql(copy_cmd): pass + @util.experimental + def to_json( + self, + expr: ir.Table, + path: str | Path, + **kwargs: Any, + ) -> None: + """Write the results of `expr` to a json file of [{column -> value}, ...] objects. + + This method is eager and will execute the associated expression + immediately. + + Parameters + ---------- + expr + The ibis expression to execute and persist to Delta Lake table. + path + URLs such as S3 buckets are supported. + kwargs + Additional, backend-specifc keyword arguments. + """ + self.raw_sql( + f"COPY ({self.compile(expr)}) TO '{path!s}' (FORMAT JSON, ARRAY true);" + ) + def _get_schema_using_query(self, query: str) -> sch.Schema: with self._safe_raw_sql(f"DESCRIBE {query}") as cur: rows = cur.fetch_arrow_table() diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 608f0ee27c0e..9e02e7c77888 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -347,6 +347,37 @@ def test_table_to_csv(tmp_path, backend, awards_players): backend.assert_frame_equal(awards_players.to_pandas(), df) +@pytest.mark.notimpl( + [ + "athena", + "bigquery", + "clickhouse", + "databricks", + "datafusion", + "druid", + "exasol", + "flink", + "impala", + "mssql", + "mysql", + "oracle", + "polars", + "postgres", + "pyspark", + "risingwave", + "snowflake", + "sqlite", + ], + reason="haven't gotten to them yet. Might be easy!", + raises=NotImplementedError, +) +def test_to_json(backend, tmp_path, awards_players): + out_path = tmp_path / "out.json" + awards_players.to_json(out_path) + df = pd.read_json(out_path, orient="records") + backend.assert_frame_equal(awards_players.to_pandas(), df) + + @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index 195f4f45dfab..359cbcd83f70 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -771,6 +771,28 @@ def to_delta( """ self._find_backend(use_default=True).to_delta(self, path, **kwargs) + @experimental + def to_json( + self, + path: str | Path, + **kwargs: Any, + ) -> None: + """Write the results of `expr` to a json file of [{column -> value}, ...] objects. + + This method is eager and will execute the associated expression + immediately. + + Parameters + ---------- + expr + The ibis expression to execute and persist to Delta Lake table. + path + The data source. A string or Path to the Delta Lake table. + kwargs + Additional, backend-specifc keyword arguments. + """ + self._find_backend(use_default=True).to_json(self, path, **kwargs) + @experimental def to_torch( self,