From 1821c0d349a01a408216668b41507ff0ea69491c Mon Sep 17 00:00:00 2001 From: Tornike Gurgenidze Date: Wed, 28 Aug 2024 21:09:30 +0400 Subject: [PATCH] feat: add datafusion to parity tests (#1108) * feat: add datafusion to parity tests * fix: apply ruff linter rules * fix: mark failing parity tests * fix: reorganize parity tests --------- Co-authored-by: tokoko --- .devcontainer/Dockerfile | 5 + .devcontainer/devcontainer.json | 24 ++ ibis_substrait/compiler/translate.py | 2 +- ibis_substrait/tests/compiler/parity_utils.py | 65 +++++ .../test_compile/tpc_h01/tpc_h01.json | 2 +- .../test_compile/tpc_h02/tpc_h02.json | 2 +- .../test_compile/tpc_h03/tpc_h03.json | 2 +- .../test_compile/tpc_h04/tpc_h04.json | 2 +- .../test_compile/tpc_h05/tpc_h05.json | 2 +- .../test_compile/tpc_h06/tpc_h06.json | 2 +- .../test_compile/tpc_h07/tpc_h07.json | 2 +- .../test_compile/tpc_h08/tpc_h08.json | 2 +- .../test_compile/tpc_h09/tpc_h09.json | 2 +- .../test_compile/tpc_h10/tpc_h10.json | 2 +- .../test_compile/tpc_h11/tpc_h11.json | 2 +- .../test_compile/tpc_h12/tpc_h12.json | 2 +- .../test_compile/tpc_h13/tpc_h13.json | 2 +- .../test_compile/tpc_h14/tpc_h14.json | 2 +- .../test_compile/tpc_h15/tpc_h15.json | 2 +- .../test_compile/tpc_h16/tpc_h16.json | 2 +- .../test_compile/tpc_h17/tpc_h17.json | 2 +- .../test_compile/tpc_h18/tpc_h18.json | 2 +- .../test_compile/tpc_h19/tpc_h19.json | 2 +- .../test_compile/tpc_h20/tpc_h20.json | 2 +- .../test_compile/tpc_h21/tpc_h21.json | 2 +- .../test_compile/tpc_h22/tpc_h22.json | 2 +- ibis_substrait/tests/compiler/test_parity.py | 244 ++++++++++-------- poetry.lock | 80 +++--- pyproject.toml | 1 + 29 files changed, 297 insertions(+), 168 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 ibis_substrait/tests/compiler/parity_utils.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..c8cad6d7 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,5 @@ +FROM mcr.microsoft.com/vscode/devcontainers/python:3.10-buster +USER vscode +RUN curl -sSL https://install.python-poetry.org | python - +RUN /home/vscode/.local/bin/poetry config virtualenvs.in-project true +USER root \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..35e94a50 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,24 @@ +{ + "name": "ibis-substrait-devcontainer", + "build": { + "context": "..", + "dockerfile": "Dockerfile" + }, + + // Features to add to the dev container. More info: https://containers.dev/features. + "features": { + "ghcr.io/devcontainers/features/nix:1": {} + }, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "poetry install" + + // Configure tool-specific properties. + // "customizations": {}, + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/ibis_substrait/compiler/translate.py b/ibis_substrait/compiler/translate.py index db7d6eda..f4b51932 100644 --- a/ibis_substrait/compiler/translate.py +++ b/ibis_substrait/compiler/translate.py @@ -883,7 +883,7 @@ def _translate_join_type(join_kind: ops.JoinKind) -> stalg.JoinRel.JoinType.V: "left": stalg.JoinRel.JoinType.JOIN_TYPE_LEFT, "right": stalg.JoinRel.JoinType.JOIN_TYPE_RIGHT, "outer": stalg.JoinRel.JoinType.JOIN_TYPE_OUTER, - "semi": stalg.JoinRel.JoinType.JOIN_TYPE_SEMI, + "semi": stalg.JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI, # "asof", # "anti", # "any_inner", diff --git a/ibis_substrait/tests/compiler/parity_utils.py b/ibis_substrait/tests/compiler/parity_utils.py new file mode 100644 index 00000000..e703aa53 --- /dev/null +++ b/ibis_substrait/tests/compiler/parity_utils.py @@ -0,0 +1,65 @@ +from abc import ABC, abstractmethod + +import datafusion +import pyarrow as pa + + +class SubstraitConsumer(ABC): + @abstractmethod + def with_tables(self, datasets: dict[str, pa.Table]): + pass + + @abstractmethod + def execute(self, plan) -> pa.Table: + pass + + +class AceroSubstraitConsumer(SubstraitConsumer): + def __init__(self) -> None: + super().__init__() + + def with_tables(self, datasets: dict[str, pa.Table]): + self.datasets = datasets + return self + + def execute(self, plan) -> pa.Table: + import pyarrow.substrait as pa_substrait + + def get_table_provider(datasets): + def table_provider(names, schema): + return datasets[names[0]] + + return table_provider + + query_bytes = plan.SerializeToString() + result = pa_substrait.run_query( + pa.py_buffer(query_bytes), + table_provider=get_table_provider(self.datasets), + ) + + return result.read_all() + + +class DatafusionSubstraitConsumer(SubstraitConsumer): + def __init__(self) -> None: + self.connection = datafusion.SessionContext() + + def with_tables(self, datasets: dict[str, pa.Table]): + for k, v in datasets.items(): + self.connection.deregister_table(k) + self.connection.register_record_batches(k, [v.to_batches()]) + return self + + def execute(self, plan) -> pa.Table: + plan_data = plan.SerializeToString() + substrait_plan = datafusion.substrait.serde.deserialize_bytes(plan_data) + logical_plan = datafusion.substrait.consumer.from_substrait_plan( + self.connection, substrait_plan + ) + + df = self.connection.create_dataframe_from_logical_plan(logical_plan) + for column_number, column_name in enumerate(df.schema().names): + df = df.with_column_renamed( + column_name, plan.relations[0].root.names[column_number] + ) + return df.to_arrow_table() diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h01/tpc_h01.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h01/tpc_h01.json index 791710e5..165cd0a5 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h01/tpc_h01.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h01/tpc_h01.json @@ -694,7 +694,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h02/tpc_h02.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h02/tpc_h02.json index 58282705..f6449808 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h02/tpc_h02.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h02/tpc_h02.json @@ -1285,7 +1285,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h03/tpc_h03.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h03/tpc_h03.json index abb3cf7a..70eebb73 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h03/tpc_h03.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h03/tpc_h03.json @@ -728,7 +728,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h04/tpc_h04.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h04/tpc_h04.json index 9bc1a5ce..606413a7 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h04/tpc_h04.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h04/tpc_h04.json @@ -506,7 +506,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h05/tpc_h05.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h05/tpc_h05.json index a427286d..3faec4e8 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h05/tpc_h05.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h05/tpc_h05.json @@ -1012,7 +1012,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h06/tpc_h06.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h06/tpc_h06.json index ea27fc27..0e9455d6 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h06/tpc_h06.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h06/tpc_h06.json @@ -439,7 +439,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h07/tpc_h07.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h07/tpc_h07.json index 7f987413..772ad296 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h07/tpc_h07.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h07/tpc_h07.json @@ -1032,7 +1032,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json index 7fcddb6d..fe9c104d 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json @@ -1358,7 +1358,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h09/tpc_h09.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h09/tpc_h09.json index c71f1fd8..4872dd6c 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h09/tpc_h09.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h09/tpc_h09.json @@ -878,7 +878,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h10/tpc_h10.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h10/tpc_h10.json index a7ae6381..93fb758a 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h10/tpc_h10.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h10/tpc_h10.json @@ -843,7 +843,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h11/tpc_h11.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h11/tpc_h11.json index c17054c4..be2fb520 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h11/tpc_h11.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h11/tpc_h11.json @@ -863,7 +863,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h12/tpc_h12.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h12/tpc_h12.json index 6364eb23..5fdc62b7 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h12/tpc_h12.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h12/tpc_h12.json @@ -774,7 +774,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h13/tpc_h13.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h13/tpc_h13.json index 4aa36f17..51d780ce 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h13/tpc_h13.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h13/tpc_h13.json @@ -421,7 +421,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json index 992cd76d..3044ef14 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json @@ -775,7 +775,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h15/tpc_h15.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h15/tpc_h15.json index d69ba535..6a49ba73 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h15/tpc_h15.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h15/tpc_h15.json @@ -1085,7 +1085,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h16/tpc_h16.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h16/tpc_h16.json index 68db1339..a13a70d3 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h16/tpc_h16.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h16/tpc_h16.json @@ -704,7 +704,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json index 3730a87c..74ebadd0 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json @@ -773,7 +773,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h18/tpc_h18.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h18/tpc_h18.json index 8a018610..a3b1d00f 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h18/tpc_h18.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h18/tpc_h18.json @@ -774,7 +774,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h19/tpc_h19.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h19/tpc_h19.json index dc78a156..a76fdc28 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h19/tpc_h19.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h19/tpc_h19.json @@ -1426,7 +1426,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h20/tpc_h20.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h20/tpc_h20.json index b05bd269..e2e2040d 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h20/tpc_h20.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h20/tpc_h20.json @@ -1012,7 +1012,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h21/tpc_h21.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h21/tpc_h21.json index 61057019..bf207491 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h21/tpc_h21.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h21/tpc_h21.json @@ -1245,7 +1245,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json index aff78571..2393c8d1 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json @@ -879,7 +879,7 @@ } ], "version": { - "minorNumber": 52, + "minorNumber": 54, "producer": "ibis-substrait" } } \ No newline at end of file diff --git a/ibis_substrait/tests/compiler/test_parity.py b/ibis_substrait/tests/compiler/test_parity.py index 72062a4f..f4e14ac9 100644 --- a/ibis_substrait/tests/compiler/test_parity.py +++ b/ibis_substrait/tests/compiler/test_parity.py @@ -5,13 +5,18 @@ import ibis import pyarrow as pa import pyarrow.compute as pc -import pyarrow.substrait as pa_substrait import pytest from ibis import _ from ibis.conftest import LINUX, SANDBOXED from ibis_substrait.compiler.core import SubstraitCompiler +from .parity_utils import ( + AceroSubstraitConsumer, + DatafusionSubstraitConsumer, + SubstraitConsumer, +) + nix_linux_sandbox = pytest.mark.xfail( LINUX and SANDBOXED, reason="nix on linux cannot download duckdb extensions or data due to sandboxing", @@ -25,28 +30,7 @@ def sort_pyarrow_table(table: pa.Table): return pc.take(table, sort_indices) -def run_query_acero(plan, datasets, compiler): - def get_table_provider(datasets): - def table_provider(names, schema): - return datasets[names[0]] - - return table_provider - - plan = compiler.compile(plan) - query_bytes = plan.SerializeToString() - result = pa_substrait.run_query( - # TODO is this still necessary? - # PyArrow wants its bytes in a very specific byte-string - pa.py_buffer(query_bytes), - table_provider=get_table_provider(datasets), - ) - - results = result.read_all() - assert isinstance(results, pa.lib.Table) - - return results - - +# TODO move this into a consumer class def run_query_duckdb(query, datasets): with tempfile.TemporaryDirectory() as tempdir: con = ibis.duckdb.connect(os.path.join(tempdir, "temp.db")) @@ -59,38 +43,6 @@ def run_query_duckdb(query, datasets): return res -def run_query_duckdb_substrait(expr, datasets, compiler): - import duckdb - - with tempfile.TemporaryDirectory() as tempdir: - con = duckdb.connect(database=os.path.join(tempdir, "temp.db")) - con.sql(f"SET home_directory='{tempdir}'") - con.install_extension("substrait") - con.load_extension("substrait") - - for k, v in datasets.items(): # noqa: B007 - con.sql(f"CREATE TABLE {k} AS SELECT * FROM v") - - plan = compiler.compile(expr) - result = con.from_substrait(plan.SerializeToString()) - return result.fetch_arrow_table() - - -def run_parity_tests(expr, datasets, compiler, engines=None): - if engines is None: - engines = ["acero"] # duckdb_substrait disabled because can't run on windows - res_duckdb = sort_pyarrow_table(run_query_duckdb(expr, datasets)) - if "acero" in engines: - res_acero = sort_pyarrow_table(run_query_acero(expr, datasets, compiler)) - assert res_acero.equals(res_duckdb) - - if "duckdb_substrait" in engines: - res_duckdb_substrait = sort_pyarrow_table( - run_query_duckdb_substrait(expr, datasets, compiler) - ) - assert res_duckdb_substrait.equals(res_duckdb) - - orders_raw = [ ("order_id", "int64", [1, 2, 3, 4]), ("fk_store_id", "int64", [1, 1, 2, 2]), @@ -117,127 +69,191 @@ def run_parity_tests(expr, datasets, compiler, engines=None): } -def test_projection(): - expr = orders["order_id", "order_total"] +@pytest.fixture +def acero_consumer(): + return AceroSubstraitConsumer().with_tables(datasets) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) +@pytest.fixture +def datafusion_consumer(): + return DatafusionSubstraitConsumer().with_tables(datasets) -def test_mutate(): - expr = orders.mutate(order_total_plus_1=orders["order_total"] + 1) + +def run_parity_test(consumer: SubstraitConsumer, expr): + res_duckdb = sort_pyarrow_table(run_query_duckdb(expr, datasets)) compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) + res_compare = sort_pyarrow_table(consumer.execute(compiler.compile(expr))) -def test_sort(): - expr = orders.order_by("order_total") - - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) + assert res_compare.equals(res_duckdb) -def test_sort_limit(): - expr = orders.order_by("order_total").limit(2) +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_projection(consumer: str, request): + expr = orders["order_id", "order_total"] + run_parity_test(request.getfixturevalue(consumer), expr) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_mutate(consumer: str, request): + expr = orders.mutate(order_total_plus_1=orders["order_total"] + 1) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_filter(): - filtered_table = orders.filter(lambda t: t.order_total > 30) - compiler = SubstraitCompiler() - run_parity_tests(filtered_table, datasets, compiler=compiler) +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_sort(consumer: str, request): + expr = orders.order_by("order_total") + run_parity_test(request.getfixturevalue(consumer), expr) -def test_inner_join(): - expr = orders.join(stores, orders["fk_store_id"] == stores["store_id"]) +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_sort_limit(consumer: str, request): + expr = orders.order_by("order_total").limit(2) + run_parity_test(request.getfixturevalue(consumer), expr) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_filter(consumer: str, request): + expr = orders.filter(lambda t: t.order_total > 30) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_left_join(): - expr = orders.join(stores, orders["fk_store_id"] == stores["store_id"], how="left") - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_inner_join(consumer: str, request): + expr = orders.join(stores, orders["fk_store_id"] == stores["store_id"]) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_filter_groupby(): +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_left_join(consumer: str, request): + expr = orders.join(stores, orders["fk_store_id"] == stores["store_id"], how="left") + run_parity_test(request.getfixturevalue(consumer), expr) + + +@pytest.mark.parametrize( + "consumer", + [ + "acero_consumer", + pytest.param( + "datafusion_consumer", + marks=[pytest.mark.xfail(Exception, reason="")], + ), + ], +) +def test_filter_groupby(consumer: str, request): filter_table = orders.join( stores, orders["fk_store_id"] == stores["store_id"] ).filter(lambda t: t.order_total > 30) - grouped_table = filter_table.group_by("city").aggregate( + expr = filter_table.group_by("city").aggregate( sales=filter_table["order_id"].count() ) - compiler = SubstraitCompiler() - run_parity_tests(grouped_table, datasets, compiler=compiler) - - -def test_filter_groupby_count_distinct(): + run_parity_test(request.getfixturevalue(consumer), expr) + + +@pytest.mark.parametrize( + "consumer", + [ + pytest.param( + "acero_consumer", + marks=[ + pytest.mark.xfail(pa.ArrowNotImplementedError, reason="Unimplemented") + ], + ), + pytest.param( + "datafusion_consumer", + marks=[pytest.mark.xfail(Exception, reason="")], + ), + ], +) +def test_filter_groupby_count_distinct(consumer: str, request): filter_table = orders.join( stores, orders["fk_store_id"] == stores["store_id"] ).filter(lambda t: t.order_total > 30) - grouped_table = filter_table.group_by("city").aggregate( - sales=filter_table["city"].nunique() - ) + expr = filter_table.group_by("city").aggregate(sales=filter_table["city"].nunique()) - compiler = SubstraitCompiler() - run_parity_tests(grouped_table, datasets, compiler=compiler, engines=[]) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_aggregate_having(): +@pytest.mark.parametrize( + "consumer", + [ + "acero_consumer", + pytest.param( + "datafusion_consumer", + marks=[pytest.mark.xfail(Exception, reason="")], + ), + ], +) +def test_aggregate_having(consumer: str, request): expr = orders.aggregate( [orders.order_id.max().name("amax"), orders.order_id.count().name("acount")], by="fk_store_id", having=(_.order_id.count() > 1), ) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_inner_join_chain(): +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_inner_join_chain(consumer: str, request): expr = orders.join(stores, orders["fk_store_id"] == stores["store_id"]).join( customers, orders["fk_customer_id"] == customers["customer_id"] ) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_union(): +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_union(consumer: str, request): expr = orders.union(orders) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) + run_parity_test(request.getfixturevalue(consumer), expr) -# TODO acero doesn't seem to support this, maybe run duckdb on both sides? -def test_window(): +@pytest.mark.parametrize( + "consumer", + [ + pytest.param( + "acero_consumer", + marks=[ + pytest.mark.xfail(pa.ArrowNotImplementedError, reason="Unimplemented") + ], + ), + "datafusion_consumer", + ], +) +def test_window(consumer: str, request): expr = orders.select( orders["order_total"].mean().over(ibis.window(group_by="fk_store_id")) ) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler, engines=[]) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_is_in(): +@pytest.mark.parametrize("consumer", ["acero_consumer", "datafusion_consumer"]) +def test_is_in(consumer: str, request): expr = stores.filter(stores.city.isin(["NY", "LA"])) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler) + run_parity_test(request.getfixturevalue(consumer), expr) -def test_scalar_subquery(): +@pytest.mark.parametrize( + "consumer", + [ + pytest.param( + "acero_consumer", + marks=[ + pytest.mark.xfail(pa.ArrowNotImplementedError, reason="Unimplemented") + ], + ), + "datafusion_consumer", + ], +) +def test_scalar_subquery(consumer: str, request): expr = orders.filter(orders["order_total"] == orders["order_total"].max()) - compiler = SubstraitCompiler() - run_parity_tests(expr, datasets, compiler=compiler, engines=[]) + run_parity_test(request.getfixturevalue(consumer), expr) diff --git a/poetry.lock b/poetry.lock index 4740c30e..6c0ed814 100644 --- a/poetry.lock +++ b/poetry.lock @@ -138,6 +138,24 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "datafusion" +version = "40.1.0" +description = "Build and run queries against data" +optional = false +python-versions = ">=3.6" +files = [ + {file = "datafusion-40.1.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:983b5b89a3aaaf2789f0423564cc24dbe5eb3f4f0a7daa8e87b35ce4d2920d6b"}, + {file = "datafusion-40.1.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be44d24971e73d324a3f41503bb091f48d171d50d1d2415b469ca5e3953b5a0e"}, + {file = "datafusion-40.1.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:fd8c3689e9d195a9603a00e399b995a6343587d0763358f2eee65b85d5f56a37"}, + {file = "datafusion-40.1.0-cp38-abi3-win_amd64.whl", hash = "sha256:2960871ce31ee489ef3b210e77c4048278e9ee873517eff9f46ca52c82eb166d"}, + {file = "datafusion-40.1.0.tar.gz", hash = "sha256:7c7cfd2bcf491adcf6580f0ff5882ca9fc658dbdab30802ad46889e417965cc6"}, +] + +[package.dependencies] +pyarrow = ">=11.0.0" +typing-extensions = {version = "*", markers = "python_version < \"3.13\""} + [[package]] name = "decorator" version = "5.1.1" @@ -286,13 +304,13 @@ visualization = ["graphviz (>=0.16,<1)"] [[package]] name = "importlib-metadata" -version = "8.2.0" +version = "8.4.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-8.2.0-py3-none-any.whl", hash = "sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369"}, - {file = "importlib_metadata-8.2.0.tar.gz", hash = "sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d"}, + {file = "importlib_metadata-8.4.0-py3-none-any.whl", hash = "sha256:66f342cc6ac9818fc6ff340576acd24d65ba0b3efabb2b4ac08b598965a4a2f1"}, + {file = "importlib_metadata-8.4.0.tar.gz", hash = "sha256:9a547d3bc3608b025f93d403fdd1aae741c24fbb8314df4b155675742ce303c5"}, ] [package.dependencies] @@ -1038,46 +1056,46 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "ruff" -version = "0.6.0" +version = "0.6.2" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.6.0-py3-none-linux_armv6l.whl", hash = "sha256:92dcce923e5df265781e5fc76f9a1edad52201a7aafe56e586b90988d5239013"}, - {file = "ruff-0.6.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:31b90ff9dc79ed476c04e957ba7e2b95c3fceb76148f2079d0d68a908d2cfae7"}, - {file = "ruff-0.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6d834a9ec9f8287dd6c3297058b3a265ed6b59233db22593379ee38ebc4b9768"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2089267692696aba342179471831a085043f218706e642564812145df8b8d0d"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa62b423ee4bbd8765f2c1dbe8f6aac203e0583993a91453dc0a449d465c84da"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7344e1a964b16b1137ea361d6516ce4ee61a0403fa94252a1913ecc1311adcae"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:487f3a35c3f33bf82be212ce15dc6278ea854e35573a3f809442f73bec8b2760"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75db409984077a793cf344d499165298a6f65449e905747ac65983b12e3e64b1"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84908bd603533ecf1db456d8fc2665d1f4335d722e84bc871d3bbd2d1116c272"}, - {file = "ruff-0.6.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f1749a0aef3ec41ed91a0e2127a6ae97d2e2853af16dbd4f3c00d7a3af726c5"}, - {file = "ruff-0.6.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:016fea751e2bcfbbd2f8cb19b97b37b3fd33148e4df45b526e87096f4e17354f"}, - {file = "ruff-0.6.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6ae80f141b53b2e36e230017e64f5ea2def18fac14334ffceaae1b780d70c4f7"}, - {file = "ruff-0.6.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eaaaf33ea4b3f63fd264d6a6f4a73fa224bbfda4b438ffea59a5340f4afa2bb5"}, - {file = "ruff-0.6.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7667ddd1fc688150a7ca4137140867584c63309695a30016880caf20831503a0"}, - {file = "ruff-0.6.0-py3-none-win32.whl", hash = "sha256:ae48365aae60d40865a412356f8c6f2c0be1c928591168111eaf07eaefa6bea3"}, - {file = "ruff-0.6.0-py3-none-win_amd64.whl", hash = "sha256:774032b507c96f0c803c8237ce7d2ef3934df208a09c40fa809c2931f957fe5e"}, - {file = "ruff-0.6.0-py3-none-win_arm64.whl", hash = "sha256:a5366e8c3ae6b2dc32821749b532606c42e609a99b0ae1472cf601da931a048c"}, - {file = "ruff-0.6.0.tar.gz", hash = "sha256:272a81830f68f9bd19d49eaf7fa01a5545c5a2e86f32a9935bb0e4bb9a1db5b8"}, + {file = "ruff-0.6.2-py3-none-linux_armv6l.whl", hash = "sha256:5c8cbc6252deb3ea840ad6a20b0f8583caab0c5ef4f9cca21adc5a92b8f79f3c"}, + {file = "ruff-0.6.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:17002fe241e76544448a8e1e6118abecbe8cd10cf68fde635dad480dba594570"}, + {file = "ruff-0.6.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3dbeac76ed13456f8158b8f4fe087bf87882e645c8e8b606dd17b0b66c2c1158"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:094600ee88cda325988d3f54e3588c46de5c18dae09d683ace278b11f9d4d534"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:316d418fe258c036ba05fbf7dfc1f7d3d4096db63431546163b472285668132b"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d72b8b3abf8a2d51b7b9944a41307d2f442558ccb3859bbd87e6ae9be1694a5d"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:2aed7e243be68487aa8982e91c6e260982d00da3f38955873aecd5a9204b1d66"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d371f7fc9cec83497fe7cf5eaf5b76e22a8efce463de5f775a1826197feb9df8"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8f310d63af08f583363dfb844ba8f9417b558199c58a5999215082036d795a1"}, + {file = "ruff-0.6.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7db6880c53c56addb8638fe444818183385ec85eeada1d48fc5abe045301b2f1"}, + {file = "ruff-0.6.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1175d39faadd9a50718f478d23bfc1d4da5743f1ab56af81a2b6caf0a2394f23"}, + {file = "ruff-0.6.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:5b939f9c86d51635fe486585389f54582f0d65b8238e08c327c1534844b3bb9a"}, + {file = "ruff-0.6.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d0d62ca91219f906caf9b187dea50d17353f15ec9bb15aae4a606cd697b49b4c"}, + {file = "ruff-0.6.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7438a7288f9d67ed3c8ce4d059e67f7ed65e9fe3aa2ab6f5b4b3610e57e3cb56"}, + {file = "ruff-0.6.2-py3-none-win32.whl", hash = "sha256:279d5f7d86696df5f9549b56b9b6a7f6c72961b619022b5b7999b15db392a4da"}, + {file = "ruff-0.6.2-py3-none-win_amd64.whl", hash = "sha256:d9f3469c7dd43cd22eb1c3fc16926fb8258d50cb1b216658a07be95dd117b0f2"}, + {file = "ruff-0.6.2-py3-none-win_arm64.whl", hash = "sha256:f28fcd2cd0e02bdf739297516d5643a945cc7caf09bd9bcb4d932540a5ea4fa9"}, + {file = "ruff-0.6.2.tar.gz", hash = "sha256:239ee6beb9e91feb8e0ec384204a763f36cb53fb895a1a364618c6abb076b3be"}, ] [[package]] name = "setuptools" -version = "72.2.0" +version = "73.0.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-72.2.0-py3-none-any.whl", hash = "sha256:f11dd94b7bae3a156a95ec151f24e4637fb4fa19c878e4d191bfb8b2d82728c4"}, - {file = "setuptools-72.2.0.tar.gz", hash = "sha256:80aacbf633704e9c8bfa1d99fa5dd4dc59573efcf9e4042c13d3bcef91ac2ef9"}, + {file = "setuptools-73.0.1-py3-none-any.whl", hash = "sha256:b208925fcb9f7af924ed2dc04708ea89791e24bde0d3020b27df0e116088b34e"}, + {file = "setuptools-73.0.1.tar.gz", hash = "sha256:d59a3e788ab7e012ab2c4baed1b376da6366883ee20d7a5fc426816e3d7b1193"}, ] [package.extras] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "ordered-set (>=3.1.1)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] [[package]] name = "six" @@ -1126,13 +1144,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "substrait" -version = "0.21.0" +version = "0.22.0" description = "A python package for Substrait." optional = false python-versions = ">=3.8.1" files = [ - {file = "substrait-0.21.0-py3-none-any.whl", hash = "sha256:1894ee9136e90f689bbe1d7ba6792d5e5a03092af59521ce9daf30d0c1ebb663"}, - {file = "substrait-0.21.0.tar.gz", hash = "sha256:f40298e09ad78c0bf9987b2c09d24d74a89a6421ef1f9406ffc22111971b5ec9"}, + {file = "substrait-0.22.0-py3-none-any.whl", hash = "sha256:c26556c8eabc922138e0749e7a8fb024af5db2d7ddc2a8004fad1d30daf90531"}, + {file = "substrait-0.22.0.tar.gz", hash = "sha256:509b538d94cd236f043a1552c1e46db462982ab036d1ac310b8515b3ee5d9b90"}, ] [package.dependencies] @@ -1263,4 +1281,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "bf71f85eec03a8fdbf3fa515575953f583c1d47dabe39ce17e0b5e925e02238a" +content-hash = "6e6719764baea4feb58548cc1f85a742638cb2378c82c5a9919c8a33de068649" diff --git a/pyproject.toml b/pyproject.toml index 9a577f05..fba946bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ types-pyyaml = ">=6.0.12.8" typing-extensions = ">=4" [tool.poetry.group.test.dependencies] +datafusion = "^40.1.0" duckdb = ">=0.8.1" pytest = "^8.0.0" pytest-clarity = "^1.0.1"