Skip to content

Commit

Permalink
feat: add datafusion to parity tests (#1108)
Browse files Browse the repository at this point in the history
* feat: add datafusion to parity tests

* fix: apply ruff linter rules

* fix: mark failing parity tests

* fix: reorganize parity tests

---------

Co-authored-by: tokoko <[email protected]>
  • Loading branch information
tokoko and tokoko authored Aug 28, 2024
1 parent b78bfba commit 1821c0d
Show file tree
Hide file tree
Showing 29 changed files with 297 additions and 168 deletions.
5 changes: 5 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM mcr.microsoft.com/vscode/devcontainers/python:3.10-buster
USER vscode
RUN curl -sSL https://install.python-poetry.org | python -
RUN /home/vscode/.local/bin/poetry config virtualenvs.in-project true
USER root
24 changes: 24 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "ibis-substrait-devcontainer",
"build": {
"context": "..",
"dockerfile": "Dockerfile"
},

// Features to add to the dev container. More info: https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/nix:1": {}
},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "poetry install"

// Configure tool-specific properties.
// "customizations": {},

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
2 changes: 1 addition & 1 deletion ibis_substrait/compiler/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,7 @@ def _translate_join_type(join_kind: ops.JoinKind) -> stalg.JoinRel.JoinType.V:
"left": stalg.JoinRel.JoinType.JOIN_TYPE_LEFT,
"right": stalg.JoinRel.JoinType.JOIN_TYPE_RIGHT,
"outer": stalg.JoinRel.JoinType.JOIN_TYPE_OUTER,
"semi": stalg.JoinRel.JoinType.JOIN_TYPE_SEMI,
"semi": stalg.JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI,
# "asof",
# "anti",
# "any_inner",
Expand Down
65 changes: 65 additions & 0 deletions ibis_substrait/tests/compiler/parity_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from abc import ABC, abstractmethod

import datafusion
import pyarrow as pa


class SubstraitConsumer(ABC):
@abstractmethod
def with_tables(self, datasets: dict[str, pa.Table]):
pass

@abstractmethod
def execute(self, plan) -> pa.Table:
pass


class AceroSubstraitConsumer(SubstraitConsumer):
def __init__(self) -> None:
super().__init__()

def with_tables(self, datasets: dict[str, pa.Table]):
self.datasets = datasets
return self

def execute(self, plan) -> pa.Table:
import pyarrow.substrait as pa_substrait

def get_table_provider(datasets):
def table_provider(names, schema):
return datasets[names[0]]

return table_provider

query_bytes = plan.SerializeToString()
result = pa_substrait.run_query(
pa.py_buffer(query_bytes),
table_provider=get_table_provider(self.datasets),
)

return result.read_all()


class DatafusionSubstraitConsumer(SubstraitConsumer):
def __init__(self) -> None:
self.connection = datafusion.SessionContext()

def with_tables(self, datasets: dict[str, pa.Table]):
for k, v in datasets.items():
self.connection.deregister_table(k)
self.connection.register_record_batches(k, [v.to_batches()])
return self

def execute(self, plan) -> pa.Table:
plan_data = plan.SerializeToString()
substrait_plan = datafusion.substrait.serde.deserialize_bytes(plan_data)
logical_plan = datafusion.substrait.consumer.from_substrait_plan(
self.connection, substrait_plan
)

df = self.connection.create_dataframe_from_logical_plan(logical_plan)
for column_number, column_name in enumerate(df.schema().names):
df = df.with_column_renamed(
column_name, plan.relations[0].root.names[column_number]
)
return df.to_arrow_table()
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1358,7 +1358,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@
}
],
"version": {
"minorNumber": 52,
"minorNumber": 54,
"producer": "ibis-substrait"
}
}
Loading

0 comments on commit 1821c0d

Please sign in to comment.