Skip to content

Commit

Permalink
feat: add substrait.proto convenience module and document it (#50)
Browse files Browse the repository at this point in the history
Add a ``substrait.proto`` module that gives access to the Substrait
protocol classes
removing the need to navigate the hierarchy automatically generated by
protobuf.
It also provides access to the modules without the ``_pb2`` suffix 
which is an implementation detail of the protobuf version used.

Provides examples on how to generate and read back Substrait plans 
using the substrait-python module itself.

---------

Co-authored-by: Gil Forsyth <[email protected]>
  • Loading branch information
amol- and gforsyth authored Mar 15, 2024
1 parent dc42f18 commit 18802c8
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 14 deletions.
131 changes: 117 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,118 @@ This project is not an execution engine for Substrait Plans.
This is an experimental package that is still under development.

# Example
At the moment, this project contains only generated Python classes for the Substrait protobuf messages. Let's use an existing Substrait producer, [Ibis](https://ibis-project.org), to provide an example using Python Substrait as the consumer.

## Produce a Substrait Plan
The ``substrait.proto`` module provides access to the classes
that represent a substrait Plan, thus allowing to create new plans.

Here is an example plan equivalent to ``SELECT first_name FROM person``
where ``people`` table has ``first_name`` and ``surname`` columns of type ``String``

```
>>> from substrait import proto
>>> plan = proto.Plan(
... relations=[
... proto.PlanRel(
... root=proto.RelRoot(
... names=["first_name"],
... input=proto.Rel(
... read=proto.ReadRel(
... named_table=proto.ReadRel.NamedTable(names=["people"]),
... base_schema=proto.NamedStruct(
... names=["first_name", "surname"],
... struct=proto.Type.Struct(
... types=[
... proto.Type(string=proto.Type.String(nullability=proto.Type.Nullability.NULLABILITY_REQUIRED)),
... proto.Type(string=proto.Type.String(nullability=proto.Type.Nullability.NULLABILITY_REQUIRED))
... ] # /types
... ) # /struct
... ) # /base_schema
... ) # /read
... ) # /input
... ) # /root
... ) # /PlanRel
... ] # /relations
... )
>>> print(plan)
relations {
root {
input {
read {
base_schema {
names: "first_name"
names: "surname"
struct {
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
}
}
named_table {
names: "people"
}
}
}
names: "first_name"
}
}
>>> serialized_plan = p.SerializeToString()
>>> serialized_plan
b'\x1aA\x12?\n1\n/\x12#\n\nfirst_name\n\x07surname\x12\x0c\n\x04b\x02\x10\x02\n\x04b\x02\x10\x02:\x08\n\x06people\x12\nfirst_name'
```

## Consume the Substrait Plan
The same plan we generated in the previous example,
can be loaded back from its binary representation
using the ``Plan.ParseFromString`` method:

```
>>> from substrait.proto import Plan
>>> p = Plan()
>>> p.ParseFromString(serialized_plan)
67
>>> p
relations {
root {
input {
read {
base_schema {
names: "first_name"
names: "surname"
struct {
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
}
}
named_table {
names: "people"
}
}
}
names: "first_name"
}
}
```

## Produce a Substrait Plan with Ibis
Let's use an existing Substrait producer, [Ibis](https://ibis-project.org),
to provide an example using Python Substrait as the consumer.

```
In [1]: import ibis
Expand All @@ -54,21 +164,14 @@ In [5]: compiler = SubstraitCompiler()
In [6]: protobuf_msg = compiler.compile(query).SerializeToString()
In [7]: type(protobuf_msg)
Out[7]: bytes
```
## Consume the Substrait Plan using Python Substrait
```
In [8]: import substrait
In [7]: from substrait.proto import Plan
In [9]: from substrait.gen.proto.plan_pb2 import Plan
In [8]: my_plan = Plan()
In [10]: my_plan = Plan()
In [9]: my_plan.ParseFromString(protobuf_msg)
Out[9]: 186
In [11]: my_plan.ParseFromString(protobuf_msg)
Out[11]: 186
In [12]: print(my_plan)
In [10]: print(my_plan)
relations {
root {
input {
Expand Down Expand Up @@ -177,4 +280,4 @@ version {
minor_number: 24
producer: "ibis-substrait"
}
```
```
38 changes: 38 additions & 0 deletions src/substrait/proto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
def _load():
"""Import all substrait protobuf classes as human friendly.
Instead of forcing users to deal with autogenerated protobuf
modules, importing individual components of the protocol
from submodules etc... this functions loads into the module
all classes representing substrait expressions and loads
the protocol modules with a friendly name making the protocol
more convenient to use.
substrait.gen.proto.extensions.extensions_pb2.SimpleExtensionDeclaration
becomes substrait.proto.SimpleExtensionDeclaration
"""
import sys
import inspect
import pkgutil
import importlib
from substrait.gen import proto as _proto

selfmodule = sys.modules[__name__]
for submodule_info in pkgutil.iter_modules(_proto.__path__):
submodule_name = submodule_info.name
attr_name = submodule_name.replace("_pb2", "")
if submodule_name == "extensions":
# Extensions are in a submodule
submodule_name = "extensions.extensions_pb2"
attr_name = "extensions"

submodule = importlib.import_module(f".{submodule_name}", _proto.__name__)
setattr(selfmodule, attr_name, submodule)

for membername, _ in inspect.getmembers(submodule):
member = getattr(submodule, membername)
if inspect.isclass(member):
setattr(selfmodule, membername, member)


_load()
18 changes: 18 additions & 0 deletions tests/test_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,21 @@ def test_imports():
from substrait.gen.proto.type_expressions_pb2 import DerivationExpression
from substrait.gen.proto.type_pb2 import Type
from substrait.gen.proto.extensions.extensions_pb2 import SimpleExtensionURI


def test_proto_proxy_module():
"""Test that protocol classes are made available in substrait.proto"""
import substrait.proto

assert {"Plan", "Type", "NamedStruct", "RelRoot"} <= set(dir(substrait.proto))
assert {
"algebra",
"capabilities",
"extensions",
"extended_expression",
"function",
"parameterized_types",
"plan",
"type_expressions",
"type",
} <= set(dir(substrait.proto))

0 comments on commit 18802c8

Please sign in to comment.