Skip to content

Commit

Permalink
Merge pull request #223 from matthewwardrop/promote_structured
Browse files Browse the repository at this point in the history
Promote `Structured` to `formulaic.utils` (and let lookup paths take precedence when indexing).
  • Loading branch information
matthewwardrop authored Dec 3, 2024
2 parents d5c3b41 + b50c659 commit 05bfa25
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 32 deletions.
3 changes: 2 additions & 1 deletion formulaic/formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
from .errors import FormulaInvalidError
from .model_matrix import ModelMatrix
from .parser import DefaultFormulaParser
from .parser.types import FormulaParser, OrderedSet, Structured, Term
from .parser.types import FormulaParser, OrderedSet, Term
from .utils.calculus import differentiate_term
from .utils.deprecations import deprecated
from .utils.structured import Structured
from .utils.variables import Variable, get_expression_variables

FormulaSpec: TypeAlias = Union[
Expand Down
2 changes: 1 addition & 1 deletion formulaic/model_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import wrapt

from formulaic.parser.types.structured import Structured
from formulaic.utils.structured import Structured

if TYPE_CHECKING: # pragma: no cover
from .model_spec import ModelSpec, ModelSpecs
Expand Down
3 changes: 2 additions & 1 deletion formulaic/model_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
)

from formulaic.materializers.base import EncodedTermStructure
from formulaic.parser.types import Factor, Structured, Term
from formulaic.parser.types import Factor, Term
from formulaic.utils.constraints import LinearConstraints, LinearConstraintSpec
from formulaic.utils.structured import Structured
from formulaic.utils.variables import Variable

from .formula import Formula, FormulaSpec, SimpleFormula, StructuredFormula
Expand Down
2 changes: 1 addition & 1 deletion formulaic/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from formulaic.errors import FormulaParsingError
from formulaic.utils.layered_mapping import LayeredMapping
from formulaic.utils.structured import Structured

from .algos.sanitize_tokens import sanitize_tokens
from .algos.tokenize import tokenize
Expand All @@ -33,7 +34,6 @@
Operator,
OperatorResolver,
OrderedSet,
Structured,
Term,
Token,
)
Expand Down
2 changes: 0 additions & 2 deletions formulaic/parser/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from .operator import Operator
from .operator_resolver import OperatorResolver
from .ordered_set import OrderedSet
from .structured import Structured
from .term import Term
from .token import Token

Expand All @@ -15,7 +14,6 @@
"Operator",
"OperatorResolver",
"OrderedSet",
"Structured",
"Term",
"Token",
]
3 changes: 2 additions & 1 deletion formulaic/parser/types/ast_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
Union,
)

from formulaic.utils.structured import Structured

from .operator import Operator
from .ordered_set import OrderedSet
from .structured import Structured
from .term import Term

ItemType = TypeVar("ItemType")
Expand Down
2 changes: 1 addition & 1 deletion formulaic/parser/types/formula_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

from formulaic.parser.types.ordered_set import OrderedSet
from formulaic.utils.layered_mapping import LayeredMapping
from formulaic.utils.structured import Structured

from .ast_node import ASTNode
from .operator_resolver import OperatorResolver
from .structured import Structured
from .term import Term
from .token import Token

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
Type,
TypeVar,
Union,
cast,
)

from formulaic.utils.sentinels import MISSING
from .sentinels import MISSING

_ItemType = TypeVar("_ItemType")
_SelfType = TypeVar("_SelfType", bound="Structured")
Expand Down Expand Up @@ -447,44 +448,56 @@ def __setattr__(self, attr: str, value: Any) -> None:
return
self._structure[attr] = self.__prepare_item(attr, value)

def __lookup_path(self, path: Tuple[Union[str, int], ...]) -> Any:
obj = self
idx = 0

while idx < len(path):
if isinstance(obj, Structured) and path[idx] in obj._structure:
obj = obj._structure[cast(str, path[idx])]
elif isinstance(obj, tuple) and isinstance(path[idx], int):
obj = obj[path[idx]]
else:
break
idx += 1
else:
return obj

raise KeyError(
f"Lookup {path} at index {idx} extends beyond structure of `{self.__class__.__name__}`."
)

def __getitem__(self, key: Any) -> Any:
if isinstance(key, tuple):
return self.__lookup_path(key)
if self._has_root and not self._has_keys:
return self.root[key]
if key in (None, "root") and self._has_root:
return self.root
if isinstance(key, str) and not key.startswith("_") and key in self._structure:
return self._structure[key]
if isinstance(key, tuple) and len(key) >= 1 and key[0] in self._structure:
obj = self[key[0]]
if len(key) == 1:
return obj
if isinstance(obj, Structured):
return obj[key[1:]]
raise KeyError(
f"{key} extends beyond structure of `{self.__class__.__name__}`."
)
raise KeyError(
f"This `{self.__class__.__name__}` instance does not have structure @ `{repr(key)}`."
)

def __setitem__(self, key: Any, value: Any) -> Any:
if isinstance(key, tuple):
if len(key) == 0:
raise KeyError("Cannot replace self.")
obj = self.__lookup_path(key[:-1])
if isinstance(obj, Structured):
obj[key[-1]] = value
return
raise KeyError(
f"Object @ {key[:-1]} is not a `Structured` instance. Unable to set value."
)
if not isinstance(key, str) or not key.isidentifier():
raise KeyError(key)
if key.startswith("_"):
raise KeyError(
"Substructure keys cannot start with an underscore. "
f"The invalid keys are: {set(key for key in self._structure if key.startswith('_'))}."
)
if isinstance(key, tuple) and len(key) > 1 and key[0] in self._structure:
obj = self[key[0]]
if isinstance(obj, Structured):
obj[key[1:]] = value
return
raise KeyError(
f"{key} extends beyond structure of `{self.__class__.__name__}`."
)
if isinstance(key, tuple) and len(key) == 1:
key = key[0]
self._structure[key] = self.__prepare_item(key, value)

def __iter__(self) -> Generator[Any, None, None]:
Expand Down
3 changes: 2 additions & 1 deletion tests/materializers/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from formulaic.materializers.base import EncodedTermStructure
from formulaic.materializers.types import EvaluatedFactor, FactorValues, NAAction
from formulaic.model_spec import ModelSpec
from formulaic.parser.types import Factor, Structured
from formulaic.parser.types import Factor
from formulaic.utils.structured import Structured

PANDAS_TESTS = {
# '<formula>': (<full_rank_names>, <names>, <full_rank_null_names>, <null_rows>)
Expand Down
3 changes: 2 additions & 1 deletion tests/parser/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

from formulaic.errors import FormulaParsingError, FormulaSyntaxError
from formulaic.parser import DefaultFormulaParser, DefaultOperatorResolver
from formulaic.parser.types import Structured, Token
from formulaic.parser.types import Token
from formulaic.parser.types.term import Term
from formulaic.utils.layered_mapping import LayeredMapping
from formulaic.utils.structured import Structured

FORMULA_TO_TOKENS = {
"": ["1"],
Expand Down
31 changes: 30 additions & 1 deletion tests/parser/types/test_structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from formulaic.parser.types import Structured
from formulaic.utils.structured import Structured


class TestStructured:
Expand Down Expand Up @@ -51,6 +51,24 @@ def test_access_structure(self):
s4 = Structured((1, 2))
assert s4[0] == 1

s5 = Structured(Structured((Structured("Hello"),)))
assert s5[("root", "root", 0, "root")] == "Hello"
with pytest.raises(KeyError, match="extends beyond structure"):
s5[("root", "root", 0, "root", 0)]

def test_item_preparation(self):
class SubStructured(Structured):
def _prepare_item(self, key, item):
return str(item) + "_prepared"

assert SubStructured("Hello")._to_dict() == {"root": "Hello_prepared"}
assert SubStructured(SubStructured("Hello"))._to_dict() == {
"root": {"root": "Hello_prepared"}
}
assert SubStructured(Structured("Hello"))._to_dict() == {
"root": {"root": "Hello_prepared_prepared"}
}

def test__map(self):
assert Structured("Hi", a="Hello", b="Greetings")._map(len)._to_dict() == {
"root": 2,
Expand Down Expand Up @@ -187,6 +205,17 @@ def test_mutation(self):
with pytest.raises(KeyError):
s[0] = 10

s2 = Structured(Structured((Structured("Hello"),)))
s2[("root", "root", 0, "b")] = "World"
assert s2 == Structured(Structured((Structured("Hello", b="World"),)))

with pytest.raises(KeyError, match="Cannot replace self"):
s2[()] = "Hello"
with pytest.raises(
KeyError, match=re.escape("Object @ ('root', 'root') is not a `Structured`")
):
s2[("root", "root", 2)] = "Hello"

def test_iteration(self):
assert list(Structured()) == []
assert list(Structured("a")) == ["a"]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

from formulaic import Formula, SimpleFormula, StructuredFormula
from formulaic.errors import FormulaInvalidError, FormulaMaterializerInvalidError
from formulaic.parser.types import Structured
from formulaic.parser.types.factor import Factor
from formulaic.parser.types.term import Term
from formulaic.utils.structured import Structured


class TestFormula:
Expand Down

0 comments on commit 05bfa25

Please sign in to comment.