diff --git a/compilertoolkit/ntree.py b/compilertoolkit/ntree.py new file mode 100644 index 0000000..9bc24fc --- /dev/null +++ b/compilertoolkit/ntree.py @@ -0,0 +1,248 @@ +"""Utilities for creation and use for arbitrarily sized tree structures. +The intent is for use in module/package trees""" + +from typing import Any, Literal, Never, Protocol, Self, Sequence, overload + + +class Leaf(Protocol): + """All things a leaf node MUST have.""" + + def matches(self, name: object) -> bool: + """Match this node based on some input param. Useful for module name resolution""" + ... + + def __eq__(self, other: Any) -> bool: ... + + +class NTree[L: Leaf, I: str](): + """ + NTree[L: Leaf, I: str] + ====== + A tree structure with the ability to hold N# of "leaf" objects + + L - The leaf type + I - The type of the tree indentifier + + Note + ##### + + You *should* subclass this if you want to add more details or change how matching works. + + """ + + __slots__ = "children", "identifier" + + children: list[L | Self] + identifier: I | None + """An identifiable "name" of some kind. Useful for tree matching/comparison""" + + def __init__( + self, + leaves: Sequence[L | Self] | None = None, + identifier: I | None = None, + ): + self.children = [] + self.identifier = identifier + + if leaves is not None: + self.add_leaves(leaves) + + def add_leaf(self, leaf: L | Self) -> Self: + """Append a single leaf""" + self.children.append(leaf) + return self + + def add_leaves(self, leaves: Sequence[L | Self]) -> Self: + """Append a single leaf""" + for leaf in leaves: + self.add_leaf( + leaf + ) # add leaf using function (this makes overwriting that function more impactful/useful) + return self + + def set_leaves(self, leaves: Sequence[L | Self]) -> Self: + """Append a single leaf""" + self.children = [] + for leaf in leaves: + self.add_leaf( + leaf + ) # add leaf using function (this makes overwriting that function more impactful/useful) + return self + + def matches(self, name: object) -> bool: + """Match against this node based on some input "name". Useful for package name resolution. + defaults to using __eq__ method + """ + if isinstance(name, NTree): + return self.identifier == name.identifier + return self.identifier == name + + # overwrite to make your life easier! + def copy(self) -> Self: + return self.__class__(leaves=list(self.children), identifier=self.identifier) + + @overload + def overlaps(self, other_tree: "NTree") -> bool: ... + + @overload + def overlaps(self, other_tree: Any) -> Never: ... + + def overlaps(self, other_tree: "NTree | Any") -> bool | Never: + """Check for overlapping trees""" + if not isinstance(other_tree, NTree): + raise TypeError(other_tree) + + return other_tree.matches(self.identifier) and len( + [ + child # get overlap of subtrees + for child in self.children + for other_child in other_tree.children + if isinstance(child, NTree) + and isinstance(other_child, NTree) + and (child.overlaps(other_child)) + ] + + [ + child # get overlap of leaves + for other_child in other_tree.children + for child in self.children + if not isinstance(child, NTree) + and not isinstance(other_child, NTree) + and (child == other_child) + ] + ) == len(other_tree.children) + + def _combine(self, other: Self) -> Sequence[L | Self]: + """combine two trees- including sub-trees by identifying intersections""" + + output = list(self.children) + for other_child in other.children: + for c, child in enumerate(output): + if not isinstance(other_child, self.__class__) or not isinstance( + child, self.__class__ + ): + if child == other_child: + break # we had a match- this element is already in our child list + continue # no match- move to next item + if child.matches(other_child): + output[c] = ( + child | other_child + ) # do a combine of these trees since they are the SAME tree + break + else: # use no-break to detect if there were ZERO MATCHES + output.append( + other_child + ) # do typical appending since this element isnt found in our own child list + return output + + def _intersect(self, other: "NTree[L, I]") -> Sequence[L | Self]: + output = [] + for other_child in other.children: + for child in self.children: + if child in output: + continue + if not isinstance(other_child, NTree) or not isinstance(child, NTree): + if child == other_child: + output.append(child) # append child that had a match + elif child.matches( + other_child + ): # both are children are trees and are the same tree + output.append( + child & other_child + ) # get overlap of these trees since they are the SAME tree + return output + + def __or__(self, other: Self) -> Self: + """Calculate the combined tree""" + if not isinstance(other, NTree): + raise TypeError(other) + + return self.copy().set_leaves(self._combine(other)) + + def __ior__(self, other: Self | object): + """Calculate the combined tree""" + if not isinstance(other, self.__class__): + raise TypeError(other) + + self.set_leaves(self._combine(other)) + + def __add__(self, other: Self | L | Sequence[Self | L]) -> Self: + if isinstance(other, Sequence): + return self.__class__( + leaves=self.children + list(other), identifier=self.identifier + ) + return self.__class__( + leaves=self.children + [other], identifier=self.identifier + ) + + def __iadd__(self, other: Self | L | Sequence[Self | L]): + if isinstance(other, Sequence): + self.add_leaves(other) + return + self.add_leaf(other) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, NTree): + return False + return other.identifier == self.identifier and other.children == self.children + + def __and__(self, other: "NTree[Any, I] | object") -> Self: + """Get overlap/intersection of trees (Useful for module/package resolution!)""" + if not isinstance(other, NTree): + raise TypeError(other) + + return self.copy().set_leaves(self._intersect(other)) + + def __iand__(self, other: "NTree[Any, I] | object"): + """Get overlap/intersection of trees (Useful for module/package resolution!)""" + if not isinstance(other, NTree): + raise TypeError(other) + + self.children = [] + self.add_leaves(self._intersect(other)) + + @overload + def __getitem__(self, key: I) -> Self: + """Get a tree based on a tree identifier/matching""" + ... + + @overload + def __getitem__(self, key: object) -> L: + """Get Any leaf node based on arbitrary key (will use .matches defined in Leaf protocol)""" + ... + + def __getitem__(self, key: I | object) -> L | Self: + """Get a subtree or leaf node based on a key: I | Any""" + for child in self.children: + if child.matches(key): + return child + raise KeyError(key) + + @overload + def __setitem__(self, key: I, value: Self): + """Set a subtree item based on a tree identifier/matching""" + ... + + @overload + def __setitem__(self, key: object, value: L): + """set a leaf node based on arbitrary key (will use .matches defined in Leaf protocol)""" + ... + + def __setitem__(self, key: I | object, value: Self | L): + """Get a subtree or leaf node based on a key: I | Any""" + for c, child in enumerate(self.children): + if child.matches(key): + self.add_leaf(value) + self.children[c] = self.children.pop() + return + raise KeyError(key) + + def __delitem__(self, key: I | object): + """Deletes the *first* matching item""" + for c, child in enumerate(self.children): + if child.matches(key): + del self.children[c] + return + raise KeyError(key) + + def __str__(self) -> str: + return f"(Tree: {self.identifier} | [{', '.join(str(child) for child in self.children)}])" diff --git a/pyproject.toml b/pyproject.toml index c03cebf..fe8fa52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,10 @@ license = { text = "LGPL-3.0-only" } keywords = [ "compilers", "library", + "framework", + "trees", + "datastructures", + "tree manipulation", "package", "interpreters", "parsers", diff --git a/readme.md b/readme.md index 6df9209..b98d652 100644 --- a/readme.md +++ b/readme.md @@ -15,7 +15,7 @@ An opinionated library to help you build compilers. - [x] Parser builder (WIP, needs to be more ergonomic) - [ ] Parser check functions built into patterns to allow automatic syntax error parsing. - [ ] Source error highlighting (fine grained highlights) -- [ ] Package and module tree utilities +- [x] Package and module tree utilities - [x] Lexing via rply library (and utilities) - [x] Parser token class builtin diff --git a/test_ntree.py b/test_ntree.py new file mode 100644 index 0000000..4cec4cc --- /dev/null +++ b/test_ntree.py @@ -0,0 +1,353 @@ +from typing import TYPE_CHECKING, Any, Self, Sequence, Union, overload, override + +from compilertoolkit.ntree import Leaf, NTree + + +class PrivateImportExc(Exception): + pass + + +class ModuleName: + """Refer to module name, used simply for matching. Basically a speculation on a module we hope exists""" + + def __init__(self, name: str, parent=None): + self.name = name + self.parent: Package | None = parent + + def matches(self, name: object) -> bool: + """Match this node based on some input param. Useful for module name resolution""" + if not isinstance(name, (ModuleName, Module)): + return self.name == name + return self.name == name.name + + def __eq__(self, other: Any) -> bool: + if isinstance(other, ModuleName): + return self.name == other.name + if isinstance(other, ModuleView): + return other == self + return self is other + + def __str__(self) -> str: + return self.name + + +def has_common_parent( + pkg: "Package | ModuleView | None", other_pkg: "Package | ModuleView | None" +) -> bool: + if isinstance(pkg, ModuleView): + pkg = pkg.parent + + if isinstance(other_pkg, ModuleView): + other_pkg = other_pkg.parent + + if pkg is None or other_pkg is None: + return pkg is None and other_pkg is None + + if pkg.identifier == other_pkg.identifier and pkg.parent == other_pkg.parent: + return True + + return other_pkg.parent is not None and has_common_parent(pkg, other_pkg.parent) + + +class Module: + name: str + private: bool + + def __init__(self, name: str, private=False): + self.name = name + self.private = private + + def matches(self, name: object) -> bool: + """Match this node based on some input param. Useful for module name resolution""" + if not isinstance(name, (Module, ModuleName)): + return self.name == name + return self.name == name.name + + def __eq__(self, other: Any) -> bool: + if isinstance(other, ModuleName): + return self.name == other.name + if isinstance(other, ModuleView): + return self.name == other.mod.name + return self is other + + def __str__(self) -> str: + return f"" + + +class ModuleView: + mod: "Module" + parent: "Package | None" # Never want to modify the parent of the original package! So we use a view instead! + + def __init__(self, mod: "Module", parent: "Package | None" = None): + self.parent = parent + self.mod = mod + + def copy(self): + return self.__class__(self.mod, self.parent) + + def matches(self, name: object) -> bool: + """Match this node based on some input param. Useful for module name resolution""" + if not self.mod.matches(name): + return False + if self.mod.private: + if isinstance(name, (ModuleView, ModuleName)) and not has_common_parent( + self.parent, name.parent + ): + raise PrivateImportExc(f"Module: {name} is private!") + + return True + + def __eq__(self, other: Any) -> bool: + return self.mod == other + + def __str__(self) -> str: + return f"" + + +class Package(NTree[ModuleView, str]): + """An example of an extension of NTree""" + + __slots__ = "parent" + + parent: "Package | None" + + def __init__( + self, + parent: Self | None = None, + leaves: Sequence[ModuleView | Module | Self] | None = None, + identifier: str = "", + ): + self.parent = parent + self.children = [] + self.identifier = identifier + + if leaves is not None: + self.add_leaves(leaves) + + def copy(self): + copy = super().copy() + copy.parent = self.parent + return copy + + def deep_copy(self): + copy = super().copy() + copy.parent = self.parent + self.children = [ + child.deep_copy() if isinstance(child, self.__class__) else child.copy() + for child in self.children + ] + return copy + + def __eq__(self, other): + return ( + isinstance(other, Package) + and ( + (self.parent is None and other.parent is None) + or ( + self.parent is not None + and other.parent is not None + and self.parent.identifier == other.parent.identifier + ) + ) + and super().__eq__(other) + ) + + def add_leaf(self, leaf: ModuleView | Module | Self): + if isinstance(leaf, Module): + leaf = ModuleView(leaf, parent=self) + if isinstance(leaf, (self.__class__, ModuleView)): + leaf.parent = self + return super().add_leaf(leaf) + + # only override the stubs to make our IDE happy. if not type-checking, don't worry about it. + if TYPE_CHECKING: + + def add_leaves(self, leaves: Sequence[ModuleView | Module | Self]) -> Self: ... + + def set_leaves(self, leaves: Sequence[ModuleView | Module | Self]) -> Self: ... + + +# Our global list of packages and modules we might have. +imports = Package( + identifier="base", + leaves=[ + Module("main", private=True), + Module("other_mod"), + Package( + identifier="lib", + leaves=[ + Module("math"), + Module("system"), + Module("err"), + Package( + identifier="ui", + leaves=[ + Module("application"), + Module("widgets"), + Module("internal_stuff", private=True), + Package( + identifier="bindings", + leaves=[ + Module("application", private=True), + Module("widgets", private=True), + ], + ), + ], + ), + ], + ), + ], +) + + +# Setup trees to do testing against +# =================================== + +trying_to_import = NTree[ModuleName, str]( + identifier="base", leaves=[ModuleName("other_mod")] +) + +trying_to_import_2 = NTree[ModuleName, str]( + identifier="base", + leaves=[NTree(identifier="lib", leaves=[ModuleName("math")])], +) + +trying_to_import_partial = NTree[ModuleName, str]( + identifier="base", + leaves=[ + NTree(identifier="lib", leaves=[ModuleName("something_that_does_not_exist")]) + ], +) +# Test Basic overlapping +# ======================== +print(imports.overlaps(trying_to_import)) # should be true +print(imports.overlaps(trying_to_import_2)) # should be true +print(imports.overlaps(trying_to_import_partial)) # should be false + +assert imports.overlaps(trying_to_import) +assert imports.overlaps(trying_to_import_2) +assert not imports.overlaps(trying_to_import_partial) + + +# Test Basic intersection +# ========================= + +print() +print(imports & trying_to_import) +print(imports & trying_to_import_2) +print(imports & trying_to_import_partial) + +assert (imports & trying_to_import).children == [imports[ModuleName("other_mod")]] +assert (imports & trying_to_import_2).children == [ + imports["lib"].copy().set_leaves((imports["lib"]["math"],)) +] +assert (imports & trying_to_import_partial).children == [ + imports["lib"].copy().set_leaves([]) +] + +# Test Basic combining +# ====================== +print() + +trying_to_import_resolved = imports & trying_to_import +trying_to_import_2_resolved = imports & trying_to_import_2 +trying_to_import_partial_resolved = Package( + identifier="base", + leaves=[ + Package(identifier="lib", leaves=[Module("something_that_does_not_exist")]) + ], +) + +print(imports | trying_to_import_resolved) +print(imports | trying_to_import_2_resolved) +print(imports | trying_to_import_partial_resolved) + +assert (imports | trying_to_import_resolved) == imports +assert (imports | trying_to_import_2_resolved) == imports +changed_tree = imports.copy() +changed_tree["lib"] = ( + changed_tree["lib"] + .copy() + .add_leaf( + trying_to_import_partial_resolved["lib"][ + ModuleName("something_that_does_not_exist") + ] + ) +) +assert (imports | trying_to_import_partial_resolved) == changed_tree + +# Test Basic tree indexing +# ========================== +print() + +lib_pkg = imports["lib"] +print(lib_pkg) +assert isinstance(lib_pkg, Package) and lib_pkg.identifier == "lib" + +# Test private imports +# ###################### +try: + imports[ModuleName("main")] # should not work + raise Exception("Expect an error") +except PrivateImportExc: + main_mod = imports[ModuleName("main", parent=imports)] + print(main_mod) + assert isinstance(main_mod, ModuleView) and main_mod.mod.name == "main" + + +# Test private (more) imports +# ############################ +try: + imports["lib"]["ui"][ModuleName("internal_stuff")] # should not work + raise Exception("Expect an error") +except PrivateImportExc: + # Access from same pkg + private_mod = imports["lib"]["ui"][ + ModuleName("internal_stuff", parent=imports["lib"]["ui"]) + ] + print(private_mod) + assert ( + isinstance(private_mod, ModuleView) and private_mod.mod.name == "internal_stuff" + ) + + # Access from module in same pkg + private_mod = imports["lib"]["ui"][ + ModuleName("internal_stuff", parent=imports["lib"]["ui"][ModuleName("widgets")]) + ] + print(private_mod) + assert ( + isinstance(private_mod, ModuleView) and private_mod.mod.name == "internal_stuff" + ) + + # Access from 1 pkg further + private_mod = imports["lib"]["ui"][ + ModuleName("internal_stuff", parent=imports["lib"]["ui"]["bindings"]) + ] + print(private_mod) + assert ( + isinstance(private_mod, ModuleView) and private_mod.mod.name == "internal_stuff" + ) + + # Access from module in 1 pkg further + private_mod = imports["lib"]["ui"][ + ModuleName( + "internal_stuff", + parent=imports["lib"]["ui"]["bindings"][ + ModuleName("application", parent=imports["lib"]["ui"]["bindings"]) + ], + ) + ] + print(private_mod) + assert ( + isinstance(private_mod, ModuleView) and private_mod.mod.name == "internal_stuff" + ) + + +# Test Tree comparison +# ====================== +print() + +assert imports != imports | trying_to_import_partial_resolved + +assert imports == imports +assert imports == imports.copy()