import uuid
import msgspec
from loguru import logger
from magpie.datamodel import Base, ContentInformationBase, Folder, FormatSpec, Path, PathSegment, Twig, WithID
from magpie.util import short_str
[docs]
def patch_all():
patch_iter_methods()
patch_display_methods()
patch_content_serialization_methods()
patch_other()
[docs]
def patch_iter_methods():
Folder.iter_twigs = folder_iter_twigs
Folder.iter_urls = folder_iter_urls
Folder.iter_tree = folder_iter_tree
Folder.iter_with_id = folder_iter_with_id
[docs]
def patch_display_methods():
Base._format = base__format
Twig.__str__ = twig__str__
Folder.__str__ = folder__str__
[docs]
def patch_other():
Base.follow = base_follow
Folder.find = folder_find
Folder.find_path = folder_find_path
[docs]
def twig__str__(self) -> str:
# FIXME: we only show main twig Url
return f'[{self.title}]({self.url})'
[docs]
def folder__str__(self: Folder) -> str:
return self.name
[docs]
def folder_iter_twigs(self: Folder, *, depth_first=True):
"""Return a generator that iterates over all `Twig` instances contained
in this folder and its sub-folders, recursively.
You can choose to iterate either depth-first or breadth-first.
"""
folders = filter(lambda obj: isinstance(obj, Folder), self.items)
twigs = filter(lambda obj: isinstance(obj, Twig), self.items)
if depth_first:
for f in folders:
yield from f.iter_twigs(depth_first=depth_first)
for t in twigs:
yield t
else:
for t in twigs:
yield t
for f in folders:
yield from f.iter_twigs(depth_first=depth_first)
[docs]
def folder_iter_urls(self: Folder, *, depth_first=True):
"""Return a generator that iterates over all `Url` instances contained
in the Twigs and related URLs, in this folder and its sub-folders,
recursively.
You can choose to iterate either depth-first or breadth-first.
"""
for twig in self.iter_twigs(depth_first=depth_first):
yield twig.url
yield from twig.related
[docs]
def folder_iter_tree(self: Folder, *, depth_first=True, depth=0):
"""Return a generator that iterates over all `Twig` or `Folder` instances contained
in this folder and its sub-folders, recursively.
You can choose to iterate either depth-first or breadth-first.
Useful for printing a Tree
"""
folders = filter(lambda obj: isinstance(obj, Folder), self.items)
twigs = filter(lambda obj: isinstance(obj, Twig), self.items)
if depth_first:
for f in folders:
yield (f, depth)
yield from f.iter_tree(depth_first=depth_first, depth=depth + 1)
for t in twigs:
yield (t, depth)
else:
for t in twigs:
yield (t, depth)
for f in folders:
yield (f, depth)
yield from f.iter_tree(depth_first=depth_first, depth=depth + 1)
[docs]
def folder_iter_with_id(self: Folder, path=None, path_as_str: bool = False):
# TODO: this function is brittle, if we change the datamodel it might
# start to fail in weird ways. Find a way to make it more robust
path = path or []
folders = filter(lambda obj: isinstance(obj, Folder), self.items)
twigs = filter(lambda obj: isinstance(obj, Twig), self.items)
yield self, path
for f in folders:
i = self.items.index(f)
segment = f.name if path_as_str else i
yield from folder_iter_with_id(f, [*path, segment], path_as_str=path_as_str)
for t in twigs:
i = self.items.index(t)
segment = t.title if path_as_str else i
twig_path = [*path, segment]
yield t, twig_path
yield t.url, [*twig_path, '.url']
for ii, r in enumerate(t.related):
yield r, [*twig_path, '.related', ii]
[docs]
def folder_find(self: Folder, uuid: uuid.UUID) -> WithID:
for obj, path in folder_iter_with_id(self):
if obj.uuid == uuid:
return obj
raise ValueError(f'Could not find object with uuid {uuid}')
[docs]
def folder_find_path(self: Folder,
obj: WithID | None = None,
uuid: uuid.UUID | None = None,
as_str: bool = False) -> Path:
if (obj is None and uuid is None) or (obj is not None and uuid is not None):
raise ValueError("Need to specify either 'obj' or 'uuid'")
for obj2, path in folder_iter_with_id(self, path_as_str=as_str):
if obj2 == obj or obj2.uuid == uuid:
return path
if obj is not None:
raise ValueError(f'Could not find path to object {obj}')
else:
raise ValueError(f'Could not find path to object with uuid {uuid}')
[docs]
def base_follow(self: Base, path: Path | PathSegment) -> Base:
result = self
if not isinstance(path, list):
path = [path]
for segment in path:
try:
if isinstance(segment, str) and segment.startswith('.'):
result = getattr(result, segment[1:])
elif isinstance(segment, (int, str)):
result = result[segment]
elif isinstance(segment, tuple):
raise NotImplementedError
# result = result[segment[0]]
except IndexError:
logger.warning(f'Exception while trying to access path segment "{segment}" '
f'on object: {result} [{type(result)}')
raise
return result
[docs]
def patch_content_serialization_methods():
""" Content serialization methods that will be used to provide
comparable snapshot to embedding model
TODO: allow different levels of abstraction e.g. title/abstract/full
"""
ContentInformationBase.snapshot = contentinformationbase_snapshot
[docs]
def contentinformationbase_snapshot(self):
return self.data