Source code for magpie.datamodel_patch

import uuid

import msgspec
from loguru import logger

from magpie.datamodel import Base, ContentInformationBase, Folder, FormatSpec, Path, PathSegment, Twig, WithID
from magpie.util import short_str


[docs] def patch_all(): patch_iter_methods() patch_display_methods() patch_content_serialization_methods() patch_other()
[docs] def patch_iter_methods(): Folder.iter_twigs = folder_iter_twigs Folder.iter_urls = folder_iter_urls Folder.iter_tree = folder_iter_tree Folder.iter_with_id = folder_iter_with_id
[docs] def patch_display_methods(): Base._format = base__format Twig.__str__ = twig__str__ Folder.__str__ = folder__str__
[docs] def patch_other(): Base.follow = base_follow Folder.find = folder_find Folder.find_path = folder_find_path
[docs] def base__format(self: Base, fmt: FormatSpec, level: int = 0) -> str: lines = [] for key, value in msgspec.structs.asdict(self).items(): if not value: continue indent = 4 * level * fmt.space if isinstance(value, msgspec.Struct): lines.append(f'{indent}{fmt.start_bold}{key}:{fmt.end_bold}') lines.append(value._format(fmt, level + 1)) elif isinstance(value, list): lines.append(f'{indent}{fmt.start_bold}{key}:{fmt.end_bold}') indent = 4 * (level + 1) * fmt.space for obj in value: lines.append(f'{indent}-') lines.append(obj._format(fmt, level + 2)) else: lines.append(f'{indent}{fmt.start_bold}{key}:{fmt.end_bold} {short_str(value, maxlen=400)}') return fmt.newline.join(lines)
[docs] def twig__str__(self) -> str: # FIXME: we only show main twig Url return f'[{self.title}]({self.url})'
[docs] def folder__str__(self: Folder) -> str: return self.name
[docs] def folder_iter_twigs(self: Folder, *, depth_first=True): """Return a generator that iterates over all `Twig` instances contained in this folder and its sub-folders, recursively. You can choose to iterate either depth-first or breadth-first. """ folders = filter(lambda obj: isinstance(obj, Folder), self.items) twigs = filter(lambda obj: isinstance(obj, Twig), self.items) if depth_first: for f in folders: yield from f.iter_twigs(depth_first=depth_first) for t in twigs: yield t else: for t in twigs: yield t for f in folders: yield from f.iter_twigs(depth_first=depth_first)
[docs] def folder_iter_urls(self: Folder, *, depth_first=True): """Return a generator that iterates over all `Url` instances contained in the Twigs and related URLs, in this folder and its sub-folders, recursively. You can choose to iterate either depth-first or breadth-first. """ for twig in self.iter_twigs(depth_first=depth_first): yield twig.url yield from twig.related
[docs] def folder_iter_tree(self: Folder, *, depth_first=True, depth=0): """Return a generator that iterates over all `Twig` or `Folder` instances contained in this folder and its sub-folders, recursively. You can choose to iterate either depth-first or breadth-first. Useful for printing a Tree """ folders = filter(lambda obj: isinstance(obj, Folder), self.items) twigs = filter(lambda obj: isinstance(obj, Twig), self.items) if depth_first: for f in folders: yield (f, depth) yield from f.iter_tree(depth_first=depth_first, depth=depth + 1) for t in twigs: yield (t, depth) else: for t in twigs: yield (t, depth) for f in folders: yield (f, depth) yield from f.iter_tree(depth_first=depth_first, depth=depth + 1)
[docs] def folder_iter_with_id(self: Folder, path=None, path_as_str: bool = False): # TODO: this function is brittle, if we change the datamodel it might # start to fail in weird ways. Find a way to make it more robust path = path or [] folders = filter(lambda obj: isinstance(obj, Folder), self.items) twigs = filter(lambda obj: isinstance(obj, Twig), self.items) yield self, path for f in folders: i = self.items.index(f) segment = f.name if path_as_str else i yield from folder_iter_with_id(f, [*path, segment], path_as_str=path_as_str) for t in twigs: i = self.items.index(t) segment = t.title if path_as_str else i twig_path = [*path, segment] yield t, twig_path yield t.url, [*twig_path, '.url'] for ii, r in enumerate(t.related): yield r, [*twig_path, '.related', ii]
[docs] def folder_find(self: Folder, uuid: uuid.UUID) -> WithID: for obj, path in folder_iter_with_id(self): if obj.uuid == uuid: return obj raise ValueError(f'Could not find object with uuid {uuid}')
[docs] def folder_find_path(self: Folder, obj: WithID | None = None, uuid: uuid.UUID | None = None, as_str: bool = False) -> Path: if (obj is None and uuid is None) or (obj is not None and uuid is not None): raise ValueError("Need to specify either 'obj' or 'uuid'") for obj2, path in folder_iter_with_id(self, path_as_str=as_str): if obj2 == obj or obj2.uuid == uuid: return path if obj is not None: raise ValueError(f'Could not find path to object {obj}') else: raise ValueError(f'Could not find path to object with uuid {uuid}')
[docs] def base_follow(self: Base, path: Path | PathSegment) -> Base: result = self if not isinstance(path, list): path = [path] for segment in path: try: if isinstance(segment, str) and segment.startswith('.'): result = getattr(result, segment[1:]) elif isinstance(segment, (int, str)): result = result[segment] elif isinstance(segment, tuple): raise NotImplementedError # result = result[segment[0]] except IndexError: logger.warning(f'Exception while trying to access path segment "{segment}" ' f'on object: {result} [{type(result)}') raise return result
[docs] def patch_content_serialization_methods(): """ Content serialization methods that will be used to provide comparable snapshot to embedding model TODO: allow different levels of abstraction e.g. title/abstract/full """ ContentInformationBase.snapshot = contentinformationbase_snapshot
[docs] def contentinformationbase_snapshot(self): return self.data