Source code for magpie.util

from collections.abc import Mapping
from typing import Iterator

import msgspec
from bs4 import BeautifulSoup
from duct import StatusError, cmd
from rich import print_json
from rich.console import Console
from rich.pretty import pprint as rich_print

from magpie.datamodel import Base, Folder, Twig

TRUNCATION_MARKER = ' [...]'
TRUNCATION_MARKER_LEN = len(TRUNCATION_MARKER)


[docs] def short_str(obj, maxlen: int = 80) -> str: """Return a short string representation of the given object. Args: obj: the object for which we want a str representation maxlen: the max numbers of chars allowed in the string output after which truncation will occur Returns: A `str` representation with maximum length ``maxlen`` """ result = str(obj) if len(result) > maxlen: result = result[:maxlen - TRUNCATION_MARKER_LEN] + TRUNCATION_MARKER return result
[docs] def clean_text_content(content: str) -> str: return ' '.join(content.split())
[docs] def clean_content(content: str) -> str: """Return the cleaned text content from the given string (usually html content).""" soup = BeautifulSoup(content, 'html.parser') content_cleaned = clean_text_content(soup.get_text()) return content_cleaned
[docs] def children_pids(pid) -> list[int]: """Return a list of the PIDs of the children from the given process.""" try: children_pids = cmd('pgrep', '-P', str(pid)).read() except StatusError: return [] children_pids = [int(pid) for pid in children_pids.split()] return children_pids
[docs] def revalidate(obj: msgspec.Struct) -> None: """Revalidate a `msgspec.Struct`. This is useful when you had a struct that was valid some time before, it got modified (potentially to an invalid state), and you want to make sure it is still valid. Args: obj (msgspec.Struct): the `msgspec.Struct` to revalidate Returns: `None` if `obj` validates successfully Raises: msgspec.ValidationError: on error """ # we do not get the warnings/error from the serialization, but from the subsequent validation msgspec.json.decode(msgspec.json.encode(obj), type=type(obj))
################################################################################ ## ## ## Pretty-printing functions ## ## ## ################################################################################ console = Console()
[docs] class ColorScheme(Base): folder_name: str # color for the folder name in the tree structure link: str # color for URLs/links found in Twigs key: str # color for keys in mappings value: str # color for values in mappings type: str # color for type information for values in mappings
# default color scheme colors = ColorScheme( folder_name='yellow', link='cyan', key='', value='green', type='magenta', )
[docs] def style(text: str, color: str) -> str: if color: return f'[{color}]{text}[/{color}]' else: return str(text)
[docs] def pformat_twig(t: Twig) -> Iterator[str]: yield f'[{t.title}]({style(t.url, colors.link)})' if t.related: yield style('Related:', colors.link) for r in t.related: yield style(r, colors.link)
# TODO: should we move this a a __rich_console__ or __rich_repr__ method on the Folder class? # it seems like that msgspec structs already implement __rich_repr__
[docs] def pformat_folder(f: Folder) -> Iterator[str]: yield style(f.name, colors.folder_name) for obj in f: if obj is f[-1]: # if we're last indent_title = ' ╰─ ' indent_content = ' ' else: indent_title = ' ├─ ' indent_content = ' │ ' if isinstance(obj, Folder): lines = pformat_folder(obj) yield f'{style(indent_title, colors.folder_name)}{next(lines)}' for line in lines: yield f'{style(indent_content, colors.folder_name)}{line}' elif isinstance(obj, Twig): lines = pformat_twig(obj) yield f'{indent_title}{next(lines)}' for line in lines: yield f'{indent_content}{line}' else: yield str(obj)
def _print_obj(obj, indent=0): if isinstance(obj, Mapping): console.print('') for k, v in obj.items(): console.print(f'{" " * indent}{style(k, colors.key)}: ', end='') _print_obj(v, indent=indent + 4) elif isinstance(obj, (list, tuple)): # TODO: what's the best here? Sequence does not work as we catch strings too console.print('') for obj2 in obj: console.print(f'{" " * indent}-', end='') _print_obj(obj2, indent=indent + 4) else: # escape the opening square bracket as we don't want this to be interpreted # as a `rich` styling directive t = f'\\[{type(obj).__name__}]' s = short_str(obj, maxlen=400) console.print(f'{style(s, colors.value)} {style(t, colors.type)}')
[docs] def pformat(obj) -> str: """Return a "smart" representation of an object according to its type: - if a `Folder` or a `Twig`, use our own functions for a neat display - if a `str` print a nicely formatted JSON string.""" with console.capture() as capture: pprint(obj) return capture.get().strip()
[docs] def pprint(obj): """Smart pretty-print function that will print an object according to its type: - if a `Folder` or a `Twig`, use our own functions for a neat display - if a `str` print a nicely formatted JSON string.""" if isinstance(obj, Twig): console.print('\n'.join(pformat_twig(obj))) elif isinstance(obj, Folder): console.print('\n'.join(pformat_folder(obj))) elif isinstance(obj, str): return print_json(obj, indent=4) elif isinstance(obj, bytes): return pprint(obj.decode('utf-8')) elif isinstance(obj, Mapping): _print_obj(obj) else: return console.print(obj)
[docs] def rprint(obj): """Re-export of the `rich.pretty.pprint()` function.""" return rich_print(obj)