Source code for tests.test_datamodel

import msgspec
import pytest
from loguru import logger

from magpie.data.testdata import firefox_bookmarks, simple_folder
from magpie.datamodel import Folder, Twig, Url
from magpie.fetch import DataRetriever
from magpie.fetchers import github, hackernews, load_plugin
from magpie.util import pformat, pprint, revalidate


[docs] def test_folder(): """Test the `Folder` model and its methods.""" twig = Twig(title='magpie website', url=Url('https://magpie.digitalgaia.net')) folder = Folder(name='digitalgaia') folder.add(twig) first = folder[0] magpie = folder['magpie website'] assert first == magpie folder.remove(0) assert not folder.items folder.add(twig) assert len(folder) == 1 for t in folder: assert t == magpie folder.remove('magpie website') assert not folder.items
[docs] def test_data_model(): """Simple test that creates and manipulates instances of [`Folder`](#magpie.datamodel.Folder) and [`Twig`](#magpie.datamodel.Twig). """ d = simple_folder() d.add(Folder(name='folder1')) d.add(Folder(name='folder2')) d[2].add(Twig(title='bookmark1', url=Url('https://digitalgaia.net'))) with pytest.raises(msgspec.ValidationError): d.add(10) revalidate(d) d.items.pop() with pytest.raises(AttributeError): d.extra_field = 3 first = d['Python']['Project structure'][0] logger.info(f"first link: {first.title} - URL: {first.url.value}") json_data = msgspec.json.encode(d) pprint(json_data) logger.info('=' * 120) logger.info(msgspec.json.decode(json_data, type=Folder)) bm1 = d['folder1'][0] assert isinstance(bm1, Twig) assert bm1.title == 'bookmark1' assert not d['folder2'].items
[docs] def test_uuid(): """Test that all elements that need to have a UUID have it, and that we can retrieve those elements by their ID. """ d = simple_folder() pprint(d) def check_obj(obj, type_): assert isinstance(obj, type_) assert obj.uuid is not None res = d.find(obj.uuid) assert res is not None assert res is obj # uuid for Url check_obj(d[0][0][0].url, Url) check_obj(d[0][1][0].related[0], Url) # uuid for Twig check_obj(d[0][0][0], Twig) # uuid for Folder check_obj(d[0], Folder)
[docs] def test_find_path(): """Test that objects can be traversed using paths, and that we can find the path from a root node to a given object.""" d = simple_folder() pprint(d) assert d.follow(0).name == 'Python' assert d.follow([0, 1]).name == 'Best practices' assert d.follow([0, 1, 0, '.related', 0]).value == 'https://www.reddit.com/r/Python/comments/1ah05vt/summary_of_major_python_changes_between_versions/' with pytest.raises(IndexError): d.follow([0, 3]) def check_obj(obj, path, **kwargs): p = d.find_path(obj, **kwargs) # pprint(p) assert p == path p = d.find_path(uuid=obj.uuid, **kwargs) assert p == path obj2 = d.follow(p) assert obj2 is obj check_obj(d[0][0][0], [0, 0, 0]) check_obj(d[0][0][0], ['Python', 'Project structure', 'UV: An extremely fast Python package and project manager'], as_str=True) check_obj(d[0][0][0].url, [0, 0, 0, '.url']) check_obj(d[0][1][0].related[0], [0, 1, 0, '.related', 0]) data = Folder.from_urls(['https://rarehistoricalphotos.com/picasso-self-portraits-photos/', # 2 on painting 'https://aureliosuarez.es/joaquin-sorolla-most-famous-painting/', 'https://mo8it.com/blog/rust-vs-julia/', # 2 on rust 'https://thelinuxcode.com/math-library-rust/', 'https://jamieolivereats.co.uk/thai-red-curry-chicken/', # 2 on thai food 'https://coleycooks.com/thai-green-papaya-salad/' ]) assert data.find_path(data[4]) == [4]
[docs] def test_plugins(): """Check that Fetcher plugins are properly loaded and have the name and tags on their structs correctly set. """ assert github == load_plugin('github') assert hackernews == load_plugin('hackernews') for name in ['github', 'hackernews', 'generic']: plugin = load_plugin(name) assert plugin.Fetcher.name() == name assert plugin.Info.__struct_config__.tag == name assert plugin.Content.__struct_config__.tag == name
[docs] @pytest.mark.celery def test_dataretriever(): """Test the `identify` and `fetch` methods on a `DataRetriever`. This assesses the basic behavier of the task queue system. """ magpie = DataRetriever() data = simple_folder() magpie.identify(data) magpie.fetch(data) magpie.wait_for_tasks_completion(timeout=10) # dump and load, make sure we don't lose any information dump = msgspec.json.encode(data) result = msgspec.json.decode(dump, type=Folder) # assert we have the required info f = result['Python']['Project structure'] gh = f[1].url pprint(gh) assert gh.url_type == 'github' assert type(gh.info) is github.Info assert gh.info.org == 'fpgmaas' assert gh.info.repo == 'cookiecutter-uv' assert type(gh.content) is github.Content assert gh.content.data is not None hn = result['Rust'][1].related[0] assert type(hn.info) is hackernews.Info assert hn.info.id == '29010327' assert type(hn.content) is hackernews.Content assert hn.content.title == 'My ideal Rust workflow' assert hn.content.real_url == 'https://fasterthanli.me/articles/my-ideal-rust-workflow'
[docs] @pytest.mark.celery def test_datafetch(): """Use a `DataRetriever` to print what it can fetch from a set of Firefox bookmarks.""" magpie = DataRetriever() data: Folder = firefox_bookmarks() magpie.identify(data) magpie.fetch(data) magpie.wait_for_tasks_completion(timeout=10) logger.info(pformat(data)) logger.info('=' * 120) logger.info(pformat(data.to_dict()))