import msgspec
import pytest
from loguru import logger
from magpie.data.testdata import firefox_bookmarks, simple_folder
from magpie.datamodel import Folder, Twig, Url
from magpie.fetch import DataRetriever
from magpie.fetchers import github, hackernews, load_plugin
from magpie.util import pformat, pprint, revalidate
[docs]
def test_folder():
"""Test the `Folder` model and its methods."""
twig = Twig(title='magpie website', url=Url('https://magpie.digitalgaia.net'))
folder = Folder(name='digitalgaia')
folder.add(twig)
first = folder[0]
magpie = folder['magpie website']
assert first == magpie
folder.remove(0)
assert not folder.items
folder.add(twig)
assert len(folder) == 1
for t in folder:
assert t == magpie
folder.remove('magpie website')
assert not folder.items
[docs]
def test_data_model():
"""Simple test that creates and manipulates instances of [`Folder`](#magpie.datamodel.Folder)
and [`Twig`](#magpie.datamodel.Twig).
"""
d = simple_folder()
d.add(Folder(name='folder1'))
d.add(Folder(name='folder2'))
d[2].add(Twig(title='bookmark1', url=Url('https://digitalgaia.net')))
with pytest.raises(msgspec.ValidationError):
d.add(10)
revalidate(d)
d.items.pop()
with pytest.raises(AttributeError):
d.extra_field = 3
first = d['Python']['Project structure'][0]
logger.info(f"first link: {first.title} - URL: {first.url.value}")
json_data = msgspec.json.encode(d)
pprint(json_data)
logger.info('=' * 120)
logger.info(msgspec.json.decode(json_data, type=Folder))
bm1 = d['folder1'][0]
assert isinstance(bm1, Twig)
assert bm1.title == 'bookmark1'
assert not d['folder2'].items
[docs]
def test_uuid():
"""Test that all elements that need to have a UUID have it, and that we can retrieve
those elements by their ID.
"""
d = simple_folder()
pprint(d)
def check_obj(obj, type_):
assert isinstance(obj, type_)
assert obj.uuid is not None
res = d.find(obj.uuid)
assert res is not None
assert res is obj
# uuid for Url
check_obj(d[0][0][0].url, Url)
check_obj(d[0][1][0].related[0], Url)
# uuid for Twig
check_obj(d[0][0][0], Twig)
# uuid for Folder
check_obj(d[0], Folder)
[docs]
def test_find_path():
"""Test that objects can be traversed using paths, and that we can find the path
from a root node to a given object."""
d = simple_folder()
pprint(d)
assert d.follow(0).name == 'Python'
assert d.follow([0, 1]).name == 'Best practices'
assert d.follow([0, 1, 0, '.related', 0]).value == 'https://www.reddit.com/r/Python/comments/1ah05vt/summary_of_major_python_changes_between_versions/'
with pytest.raises(IndexError):
d.follow([0, 3])
def check_obj(obj, path, **kwargs):
p = d.find_path(obj, **kwargs)
# pprint(p)
assert p == path
p = d.find_path(uuid=obj.uuid, **kwargs)
assert p == path
obj2 = d.follow(p)
assert obj2 is obj
check_obj(d[0][0][0], [0, 0, 0])
check_obj(d[0][0][0],
['Python',
'Project structure',
'UV: An extremely fast Python package and project manager'],
as_str=True)
check_obj(d[0][0][0].url,
[0, 0, 0, '.url'])
check_obj(d[0][1][0].related[0],
[0, 1, 0, '.related', 0])
data = Folder.from_urls(['https://rarehistoricalphotos.com/picasso-self-portraits-photos/', # 2 on painting
'https://aureliosuarez.es/joaquin-sorolla-most-famous-painting/',
'https://mo8it.com/blog/rust-vs-julia/', # 2 on rust
'https://thelinuxcode.com/math-library-rust/',
'https://jamieolivereats.co.uk/thai-red-curry-chicken/', # 2 on thai food
'https://coleycooks.com/thai-green-papaya-salad/'
])
assert data.find_path(data[4]) == [4]
[docs]
def test_plugins():
"""Check that Fetcher plugins are properly loaded and have the name and tags
on their structs correctly set.
"""
assert github == load_plugin('github')
assert hackernews == load_plugin('hackernews')
for name in ['github', 'hackernews', 'generic']:
plugin = load_plugin(name)
assert plugin.Fetcher.name() == name
assert plugin.Info.__struct_config__.tag == name
assert plugin.Content.__struct_config__.tag == name
[docs]
@pytest.mark.celery
def test_dataretriever():
"""Test the `identify` and `fetch` methods on a `DataRetriever`.
This assesses the basic behavier of the task queue system.
"""
magpie = DataRetriever()
data = simple_folder()
magpie.identify(data)
magpie.fetch(data)
magpie.wait_for_tasks_completion(timeout=10)
# dump and load, make sure we don't lose any information
dump = msgspec.json.encode(data)
result = msgspec.json.decode(dump, type=Folder)
# assert we have the required info
f = result['Python']['Project structure']
gh = f[1].url
pprint(gh)
assert gh.url_type == 'github'
assert type(gh.info) is github.Info
assert gh.info.org == 'fpgmaas'
assert gh.info.repo == 'cookiecutter-uv'
assert type(gh.content) is github.Content
assert gh.content.data is not None
hn = result['Rust'][1].related[0]
assert type(hn.info) is hackernews.Info
assert hn.info.id == '29010327'
assert type(hn.content) is hackernews.Content
assert hn.content.title == 'My ideal Rust workflow'
assert hn.content.real_url == 'https://fasterthanli.me/articles/my-ideal-rust-workflow'
[docs]
@pytest.mark.celery
def test_datafetch():
"""Use a `DataRetriever` to print what it can fetch from a set of Firefox bookmarks."""
magpie = DataRetriever()
data: Folder = firefox_bookmarks()
magpie.identify(data)
magpie.fetch(data)
magpie.wait_for_tasks_completion(timeout=10)
logger.info(pformat(data))
logger.info('=' * 120)
logger.info(pformat(data.to_dict()))