Source code for magpie.fetchers
import sys
from importlib import import_module
from pathlib import Path
from types import ModuleType
from loguru import logger
from magpie.datamodel import Base, ContentInformation, ContentInformationBase, DataFetcher, UrlInformation
PLUGINS: dict[str, ModuleType] = {}
"""Dict of `DataFetcher` plugins that have been found and loaded by Magpie."""
[docs]
def get_fetcher(name: str) -> DataFetcher:
return PLUGINS[name].Fetcher()
[docs]
def all_fetchers():
"""
Returns all fetchers
"""
for plugin in PLUGINS.values():
yield plugin.Fetcher()
def _is_valid_plugin(module: ModuleType) -> bool:
name = module.__name__
# check Info class is there and inherits from modelbase.Base
info_cls = getattr(module, 'Info', None)
if info_cls is None:
logger.error(f'While loading "{name}" plugin: `Info` class is not present')
return False
if not issubclass(info_cls, Base):
logger.error(f'While loading "{name}" plugin: `Info` class does not inherit from `Base`')
logger.error('Fix it like that:')
logger.error(' from magpie.modelbase import Base')
logger.error(' class Info(Base):')
logger.error(' ...')
return False
# check Content class is there and inherits from modelbase.ContentBase
content_cls = getattr(module, 'Content', None)
if content_cls is None:
logger.error(f'While loading "{name}" plugin: `Content` class is not present')
return False
if not issubclass(content_cls, ContentInformationBase):
logger.error(f'While loading "{name}" plugin: `Content` class does not inherit from `ContentInformationBase`')
logger.error('Fix it like that:')
logger.error(' from magpie.modelbase import ContentInformationBase')
logger.error(' class Content(ContentInformationBase):')
logger.error(' ...')
return False
# check Fetcher class is there and inherits from DataFetcher, has at least a `match()` method
fetcher_cls = getattr(module, 'Fetcher', None)
if fetcher_cls is None:
logger.error(f'While loading "{name}" plugin: `Fetcher` class is not present')
return False
if not issubclass(fetcher_cls, DataFetcher):
logger.error(f'While loading "{name}" plugin: `Fetcher` class does not inherit from `DataFetcher`')
logger.error('Fix it like that:')
logger.error(' from magpie.modelbase import DataFetcher')
logger.error(' class Fetcher(DataFetcher):')
logger.error(' ...')
return False
try:
match = fetcher_cls.match
except AttributeError:
logger.error('`Fetcher` class needs to provide at least the `match()` method')
return False
if not callable(match):
logger.error('`Fetcher.match()` needs to be a callable method')
return True
def _load_plugin(name: str):
module_name = f'magpie.fetchers.{name}'
import_module(module_name)
return sys.modules[module_name]
[docs]
def load_plugin(name: str) -> ModuleType | None:
"""Load the fetcher plugin identified by its name.
Returns:
the module if loading was successful, `None` otherwise.
"""
try:
module = _load_plugin(name)
except ModuleNotFoundError:
logger.error(f'Could not find plugin with name: "{name}"')
return None
if not _is_valid_plugin(module):
return None
return module
[docs]
def load_plugins():
plugins = [p.stem for p in Path(__file__).parent.glob('*.py')]
plugins.remove('__init__')
# ensure 'generic' plugin is loaded last. The fact that we put it in a dict later
# is not a problem as dicts are ordered since python 3.7
plugins.remove('generic')
for name in [*plugins, 'generic']:
plugin = load_plugin(name)
if plugin is not None:
PLUGINS[name] = plugin
union_info_type = UrlInformation
union_content_type = ContentInformation
for plugin in PLUGINS.values():
union_info_type = union_info_type | plugin.Info
union_content_type = union_content_type | plugin.Content
logger.debug(f"loaded plugins: {PLUGINS}")
sys.modules['magpie.datamodel'].UrlInformation = union_info_type
sys.modules['magpie.datamodel'].ContentInformation = union_content_type
load_plugins()