Source code for magpie.fetchers

import sys
from importlib import import_module
from pathlib import Path
from types import ModuleType

from loguru import logger

from magpie.datamodel import Base, ContentInformation, ContentInformationBase, DataFetcher, UrlInformation

PLUGINS: dict[str, ModuleType] = {}
"""Dict of `DataFetcher` plugins that have been found and loaded by Magpie."""


[docs] def get_fetcher(name: str) -> DataFetcher: return PLUGINS[name].Fetcher()
[docs] def all_fetchers(): """ Returns all fetchers """ for plugin in PLUGINS.values(): yield plugin.Fetcher()
def _is_valid_plugin(module: ModuleType) -> bool: name = module.__name__ # check Info class is there and inherits from modelbase.Base info_cls = getattr(module, 'Info', None) if info_cls is None: logger.error(f'While loading "{name}" plugin: `Info` class is not present') return False if not issubclass(info_cls, Base): logger.error(f'While loading "{name}" plugin: `Info` class does not inherit from `Base`') logger.error('Fix it like that:') logger.error(' from magpie.modelbase import Base') logger.error(' class Info(Base):') logger.error(' ...') return False # check Content class is there and inherits from modelbase.ContentBase content_cls = getattr(module, 'Content', None) if content_cls is None: logger.error(f'While loading "{name}" plugin: `Content` class is not present') return False if not issubclass(content_cls, ContentInformationBase): logger.error(f'While loading "{name}" plugin: `Content` class does not inherit from `ContentInformationBase`') logger.error('Fix it like that:') logger.error(' from magpie.modelbase import ContentInformationBase') logger.error(' class Content(ContentInformationBase):') logger.error(' ...') return False # check Fetcher class is there and inherits from DataFetcher, has at least a `match()` method fetcher_cls = getattr(module, 'Fetcher', None) if fetcher_cls is None: logger.error(f'While loading "{name}" plugin: `Fetcher` class is not present') return False if not issubclass(fetcher_cls, DataFetcher): logger.error(f'While loading "{name}" plugin: `Fetcher` class does not inherit from `DataFetcher`') logger.error('Fix it like that:') logger.error(' from magpie.modelbase import DataFetcher') logger.error(' class Fetcher(DataFetcher):') logger.error(' ...') return False try: match = fetcher_cls.match except AttributeError: logger.error('`Fetcher` class needs to provide at least the `match()` method') return False if not callable(match): logger.error('`Fetcher.match()` needs to be a callable method') return True def _load_plugin(name: str): module_name = f'magpie.fetchers.{name}' import_module(module_name) return sys.modules[module_name]
[docs] def load_plugin(name: str) -> ModuleType | None: """Load the fetcher plugin identified by its name. Returns: the module if loading was successful, `None` otherwise. """ try: module = _load_plugin(name) except ModuleNotFoundError: logger.error(f'Could not find plugin with name: "{name}"') return None if not _is_valid_plugin(module): return None return module
[docs] def load_plugins(): plugins = [p.stem for p in Path(__file__).parent.glob('*.py')] plugins.remove('__init__') # ensure 'generic' plugin is loaded last. The fact that we put it in a dict later # is not a problem as dicts are ordered since python 3.7 plugins.remove('generic') for name in [*plugins, 'generic']: plugin = load_plugin(name) if plugin is not None: PLUGINS[name] = plugin union_info_type = UrlInformation union_content_type = ContentInformation for plugin in PLUGINS.values(): union_info_type = union_info_type | plugin.Info union_content_type = union_content_type | plugin.Content logger.debug(f"loaded plugins: {PLUGINS}") sys.modules['magpie.datamodel'].UrlInformation = union_info_type sys.modules['magpie.datamodel'].ContentInformation = union_content_type
load_plugins()