Source code for magpie.network

import random

import httpx
from celery import shared_task

from magpie import config

__all__ = ['http_get', 'http_get_json']

# List of user agents - to rotate when querying to third party APIs
user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',  # noqa: E501
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',  # noqa: E501
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15',  # noqa: E501
    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59'  # noqa: E501
]


RANDOMIZE_USER_AGENT = False

if config.MOCK_NETWORK:
    from magpie.mock.network import http_get
    _http_get = http_get

else:
    def _http_get(url: str, headers=None) -> httpx.Response:
        headers = headers or {}
        if RANDOMIZE_USER_AGENT and 'User-Agent' not in headers:
            headers['User-Agent'] = random.choice(user_agents)
        response = httpx.get(url, headers=headers, follow_redirects=True)
        return response.raise_for_status()


[docs] @shared_task def http_get(url: str, headers=None) -> str: """Perform an http GET method on the given URL. ```{tip} This can be run as a celery task ``` Args: url: the given URL to fetch headers: optional http headers to pass along the request Returns: the text content of the given URL """ return _http_get(url, headers).text
type JsonObject = int | float | str | list | dict
[docs] @shared_task def http_get_json(url: str, headers=None) -> JsonObject: """Perform an http GET method on the given URL and return it as a JSON object. ```{tip} This can be run as a celery task ``` Args: url: the given URL to fetch headers: optional http headers to pass along the request Returns: a JSON object parsed from the content of the given URL """ return _http_get(url, headers).json()