Source code for magpie.network
import random
import httpx
from celery import shared_task
from magpie import config
__all__ = ['http_get', 'http_get_json']
# List of user agents - to rotate when querying to third party APIs
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', # noqa: E501
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', # noqa: E501
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15', # noqa: E501
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59' # noqa: E501
]
RANDOMIZE_USER_AGENT = False
if config.MOCK_NETWORK:
from magpie.mock.network import http_get
_http_get = http_get
else:
def _http_get(url: str, headers=None) -> httpx.Response:
headers = headers or {}
if RANDOMIZE_USER_AGENT and 'User-Agent' not in headers:
headers['User-Agent'] = random.choice(user_agents)
response = httpx.get(url, headers=headers, follow_redirects=True)
return response.raise_for_status()
[docs]
@shared_task
def http_get(url: str, headers=None) -> str:
"""Perform an http GET method on the given URL.
```{tip}
This can be run as a celery task
```
Args:
url: the given URL to fetch
headers: optional http headers to pass along the request
Returns:
the text content of the given URL
"""
return _http_get(url, headers).text
type JsonObject = int | float | str | list | dict
[docs]
@shared_task
def http_get_json(url: str, headers=None) -> JsonObject:
"""Perform an http GET method on the given URL and return it as a JSON object.
```{tip}
This can be run as a celery task
```
Args:
url: the given URL to fetch
headers: optional http headers to pass along the request
Returns:
a JSON object parsed from the content of the given URL
"""
return _http_get(url, headers).json()