Skip to main content

src/chives/fetch.py

1"""
2Make HTTP requests using the standard library.
3"""
5from pathlib import Path
6import ssl
7import urllib.parse
8import urllib.request
10import certifi
13__all__ = ["build_request", "download_image", "fetch_url"]
16ssl_context = ssl.create_default_context(cafile=certifi.where())
18QueryParams = dict[str, str] | list[tuple[str, str]]
19Headers = dict[str, str]
22def build_request(
23 url: str, *, params: QueryParams | None = None, headers: Headers | None = None
24) -> urllib.request.Request:
25 """
26 Build a urllib Request, appending query parameters and attaching headers.
27 """
28 if params is not None:
29 params_list = list(params.items()) if isinstance(params, dict) else params
31 u = urllib.parse.urlsplit(url)
32 query = urllib.parse.parse_qsl(u.query) + params_list
33 new_query = urllib.parse.urlencode(query)
34 url = urllib.parse.urlunsplit(
35 (u.scheme, u.netloc, u.path, new_query, u.fragment)
36 )
38 req = urllib.request.Request(url, headers=headers or {})
40 return req
43def fetch_url(
44 url: str, *, params: QueryParams | None = None, headers: Headers | None = None
45) -> bytes:
46 """
47 Fetch the contents of the given URL and return the body of
48 the response.
49 """
50 req = build_request(url, params=params, headers=headers)
52 with urllib.request.urlopen(req, context=ssl_context) as resp:
53 data: bytes = resp.read()
55 return data
58def choose_filename_extension(content_type: str | None) -> str:
59 """
60 Choose a filename extension for an image downloaded with the given
61 Content-Type header.
62 """
63 if content_type is None:
64 raise ValueError("no Content-Type header, cannot determine image format")
66 content_type_mapping = {
67 "image/jpeg": "jpg",
68 "image/png": "png",
69 "image/gif": "gif",
70 "image/webp": "webp",
71 }
73 try:
74 return content_type_mapping[content_type]
75 except KeyError:
76 raise ValueError(f"unrecognised Content-Type header: {content_type}")
79def download_image(
80 url: str,
81 out_prefix: Path,
82 *,
83 params: QueryParams | None = None,
84 headers: Headers | None = None,
85) -> Path:
86 """
87 Download an image from the given URL to the target path, and return
88 the path of the downloaded file.
90 Add the appropriate file extension, based on the image's Content-Type.
92 Throws a FileExistsError if you try to overwrite an existing file.
93 """
94 req = build_request(url, params=params, headers=headers)
96 with urllib.request.urlopen(req, context=ssl_context) as resp:
97 image_data: bytes = resp.read()
99 image_format = choose_filename_extension(content_type=resp.headers["content-type"])
101 out_path = out_prefix.with_suffix("." + image_format)
102 out_path.parent.mkdir(exist_ok=True, parents=True)
103 with open(out_path, "xb") as out_file:
104 out_file.write(image_data)
106 return out_path