Skip to main content

tests/test_fetch.py

1"""
2Tests for `chives.fetch`.
3"""
5import filecmp
6import json
7from pathlib import Path
8from typing import Any
9from urllib.error import HTTPError
11import pytest
12import vcr
13from vcr.cassette import Cassette
15from chives.fetch import build_request, download_image, fetch_url, QueryParams
18@pytest.mark.parametrize(
19 "url, params, expected_url",
20 [
21 ("https://example.com", None, "https://example.com"),
22 (
23 "https://example.com",
24 {"one": "1", "two": "2"},
25 "https://example.com?one=1&two=2",
26 ),
27 (
28 "https://example.com",
29 [("one", "1"), ("two", "2")],
30 "https://example.com?one=1&two=2",
31 ),
32 (
33 "https://example.com",
34 [("num", "1"), ("num", "2")],
35 "https://example.com?num=1&num=2",
36 ),
37 (
38 "https://example.com#fragment",
39 [("num", "1"), ("num", "2")],
40 "https://example.com?num=1&num=2#fragment",
41 ),
42 (
43 "https://example.com?existing=1",
44 [("num", "1"), ("num", "2")],
45 "https://example.com?existing=1&num=1&num=2",
46 ),
47 (
48 "https://example.com?existing=1#fragment",
49 [("num", "1"), ("num", "2")],
50 "https://example.com?existing=1&num=1&num=2#fragment",
51 ),
52 ],
54def test_build_request(url: str, params: QueryParams | None, expected_url: str) -> None:
55 """
56 Tests for `build_request`.
57 """
58 req = build_request(url, params=params)
59 assert req.full_url == expected_url
62class TestFetchUrl:
63 """
64 Tests for `fetch_url`.
65 """
67 def test_http_200(self, vcr_cassette: Cassette) -> None:
68 """
69 Fetch a URL and check we get the expected response body.
70 """
71 resp = fetch_url("http://httpbin.org/robots.txt")
72 assert resp == b"User-agent: *\nDisallow: /deny\n"
74 def test_http_404(self, vcr_cassette: Cassette) -> None:
75 """
76 Fetch a URL that returns a 404 Not Found error.
77 """
78 with pytest.raises(HTTPError) as exc:
79 fetch_url("http://httpbin.org/status/404")
81 assert exc.value.code == 404
82 exc.value.close()
84 def test_query_params(self, vcr_cassette: Cassette) -> None:
85 """
86 Pass some query parameters in the fetch request.
87 """
88 resp = fetch_url(
89 "http://httpbin.org/get",
90 params={"package": "chives", "author": "alexwlchan"},
91 )
93 args = json.loads(resp)["args"]
95 assert args["package"] == "chives"
96 assert args["author"] == "alexwlchan"
98 def test_headers(self, vcr_cassette: Cassette) -> None:
99 """
100 Pass some headers in the fetch request.
101 """
102 resp = fetch_url(
103 "http://httpbin.org/headers",
104 headers={"X-Package": "chives", "X-Author": "alexwlchan"},
105 )
107 headers = json.loads(resp)["headers"]
109 assert headers["X-Package"] == "chives"
110 assert headers["X-Author"] == "alexwlchan"
113class TestDownloadImage:
114 """
115 Tests for `download_image`.
116 """
118 def test_download_image(self, tmp_path: Path, vcr_cassette: Cassette) -> None:
119 """
120 Download an image and compare the result.
121 """
122 url = "https://alexwlchan.net/images/2026/470906.png"
124 out_path = download_image(url, tmp_path / "470906")
125 assert out_path == tmp_path / "470906.png"
126 assert out_path.exists()
128 assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
130 def test_download_to_nested_path(
131 self, tmp_path: Path, vcr_cassette: Cassette
132 ) -> None:
133 """
134 You can download an image to a heavily nested path, and it creates
135 the parent directory.
136 """
137 url = "https://alexwlchan.net/images/2026/470906.png"
139 out_path = download_image(url, tmp_path / "a/b/c/470906")
140 assert out_path == tmp_path / "a/b/c/470906.png"
141 assert out_path.exists()
143 assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
145 def test_cannot_download_twice(
146 self, tmp_path: Path, vcr_cassette: Cassette
147 ) -> None:
148 """
149 Trying to overwrite an existing image throws a FileExistsError.
150 """
151 url1 = "https://alexwlchan.net/images/2026/470906.png"
152 url2 = "https://alexwlchan.net/images/2026/f69b96.png"
154 out_path = download_image(url1, tmp_path / "squares")
155 assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
157 with pytest.raises(FileExistsError):
158 download_image(url2, tmp_path / "squares")
160 # The file contents are the same as the first download.
161 assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
163 def test_non_image(self, vcr_cassette: Cassette) -> None:
164 """
165 Fetching an "image" which has a non-image Content-Type header
166 throws an error.
167 """
168 url = "http://httpbin.org/status/200"
170 with pytest.raises(ValueError, match="unrecognised image format"):
171 download_image(url, out_prefix=Path("example"))
173 def test_no_content_type_header(self, cassette_name: str) -> None:
174 """
175 Fetching a URL which doesn't return a Content-Type header
176 throws an error.
177 """
178 url = "http://httpbin.org/status/200"
180 def delete_content_type_header(response: Any) -> Any:
181 response["headers"]["Content-Type"] = []
182 return response
184 with vcr.use_cassette(
185 cassette_name,
186 cassette_library_dir="tests/fixtures/cassettes",
187 decode_compressed_response=True,
188 before_record_response=delete_content_type_header,
189 ):
190 with pytest.raises(
191 RuntimeError, match="no Content-Type header in response"
192 ):
193 download_image(url, out_prefix=Path("example"))