fetch: add a download_image function
- ID
9881a6a- date
2026-04-01 06:25:59+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
22643ed- message
fetch: add a `download_image` function- changed files
8 files, 218 additions, 39 deletionsCHANGELOG.mdsrc/chives/__init__.pysrc/chives/fetch.pytests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.ymltests/fixtures/cassettes/TestDownloadImage.test_download_image.ymltests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.ymltests/fixtures/media/470906.pngtests/test_fetch.py
Changed files
CHANGELOG.md (3899) → CHANGELOG.md (3972)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76306d..d03c8fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
# CHANGELOG
+## v32 - 2026-04-01
+
+Add a `download_image` function to `chives.fetch`.
+
## v31 - 2026-03-31
Remove the `hyperlink` dependency.
src/chives/__init__.py (391) → src/chives/__init__.py (391)
diff --git a/src/chives/__init__.py b/src/chives/__init__.py
index 0271393..a3ab715 100644
--- a/src/chives/__init__.py
+++ b/src/chives/__init__.py
@@ -11,4 +11,4 @@ I share across multiple sites.
"""
-__version__ = "31"
+__version__ = "32"
src/chives/fetch.py (3260) → src/chives/fetch.py (3246)
diff --git a/src/chives/fetch.py b/src/chives/fetch.py
index 6ea6613..3d60e78 100644
--- a/src/chives/fetch.py
+++ b/src/chives/fetch.py
@@ -2,6 +2,7 @@
Make HTTP requests using the standard library.
"""
+from pathlib import Path
import ssl
from typing import Literal
import urllib.parse
@@ -10,7 +11,10 @@ import urllib.request
import certifi
-__all__ = ["fetch_url", "fetch_image", "ImageFormat"]
+__all__ = ["download_image", "fetch_url", "fetch_image", "ImageFormat"]
+
+
+ssl_context = ssl.create_default_context(cafile=certifi.where())
def _build_request(
@@ -42,14 +46,11 @@ def fetch_url(
Fetch the contents of the given URL and return the body of
the response.
"""
- ssl_context = ssl.create_default_context(cafile=certifi.where())
-
req = _build_request(url, params, headers)
- resp = urllib.request.urlopen(req, context=ssl_context)
+ with urllib.request.urlopen(req, context=ssl_context) as resp:
+ data = resp.read()
- data = resp.read()
- resp.close()
assert isinstance(data, bytes), type(data)
return data
@@ -77,7 +78,7 @@ def _guess_image_format(content_type: str | None) -> ImageFormat:
try:
return content_type_mapping[content_type]
except KeyError:
- raise RuntimeError(f"unrecognised image format: {content_type}")
+ raise ValueError(f"unrecognised image format: {content_type}")
def fetch_image(
@@ -94,38 +95,36 @@ def fetch_image(
req = _build_request(url, params, headers)
- resp = urllib.request.urlopen(req, context=ssl_context)
+ with urllib.request.urlopen(req, context=ssl_context) as resp:
+ img_data = resp.read()
+ assert isinstance(img_data, bytes), type(img_data)
img_format = _guess_image_format(content_type=resp.headers["content-type"])
- img_data = resp.read()
- resp.close()
- assert isinstance(img_data, bytes), type(img_data)
-
return img_data, img_format
-# def download_image(
-# url: str,
-# out_prefix: Path,
-# *,
-# params: dict[str, str] | None = None,
-# headers: dict[str, str] | None = None,
-# ) -> Path:
-# """
-# Download an image from the given URL to the target path, and return
-# the path of the downloaded file.
-#
-# Add the appropriate file extension, based on the image's Content-Type.
-#
-# Throws a FileExistsError if you try to overwrite an existing file.
-# """
-# im_data, im_format = fetch_image(url, params=params, headers=headers)
-# out_path = out_prefix.with_suffix("." + im_format)
-#
-# out_path.parent.mkdir(exist_ok=True, parents=True)
-#
-# with open(out_path, "xb") as out_file:
-# out_file.write(im_data)
-#
-# return out_path
+def download_image(
+ url: str,
+ out_prefix: Path,
+ *,
+ params: dict[str, str] | None = None,
+ headers: dict[str, str] | None = None,
+) -> Path:
+ """
+ Download an image from the given URL to the target path, and return
+ the path of the downloaded file.
+
+ Add the appropriate file extension, based on the image's Content-Type.
+
+ Throws a FileExistsError if you try to overwrite an existing file.
+ """
+ im_data, im_format = fetch_image(url, params=params, headers=headers)
+ out_path = out_prefix.with_suffix("." + im_format)
+
+ out_path.parent.mkdir(exist_ok=True, parents=True)
+
+ with open(out_path, "xb") as out_file:
+ out_file.write(im_data)
+
+ return out_path
tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml (0) → tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml (2582)
diff --git a/tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml b/tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml
new file mode 100644
index 0000000..2189950
--- /dev/null
+++ b/tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml
@@ -0,0 +1,61 @@
+interactions:
+- request:
+ body: null
+ headers:
+ Host:
+ - alexwlchan.net
+ method: GET
+ uri: https://alexwlchan.net/images/2026/470906.png
+ response:
+ body:
+ string: !!binary |
+ iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAOVBMVEU+BwU/BwU/CAVACAVBCAVC
+ CAVDCAVECAVGCAVHCQZICQZJCQZKCQZLCQZMCQZNCQZOCQZOCgZQCgam5+ZPAAAB5ElEQVR42u3d
+ wQqCQABFUTMNEyHw//8xURDDhH6ht5hZnbt/CKeZ5VD3aNLGeLHEi1e82ONFHy+6RrBgwYIFC5Zg
+ wYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBgwRIsWLBgwYIlWLBgwYIFS/9jTfFkjRdz
+ vPjEi/xd6uBkuYawYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiw
+ BAsWLFiwYAkWrKJYZzy5x4v8G228uOLF18lyDWHBgiVYsGDBggVLsGDBggULlmDBggULFizBggUL
+ FixYggULFixYsAQLFixYsGAJFixYsGDBEixYRbFqvEsd48UWL27xonWyXENYsGAJFixYsGDBEixY
+ sGDBgiVYsGDBggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLVlGsJZ7UePua/19qX+Eb
+ ThYsWLBgwRIsWLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBg
+ CVaCNceTd7w4KvyGz3hxOlmuISxYsAQLFixYsGAJFixYsGDBEixYsGDBgiVYsGDBggVLsGDBggUL
+ lmDBggULFizBggULFixYggWrZD/yQRWvEwG66AAAAABJRU5ErkJggg==
+ headers:
+ Content-Length:
+ - '610'
+ Content-Type:
+ - image/png
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers:
+ Host:
+ - alexwlchan.net
+ method: GET
+ uri: https://alexwlchan.net/images/2026/f69b96.png
+ response:
+ body:
+ string: !!binary |
+ iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAZlBMVEXycGnydG3ydW7ydm/yeHHy
+ eXLzgHnzhH70iIL0i4b1ko31lY/1lpD1l5L1mJL2m5b2nJf2npr2n5v2oJv2o573p6P3q6f3sKz4
+ tLD5vLn5wL35wb75xMH5xcP5xsP5xsT6yMX6yscV0ZT9AAAB60lEQVR42u3dvWoCQQBG0RRLEDQo
+ aBff/80MaidW/kR8hlvMNp7bf83ZnXKYafFV2+XFPS+uebHKi2mGxQcHCxYsWLBgCRYsWLBgwRIs
+ WLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLA0fefJOS9+8+KWF/1G7sOf
+ 5RjCggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLFixYsGAJFixYsGDBEixYsGDBgiVY
+ sIZi/eTJf16c8uKVF8e8cJPVMYQFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBg
+ wRIsWLBgwYIlWLBgwYIFS7BgjcX6y5NlXmzz4pAXm7y4+LMcQ1iwYAkWLFiwYMESLFiwYMGCJViw
+ YMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiwBAvWUKx+y7S/l3qb4Rv2e6l7f5ZjCAsW
+ LMGCBQsWLFiCBQsWLFiwBAsWLFiwYAkWLFiwYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMEaivXM
+ k3VePGZY9JdivcnqGMKCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiwBAsWLFiwYAkWLFiw
+ YMESLFiwYMGCJViwhvYGGrgYp8lVDqkAAAAASUVORK5CYII=
+ headers:
+ Content-Length:
+ - '662'
+ Content-Type:
+ - image/png
+ status:
+ code: 200
+ message: OK
+version: 1
tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml (0) → tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml (1265)
diff --git a/tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml b/tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml
new file mode 100644
index 0000000..e2b8195
--- /dev/null
+++ b/tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml
@@ -0,0 +1,31 @@
+interactions:
+- request:
+ body: null
+ headers:
+ Host:
+ - alexwlchan.net
+ method: GET
+ uri: https://alexwlchan.net/images/2026/470906.png
+ response:
+ body:
+ string: !!binary |
+ iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAOVBMVEU+BwU/BwU/CAVACAVBCAVC
+ CAVDCAVECAVGCAVHCQZICQZJCQZKCQZLCQZMCQZNCQZOCQZOCgZQCgam5+ZPAAAB5ElEQVR42u3d
+ wQqCQABFUTMNEyHw//8xURDDhH6ht5hZnbt/CKeZ5VD3aNLGeLHEi1e82ONFHy+6RrBgwYIFC5Zg
+ wYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBgwRIsWLBgwYIlWLBgwYIFS/9jTfFkjRdz
+ vPjEi/xd6uBkuYawYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiw
+ BAsWLFiwYAkWrKJYZzy5x4v8G228uOLF18lyDWHBgiVYsGDBggVLsGDBggULlmDBggULFizBggUL
+ FixYggULFixYsAQLFixYsGAJFixYsGDBEixYRbFqvEsd48UWL27xonWyXENYsGAJFixYsGDBEixY
+ sGDBgiVYsGDBggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLVlGsJZ7UePua/19qX+Eb
+ ThYsWLBgwRIsWLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBg
+ CVaCNceTd7w4KvyGz3hxOlmuISxYsAQLFixYsGAJFixYsGDBEixYsGDBgiVYsGDBggVLsGDBggUL
+ lmDBggULFizBggULFixYggWrZD/yQRWvEwG66AAAAABJRU5ErkJggg==
+ headers:
+ Content-Length:
+ - '610'
+ Content-Type:
+ - image/png
+ status:
+ code: 200
+ message: OK
+version: 1
tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml (0) → tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml (1265)
diff --git a/tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml b/tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml
new file mode 100644
index 0000000..e2b8195
--- /dev/null
+++ b/tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml
@@ -0,0 +1,31 @@
+interactions:
+- request:
+ body: null
+ headers:
+ Host:
+ - alexwlchan.net
+ method: GET
+ uri: https://alexwlchan.net/images/2026/470906.png
+ response:
+ body:
+ string: !!binary |
+ iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAOVBMVEU+BwU/BwU/CAVACAVBCAVC
+ CAVDCAVECAVGCAVHCQZICQZJCQZKCQZLCQZMCQZNCQZOCQZOCgZQCgam5+ZPAAAB5ElEQVR42u3d
+ wQqCQABFUTMNEyHw//8xURDDhH6ht5hZnbt/CKeZ5VD3aNLGeLHEi1e82ONFHy+6RrBgwYIFC5Zg
+ wYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBgwRIsWLBgwYIlWLBgwYIFS/9jTfFkjRdz
+ vPjEi/xd6uBkuYawYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiw
+ BAsWLFiwYAkWrKJYZzy5x4v8G228uOLF18lyDWHBgiVYsGDBggVLsGDBggULlmDBggULFizBggUL
+ FixYggULFixYsAQLFixYsGAJFixYsGDBEixYRbFqvEsd48UWL27xonWyXENYsGAJFixYsGDBEixY
+ sGDBgiVYsGDBggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLVlGsJZ7UePua/19qX+Eb
+ ThYsWLBgwRIsWLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBg
+ CVaCNceTd7w4KvyGz3hxOlmuISxYsAQLFixYsGAJFixYsGDBEixYsGDBgiVYsGDBggVLsGDBggUL
+ lmDBggULFizBggULFixYggWrZD/yQRWvEwG66AAAAABJRU5ErkJggg==
+ headers:
+ Content-Length:
+ - '610'
+ Content-Type:
+ - image/png
+ status:
+ code: 200
+ message: OK
+version: 1
tests/fixtures/media/470906.png (0) → tests/fixtures/media/470906.png (610)
diff --git a/tests/fixtures/media/470906.png b/tests/fixtures/media/470906.png
new file mode 100644
index 0000000..e5a23cb
Binary files /dev/null and b/tests/fixtures/media/470906.png differ
tests/test_fetch.py (3309) → tests/test_fetch.py (5164)
diff --git a/tests/test_fetch.py b/tests/test_fetch.py
index aba5278..442de02 100644
--- a/tests/test_fetch.py
+++ b/tests/test_fetch.py
@@ -2,8 +2,10 @@
Tests for `chives.fetch`.
"""
+import filecmp
from io import BytesIO
import json
+from pathlib import Path
from typing import Any
from urllib.error import HTTPError
@@ -12,7 +14,7 @@ import pytest
import vcr
from vcr.cassette import Cassette
-from chives.fetch import fetch_image, fetch_url
+from chives.fetch import download_image, fetch_image, fetch_url
class TestFetchUrl:
@@ -90,7 +92,7 @@ class TestFetchImage:
"""
url = "http://httpbin.org/status/200"
- with pytest.raises(RuntimeError, match="unrecognised image format"):
+ with pytest.raises(ValueError, match="unrecognised image format"):
fetch_image(url)
def test_no_content_type_header(self, cassette_name: str) -> None:
@@ -114,3 +116,54 @@ class TestFetchImage:
RuntimeError, match="no Content-Type header in response"
):
fetch_image(url)
+
+
+class TestDownloadImage:
+ """
+ Tests for `download_image`.
+ """
+
+ def test_download_image(self, tmp_path: Path, vcr_cassette: Cassette) -> None:
+ """
+ Download an image and compare the result.
+ """
+ url = "https://alexwlchan.net/images/2026/470906.png"
+
+ out_path = download_image(url, tmp_path / "470906")
+ assert out_path == tmp_path / "470906.png"
+ assert out_path.exists()
+
+ assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
+
+ def test_download_to_nested_path(
+ self, tmp_path: Path, vcr_cassette: Cassette
+ ) -> None:
+ """
+ You can download an image to a heavily nested path, and it creates
+ the parent directory.
+ """
+ url = "https://alexwlchan.net/images/2026/470906.png"
+
+ out_path = download_image(url, tmp_path / "a/b/c/470906")
+ assert out_path == tmp_path / "a/b/c/470906.png"
+ assert out_path.exists()
+
+ assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
+
+ def test_cannot_download_twice(
+ self, tmp_path: Path, vcr_cassette: Cassette
+ ) -> None:
+ """
+ Trying to overwrite an existing image throws a FileExistsError.
+ """
+ url1 = "https://alexwlchan.net/images/2026/470906.png"
+ url2 = "https://alexwlchan.net/images/2026/f69b96.png"
+
+ out_path = download_image(url1, tmp_path / "squares")
+ assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
+
+ with pytest.raises(FileExistsError):
+ download_image(url2, tmp_path / "squares")
+
+ # The file contents are the same as the first download.
+ assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)