Skip to main content

fetch: add a download_image function

ID
9881a6a
date
2026-04-01 06:25:59+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
22643ed
message
fetch: add a `download_image` function
changed files
8 files, 218 additions, 39 deletions

Changed files

CHANGELOG.md (3899) → CHANGELOG.md (3972)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76306d..d03c8fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # CHANGELOG
 
+## v32 - 2026-04-01
+
+Add a `download_image` function to `chives.fetch`.
+
 ## v31 - 2026-03-31
 
 Remove the `hyperlink` dependency.

src/chives/__init__.py (391) → src/chives/__init__.py (391)

diff --git a/src/chives/__init__.py b/src/chives/__init__.py
index 0271393..a3ab715 100644
--- a/src/chives/__init__.py
+++ b/src/chives/__init__.py
@@ -11,4 +11,4 @@ I share across multiple sites.
 
 """
 
-__version__ = "31"
+__version__ = "32"

src/chives/fetch.py (3260) → src/chives/fetch.py (3246)

diff --git a/src/chives/fetch.py b/src/chives/fetch.py
index 6ea6613..3d60e78 100644
--- a/src/chives/fetch.py
+++ b/src/chives/fetch.py
@@ -2,6 +2,7 @@
 Make HTTP requests using the standard library.
 """
 
+from pathlib import Path
 import ssl
 from typing import Literal
 import urllib.parse
@@ -10,7 +11,10 @@ import urllib.request
 import certifi
 
 
-__all__ = ["fetch_url", "fetch_image", "ImageFormat"]
+__all__ = ["download_image", "fetch_url", "fetch_image", "ImageFormat"]
+
+
+ssl_context = ssl.create_default_context(cafile=certifi.where())
 
 
 def _build_request(
@@ -42,14 +46,11 @@ def fetch_url(
     Fetch the contents of the given URL and return the body of
     the response.
     """
-    ssl_context = ssl.create_default_context(cafile=certifi.where())
-
     req = _build_request(url, params, headers)
 
-    resp = urllib.request.urlopen(req, context=ssl_context)
+    with urllib.request.urlopen(req, context=ssl_context) as resp:
+        data = resp.read()
 
-    data = resp.read()
-    resp.close()
     assert isinstance(data, bytes), type(data)
 
     return data
@@ -77,7 +78,7 @@ def _guess_image_format(content_type: str | None) -> ImageFormat:
     try:
         return content_type_mapping[content_type]
     except KeyError:
-        raise RuntimeError(f"unrecognised image format: {content_type}")
+        raise ValueError(f"unrecognised image format: {content_type}")
 
 
 def fetch_image(
@@ -94,38 +95,36 @@ def fetch_image(
 
     req = _build_request(url, params, headers)
 
-    resp = urllib.request.urlopen(req, context=ssl_context)
+    with urllib.request.urlopen(req, context=ssl_context) as resp:
+        img_data = resp.read()
+        assert isinstance(img_data, bytes), type(img_data)
 
     img_format = _guess_image_format(content_type=resp.headers["content-type"])
 
-    img_data = resp.read()
-    resp.close()
-    assert isinstance(img_data, bytes), type(img_data)
-
     return img_data, img_format
 
 
-# def download_image(
-#     url: str,
-#     out_prefix: Path,
-#     *,
-#     params: dict[str, str] | None = None,
-#     headers: dict[str, str] | None = None,
-# ) -> Path:
-#     """
-#     Download an image from the given URL to the target path, and return
-#     the path of the downloaded file.
-#
-#     Add the appropriate file extension, based on the image's Content-Type.
-#
-#     Throws a FileExistsError if you try to overwrite an existing file.
-#     """
-#     im_data, im_format = fetch_image(url, params=params, headers=headers)
-#     out_path = out_prefix.with_suffix("." + im_format)
-#
-#     out_path.parent.mkdir(exist_ok=True, parents=True)
-#
-#     with open(out_path, "xb") as out_file:
-#         out_file.write(im_data)
-#
-#     return out_path
+def download_image(
+    url: str,
+    out_prefix: Path,
+    *,
+    params: dict[str, str] | None = None,
+    headers: dict[str, str] | None = None,
+) -> Path:
+    """
+    Download an image from the given URL to the target path, and return
+    the path of the downloaded file.
+
+    Add the appropriate file extension, based on the image's Content-Type.
+
+    Throws a FileExistsError if you try to overwrite an existing file.
+    """
+    im_data, im_format = fetch_image(url, params=params, headers=headers)
+    out_path = out_prefix.with_suffix("." + im_format)
+
+    out_path.parent.mkdir(exist_ok=True, parents=True)
+
+    with open(out_path, "xb") as out_file:
+        out_file.write(im_data)
+
+    return out_path

tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml (0) → tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml (2582)

diff --git a/tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml b/tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml
new file mode 100644
index 0000000..2189950
--- /dev/null
+++ b/tests/fixtures/cassettes/TestDownloadImage.test_cannot_download_twice.yml
@@ -0,0 +1,61 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Host:
+      - alexwlchan.net
+    method: GET
+    uri: https://alexwlchan.net/images/2026/470906.png
+  response:
+    body:
+      string: !!binary |
+        iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAOVBMVEU+BwU/BwU/CAVACAVBCAVC
+        CAVDCAVECAVGCAVHCQZICQZJCQZKCQZLCQZMCQZNCQZOCQZOCgZQCgam5+ZPAAAB5ElEQVR42u3d
+        wQqCQABFUTMNEyHw//8xURDDhH6ht5hZnbt/CKeZ5VD3aNLGeLHEi1e82ONFHy+6RrBgwYIFC5Zg
+        wYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBgwRIsWLBgwYIlWLBgwYIFS/9jTfFkjRdz
+        vPjEi/xd6uBkuYawYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiw
+        BAsWLFiwYAkWrKJYZzy5x4v8G228uOLF18lyDWHBgiVYsGDBggVLsGDBggULlmDBggULFizBggUL
+        FixYggULFixYsAQLFixYsGAJFixYsGDBEixYRbFqvEsd48UWL27xonWyXENYsGAJFixYsGDBEixY
+        sGDBgiVYsGDBggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLVlGsJZ7UePua/19qX+Eb
+        ThYsWLBgwRIsWLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBg
+        CVaCNceTd7w4KvyGz3hxOlmuISxYsAQLFixYsGAJFixYsGDBEixYsGDBgiVYsGDBggVLsGDBggUL
+        lmDBggULFizBggULFixYggWrZD/yQRWvEwG66AAAAABJRU5ErkJggg==
+    headers:
+      Content-Length:
+      - '610'
+      Content-Type:
+      - image/png
+    status:
+      code: 200
+      message: OK
+- request:
+    body: null
+    headers:
+      Host:
+      - alexwlchan.net
+    method: GET
+    uri: https://alexwlchan.net/images/2026/f69b96.png
+  response:
+    body:
+      string: !!binary |
+        iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAZlBMVEXycGnydG3ydW7ydm/yeHHy
+        eXLzgHnzhH70iIL0i4b1ko31lY/1lpD1l5L1mJL2m5b2nJf2npr2n5v2oJv2o573p6P3q6f3sKz4
+        tLD5vLn5wL35wb75xMH5xcP5xsP5xsT6yMX6yscV0ZT9AAAB60lEQVR42u3dvWoCQQBG0RRLEDQo
+        aBff/80MaidW/kR8hlvMNp7bf83ZnXKYafFV2+XFPS+uebHKi2mGxQcHCxYsWLBgCRYsWLBgwRIs
+        WLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLA0fefJOS9+8+KWF/1G7sOf
+        5RjCggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLFixYsGAJFixYsGDBEixYsGDBgiVY
+        sIZi/eTJf16c8uKVF8e8cJPVMYQFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBg
+        wRIsWLBgwYIlWLBgwYIFS7BgjcX6y5NlXmzz4pAXm7y4+LMcQ1iwYAkWLFiwYMESLFiwYMGCJViw
+        YMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiwBAvWUKx+y7S/l3qb4Rv2e6l7f5ZjCAsW
+        LMGCBQsWLFiCBQsWLFiwBAsWLFiwYAkWLFiwYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMEaivXM
+        k3VePGZY9JdivcnqGMKCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiwBAsWLFiwYAkWLFiw
+        YMESLFiwYMGCJViwhvYGGrgYp8lVDqkAAAAASUVORK5CYII=
+    headers:
+      Content-Length:
+      - '662'
+      Content-Type:
+      - image/png
+    status:
+      code: 200
+      message: OK
+version: 1

tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml (0) → tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml (1265)

diff --git a/tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml b/tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml
new file mode 100644
index 0000000..e2b8195
--- /dev/null
+++ b/tests/fixtures/cassettes/TestDownloadImage.test_download_image.yml
@@ -0,0 +1,31 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Host:
+      - alexwlchan.net
+    method: GET
+    uri: https://alexwlchan.net/images/2026/470906.png
+  response:
+    body:
+      string: !!binary |
+        iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAOVBMVEU+BwU/BwU/CAVACAVBCAVC
+        CAVDCAVECAVGCAVHCQZICQZJCQZKCQZLCQZMCQZNCQZOCQZOCgZQCgam5+ZPAAAB5ElEQVR42u3d
+        wQqCQABFUTMNEyHw//8xURDDhH6ht5hZnbt/CKeZ5VD3aNLGeLHEi1e82ONFHy+6RrBgwYIFC5Zg
+        wYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBgwRIsWLBgwYIlWLBgwYIFS/9jTfFkjRdz
+        vPjEi/xd6uBkuYawYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiw
+        BAsWLFiwYAkWrKJYZzy5x4v8G228uOLF18lyDWHBgiVYsGDBggVLsGDBggULlmDBggULFizBggUL
+        FixYggULFixYsAQLFixYsGAJFixYsGDBEixYRbFqvEsd48UWL27xonWyXENYsGAJFixYsGDBEixY
+        sGDBgiVYsGDBggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLVlGsJZ7UePua/19qX+Eb
+        ThYsWLBgwRIsWLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBg
+        CVaCNceTd7w4KvyGz3hxOlmuISxYsAQLFixYsGAJFixYsGDBEixYsGDBgiVYsGDBggVLsGDBggUL
+        lmDBggULFizBggULFixYggWrZD/yQRWvEwG66AAAAABJRU5ErkJggg==
+    headers:
+      Content-Length:
+      - '610'
+      Content-Type:
+      - image/png
+    status:
+      code: 200
+      message: OK
+version: 1

tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml (0) → tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml (1265)

diff --git a/tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml b/tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml
new file mode 100644
index 0000000..e2b8195
--- /dev/null
+++ b/tests/fixtures/cassettes/TestDownloadImage.test_download_to_nested_path.yml
@@ -0,0 +1,31 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Host:
+      - alexwlchan.net
+    method: GET
+    uri: https://alexwlchan.net/images/2026/470906.png
+  response:
+    body:
+      string: !!binary |
+        iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAMAAABOo35HAAAAOVBMVEU+BwU/BwU/CAVACAVBCAVC
+        CAVDCAVECAVGCAVHCQZICQZJCQZKCQZLCQZMCQZNCQZOCQZOCgZQCgam5+ZPAAAB5ElEQVR42u3d
+        wQqCQABFUTMNEyHw//8xURDDhH6ht5hZnbt/CKeZ5VD3aNLGeLHEi1e82ONFHy+6RrBgwYIFC5Zg
+        wYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBgCRYsWLBgwRIsWLBgwYIlWLBgwYIFS/9jTfFkjRdz
+        vPjEi/xd6uBkuYawYMESLFiwYMGCJViwYMGCBUuwYMGCBQuWYMGCBQsWLMGCBQsWLFiCBQsWLFiw
+        BAsWLFiwYAkWrKJYZzy5x4v8G228uOLF18lyDWHBgiVYsGDBggVLsGDBggULlmDBggULFizBggUL
+        FixYggULFixYsAQLFixYsGAJFixYsGDBEixYRbFqvEsd48UWL27xonWyXENYsGAJFixYsGDBEixY
+        sGDBgiVYsGDBggVLsGDBggULlmDBggULFizBggULFixYggULFixYsAQLVlGsJZ7UePua/19qX+Eb
+        ThYsWLBgwRIsWLBgwYIlWLBgwYIFS7BgwYIFC5ZgwYIFCxYswYIFCxYsWIIFCxYsWLAECxYsWLBg
+        CVaCNceTd7w4KvyGz3hxOlmuISxYsAQLFixYsGAJFixYsGDBEixYsGDBgiVYsGDBggVLsGDBggUL
+        lmDBggULFizBggULFixYggWrZD/yQRWvEwG66AAAAABJRU5ErkJggg==
+    headers:
+      Content-Length:
+      - '610'
+      Content-Type:
+      - image/png
+    status:
+      code: 200
+      message: OK
+version: 1

tests/fixtures/media/470906.png (0) → tests/fixtures/media/470906.png (610)

diff --git a/tests/fixtures/media/470906.png b/tests/fixtures/media/470906.png
new file mode 100644
index 0000000..e5a23cb
Binary files /dev/null and b/tests/fixtures/media/470906.png differ

tests/test_fetch.py (3309) → tests/test_fetch.py (5164)

diff --git a/tests/test_fetch.py b/tests/test_fetch.py
index aba5278..442de02 100644
--- a/tests/test_fetch.py
+++ b/tests/test_fetch.py
@@ -2,8 +2,10 @@
 Tests for `chives.fetch`.
 """
 
+import filecmp
 from io import BytesIO
 import json
+from pathlib import Path
 from typing import Any
 from urllib.error import HTTPError
 
@@ -12,7 +14,7 @@ import pytest
 import vcr
 from vcr.cassette import Cassette
 
-from chives.fetch import fetch_image, fetch_url
+from chives.fetch import download_image, fetch_image, fetch_url
 
 
 class TestFetchUrl:
@@ -90,7 +92,7 @@ class TestFetchImage:
         """
         url = "http://httpbin.org/status/200"
 
-        with pytest.raises(RuntimeError, match="unrecognised image format"):
+        with pytest.raises(ValueError, match="unrecognised image format"):
             fetch_image(url)
 
     def test_no_content_type_header(self, cassette_name: str) -> None:
@@ -114,3 +116,54 @@ class TestFetchImage:
                 RuntimeError, match="no Content-Type header in response"
             ):
                 fetch_image(url)
+
+
+class TestDownloadImage:
+    """
+    Tests for `download_image`.
+    """
+
+    def test_download_image(self, tmp_path: Path, vcr_cassette: Cassette) -> None:
+        """
+        Download an image and compare the result.
+        """
+        url = "https://alexwlchan.net/images/2026/470906.png"
+
+        out_path = download_image(url, tmp_path / "470906")
+        assert out_path == tmp_path / "470906.png"
+        assert out_path.exists()
+
+        assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
+
+    def test_download_to_nested_path(
+        self, tmp_path: Path, vcr_cassette: Cassette
+    ) -> None:
+        """
+        You can download an image to a heavily nested path, and it creates
+        the parent directory.
+        """
+        url = "https://alexwlchan.net/images/2026/470906.png"
+
+        out_path = download_image(url, tmp_path / "a/b/c/470906")
+        assert out_path == tmp_path / "a/b/c/470906.png"
+        assert out_path.exists()
+
+        assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
+
+    def test_cannot_download_twice(
+        self, tmp_path: Path, vcr_cassette: Cassette
+    ) -> None:
+        """
+        Trying to overwrite an existing image throws a FileExistsError.
+        """
+        url1 = "https://alexwlchan.net/images/2026/470906.png"
+        url2 = "https://alexwlchan.net/images/2026/f69b96.png"
+
+        out_path = download_image(url1, tmp_path / "squares")
+        assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)
+
+        with pytest.raises(FileExistsError):
+            download_image(url2, tmp_path / "squares")
+
+        # The file contents are the same as the first download.
+        assert filecmp.cmp(out_path, "tests/fixtures/media/470906.png", shallow=False)