Skip to main content

fetch: improve URL handling for URLs with existing query params

ID
aac4328
date
2026-04-11 17:54:02+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
1b2da07
message
fetch: improve URL handling for URLs with existing query params
changed files
4 files, 70 additions, 16 deletions

Changed files

CHANGELOG.md (4093) → CHANGELOG.md (4295)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c9922e..a4b56e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # CHANGELOG
 
+## v34 - 2026-04-11
+
+Improve the URL-handling in `chives.fetch`; in particular improve the case where you call `fetch_url(url, params)` with a URL that already has some query parameters or a fragment.
+
 ## v33 - 2026-04-10
 
 Remove the `fetch_image` function from `chives.fetch`; either use `fetch_url` or `download_image`.

src/chives/__init__.py (391) → src/chives/__init__.py (391)

diff --git a/src/chives/__init__.py b/src/chives/__init__.py
index 5378754..7efa70a 100644
--- a/src/chives/__init__.py
+++ b/src/chives/__init__.py
@@ -11,4 +11,4 @@ I share across multiple sites.
 
 """
 
-__version__ = "33"
+__version__ = "34"

src/chives/fetch.py (2853) → src/chives/fetch.py (3218)

diff --git a/src/chives/fetch.py b/src/chives/fetch.py
index 71f896c..21ba817 100644
--- a/src/chives/fetch.py
+++ b/src/chives/fetch.py
@@ -11,21 +11,30 @@ import urllib.request
 import certifi
 
 
-__all__ = ["download_image", "fetch_url"]
+__all__ = ["build_request", "download_image", "fetch_url"]
 
 
 ssl_context = ssl.create_default_context(cafile=certifi.where())
 
+QueryParams = dict[str, str] | list[tuple[str, str]]
+Headers = dict[str, str]
 
-def _build_request(
-    url: str, params: dict[str, str] | None, headers: dict[str, str] | None
+
+def build_request(
+    url: str, *, params: QueryParams | None = None, headers: Headers | None = None
 ) -> urllib.request.Request:
     """
     Build a request based on the given inputs.
     """
-    if params:
-        params_str = urllib.parse.urlencode(params)
-        url = url + "?" + params_str
+    if isinstance(params, dict):
+        params = [(k, v) for k, v in params.items()]
+    if params is not None:
+        u = urllib.parse.urlsplit(url)
+        query = urllib.parse.parse_qsl(u.query) + params
+        new_query = urllib.parse.urlencode(query)
+        url = urllib.parse.urlunsplit(
+            (u.scheme, u.netloc, u.path, new_query, u.fragment)
+        )
 
     req = urllib.request.Request(url)
 
@@ -37,16 +46,13 @@ def _build_request(
 
 
 def fetch_url(
-    url: str,
-    *,
-    params: dict[str, str] | None = None,
-    headers: dict[str, str] | None = None,
+    url: str, *, params: QueryParams | None = None, headers: Headers | None = None
 ) -> bytes:
     """
     Fetch the contents of the given URL and return the body of
     the response.
     """
-    req = _build_request(url, params, headers)
+    req = build_request(url, params=params, headers=headers)
 
     with urllib.request.urlopen(req, context=ssl_context) as resp:
         data = resp.read()
@@ -85,8 +91,8 @@ def download_image(
     url: str,
     out_prefix: Path,
     *,
-    params: dict[str, str] | None = None,
-    headers: dict[str, str] | None = None,
+    params: QueryParams | None = None,
+    headers: Headers | None = None,
 ) -> Path:
     """
     Download an image from the given URL to the target path, and return
@@ -98,7 +104,7 @@ def download_image(
     """
     ssl_context = ssl.create_default_context(cafile=certifi.where())
 
-    req = _build_request(url, params, headers)
+    req = build_request(url, params=params, headers=headers)
 
     with urllib.request.urlopen(req, context=ssl_context) as resp:
         img_data = resp.read()

tests/test_fetch.py (4709) → tests/test_fetch.py (6041)

diff --git a/tests/test_fetch.py b/tests/test_fetch.py
index 18786be..874ad95 100644
--- a/tests/test_fetch.py
+++ b/tests/test_fetch.py
@@ -12,7 +12,51 @@ import pytest
 import vcr
 from vcr.cassette import Cassette
 
-from chives.fetch import download_image, fetch_url
+from chives.fetch import build_request, download_image, fetch_url, QueryParams
+
+
+@pytest.mark.parametrize(
+    "url, params, expected_url",
+    [
+        ("https://example.com", None, "https://example.com"),
+        (
+            "https://example.com",
+            {"one": "1", "two": "2"},
+            "https://example.com?one=1&two=2",
+        ),
+        (
+            "https://example.com",
+            [("one", "1"), ("two", "2")],
+            "https://example.com?one=1&two=2",
+        ),
+        (
+            "https://example.com",
+            [("num", "1"), ("num", "2")],
+            "https://example.com?num=1&num=2",
+        ),
+        (
+            "https://example.com#fragment",
+            [("num", "1"), ("num", "2")],
+            "https://example.com?num=1&num=2#fragment",
+        ),
+        (
+            "https://example.com?existing=1",
+            [("num", "1"), ("num", "2")],
+            "https://example.com?existing=1&num=1&num=2",
+        ),
+        (
+            "https://example.com?existing=1#fragment",
+            [("num", "1"), ("num", "2")],
+            "https://example.com?existing=1&num=1&num=2#fragment",
+        ),
+    ],
+)
+def test_build_request(url: str, params: QueryParams | None, expected_url: str) -> None:
+    """
+    Tests for `build_request`.
+    """
+    req = build_request(url, params=params)
+    assert req.full_url == expected_url
 
 
 class TestFetchUrl: