fetch: improve URL handling for URLs with existing query params
- ID
aac4328- date
2026-04-11 17:54:02+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
1b2da07- message
fetch: improve URL handling for URLs with existing query params- changed files
4 files, 70 additions, 16 deletions
Changed files
CHANGELOG.md (4093) → CHANGELOG.md (4295)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c9922e..a4b56e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
# CHANGELOG
+## v34 - 2026-04-11
+
+Improve the URL-handling in `chives.fetch`; in particular improve the case where you call `fetch_url(url, params)` with a URL that already has some query parameters or a fragment.
+
## v33 - 2026-04-10
Remove the `fetch_image` function from `chives.fetch`; either use `fetch_url` or `download_image`.
src/chives/__init__.py (391) → src/chives/__init__.py (391)
diff --git a/src/chives/__init__.py b/src/chives/__init__.py
index 5378754..7efa70a 100644
--- a/src/chives/__init__.py
+++ b/src/chives/__init__.py
@@ -11,4 +11,4 @@ I share across multiple sites.
"""
-__version__ = "33"
+__version__ = "34"
src/chives/fetch.py (2853) → src/chives/fetch.py (3218)
diff --git a/src/chives/fetch.py b/src/chives/fetch.py
index 71f896c..21ba817 100644
--- a/src/chives/fetch.py
+++ b/src/chives/fetch.py
@@ -11,21 +11,30 @@ import urllib.request
import certifi
-__all__ = ["download_image", "fetch_url"]
+__all__ = ["build_request", "download_image", "fetch_url"]
ssl_context = ssl.create_default_context(cafile=certifi.where())
+QueryParams = dict[str, str] | list[tuple[str, str]]
+Headers = dict[str, str]
-def _build_request(
- url: str, params: dict[str, str] | None, headers: dict[str, str] | None
+
+def build_request(
+ url: str, *, params: QueryParams | None = None, headers: Headers | None = None
) -> urllib.request.Request:
"""
Build a request based on the given inputs.
"""
- if params:
- params_str = urllib.parse.urlencode(params)
- url = url + "?" + params_str
+ if isinstance(params, dict):
+ params = [(k, v) for k, v in params.items()]
+ if params is not None:
+ u = urllib.parse.urlsplit(url)
+ query = urllib.parse.parse_qsl(u.query) + params
+ new_query = urllib.parse.urlencode(query)
+ url = urllib.parse.urlunsplit(
+ (u.scheme, u.netloc, u.path, new_query, u.fragment)
+ )
req = urllib.request.Request(url)
@@ -37,16 +46,13 @@ def _build_request(
def fetch_url(
- url: str,
- *,
- params: dict[str, str] | None = None,
- headers: dict[str, str] | None = None,
+ url: str, *, params: QueryParams | None = None, headers: Headers | None = None
) -> bytes:
"""
Fetch the contents of the given URL and return the body of
the response.
"""
- req = _build_request(url, params, headers)
+ req = build_request(url, params=params, headers=headers)
with urllib.request.urlopen(req, context=ssl_context) as resp:
data = resp.read()
@@ -85,8 +91,8 @@ def download_image(
url: str,
out_prefix: Path,
*,
- params: dict[str, str] | None = None,
- headers: dict[str, str] | None = None,
+ params: QueryParams | None = None,
+ headers: Headers | None = None,
) -> Path:
"""
Download an image from the given URL to the target path, and return
@@ -98,7 +104,7 @@ def download_image(
"""
ssl_context = ssl.create_default_context(cafile=certifi.where())
- req = _build_request(url, params, headers)
+ req = build_request(url, params=params, headers=headers)
with urllib.request.urlopen(req, context=ssl_context) as resp:
img_data = resp.read()
tests/test_fetch.py (4709) → tests/test_fetch.py (6041)
diff --git a/tests/test_fetch.py b/tests/test_fetch.py
index 18786be..874ad95 100644
--- a/tests/test_fetch.py
+++ b/tests/test_fetch.py
@@ -12,7 +12,51 @@ import pytest
import vcr
from vcr.cassette import Cassette
-from chives.fetch import download_image, fetch_url
+from chives.fetch import build_request, download_image, fetch_url, QueryParams
+
+
+@pytest.mark.parametrize(
+ "url, params, expected_url",
+ [
+ ("https://example.com", None, "https://example.com"),
+ (
+ "https://example.com",
+ {"one": "1", "two": "2"},
+ "https://example.com?one=1&two=2",
+ ),
+ (
+ "https://example.com",
+ [("one", "1"), ("two", "2")],
+ "https://example.com?one=1&two=2",
+ ),
+ (
+ "https://example.com",
+ [("num", "1"), ("num", "2")],
+ "https://example.com?num=1&num=2",
+ ),
+ (
+ "https://example.com#fragment",
+ [("num", "1"), ("num", "2")],
+ "https://example.com?num=1&num=2#fragment",
+ ),
+ (
+ "https://example.com?existing=1",
+ [("num", "1"), ("num", "2")],
+ "https://example.com?existing=1&num=1&num=2",
+ ),
+ (
+ "https://example.com?existing=1#fragment",
+ [("num", "1"), ("num", "2")],
+ "https://example.com?existing=1&num=1&num=2#fragment",
+ ),
+ ],
+)
+def test_build_request(url: str, params: QueryParams | None, expected_url: str) -> None:
+ """
+ Tests for `build_request`.
+ """
+ req = build_request(url, params=params)
+ assert req.full_url == expected_url
class TestFetchUrl: