Skip to main content

youtube: normalise URLs; remove unneeded query parameters

ID
f96c4de
date
2026-04-18 22:07:17+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
e79b606
message
youtube: normalise URLs; remove unneeded query parameters
changed files
2 files, 45 additions

Changed files

test_yt-dlp_alexwlchan.py (3383) → test_yt-dlp_alexwlchan.py (4224)

diff --git a/test_yt-dlp_alexwlchan.py b/test_yt-dlp_alexwlchan.py
index 7fbbfad..8818dc8 100644
--- a/test_yt-dlp_alexwlchan.py
+++ b/test_yt-dlp_alexwlchan.py
@@ -11,6 +11,7 @@ import pytest
 yt_dlp_alexwlchan = importlib.import_module("yt-dlp_alexwlchan")
 
 download_video = yt_dlp_alexwlchan.download_video
+normalise_url = yt_dlp_alexwlchan.normalise_url
 
 
 def test_youtube_video() -> None:
@@ -100,3 +101,31 @@ def test_instagram_video() -> None:
     assert video_info["date_uploaded"] == "2025-07-21T00:34:41Z"
 
     assert video_info["video_path"].endswith(" [DMWY8KkOS0n].mp4")
+
+
+@pytest.mark.parametrize(
+    "url, expected",
+    [
+        (
+            "https://www.instagram.com/reel/DMWY8KkOS0n/",
+            "https://www.instagram.com/reel/DMWY8KkOS0n/",
+        ),
+        (
+            "https://www.youtube.com/shorts/hyGluE562oA",
+            "https://www.youtube.com/shorts/hyGluE562oA",
+        ),
+        (
+            "https://www.youtube.com/watch?v=0N1_0SUGlDQ",
+            "https://www.youtube.com/watch?v=0N1_0SUGlDQ",
+        ),
+        (
+            "https://www.youtube.com/watch?v=0N1_0SUGlDQ&app=desktop&list=LL&index=43",
+            "https://www.youtube.com/watch?v=0N1_0SUGlDQ",
+        ),
+    ],
+)
+def test_normalise_url(url: str, expected: str) -> None:
+    """
+    Tests for `normalise_url`.
+    """
+    assert normalise_url(url) == expected

yt-dlp_alexwlchan.py (8504) → yt-dlp_alexwlchan.py (8997)

diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index cf5ba93..91fb2b1 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -53,6 +53,22 @@ ydl_opts: Any = {
 }
 
 
+def normalise_url(url: str) -> str:
+    """
+    Remove unnecessary tracking parameters from a URL.
+    """
+    u = urllib.parse.urlsplit(url)
+
+    # If it's a YouTube URL, remove all query parameters except video ID (v)
+    if u.netloc == "www.youtube.com":
+        qs = urllib.parse.parse_qsl(u.query)
+        qs = [(k, v) for k, v in qs if k == "v"]
+        query = urllib.parse.urlencode(qs)
+        return urllib.parse.urlunsplit((u.scheme, u.netloc, u.path, query, ""))
+
+    return url
+
+
 def get_youtube_avatar(tmp_dir: Path, channel_url: str) -> Path:
     """
     Download the avatar of a YouTube channel.