Skip to main content

Merge pull request #28 from alexwlchan/cleanup-paths

ID
2be02e9
date
2025-12-17 08:37:09+00:00
author
Alex Chan <alex@alexwlchan.net>
parents
7f53fd5, 2234ced
message
Merge pull request #28 from alexwlchan/cleanup-paths

Clean up URL-unsafe characters in filenames
changed files
5 files, 59 additions, 33 deletions

Changed files

.github/workflows/test.yml (952) → .github/workflows/test.yml (952)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 60df6c2..f7216c0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       matrix:
         python-version:
-          - "3.13"
+          - "3.14"
 
     runs-on: ubuntu-latest
 

dev_requirements.txt (1844) → dev_requirements.txt (1715)

diff --git a/dev_requirements.txt b/dev_requirements.txt
index f5ef368..6d545e5 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,20 +1,17 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile dev_requirements.in --output-file dev_requirements.txt
-anyio==4.11.0
+anyio==4.12.0
     # via
     #   -r requirements.txt
     #   httpx
-brotli==1.1.0
-    # via
-    #   -r requirements.txt
-    #   yt-dlp
-certifi==2025.10.5
+brotli==1.2.0
+    # via -r requirements.txt
+certifi==2025.11.12
     # via
     #   -r requirements.txt
     #   httpcore
     #   httpx
     #   requests
-    #   yt-dlp
 charset-normalizer==3.4.4
     # via
     #   -r requirements.txt
@@ -42,11 +39,11 @@ idna==3.11
     #   requests
 iniconfig==2.3.0
     # via pytest
+librt==0.7.4
+    # via mypy
 mutagen==1.47.0
-    # via
-    #   -r requirements.txt
-    #   yt-dlp
-mypy==1.18.2
+    # via -r requirements.txt
+mypy==1.19.1
     # via -r dev_requirements.in
 mypy-extensions==1.1.0
     # via mypy
@@ -57,37 +54,30 @@ pathspec==0.12.1
 pluggy==1.6.0
     # via pytest
 pycryptodomex==3.23.0
-    # via
-    #   -r requirements.txt
-    #   yt-dlp
+    # via -r requirements.txt
 pygments==2.19.2
     # via pytest
-pytest==9.0.1
+pytest==9.0.2
     # via -r dev_requirements.in
 requests==2.32.5
     # via
     #   -r requirements.txt
     #   gallery-dl
-    #   yt-dlp
-ruff==0.14.1
+ruff==0.14.9
     # via -r dev_requirements.in
-sniffio==1.3.1
-    # via
-    #   -r requirements.txt
-    #   anyio
 types-yt-dlp==2025.12.8.20251210
     # via -r dev_requirements.in
 typing-extensions==4.15.0
     # via mypy
-urllib3==2.5.0
+urllib3==2.6.2
     # via
     #   -r requirements.txt
     #   requests
-    #   yt-dlp
 websockets==15.0.1
     # via
     #   -r requirements.txt
     #   types-yt-dlp
-    #   yt-dlp
-yt-dlp[default]==2025.10.22
+yt-dlp==2025.12.8
+    # via -r requirements.txt
+yt-dlp-ejs==0.3.2
     # via -r requirements.txt

requirements.txt (927) → requirements.txt (922)

diff --git a/requirements.txt b/requirements.txt
index 9440c58..adf8f8e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile requirements.in --output-file requirements.txt
-anyio==4.11.0
+anyio==4.12.0
     # via httpx
-brotli==1.1.0
+brotli==1.2.0
     # via yt-dlp
-certifi==2025.10.5
+certifi==2025.11.12
     # via
     #   httpcore
     #   httpx
@@ -36,13 +36,13 @@ requests==2.32.5
     # via
     #   gallery-dl
     #   yt-dlp
-sniffio==1.3.1
-    # via anyio
-urllib3==2.5.0
+urllib3==2.6.2
     # via
     #   requests
     #   yt-dlp
 websockets==15.0.1
     # via yt-dlp
-yt-dlp[default]==2025.10.22
+yt-dlp==2025.12.8
     # via -r requirements.in
+yt-dlp-ejs==0.3.2
+    # via yt-dlp

test_yt-dlp_alexwlchan.py (1793) → test_yt-dlp_alexwlchan.py (2310)

diff --git a/test_yt-dlp_alexwlchan.py b/test_yt-dlp_alexwlchan.py
index 0194045..99e4ffa 100644
--- a/test_yt-dlp_alexwlchan.py
+++ b/test_yt-dlp_alexwlchan.py
@@ -32,6 +32,21 @@ def test_youtube_video() -> None:
     assert video_info["video_path"].endswith(" [TUQaGhPdlxs].mp4")
 
 
+def test_youtube_path_is_cleaned_up() -> None:
+    """
+    Paths of YouTube videos get cleaned up during the download.
+    """
+    video = download_video("https://www.youtube.com/shorts/eso8JB7q0a0")
+    assert (
+        video["title"]
+        == "3D Printing Everyday for 365 Days 176/365  #stem #3dprinting #3dprint #ideas #useful"
+    )
+    assert (
+        os.path.basename(video["video_path"])
+        == "3D Printing Everyday for 365 Days 176-365 stem 3dprinting 3dprint ideas useful [eso8JB7q0a0].mp4"
+    )
+
+
 def test_instagram_video() -> None:
     """
     Download an Instagram video and check we get the expected output.

yt-dlp_alexwlchan.py (6246) → yt-dlp_alexwlchan.py (6777)

diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 1d0912d..fc76ef8 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -3,6 +3,7 @@
 from datetime import datetime, timezone
 import json
 from pathlib import Path
+import re
 import subprocess
 import sys
 import tempfile
@@ -141,6 +142,24 @@ class VideoInfo(TypedDict):
     site: str
 
 
+def cleanup_paths(dir_path: Path) -> None:
+    """
+    For every file in `dir_path`, remove URL-unsafe characters from
+    the filenames.
+    """
+    for p in dir_path.iterdir():
+        old_name = p.name
+
+        new_name = p.name.replace("#", " ").replace("?", " ").replace("⧸", "-")
+        new_name = re.sub(r"\s+", " ", new_name)
+
+        if old_name == new_name:
+            continue
+
+        assert not (dir_path / new_name).exists(), new_name
+        p.move(dir_path / new_name)
+
+
 def download_video(url: str) -> VideoInfo:
     # Download all the videos to a temp directory; this allows the caller
     # to decide exactly where they want the video later.
@@ -150,6 +169,8 @@ def download_video(url: str) -> VideoInfo:
     with YoutubeDL(ydl_opts) as ydl:
         video_info: Any = ydl.extract_info(url)
 
+    cleanup_paths(tmp_dir)
+
     video_path = next(p for p in tmp_dir.iterdir() if p.suffix == ".mp4")
     thumbnail_path = next(p for p in tmp_dir.iterdir() if p.suffix == ".jpg")
     try: