Merge pull request #28 from alexwlchan/cleanup-paths
- ID
2be02e9- date
2025-12-17 08:37:09+00:00- author
Alex Chan <alex@alexwlchan.net>- parents
7f53fd5,2234ced- message
Merge pull request #28 from alexwlchan/cleanup-paths Clean up URL-unsafe characters in filenames- changed files
5 files, 59 additions, 33 deletions
Changed files
.github/workflows/test.yml (952) → .github/workflows/test.yml (952)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 60df6c2..f7216c0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
python-version:
- - "3.13"
+ - "3.14"
runs-on: ubuntu-latest
dev_requirements.txt (1844) → dev_requirements.txt (1715)
diff --git a/dev_requirements.txt b/dev_requirements.txt
index f5ef368..6d545e5 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,20 +1,17 @@
# This file was autogenerated by uv via the following command:
# uv pip compile dev_requirements.in --output-file dev_requirements.txt
-anyio==4.11.0
+anyio==4.12.0
# via
# -r requirements.txt
# httpx
-brotli==1.1.0
- # via
- # -r requirements.txt
- # yt-dlp
-certifi==2025.10.5
+brotli==1.2.0
+ # via -r requirements.txt
+certifi==2025.11.12
# via
# -r requirements.txt
# httpcore
# httpx
# requests
- # yt-dlp
charset-normalizer==3.4.4
# via
# -r requirements.txt
@@ -42,11 +39,11 @@ idna==3.11
# requests
iniconfig==2.3.0
# via pytest
+librt==0.7.4
+ # via mypy
mutagen==1.47.0
- # via
- # -r requirements.txt
- # yt-dlp
-mypy==1.18.2
+ # via -r requirements.txt
+mypy==1.19.1
# via -r dev_requirements.in
mypy-extensions==1.1.0
# via mypy
@@ -57,37 +54,30 @@ pathspec==0.12.1
pluggy==1.6.0
# via pytest
pycryptodomex==3.23.0
- # via
- # -r requirements.txt
- # yt-dlp
+ # via -r requirements.txt
pygments==2.19.2
# via pytest
-pytest==9.0.1
+pytest==9.0.2
# via -r dev_requirements.in
requests==2.32.5
# via
# -r requirements.txt
# gallery-dl
- # yt-dlp
-ruff==0.14.1
+ruff==0.14.9
# via -r dev_requirements.in
-sniffio==1.3.1
- # via
- # -r requirements.txt
- # anyio
types-yt-dlp==2025.12.8.20251210
# via -r dev_requirements.in
typing-extensions==4.15.0
# via mypy
-urllib3==2.5.0
+urllib3==2.6.2
# via
# -r requirements.txt
# requests
- # yt-dlp
websockets==15.0.1
# via
# -r requirements.txt
# types-yt-dlp
- # yt-dlp
-yt-dlp[default]==2025.10.22
+yt-dlp==2025.12.8
+ # via -r requirements.txt
+yt-dlp-ejs==0.3.2
# via -r requirements.txt
requirements.txt (927) → requirements.txt (922)
diff --git a/requirements.txt b/requirements.txt
index 9440c58..adf8f8e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in --output-file requirements.txt
-anyio==4.11.0
+anyio==4.12.0
# via httpx
-brotli==1.1.0
+brotli==1.2.0
# via yt-dlp
-certifi==2025.10.5
+certifi==2025.11.12
# via
# httpcore
# httpx
@@ -36,13 +36,13 @@ requests==2.32.5
# via
# gallery-dl
# yt-dlp
-sniffio==1.3.1
- # via anyio
-urllib3==2.5.0
+urllib3==2.6.2
# via
# requests
# yt-dlp
websockets==15.0.1
# via yt-dlp
-yt-dlp[default]==2025.10.22
+yt-dlp==2025.12.8
# via -r requirements.in
+yt-dlp-ejs==0.3.2
+ # via yt-dlp
test_yt-dlp_alexwlchan.py (1793) → test_yt-dlp_alexwlchan.py (2310)
diff --git a/test_yt-dlp_alexwlchan.py b/test_yt-dlp_alexwlchan.py
index 0194045..99e4ffa 100644
--- a/test_yt-dlp_alexwlchan.py
+++ b/test_yt-dlp_alexwlchan.py
@@ -32,6 +32,21 @@ def test_youtube_video() -> None:
assert video_info["video_path"].endswith(" [TUQaGhPdlxs].mp4")
+def test_youtube_path_is_cleaned_up() -> None:
+ """
+ Paths of YouTube videos get cleaned up during the download.
+ """
+ video = download_video("https://www.youtube.com/shorts/eso8JB7q0a0")
+ assert (
+ video["title"]
+ == "3D Printing Everyday for 365 Days 176/365 #stem #3dprinting #3dprint #ideas #useful"
+ )
+ assert (
+ os.path.basename(video["video_path"])
+ == "3D Printing Everyday for 365 Days 176-365 stem 3dprinting 3dprint ideas useful [eso8JB7q0a0].mp4"
+ )
+
+
def test_instagram_video() -> None:
"""
Download an Instagram video and check we get the expected output.
yt-dlp_alexwlchan.py (6246) → yt-dlp_alexwlchan.py (6777)
diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 1d0912d..fc76ef8 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -3,6 +3,7 @@
from datetime import datetime, timezone
import json
from pathlib import Path
+import re
import subprocess
import sys
import tempfile
@@ -141,6 +142,24 @@ class VideoInfo(TypedDict):
site: str
+def cleanup_paths(dir_path: Path) -> None:
+ """
+ For every file in `dir_path`, remove URL-unsafe characters from
+ the filenames.
+ """
+ for p in dir_path.iterdir():
+ old_name = p.name
+
+ new_name = p.name.replace("#", " ").replace("?", " ").replace("⧸", "-")
+ new_name = re.sub(r"\s+", " ", new_name)
+
+ if old_name == new_name:
+ continue
+
+ assert not (dir_path / new_name).exists(), new_name
+ p.move(dir_path / new_name)
+
+
def download_video(url: str) -> VideoInfo:
# Download all the videos to a temp directory; this allows the caller
# to decide exactly where they want the video later.
@@ -150,6 +169,8 @@ def download_video(url: str) -> VideoInfo:
with YoutubeDL(ydl_opts) as ydl:
video_info: Any = ydl.extract_info(url)
+ cleanup_paths(tmp_dir)
+
video_path = next(p for p in tmp_dir.iterdir() if p.suffix == ".mp4")
thumbnail_path = next(p for p in tmp_dir.iterdir() if p.suffix == ".jpg")
try: