Clean up URL-unsafe characters in filenames
- ID
2234ced- date
2025-12-17 08:36:11+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
1b907c1- message
Clean up URL-unsafe characters in filenames Fixes #27- changed files
3 files, 37 additions, 1 deletion
Changed files
.github/workflows/test.yml (952) → .github/workflows/test.yml (952)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 60df6c2..f7216c0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
python-version:
- - "3.13"
+ - "3.14"
runs-on: ubuntu-latest
test_yt-dlp_alexwlchan.py (1793) → test_yt-dlp_alexwlchan.py (2310)
diff --git a/test_yt-dlp_alexwlchan.py b/test_yt-dlp_alexwlchan.py
index 0194045..99e4ffa 100644
--- a/test_yt-dlp_alexwlchan.py
+++ b/test_yt-dlp_alexwlchan.py
@@ -32,6 +32,21 @@ def test_youtube_video() -> None:
assert video_info["video_path"].endswith(" [TUQaGhPdlxs].mp4")
+def test_youtube_path_is_cleaned_up() -> None:
+ """
+ Paths of YouTube videos get cleaned up during the download.
+ """
+ video = download_video("https://www.youtube.com/shorts/eso8JB7q0a0")
+ assert (
+ video["title"]
+ == "3D Printing Everyday for 365 Days 176/365 #stem #3dprinting #3dprint #ideas #useful"
+ )
+ assert (
+ os.path.basename(video["video_path"])
+ == "3D Printing Everyday for 365 Days 176-365 stem 3dprinting 3dprint ideas useful [eso8JB7q0a0].mp4"
+ )
+
+
def test_instagram_video() -> None:
"""
Download an Instagram video and check we get the expected output.
yt-dlp_alexwlchan.py (6246) → yt-dlp_alexwlchan.py (6777)
diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 1d0912d..fc76ef8 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -3,6 +3,7 @@
from datetime import datetime, timezone
import json
from pathlib import Path
+import re
import subprocess
import sys
import tempfile
@@ -141,6 +142,24 @@ class VideoInfo(TypedDict):
site: str
+def cleanup_paths(dir_path: Path) -> None:
+ """
+ For every file in `dir_path`, remove URL-unsafe characters from
+ the filenames.
+ """
+ for p in dir_path.iterdir():
+ old_name = p.name
+
+ new_name = p.name.replace("#", " ").replace("?", " ").replace("⧸", "-")
+ new_name = re.sub(r"\s+", " ", new_name)
+
+ if old_name == new_name:
+ continue
+
+ assert not (dir_path / new_name).exists(), new_name
+ p.move(dir_path / new_name)
+
+
def download_video(url: str) -> VideoInfo:
# Download all the videos to a temp directory; this allows the caller
# to decide exactly where they want the video later.
@@ -150,6 +169,8 @@ def download_video(url: str) -> VideoInfo:
with YoutubeDL(ydl_opts) as ydl:
video_info: Any = ydl.extract_info(url)
+ cleanup_paths(tmp_dir)
+
video_path = next(p for p in tmp_dir.iterdir() if p.suffix == ".mp4")
thumbnail_path = next(p for p in tmp_dir.iterdir() if p.suffix == ".jpg")
try: