Improve the way that yt-dlp works
- ID
e9ed51f- date
2025-05-17 18:37:11+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
f309460- message
Improve the way that `yt-dlp` works- changed files
1 file, 49 additions, 42 deletions
Changed files
web/yt-dlp.py (3423) → web/yt-dlp.py (3816)
diff --git a/web/yt-dlp.py b/web/yt-dlp.py
index 3c394b4..59c450a 100755
--- a/web/yt-dlp.py
+++ b/web/yt-dlp.py
@@ -15,6 +15,8 @@ tool and it would work as-is. It might check extra rules or run faster,
but it should never download something different to the regular tool.
"""
+from collections.abc import Iterator
+import concurrent.futures
import os
import subprocess
import sys
@@ -22,6 +24,7 @@ import time
import hyperlink
import termcolor
+import tqdm
def is_youtube_playlist(url: str) -> bool:
@@ -33,54 +36,58 @@ def is_youtube_playlist(url: str) -> bool:
return bool(u.get("list"))
-def download_parallel_playlist(youtube_url: str, remaining_args: list[str]) -> None:
+def get_playlist_video_ids(youtube_url: str) -> Iterator[str]:
"""
- Download a YouTube playlist in parallel.
-
- See https://alexwlchan.net/2020/how-to-do-parallel-downloads-with-youtube-dl/
"""
- print(termcolor.colored("-> This is a playlist, downloading in parallel", "blue"))
-
get_ids_proc = subprocess.Popen(
- [yt_dlp_path, "--get-id", youtube_url], stdout=subprocess.PIPE
+ [yt_dlp_path, "--get-id", youtube_url], stdout=subprocess.PIPE, bufsize=1, text=True
)
+
+ for line in get_ids_proc.stdout:
+ yield line.strip()
+
+
+def download_single_youtube_video(video_id: str, remaining_args: list[str]) -> None:
+ """
+ Download a single YouTube video.
+ """
+ subprocess.check_call([
+ yt_dlp_path, "--quiet"
+ ] + remaining_args + [f"https://youtube.com/watch?v={video_id}"])
- xargs_proc = subprocess.Popen(
- ["xargs", "-I", "{}", "-P", "5", yt_dlp_path, "--quiet"]
- + remaining_args
- + ["https://youtube.com/watch?v={}"],
- stdin=get_ids_proc.stdout,
- )
- seen_filenames = set()
-
- while get_ids_proc.returncode is None and xargs_proc.returncode is None:
- get_ids_proc.poll()
- xargs_proc.poll()
-
- new_filenames = {
- f
- for f in os.listdir(".")
- if f not in seen_filenames and not f.endswith(".part")
- }
-
- if "-x" in remaining_args:
- new_filenames = {f for f in new_filenames if f.endswith(".mp3")}
-
- if new_filenames:
- print("\n".join(new_filenames))
- seen_filenames |= new_filenames
- time.sleep(0.05)
- else:
- time.sleep(0.1)
-
- new_filenames = {
- f
- for f in os.listdir(".")
- if f not in seen_filenames and not f.endswith(".part")
- }
- if new_filenames:
- print("\n".join(new_filenames))
+def download_parallel_playlist(youtube_url: str, remaining_args: list[str]) -> None:
+ """
+ Download a YouTube playlist in parallel.
+
+ See https://alexwlchan.net/2020/how-to-do-parallel-downloads-with-youtube-dl/
+ """
+ print(termcolor.colored("-> This is a YouTube playlist, downloading in parallel", "blue"))
+
+ playlist_length = 0
+
+ with concurrent.futures.ThreadPoolExecutor() as executor, tqdm.tqdm() as pbar:
+ futures = set()
+
+ for video_id in get_playlist_video_ids(youtube_url):
+ futures.add(
+ executor.submit(download_single_youtube_video, video_id, remaining_args)
+ )
+ playlist_length += 1
+
+ # Once we've got a few videos in the queue, wait for a video
+ # to complete before we queue the next one.
+ if playlist_length > 5:
+ done, futures = concurrent.futures.wait(
+ futures, return_when=concurrent.futures.FIRST_COMPLETED
+ )
+ pbar.update(len(done))
+
+ pbar.total = playlist_length
+ pbar.refresh()
+
+ for fut in concurrent.futures.as_completed(futures):
+ pbar.update(1)
if __name__ == "__main__":