Skip to main content

web/yt-dlp.py

1#!/usr/bin/env python3
2"""
3This is a wrapper around yt-dlp that has a couple of special behaviours:
5* It does parallel downloads for YouTube playlists, which is must
6 faster than vanilla yt-dlp.
8* It enforces a couple of rules around downloading subtitles, to ensure
9 I always remember to download them in a consistent way.
11The goal is that this is a drop-in replacement for vanilla yt-dlp: if it
12downloads something, it downloads the exact same set of files. You could
13copy any command that uses this script onto a machine running the regular
14tool and it would work as-is. It might check extra rules or run faster,
15but it should never download something different to the regular tool.
16"""
18from collections.abc import Iterator
19import concurrent.futures
20import os
21import subprocess
22import sys
23import urllib.parse
25import tqdm
28def is_youtube_playlist(url: str) -> bool:
29 """
30 Returns True if a YouTube URL is a playlist, false otherwise.
31 """
32 u = urllib.parse.urlsplit(url)
33 assert "youtube.com" in u.netloc
35 query = urllib.parse.parse_qs(u.query)
37 # Look for a non-empty playlist which isn't WL (Watch Later)
38 return bool(query.get("list") and query["list"] != ["WL"])
41def get_playlist_video_ids(youtube_url: str) -> Iterator[str]:
42 """
43 Generate a list of video IDs in a YouTube playlist.
44 """
45 get_ids_proc = subprocess.Popen(
46 [yt_dlp_path, "--get-id", youtube_url],
47 stdout=subprocess.PIPE,
48 bufsize=1,
49 text=True,
50 )
52 for line in get_ids_proc.stdout:
53 yield line.strip()
56def download_single_youtube_video(video_id: str, remaining_args: list[str]) -> None:
57 """
58 Download a single YouTube video.
59 """
60 subprocess.check_call(
61 [yt_dlp_path, "--quiet"]
62 + remaining_args
63 + [f"https://youtube.com/watch?v={video_id}"]
64 )
67def download_parallel_playlist(youtube_url: str, remaining_args: list[str]) -> None:
68 """
69 Download a YouTube playlist in parallel.
71 See https://alexwlchan.net/2020/how-to-do-parallel-downloads-with-youtube-dl/
72 """
73 print(
74 "\033[94m"
75 + "-> This is a YouTube playlist, downloading in parallel"
76 + "\033[0m"
77 )
79 playlist_length = 0
81 with concurrent.futures.ThreadPoolExecutor() as executor, tqdm.tqdm() as pbar:
82 futures = set()
84 for video_id in get_playlist_video_ids(youtube_url):
85 futures.add(
86 executor.submit(download_single_youtube_video, video_id, remaining_args)
87 )
88 playlist_length += 1
90 # Once we've got a few videos in the queue, wait for a video
91 # to complete before we queue the next one.
92 if playlist_length > 5:
93 done, futures = concurrent.futures.wait(
94 futures, return_when=concurrent.futures.FIRST_COMPLETED
95 )
96 pbar.update(len(done))
98 pbar.total = playlist_length
99 pbar.refresh()
101 for fut in concurrent.futures.as_completed(futures):
102 pbar.update(1)
105if __name__ == "__main__":
106 argv = sys.argv[1:]
108 # Where is yt-dlp?
109 #
110 # sys.executable returns the path to the currently running Python,
111 # and we can go from there to get the path to yt-dlp.
112 yt_dlp_path = os.path.join(os.path.dirname(sys.executable), "yt-dlp")
114 # Look for a YouTube URL in the argument list. If we don't find one,
115 # assume we're downloading some other source and call yt-dlp as usual.
116 youtube_url_matches = [a for a in argv if "youtube.com" in a]
117 remaining_args = [a for a in argv if "youtube.com" not in a]
119 if len(youtube_url_matches) != 1:
120 subprocess.check_call([yt_dlp_path] + argv)
121 sys.exit(0)
123 youtube_url = youtube_url_matches[0]
125 if is_youtube_playlist(youtube_url):
126 download_parallel_playlist(
127 youtube_url=youtube_url, remaining_args=remaining_args
128 )
129 else:
130 subprocess.check_call([yt_dlp_path] + argv)