Add a wrapper for yt-dlp and parallel downloads
- ID
d8393ce- date
2024-01-13 10:54:29+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
4752383- message
Add a wrapper for yt-dlp and parallel downloads- changed files
2 files, 107 additions
Changed files
web/README.md (0) → web/README.md (994)
diff --git a/web/README.md b/web/README.md
new file mode 100644
index 0000000..9bc8402
--- /dev/null
+++ b/web/README.md
@@ -0,0 +1,41 @@
+# web
+
+These scripts are for interacting with stuff on the web.
+
+## The individual scripts
+
+<!-- [[[cog
+
+# This adds the root of the repo to the PATH, which has cog_helpers.py
+from os.path import abspath, dirname
+import sys
+
+sys.path.append(abspath(dirname(dirname("."))))
+
+import cog_helpers
+
+folder_name = "web"
+
+scripts = [
+ {
+ "name": "yt-dlp.py",
+ "description": """
+ this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
+ """
+ },
+]
+
+cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
+
+]]]-->
+<dl>
+ <dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/web/yt-dlp.py">
+ <code>yt-dlp.py</code>
+ </a>
+ </dt>
+ <dd>
+ this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
+ </dd>
+</dl>
+<!-- [[[end]]] (checksum: b21b8d09c8a474c78ec759b6ef5f23f5) -->
web/yt-dlp.py (0) → web/yt-dlp.py (1966)
diff --git a/web/yt-dlp.py b/web/yt-dlp.py
new file mode 100755
index 0000000..5629cac
--- /dev/null
+++ b/web/yt-dlp.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""
+This is a wrapper around yt-dlp that has special behaviour
+for YouTube playlists.
+
+When it detects a playlist, it can run up to five downloads in parallel --
+this can makes downloads significantly faster.
+
+See https://alexwlchan.net/2020/how-to-do-parallel-downloads-with-youtube-dl/
+"""
+
+import os
+import shlex
+import subprocess
+import sys
+
+import hyperlink
+
+
+def is_playlist(url: str) -> bool:
+ """
+ Returns True if a YouTube URL is a playlist, false otherwise.
+ """
+ u = hyperlink.DecodedURL.from_text(url)
+ return bool(u.get("list"))
+
+
+if __name__ == "__main__":
+ argv = sys.argv[1:]
+
+ # Where is yt-dlp?
+ #
+ # sys.executable returns the path to the currently running Python,
+ # and we can go from there to get the path to yt-dlp.
+ yt_dlp_path = os.path.join(os.path.dirname(sys.executable), "yt-dlp")
+
+ # Look for a YouTube URL in the argument list. If we don't find one,
+ # assume we're downloading some other source and call yt-dlp as usual.
+ youtube_url_matches = [a for a in argv if "youtube.com" in a]
+
+ if len(youtube_url_matches) != 1:
+ subprocess.check_call([yt_dlp_path] + argv)
+ sys.exit(0)
+
+ remaining_args = [a for a in argv if "youtube.com" not in a]
+ youtube_url = youtube_url_matches[0]
+
+ # If this is a YouTube URL but it's not a playlist, then it's probably
+ # a single video. Download it as normal.
+ if not is_playlist(youtube_url):
+ subprocess.check_call([yt_dlp_path] + argv)
+
+ # Otherwise, this is a playlist, so we want to download it in parallel.
+ else:
+ get_ids_proc = subprocess.Popen(
+ [yt_dlp_path, "--get-id", youtube_url], stdout=subprocess.PIPE
+ )
+
+ subprocess.Popen(
+ ["xargs", "-I", "{}", "-P", "5", yt_dlp_path]
+ + remaining_args
+ + ["https://youtube.com/watch?v={}"],
+ stdin=get_ids_proc.stdout,
+ )
+
+ get_ids_proc.wait()