Skip to main content

Add a wrapper for yt-dlp and parallel downloads

ID
d8393ce
date
2024-01-13 10:54:29+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
4752383
message
Add a wrapper for yt-dlp and parallel downloads
changed files
2 files, 107 additions

Changed files

web/README.md (0) → web/README.md (994)

diff --git a/web/README.md b/web/README.md
new file mode 100644
index 0000000..9bc8402
--- /dev/null
+++ b/web/README.md
@@ -0,0 +1,41 @@
+# web
+
+These scripts are for interacting with stuff on the web.
+
+## The individual scripts
+
+<!-- [[[cog
+
+# This adds the root of the repo to the PATH, which has cog_helpers.py
+from os.path import abspath, dirname
+import sys
+
+sys.path.append(abspath(dirname(dirname("."))))
+
+import cog_helpers
+
+folder_name = "web"
+
+scripts = [
+    {
+        "name": "yt-dlp.py",
+        "description": """
+        this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
+        """
+    },
+]
+
+cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
+
+]]]-->
+<dl>
+  <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/web/yt-dlp.py">
+      <code>yt-dlp.py</code>
+    </a>
+  </dt>
+  <dd>
+    this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
+  </dd>
+</dl>
+<!-- [[[end]]] (checksum: b21b8d09c8a474c78ec759b6ef5f23f5) -->

web/yt-dlp.py (0) → web/yt-dlp.py (1966)

diff --git a/web/yt-dlp.py b/web/yt-dlp.py
new file mode 100755
index 0000000..5629cac
--- /dev/null
+++ b/web/yt-dlp.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""
+This is a wrapper around yt-dlp that has special behaviour
+for YouTube playlists.
+
+When it detects a playlist, it can run up to five downloads in parallel --
+this can makes downloads significantly faster.
+
+See https://alexwlchan.net/2020/how-to-do-parallel-downloads-with-youtube-dl/
+"""
+
+import os
+import shlex
+import subprocess
+import sys
+
+import hyperlink
+
+
+def is_playlist(url: str) -> bool:
+    """
+    Returns True if a YouTube URL is a playlist, false otherwise.
+    """
+    u = hyperlink.DecodedURL.from_text(url)
+    return bool(u.get("list"))
+
+
+if __name__ == "__main__":
+    argv = sys.argv[1:]
+
+    # Where is yt-dlp?
+    #
+    # sys.executable returns the path to the currently running Python,
+    # and we can go from there to get the path to yt-dlp.
+    yt_dlp_path = os.path.join(os.path.dirname(sys.executable), "yt-dlp")
+
+    # Look for a YouTube URL in the argument list.  If we don't find one,
+    # assume we're downloading some other source and call yt-dlp as usual.
+    youtube_url_matches = [a for a in argv if "youtube.com" in a]
+
+    if len(youtube_url_matches) != 1:
+        subprocess.check_call([yt_dlp_path] + argv)
+        sys.exit(0)
+
+    remaining_args = [a for a in argv if "youtube.com" not in a]
+    youtube_url = youtube_url_matches[0]
+
+    # If this is a YouTube URL but it's not a playlist, then it's probably
+    # a single video.  Download it as normal.
+    if not is_playlist(youtube_url):
+        subprocess.check_call([yt_dlp_path] + argv)
+
+    # Otherwise, this is a playlist, so we want to download it in parallel.
+    else:
+        get_ids_proc = subprocess.Popen(
+            [yt_dlp_path, "--get-id", youtube_url], stdout=subprocess.PIPE
+        )
+
+        subprocess.Popen(
+            ["xargs", "-I", "{}", "-P", "5", yt_dlp_path]
+            + remaining_args
+            + ["https://youtube.com/watch?v={}"],
+            stdin=get_ids_proc.stdout,
+        )
+
+        get_ids_proc.wait()