Skip to main content

all: initial commit

ID
1b75edc
date
2025-10-05 06:46:36+00:00
author
Alex Chan <alex@alexwlchan.net>
message
all: initial commit
changed files
8 files, 245 additions

Changed files

.github/dependabot.yml (0) → .github/dependabot.yml (285)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..6a21153
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,14 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "monday"
+      time: "09:00"
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "monday"
+      time: "09:00"

.github/workflows/test.yml (0) → .github/workflows/test.yml (674)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..06b2491
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,40 @@
+name: Test
+
+on:
+  push:
+    branches:
+    - main
+
+  pull_request:
+    branches:
+    - main
+
+jobs:
+  test:
+    strategy:
+      matrix:
+        python-version:
+          - "3.13"
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'pip'
+        cache-dependency-path: 'dev_requirements.txt'
+
+    - name: Install dependencies
+      run: pip install -r dev_requirements.txt
+
+    - name: Check formatting
+      run: |
+        ruff check .
+        ruff format --check .
+
+    - name: Run tests
+      run: pytest tests

README.md (0) → README.md (1365)

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e91e254
--- /dev/null
+++ b/README.md
@@ -0,0 +1,24 @@
+# yt-dlp_alexwlchan
+
+This is a personal wrapper around [yt-dlp](https://github.com/yt-dlp/yt-dlp) that downloads a video with thumbnails and subtitles, converts to my preferred formats, then prints some key information in a consistent JSON format.
+
+```console
+$ yt-dlp_alexwlchan.py "https://www.youtube.com/watch?v=TUQaGhPdlxs"
+{
+  "url": "https://www.youtube.com/watch?v=TUQaGhPdlxs",
+  "title": "\"new york city, manhattan, people\" - Free Public Domain Video",
+  "description": "All videos uploaded to this channel are in the Public Domain: Free for use by anyone for any purpose without restriction. #PublicDomain",
+  "video_path": "\uff02new york city, manhattan, people\uff02 - Free Public Domain Video [TUQaGhPdlxs].mp4",
+  "thumbnail_path": "\uff02new york city, manhattan, people\uff02 - Free Public Domain Video [TUQaGhPdlxs].jpg",
+  "subtitle_path": null,
+  "channel": {
+    "id": "UCDeqps8f3hoHm6DHJoseDlg",
+    "name": "Public Domain Archive",
+    "url": "https://www.youtube.com/channel/UCDeqps8f3hoHm6DHJoseDlg",
+    "avatar_url": "https://yt3.googleusercontent.com/ytc/AIdro_kbeCfc5KrnLmdASZQ9u649IxrxEUXsUaxdSUR_jA_4SZQ=s0"
+  }
+}
+```
+
+I have other scripts that know how to read this format, and it allows me to consolidate all my YouTube-handling logic in one place.
+Other scripts can call this script and get the title or description "for free".

dev_requirements.in (0) → dev_requirements.in (26)

diff --git a/dev_requirements.in b/dev_requirements.in
new file mode 100644
index 0000000..1d7e062
--- /dev/null
+++ b/dev_requirements.in
@@ -0,0 +1,3 @@
+-r requirements.txt
+
+ruff

dev_requirements.txt (0) → dev_requirements.txt (761)

diff --git a/dev_requirements.txt b/dev_requirements.txt
new file mode 100644
index 0000000..2c8efa7
--- /dev/null
+++ b/dev_requirements.txt
@@ -0,0 +1,32 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile dev_requirements.in --output-file dev_requirements.txt
+brotli==1.1.0
+    # via -r requirements.txt
+certifi==2025.10.5
+    # via
+    #   -r requirements.txt
+    #   requests
+charset-normalizer==3.4.3
+    # via
+    #   -r requirements.txt
+    #   requests
+idna==3.10
+    # via
+    #   -r requirements.txt
+    #   requests
+mutagen==1.47.0
+    # via -r requirements.txt
+pycryptodomex==3.23.0
+    # via -r requirements.txt
+requests==2.32.5
+    # via -r requirements.txt
+ruff==0.13.3
+    # via -r dev_requirements.in
+urllib3==2.5.0
+    # via
+    #   -r requirements.txt
+    #   requests
+websockets==15.0.1
+    # via -r requirements.txt
+yt-dlp==2025.9.26
+    # via -r requirements.txt

requirements.in (0) → requirements.in (16)

diff --git a/requirements.in b/requirements.in
new file mode 100644
index 0000000..82eef3a
--- /dev/null
+++ b/requirements.in
@@ -0,0 +1 @@
+yt-dlp[default]

requirements.txt (0) → requirements.txt (543)

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4617017
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,26 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements.in --output-file requirements.txt
+brotli==1.1.0
+    # via yt-dlp
+certifi==2025.10.5
+    # via
+    #   requests
+    #   yt-dlp
+charset-normalizer==3.4.3
+    # via requests
+idna==3.10
+    # via requests
+mutagen==1.47.0
+    # via yt-dlp
+pycryptodomex==3.23.0
+    # via yt-dlp
+requests==2.32.5
+    # via yt-dlp
+urllib3==2.5.0
+    # via
+    #   requests
+    #   yt-dlp
+websockets==15.0.1
+    # via yt-dlp
+yt-dlp==2025.9.26
+    # via -r requirements.in

yt-dlp_alexwlchan.py (0) → yt-dlp_alexwlchan.py (2786)

diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
new file mode 100755
index 0000000..332f491
--- /dev/null
+++ b/yt-dlp_alexwlchan.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import sys
+import tempfile
+
+from yt_dlp import YoutubeDL
+
+
+ydl_opts = {
+    # Print progress output to stderr, not stdout
+    "logtostderr": True,
+    #
+    # Download the thumbnail
+    "writethumbnail": True,
+    #
+    # Download subtitles, or YouTube's automatic subtitles if there
+    # aren't any.
+    "writesubtitles": True,
+    "writeautomaticsub": True,
+    #
+    # Download video files as MP4 and thumbnails as JPEG, or convert
+    # to those formats if they aren't the best available.
+    "format_sort": ["res", "ext:mp4:m4a"],
+    "postprocessors": [
+        {
+            "key": "FFmpegVideoConvertor",
+            "preferedformat": "mp4",
+        },
+        {
+            "key": "FFmpegThumbnailsConvertor",
+            "format": "jpg",
+            "when": "before_dl",
+        },
+    ],
+}
+
+
+def get_avatar_url(channel_url: str) -> str:
+    """
+    Returns the avatar URL of a YouTube channel.
+    """
+    ydl_opts = {
+        # Print progress output to stderr, not stdout
+        "logtostderr": True,
+        #
+        # Don't download every page of results for the channel.
+        #
+        # This tells yt-dlp that we're only interested in the first video,
+        # which is technically a lie because we don't care about any videos,
+        # but it has the desired effect.
+        "playlist_items": "0",
+    }
+
+    with YoutubeDL(ydl_opts) as ydl:
+        channel_info = ydl.extract_info(channel_url, download=False)
+
+    thumbnails = channel_info["thumbnails"]
+    best_thumbnail = next(
+        t for t in thumbnails if t['id'] == 'avatar_uncropped'
+    )
+    return best_thumbnail["url"]
+
+
+if __name__ == "__main__":
+    try:
+        url = sys.argv[1]
+    except IndexError:
+        sys.exit(f"Usage: {__file__} URL")
+
+    tmp_dir = tempfile.mkdtemp()
+
+    os.chdir(tmp_dir)
+
+    with YoutubeDL(ydl_opts) as ydl:
+        video_info = ydl.extract_info(url)
+
+    downloaded_files = os.listdir(tmp_dir)
+
+    video_path = next(p for p in downloaded_files if p.endswith(".mp4"))
+    thumbnail_path = next(p for p in downloaded_files if p.endswith(".jpg"))
+    try:
+        subtitle_path = next(p for p in downloaded_files if p.endswith(".vtt"))
+    except StopIteration:
+        subtitle_path = None
+
+    channel = {
+        "id": video_info["channel_id"],
+        "name": video_info["channel"],
+        "url": video_info["channel_url"],
+        "avatar_url": get_avatar_url(video_info["channel_url"]),
+    }
+
+    result = {
+        "url": url,
+        "title": video_info["title"],
+        "description": video_info["description"],
+        "video_path": video_path,
+        "thumbnail_path": thumbnail_path,
+        "subtitle_path": subtitle_path,
+        "channel": channel,
+    }
+
+    print(json.dumps(result, indent=2))