Skip to main content

Merge pull request #14 from alexwlchan/avatars

ID
6d926af
date
2025-10-19 06:31:28+00:00
author
Alex Chan <alex@alexwlchan.net>
parents
7ab7c8a, e093e69
message
Merge pull request #14 from alexwlchan/avatars

all: download avatars and provide a path to downstream code
changed files
6 files, 120 additions, 24 deletions

Changed files

README.md (3006) → README.md (2942)

diff --git a/README.md b/README.md
index 65eb6d7..e5f80c7 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ $ yt-dlp_alexwlchan.py "https://www.youtube.com/watch?v=TUQaGhPdlxs"
     "id": "UCDeqps8f3hoHm6DHJoseDlg",
     "name": "Public Domain Archive",
     "url": "https://www.youtube.com/channel/UCDeqps8f3hoHm6DHJoseDlg",
-    "avatar_url": "https://yt3.googleusercontent.com/ytc/AIdro_kbeCfc5KrnLmdASZQ9u649IxrxEUXsUaxdSUR_jA_4SZQ=s0"
+    "avatar_path": "publicdomainarchive3052.png"
   },
   "site": "youtube"
 }

dev_requirements.txt (1270) → dev_requirements.txt (1707)

diff --git a/dev_requirements.txt b/dev_requirements.txt
index 8a8d1cf..e5cb856 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,10 +1,16 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile dev_requirements.in --output-file dev_requirements.txt
+anyio==4.11.0
+    # via
+    #   -r requirements.txt
+    #   httpx
 brotli==1.1.0
     # via -r requirements.txt
 certifi==2025.10.5
     # via
     #   -r requirements.txt
+    #   httpcore
+    #   httpx
     #   requests
 charset-normalizer==3.4.4
     # via
@@ -12,9 +18,24 @@ charset-normalizer==3.4.4
     #   requests
 gallery-dl==1.30.10
     # via -r requirements.txt
+h11==0.16.0
+    # via
+    #   -r requirements.txt
+    #   httpcore
+httpcore==1.0.9
+    # via
+    #   -r requirements.txt
+    #   httpx
+httpx==0.28.1
+    # via -r requirements.txt
+hyperlink==21.0.0
+    # via -r requirements.txt
 idna==3.11
     # via
     #   -r requirements.txt
+    #   anyio
+    #   httpx
+    #   hyperlink
     #   requests
 iniconfig==2.3.0
     # via pytest
@@ -42,6 +63,10 @@ requests==2.32.5
     #   gallery-dl
 ruff==0.14.1
     # via -r dev_requirements.in
+sniffio==1.3.1
+    # via
+    #   -r requirements.txt
+    #   anyio
 types-yt-dlp==2025.9.26.20251009
     # via -r dev_requirements.in
 typing-extensions==4.15.0

requirements.in (27) → requirements.in (43)

diff --git a/requirements.in b/requirements.in
index 89d6fd8..a71cade 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,2 +1,4 @@
 gallery-dl
+httpx
+hyperlink
 yt-dlp[default]

requirements.txt (620) → requirements.txt (919)

diff --git a/requirements.txt b/requirements.txt
index 5f4e911..61faf81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,33 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile requirements.in --output-file requirements.txt
+anyio==4.11.0
+    # via httpx
 brotli==1.1.0
     # via yt-dlp
 certifi==2025.10.5
     # via
+    #   httpcore
+    #   httpx
     #   requests
     #   yt-dlp
 charset-normalizer==3.4.4
     # via requests
 gallery-dl==1.30.10
     # via -r requirements.in
+h11==0.16.0
+    # via httpcore
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via -r requirements.in
+hyperlink==21.0.0
+    # via -r requirements.in
 idna==3.11
-    # via requests
+    # via
+    #   anyio
+    #   httpx
+    #   hyperlink
+    #   requests
 mutagen==1.47.0
     # via yt-dlp
 pycryptodomex==3.23.0
@@ -20,6 +36,8 @@ requests==2.32.5
     # via
     #   gallery-dl
     #   yt-dlp
+sniffio==1.3.1
+    # via anyio
 urllib3==2.5.0
     # via
     #   requests

test_yt-dlp_alexwlchan.py (1508) → test_yt-dlp_alexwlchan.py (1657)

diff --git a/test_yt-dlp_alexwlchan.py b/test_yt-dlp_alexwlchan.py
index 5b28cf2..0a11d22 100644
--- a/test_yt-dlp_alexwlchan.py
+++ b/test_yt-dlp_alexwlchan.py
@@ -24,6 +24,7 @@ def test_youtube_video() -> None:
     assert os.path.exists(video_info["video_path"])
     assert os.path.exists(video_info["thumbnail_path"])
     assert video_info["subtitle_path"] is None
+    assert os.path.exists(video_info["uploader"]["avatar_path"])
 
     assert video_info["id"] == "TUQaGhPdlxs"
     assert video_info["date_uploaded"] == "2022-03-25T01:10:38Z"
@@ -39,9 +40,12 @@ def test_instagram_video() -> None:
     assert os.path.exists(video_info["thumbnail_path"])
     assert video_info["subtitle_path"] is None
 
-    assert video_info["channel"]["id"] == "52716733233"
-    assert video_info["channel"]["name"] == "Public Domain Gems"
-    assert video_info["channel"]["url"] == "https://www.instagram.com/publicdomaingems/"
+    assert video_info["uploader"]["id"] == "52716733233"
+    assert video_info["uploader"]["name"] == "Public Domain Gems"
+    assert (
+        video_info["uploader"]["url"] == "https://www.instagram.com/publicdomaingems/"
+    )
+    assert os.path.exists(video_info["uploader"]["avatar_path"])
 
     assert video_info["id"] == "DMWY8KkOS0n"
     assert video_info["date_uploaded"] == "2025-07-21T00:34:41Z"

yt-dlp_alexwlchan.py (4879) → yt-dlp_alexwlchan.py (6237)

diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 0f2685b..419bcc8 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -8,6 +8,8 @@ import sys
 import tempfile
 from typing import Any, TypedDict
 
+import httpx
+import hyperlink
 from yt_dlp import YoutubeDL
 
 
@@ -40,9 +42,40 @@ ydl_opts: Any = {
 }
 
 
-def get_youtube_avatar_url(channel_url: str) -> str:
+def _choose_filename_suffix(content_type: str) -> str:
     """
-    Returns the avatar URL of a YouTube channel.
+    Given an HTTP Content-Type header, choose the correct suffix for
+    the downloaded file.
+    """
+    if content_type == "image/png":
+        return ".png"
+    elif content_type == "image/jpeg":
+        return ".jpg"
+    else:
+        raise ValueError(f"Unrecognised content-type: {content_type}")
+
+
+def download_file(out_dir: Path, url: str, basename: str) -> Path:
+    """
+    Download an image, and pick a file extension based on the image type.
+    """
+    # Download the bytes, and save them to a file.
+    resp = httpx.get(url)
+    resp.raise_for_status()
+
+    suffix = _choose_filename_suffix(resp.headers["content-type"])
+
+    out_path = out_dir / (basename + suffix)
+
+    with open(out_path, "xb") as out_file:
+        out_file.write(resp.content)
+
+    return out_path
+
+
+def get_youtube_avatar(tmp_dir: Path, channel_url: str) -> Path:
+    """
+    Download the avatar of a YouTube channel.
     """
     ydl_opts: Any = {
         # Print progress output to stderr, not stdout
@@ -56,31 +89,43 @@ def get_youtube_avatar_url(channel_url: str) -> str:
         "playlist_items": "0",
     }
 
+    # Get the URL of the YouTube avatar.
     with YoutubeDL(ydl_opts) as ydl:
         channel_info: Any = ydl.extract_info(channel_url, download=False)
 
     thumbnails = channel_info["thumbnails"]
     best_thumbnail = next(t for t in thumbnails if t["id"] == "avatar_uncropped")
-    return str(best_thumbnail["url"])
+    thumbnail_url = best_thumbnail["url"]
 
+    # Work out the base filename, e.g. "https://www.youtube.com/@networkrail"
+    # becomes "networkrail"
+    u = hyperlink.parse(channel_url)
+    basename = u.path[0].replace("@", "")
 
-def get_instagram_avatar_url(channel_name: str) -> str:
+    return download_file(tmp_dir, url=thumbnail_url, basename=basename)
+
+
+def get_instagram_avatar(tmp_dir: Path, uploader_name: str) -> Path:
     """
-    Returns the avatar URL of an Instagram channel.
+    Download the avatar of an Instagram channel.
     """
     output = subprocess.check_output(
-        ["gallery-dl", "--get-urls", f"https://www.instagram.com/{channel_name}/avatar"]
+        [
+            "gallery-dl",
+            "--get-urls",
+            f"https://www.instagram.com/{uploader_name}/avatar",
+        ]
     )
     avatar_url = output.strip().decode("utf8")
 
-    return avatar_url
+    return download_file(tmp_dir, url=avatar_url, basename=uploader_name)
 
 
-class ChannelInfo(TypedDict):
+class UploaderInfo(TypedDict):
     id: str
     name: str
     url: str
-    avatar_url: str
+    avatar_path: Path
 
 
 class VideoInfo(TypedDict):
@@ -92,7 +137,7 @@ class VideoInfo(TypedDict):
     video_path: Path
     thumbnail_path: Path
     subtitle_path: Path | None
-    channel: ChannelInfo
+    uploader: UploaderInfo
     site: str
 
 
@@ -112,23 +157,25 @@ def download_video(url: str) -> VideoInfo:
     except StopIteration:
         subtitle_path = None
 
-    channel: ChannelInfo
+    uploader: UploaderInfo
 
     if video_info["extractor"] == "youtube":
         site = "youtube"
-        channel = {
-            "id": video_info["channel_id"],
-            "name": video_info["channel"],
-            "url": video_info["channel_url"],
-            "avatar_url": get_youtube_avatar_url(video_info["channel_url"]),
+        uploader = {
+            "id": video_info["uploader_id"],
+            "name": video_info["uploader"],
+            "url": video_info["uploader_url"],
+            "avatar_path": get_youtube_avatar(tmp_dir, video_info["uploader_url"]),
         }
     elif video_info["extractor"] == "Instagram":
         site = "instagram"
-        channel = {
+        uploader = {
             "id": video_info["uploader_id"],
             "name": video_info["uploader"],
             "url": f"https://www.instagram.com/{video_info['channel']}/",
-            "avatar_url": get_instagram_avatar_url(channel_name=video_info["channel"]),
+            "avatar_path": get_instagram_avatar(
+                tmp_dir, uploader_name=video_info["channel"]
+            ),
         }
     else:
         sys.exit(f"Unsupported extractor: {video_info['extractor']}")
@@ -144,7 +191,7 @@ def download_video(url: str) -> VideoInfo:
         "video_path": video_path,
         "thumbnail_path": thumbnail_path,
         "subtitle_path": subtitle_path,
-        "channel": channel,
+        "uploader": uploader,
         "site": site,
     }