Merge pull request #14 from alexwlchan/avatars
- ID
6d926af- date
2025-10-19 06:31:28+00:00- author
Alex Chan <alex@alexwlchan.net>- parents
7ab7c8a,e093e69- message
Merge pull request #14 from alexwlchan/avatars all: download avatars and provide a path to downstream code- changed files
6 files, 120 additions, 24 deletions
Changed files
README.md (3006) → README.md (2942)
diff --git a/README.md b/README.md
index 65eb6d7..e5f80c7 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ $ yt-dlp_alexwlchan.py "https://www.youtube.com/watch?v=TUQaGhPdlxs"
"id": "UCDeqps8f3hoHm6DHJoseDlg",
"name": "Public Domain Archive",
"url": "https://www.youtube.com/channel/UCDeqps8f3hoHm6DHJoseDlg",
- "avatar_url": "https://yt3.googleusercontent.com/ytc/AIdro_kbeCfc5KrnLmdASZQ9u649IxrxEUXsUaxdSUR_jA_4SZQ=s0"
+ "avatar_path": "publicdomainarchive3052.png"
},
"site": "youtube"
}
dev_requirements.txt (1270) → dev_requirements.txt (1707)
diff --git a/dev_requirements.txt b/dev_requirements.txt
index 8a8d1cf..e5cb856 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,10 +1,16 @@
# This file was autogenerated by uv via the following command:
# uv pip compile dev_requirements.in --output-file dev_requirements.txt
+anyio==4.11.0
+ # via
+ # -r requirements.txt
+ # httpx
brotli==1.1.0
# via -r requirements.txt
certifi==2025.10.5
# via
# -r requirements.txt
+ # httpcore
+ # httpx
# requests
charset-normalizer==3.4.4
# via
@@ -12,9 +18,24 @@ charset-normalizer==3.4.4
# requests
gallery-dl==1.30.10
# via -r requirements.txt
+h11==0.16.0
+ # via
+ # -r requirements.txt
+ # httpcore
+httpcore==1.0.9
+ # via
+ # -r requirements.txt
+ # httpx
+httpx==0.28.1
+ # via -r requirements.txt
+hyperlink==21.0.0
+ # via -r requirements.txt
idna==3.11
# via
# -r requirements.txt
+ # anyio
+ # httpx
+ # hyperlink
# requests
iniconfig==2.3.0
# via pytest
@@ -42,6 +63,10 @@ requests==2.32.5
# gallery-dl
ruff==0.14.1
# via -r dev_requirements.in
+sniffio==1.3.1
+ # via
+ # -r requirements.txt
+ # anyio
types-yt-dlp==2025.9.26.20251009
# via -r dev_requirements.in
typing-extensions==4.15.0
requirements.in (27) → requirements.in (43)
diff --git a/requirements.in b/requirements.in
index 89d6fd8..a71cade 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,2 +1,4 @@
gallery-dl
+httpx
+hyperlink
yt-dlp[default]
requirements.txt (620) → requirements.txt (919)
diff --git a/requirements.txt b/requirements.txt
index 5f4e911..61faf81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,33 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in --output-file requirements.txt
+anyio==4.11.0
+ # via httpx
brotli==1.1.0
# via yt-dlp
certifi==2025.10.5
# via
+ # httpcore
+ # httpx
# requests
# yt-dlp
charset-normalizer==3.4.4
# via requests
gallery-dl==1.30.10
# via -r requirements.in
+h11==0.16.0
+ # via httpcore
+httpcore==1.0.9
+ # via httpx
+httpx==0.28.1
+ # via -r requirements.in
+hyperlink==21.0.0
+ # via -r requirements.in
idna==3.11
- # via requests
+ # via
+ # anyio
+ # httpx
+ # hyperlink
+ # requests
mutagen==1.47.0
# via yt-dlp
pycryptodomex==3.23.0
@@ -20,6 +36,8 @@ requests==2.32.5
# via
# gallery-dl
# yt-dlp
+sniffio==1.3.1
+ # via anyio
urllib3==2.5.0
# via
# requests
test_yt-dlp_alexwlchan.py (1508) → test_yt-dlp_alexwlchan.py (1657)
diff --git a/test_yt-dlp_alexwlchan.py b/test_yt-dlp_alexwlchan.py
index 5b28cf2..0a11d22 100644
--- a/test_yt-dlp_alexwlchan.py
+++ b/test_yt-dlp_alexwlchan.py
@@ -24,6 +24,7 @@ def test_youtube_video() -> None:
assert os.path.exists(video_info["video_path"])
assert os.path.exists(video_info["thumbnail_path"])
assert video_info["subtitle_path"] is None
+ assert os.path.exists(video_info["uploader"]["avatar_path"])
assert video_info["id"] == "TUQaGhPdlxs"
assert video_info["date_uploaded"] == "2022-03-25T01:10:38Z"
@@ -39,9 +40,12 @@ def test_instagram_video() -> None:
assert os.path.exists(video_info["thumbnail_path"])
assert video_info["subtitle_path"] is None
- assert video_info["channel"]["id"] == "52716733233"
- assert video_info["channel"]["name"] == "Public Domain Gems"
- assert video_info["channel"]["url"] == "https://www.instagram.com/publicdomaingems/"
+ assert video_info["uploader"]["id"] == "52716733233"
+ assert video_info["uploader"]["name"] == "Public Domain Gems"
+ assert (
+ video_info["uploader"]["url"] == "https://www.instagram.com/publicdomaingems/"
+ )
+ assert os.path.exists(video_info["uploader"]["avatar_path"])
assert video_info["id"] == "DMWY8KkOS0n"
assert video_info["date_uploaded"] == "2025-07-21T00:34:41Z"
yt-dlp_alexwlchan.py (4879) → yt-dlp_alexwlchan.py (6237)
diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 0f2685b..419bcc8 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -8,6 +8,8 @@ import sys
import tempfile
from typing import Any, TypedDict
+import httpx
+import hyperlink
from yt_dlp import YoutubeDL
@@ -40,9 +42,40 @@ ydl_opts: Any = {
}
-def get_youtube_avatar_url(channel_url: str) -> str:
+def _choose_filename_suffix(content_type: str) -> str:
"""
- Returns the avatar URL of a YouTube channel.
+ Given an HTTP Content-Type header, choose the correct suffix for
+ the downloaded file.
+ """
+ if content_type == "image/png":
+ return ".png"
+ elif content_type == "image/jpeg":
+ return ".jpg"
+ else:
+ raise ValueError(f"Unrecognised content-type: {content_type}")
+
+
+def download_file(out_dir: Path, url: str, basename: str) -> Path:
+ """
+ Download an image, and pick a file extension based on the image type.
+ """
+ # Download the bytes, and save them to a file.
+ resp = httpx.get(url)
+ resp.raise_for_status()
+
+ suffix = _choose_filename_suffix(resp.headers["content-type"])
+
+ out_path = out_dir / (basename + suffix)
+
+ with open(out_path, "xb") as out_file:
+ out_file.write(resp.content)
+
+ return out_path
+
+
+def get_youtube_avatar(tmp_dir: Path, channel_url: str) -> Path:
+ """
+ Download the avatar of a YouTube channel.
"""
ydl_opts: Any = {
# Print progress output to stderr, not stdout
@@ -56,31 +89,43 @@ def get_youtube_avatar_url(channel_url: str) -> str:
"playlist_items": "0",
}
+ # Get the URL of the YouTube avatar.
with YoutubeDL(ydl_opts) as ydl:
channel_info: Any = ydl.extract_info(channel_url, download=False)
thumbnails = channel_info["thumbnails"]
best_thumbnail = next(t for t in thumbnails if t["id"] == "avatar_uncropped")
- return str(best_thumbnail["url"])
+ thumbnail_url = best_thumbnail["url"]
+ # Work out the base filename, e.g. "https://www.youtube.com/@networkrail"
+ # becomes "networkrail"
+ u = hyperlink.parse(channel_url)
+ basename = u.path[0].replace("@", "")
-def get_instagram_avatar_url(channel_name: str) -> str:
+ return download_file(tmp_dir, url=thumbnail_url, basename=basename)
+
+
+def get_instagram_avatar(tmp_dir: Path, uploader_name: str) -> Path:
"""
- Returns the avatar URL of an Instagram channel.
+ Download the avatar of an Instagram channel.
"""
output = subprocess.check_output(
- ["gallery-dl", "--get-urls", f"https://www.instagram.com/{channel_name}/avatar"]
+ [
+ "gallery-dl",
+ "--get-urls",
+ f"https://www.instagram.com/{uploader_name}/avatar",
+ ]
)
avatar_url = output.strip().decode("utf8")
- return avatar_url
+ return download_file(tmp_dir, url=avatar_url, basename=uploader_name)
-class ChannelInfo(TypedDict):
+class UploaderInfo(TypedDict):
id: str
name: str
url: str
- avatar_url: str
+ avatar_path: Path
class VideoInfo(TypedDict):
@@ -92,7 +137,7 @@ class VideoInfo(TypedDict):
video_path: Path
thumbnail_path: Path
subtitle_path: Path | None
- channel: ChannelInfo
+ uploader: UploaderInfo
site: str
@@ -112,23 +157,25 @@ def download_video(url: str) -> VideoInfo:
except StopIteration:
subtitle_path = None
- channel: ChannelInfo
+ uploader: UploaderInfo
if video_info["extractor"] == "youtube":
site = "youtube"
- channel = {
- "id": video_info["channel_id"],
- "name": video_info["channel"],
- "url": video_info["channel_url"],
- "avatar_url": get_youtube_avatar_url(video_info["channel_url"]),
+ uploader = {
+ "id": video_info["uploader_id"],
+ "name": video_info["uploader"],
+ "url": video_info["uploader_url"],
+ "avatar_path": get_youtube_avatar(tmp_dir, video_info["uploader_url"]),
}
elif video_info["extractor"] == "Instagram":
site = "instagram"
- channel = {
+ uploader = {
"id": video_info["uploader_id"],
"name": video_info["uploader"],
"url": f"https://www.instagram.com/{video_info['channel']}/",
- "avatar_url": get_instagram_avatar_url(channel_name=video_info["channel"]),
+ "avatar_path": get_instagram_avatar(
+ tmp_dir, uploader_name=video_info["channel"]
+ ),
}
else:
sys.exit(f"Unsupported extractor: {video_info['extractor']}")
@@ -144,7 +191,7 @@ def download_video(url: str) -> VideoInfo:
"video_path": video_path,
"thumbnail_path": thumbnail_path,
"subtitle_path": subtitle_path,
- "channel": channel,
+ "uploader": uploader,
"site": site,
}