all: add support for downloading Instagram videos
- ID
24b8eea- date
2025-10-05 14:04:58+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
9a8b746- message
all: add support for downloading Instagram videos Fixes #3- changed files
6 files, 84 additions, 25 deletions
Changed files
README.md (2806) → README.md (2940)
diff --git a/README.md b/README.md
index cac5bb6..f2b8544 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,12 @@ $ yt-dlp_alexwlchan.py "https://www.youtube.com/watch?v=TUQaGhPdlxs"
"name": "Public Domain Archive",
"url": "https://www.youtube.com/channel/UCDeqps8f3hoHm6DHJoseDlg",
"avatar_url": "https://yt3.googleusercontent.com/ytc/AIdro_kbeCfc5KrnLmdASZQ9u649IxrxEUXsUaxdSUR_jA_4SZQ=s0"
- }
+ },
+ "site": "youtube"
}
```
-I have other scripts that know how to read this format, and it allows me to consolidate all my YouTube-handling logic in one place.
+I have other scripts that know how to read this format, and it allows me to consolidate all my video-downloading logic in one place.
Other scripts can call this script and get the title or description "for free".
## What it does
@@ -54,6 +55,10 @@ source .venv/bin/activate
python3 yt-dlp_alexwlchan.py "$@"
```
+## Supported sites
+
+yt-dlp supports a wide range of site, but I only map YouTube and Instagram with this tool.
+
## Can other people use this?
You're welcome to, but I wouldn't recommend it.
dev_requirements.txt (940) → dev_requirements.txt (1016)
diff --git a/dev_requirements.txt b/dev_requirements.txt
index 6f2f120..b22ca4c 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -10,6 +10,8 @@ charset-normalizer==3.4.3
# via
# -r requirements.txt
# requests
+gallery-dl==1.30.9
+ # via -r requirements.txt
idna==3.10
# via
# -r requirements.txt
@@ -29,7 +31,9 @@ pygments==2.19.2
pytest==8.4.2
# via -r dev_requirements.in
requests==2.32.5
- # via -r requirements.txt
+ # via
+ # -r requirements.txt
+ # gallery-dl
ruff==0.13.3
# via -r dev_requirements.in
urllib3==2.5.0
requirements.in (16) → requirements.in (27)
diff --git a/requirements.in b/requirements.in
index 82eef3a..89d6fd8 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1 +1,2 @@
+gallery-dl
yt-dlp[default]
requirements.txt (543) → requirements.txt (618)
diff --git a/requirements.txt b/requirements.txt
index 4617017..946486c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,8 @@ certifi==2025.10.5
# yt-dlp
charset-normalizer==3.4.3
# via requests
+gallery-dl==1.30.9
+ # via -r requirements.in
idna==3.10
# via requests
mutagen==1.47.0
@@ -15,7 +17,9 @@ mutagen==1.47.0
pycryptodomex==3.23.0
# via yt-dlp
requests==2.32.5
- # via yt-dlp
+ # via
+ # gallery-dl
+ # yt-dlp
urllib3==2.5.0
# via
# requests
tests/test_yt-dlp_alexwlchan.py (667) → tests/test_yt-dlp_alexwlchan.py (1283)
diff --git a/tests/test_yt-dlp_alexwlchan.py b/tests/test_yt-dlp_alexwlchan.py
index 1e516a0..ad74080 100644
--- a/tests/test_yt-dlp_alexwlchan.py
+++ b/tests/test_yt-dlp_alexwlchan.py
@@ -3,18 +3,18 @@ import os
import subprocess
-def test_public_domain_video() -> None:
+def download_video(url):
+ output = subprocess.check_output(["python3", "yt-dlp_alexwlchan.py", url])
+ video_info = json.loads(output)
+
+ return video_info
+
+
+def test_youtube_video() -> None:
"""
- Download a public domain video and check we get the expected output.
+ Download a YouTube video and check we get the expected output.
"""
- output = subprocess.check_output(
- [
- "python3",
- "yt-dlp_alexwlchan.py",
- "https://www.youtube.com/watch?v=TUQaGhPdlxs",
- ]
- )
- video_info = json.loads(output)
+ video_info = download_video("https://www.youtube.com/watch?v=TUQaGhPdlxs")
assert (
video_info["title"]
@@ -23,3 +23,21 @@ def test_public_domain_video() -> None:
assert os.path.exists(video_info["video_path"])
assert os.path.exists(video_info["thumbnail_path"])
assert video_info["subtitle_path"] is None
+
+
+def test_instagram_video() -> None:
+ """
+ Download an Instagram video and check we get the expected output.
+ """
+ video_info = download_video("https://www.instagram.com/reel/DMWY8KkOS0n/")
+
+ assert os.path.exists(video_info["video_path"])
+ assert os.path.exists(video_info["thumbnail_path"])
+ assert video_info["subtitle_path"] is None
+
+ assert video_info["channel"]["id"] == "52716733233"
+ assert video_info["channel"]["name"] == "Public Domain Gems"
+ assert (
+ video_info["channel"]["channel_url"]
+ == "https://www.instagram.com/publicdomaingems/"
+ )
yt-dlp_alexwlchan.py (3627) → yt-dlp_alexwlchan.py (4624)
diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 0f8195b..011e4f9 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -2,6 +2,7 @@
import json
from pathlib import Path
+import subprocess
import sys
import tempfile
from typing import TypedDict
@@ -38,7 +39,7 @@ ydl_opts = {
}
-def get_avatar_url(channel_url: str) -> str:
+def get_youtube_avatar_url(channel_url: str) -> str:
"""
Returns the avatar URL of a YouTube channel.
"""
@@ -62,6 +63,18 @@ def get_avatar_url(channel_url: str) -> str:
return best_thumbnail["url"]
+def get_instagram_avatar_url(channel_name: str) -> str:
+ """
+ Returns the avatar URL of an Instagram channel.
+ """
+ output = subprocess.check_output(
+ ["gallery-dl", "--get-urls", f"https://www.instagram.com/{channel_name}/avatar"]
+ )
+ avatar_url = output.strip().decode("utf8")
+
+ return avatar_url
+
+
class ChannelInfo(TypedDict):
id: str
name: str
@@ -77,6 +90,7 @@ class VideoInfo(TypedDict):
thumbnail_path: Path
subtitle_path: Path
channel: ChannelInfo
+ site: str
def download_video(url: str) -> VideoInfo:
@@ -95,14 +109,28 @@ def download_video(url: str) -> VideoInfo:
except StopIteration:
subtitle_path = None
- channel = {
- "id": video_info["channel_id"],
- "name": video_info["channel"],
- "url": video_info["channel_url"],
- "avatar_url": get_avatar_url(video_info["channel_url"]),
- }
-
- result = {
+ if video_info["extractor"] == "youtube":
+ site = "youtube"
+ channel = {
+ "id": video_info["channel_id"],
+ "name": video_info["channel"],
+ "url": video_info["channel_url"],
+ "avatar_url": get_youtube_avatar_url(video_info["channel_url"]),
+ }
+ elif video_info["extractor"] == "Instagram":
+ with open("out.json", "w") as of:
+ of.write(str(video_info))
+ site = "instagram"
+ channel = {
+ "id": video_info["uploader_id"],
+ "name": video_info["uploader"],
+ "channel_url": f"https://www.instagram.com/{video_info['channel']}/",
+ "avatar_url": get_instagram_avatar_url(channel_name=video_info["channel"]),
+ }
+ else:
+ sys.exit(f"Unsupported extractor: {video_info['extractor']}")
+
+ return {
"url": url,
"title": video_info["title"],
"description": video_info["description"],
@@ -110,10 +138,9 @@ def download_video(url: str) -> VideoInfo:
"thumbnail_path": thumbnail_path,
"subtitle_path": subtitle_path,
"channel": channel,
+ "site": site,
}
- return result
-
class PathEncoder(json.JSONEncoder):
"""