Skip to main content

all: add support for downloading Instagram videos

ID
24b8eea
date
2025-10-05 14:04:58+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
9a8b746
message
all: add support for downloading Instagram videos

Fixes #3
changed files
6 files, 84 additions, 25 deletions

Changed files

README.md (2806) → README.md (2940)

diff --git a/README.md b/README.md
index cac5bb6..f2b8544 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,12 @@ $ yt-dlp_alexwlchan.py "https://www.youtube.com/watch?v=TUQaGhPdlxs"
     "name": "Public Domain Archive",
     "url": "https://www.youtube.com/channel/UCDeqps8f3hoHm6DHJoseDlg",
     "avatar_url": "https://yt3.googleusercontent.com/ytc/AIdro_kbeCfc5KrnLmdASZQ9u649IxrxEUXsUaxdSUR_jA_4SZQ=s0"
-  }
+  },
+  "site": "youtube"
 }
 ```
 
-I have other scripts that know how to read this format, and it allows me to consolidate all my YouTube-handling logic in one place.
+I have other scripts that know how to read this format, and it allows me to consolidate all my video-downloading logic in one place.
 Other scripts can call this script and get the title or description "for free".
 
 ## What it does
@@ -54,6 +55,10 @@ source .venv/bin/activate
 python3 yt-dlp_alexwlchan.py "$@"
 ```
 
+## Supported sites
+
+yt-dlp supports a wide range of site, but I only map YouTube and Instagram with this tool.
+
 ## Can other people use this?
 
 You're welcome to, but I wouldn't recommend it.

dev_requirements.txt (940) → dev_requirements.txt (1016)

diff --git a/dev_requirements.txt b/dev_requirements.txt
index 6f2f120..b22ca4c 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -10,6 +10,8 @@ charset-normalizer==3.4.3
     # via
     #   -r requirements.txt
     #   requests
+gallery-dl==1.30.9
+    # via -r requirements.txt
 idna==3.10
     # via
     #   -r requirements.txt
@@ -29,7 +31,9 @@ pygments==2.19.2
 pytest==8.4.2
     # via -r dev_requirements.in
 requests==2.32.5
-    # via -r requirements.txt
+    # via
+    #   -r requirements.txt
+    #   gallery-dl
 ruff==0.13.3
     # via -r dev_requirements.in
 urllib3==2.5.0

requirements.in (16) → requirements.in (27)

diff --git a/requirements.in b/requirements.in
index 82eef3a..89d6fd8 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1 +1,2 @@
+gallery-dl
 yt-dlp[default]

requirements.txt (543) → requirements.txt (618)

diff --git a/requirements.txt b/requirements.txt
index 4617017..946486c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,8 @@ certifi==2025.10.5
     #   yt-dlp
 charset-normalizer==3.4.3
     # via requests
+gallery-dl==1.30.9
+    # via -r requirements.in
 idna==3.10
     # via requests
 mutagen==1.47.0
@@ -15,7 +17,9 @@ mutagen==1.47.0
 pycryptodomex==3.23.0
     # via yt-dlp
 requests==2.32.5
-    # via yt-dlp
+    # via
+    #   gallery-dl
+    #   yt-dlp
 urllib3==2.5.0
     # via
     #   requests

tests/test_yt-dlp_alexwlchan.py (667) → tests/test_yt-dlp_alexwlchan.py (1283)

diff --git a/tests/test_yt-dlp_alexwlchan.py b/tests/test_yt-dlp_alexwlchan.py
index 1e516a0..ad74080 100644
--- a/tests/test_yt-dlp_alexwlchan.py
+++ b/tests/test_yt-dlp_alexwlchan.py
@@ -3,18 +3,18 @@ import os
 import subprocess
 
 
-def test_public_domain_video() -> None:
+def download_video(url):
+    output = subprocess.check_output(["python3", "yt-dlp_alexwlchan.py", url])
+    video_info = json.loads(output)
+
+    return video_info
+
+
+def test_youtube_video() -> None:
     """
-    Download a public domain video and check we get the expected output.
+    Download a YouTube video and check we get the expected output.
     """
-    output = subprocess.check_output(
-        [
-            "python3",
-            "yt-dlp_alexwlchan.py",
-            "https://www.youtube.com/watch?v=TUQaGhPdlxs",
-        ]
-    )
-    video_info = json.loads(output)
+    video_info = download_video("https://www.youtube.com/watch?v=TUQaGhPdlxs")
 
     assert (
         video_info["title"]
@@ -23,3 +23,21 @@ def test_public_domain_video() -> None:
     assert os.path.exists(video_info["video_path"])
     assert os.path.exists(video_info["thumbnail_path"])
     assert video_info["subtitle_path"] is None
+
+
+def test_instagram_video() -> None:
+    """
+    Download an Instagram video and check we get the expected output.
+    """
+    video_info = download_video("https://www.instagram.com/reel/DMWY8KkOS0n/")
+
+    assert os.path.exists(video_info["video_path"])
+    assert os.path.exists(video_info["thumbnail_path"])
+    assert video_info["subtitle_path"] is None
+
+    assert video_info["channel"]["id"] == "52716733233"
+    assert video_info["channel"]["name"] == "Public Domain Gems"
+    assert (
+        video_info["channel"]["channel_url"]
+        == "https://www.instagram.com/publicdomaingems/"
+    )

yt-dlp_alexwlchan.py (3627) → yt-dlp_alexwlchan.py (4624)

diff --git a/yt-dlp_alexwlchan.py b/yt-dlp_alexwlchan.py
index 0f8195b..011e4f9 100755
--- a/yt-dlp_alexwlchan.py
+++ b/yt-dlp_alexwlchan.py
@@ -2,6 +2,7 @@
 
 import json
 from pathlib import Path
+import subprocess
 import sys
 import tempfile
 from typing import TypedDict
@@ -38,7 +39,7 @@ ydl_opts = {
 }
 
 
-def get_avatar_url(channel_url: str) -> str:
+def get_youtube_avatar_url(channel_url: str) -> str:
     """
     Returns the avatar URL of a YouTube channel.
     """
@@ -62,6 +63,18 @@ def get_avatar_url(channel_url: str) -> str:
     return best_thumbnail["url"]
 
 
+def get_instagram_avatar_url(channel_name: str) -> str:
+    """
+    Returns the avatar URL of an Instagram channel.
+    """
+    output = subprocess.check_output(
+        ["gallery-dl", "--get-urls", f"https://www.instagram.com/{channel_name}/avatar"]
+    )
+    avatar_url = output.strip().decode("utf8")
+
+    return avatar_url
+
+
 class ChannelInfo(TypedDict):
     id: str
     name: str
@@ -77,6 +90,7 @@ class VideoInfo(TypedDict):
     thumbnail_path: Path
     subtitle_path: Path
     channel: ChannelInfo
+    site: str
 
 
 def download_video(url: str) -> VideoInfo:
@@ -95,14 +109,28 @@ def download_video(url: str) -> VideoInfo:
     except StopIteration:
         subtitle_path = None
 
-    channel = {
-        "id": video_info["channel_id"],
-        "name": video_info["channel"],
-        "url": video_info["channel_url"],
-        "avatar_url": get_avatar_url(video_info["channel_url"]),
-    }
-
-    result = {
+    if video_info["extractor"] == "youtube":
+        site = "youtube"
+        channel = {
+            "id": video_info["channel_id"],
+            "name": video_info["channel"],
+            "url": video_info["channel_url"],
+            "avatar_url": get_youtube_avatar_url(video_info["channel_url"]),
+        }
+    elif video_info["extractor"] == "Instagram":
+        with open("out.json", "w") as of:
+            of.write(str(video_info))
+        site = "instagram"
+        channel = {
+            "id": video_info["uploader_id"],
+            "name": video_info["uploader"],
+            "channel_url": f"https://www.instagram.com/{video_info['channel']}/",
+            "avatar_url": get_instagram_avatar_url(channel_name=video_info["channel"]),
+        }
+    else:
+        sys.exit(f"Unsupported extractor: {video_info['extractor']}")
+
+    return {
         "url": url,
         "title": video_info["title"],
         "description": video_info["description"],
@@ -110,10 +138,9 @@ def download_video(url: str) -> VideoInfo:
         "thumbnail_path": thumbnail_path,
         "subtitle_path": subtitle_path,
         "channel": channel,
+        "site": site,
     }
 
-    return result
-
 
 class PathEncoder(json.JSONEncoder):
     """