Download the subtitles, if available
- ID
08c1bad- date
2024-02-16 07:18:53+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
20ff716- message
Download the subtitles, if available- changed files
2 files, 18 additions, 2 deletions
Changed files
web/save_youtube_videos.py (4983) → web/save_youtube_videos.py (5294)
diff --git a/web/save_youtube_videos.py b/web/save_youtube_videos.py
index 5ae3104..fd228ae 100755
--- a/web/save_youtube_videos.py
+++ b/web/save_youtube_videos.py
@@ -88,7 +88,7 @@ def log_result(format_template):
def classify_file_type(
video_id: str, filename: str
-) -> Literal["video", "info", "thumbnail"] | None:
+) -> Literal["video", "info", "thumbnail", "subtitles"] | None:
"""
Given an already-downloaded file, work out what sort of file it is.
"""
@@ -120,6 +120,17 @@ def classify_file_type(
if filename.endswith((f"-{video_id}.info.json", f" [{video_id}].info.json")):
return "info"
+ if filename.endswith(
+ (
+ f" [{video_id}].en.vtt",
+ f" [{video_id}].en-US.vtt",
+ f" [{video_id}].en-GB.vtt",
+ f" [{video_id}].en-CA.vtt",
+ f" [{video_id}].live_chat.json",
+ )
+ ):
+ return "subtitles"
+
raise ValueError(f"Unrecognised filename: {filename}")
@@ -152,7 +163,7 @@ def download_video(*, video_id, download_root):
# Construct the command. The expensive bit is redownloading the
# video file, so don't do that if it's already downloaded.
video_url = f"https://youtube.com/watch?v={video_id}"
- cmd = [video_url]
+ cmd = [video_url, "--write-sub"]
if has_video:
cmd.append("--skip-download")
web/test_save_youtube_videos.py (874) → web/test_save_youtube_videos.py (1030)
diff --git a/web/test_save_youtube_videos.py b/web/test_save_youtube_videos.py
index c182a8a..b8157c9 100644
--- a/web/test_save_youtube_videos.py
+++ b/web/test_save_youtube_videos.py
@@ -23,6 +23,11 @@ from save_youtube_videos import classify_file_type
"The World's Most Remote Buildings-wGS53t8ZbO8.f251.webm.part",
None,
),
+ (
+ "IjCylxs8hZU",
+ "Soviet Flying Aircraft Carriers Were Ingenious [IjCylxs8hZU].en.vtt",
+ "subtitles",
+ ),
],
)
def test_classify_file_type(video_id, filename, file_type):