Merge pull request #16 from alexwlchan/dont-save-youtube-description
- ID
20ff716- date
2024-02-15 22:13:21+00:00- author
Alex Chan <alex@alexwlchan.net>- parents
047d549,12f7ace- message
Merge pull request #16 from alexwlchan/dont-save-youtube-description Improve my scripts for saving from YouTube- changed files
3 files, 84 additions, 35 deletions
Changed files
.github/workflows/test.yml (1233) → .github/workflows/test.yml (1281)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6356a05..44a57af 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -45,3 +45,4 @@ jobs:
py.test text/test_fix_twitter_thread.py
py.test textexpander/test_get_mastodon_text.py
py.test web/test_save_ao3_links.py
+ py.test web/test_save_youtube_videos.py
web/save_youtube_videos.py (4444) → web/save_youtube_videos.py (4983)
diff --git a/web/save_youtube_videos.py b/web/save_youtube_videos.py
index f82b811..5ae3104 100755
--- a/web/save_youtube_videos.py
+++ b/web/save_youtube_videos.py
@@ -10,6 +10,7 @@ import pathlib
import subprocess
import sys
import textwrap
+from typing import Literal
import hyperlink
from sqlite_utils import Database
@@ -76,7 +77,8 @@ def log_result(format_template):
print(termcolor.colored(f"✘ {description}\n{wrapped_error}", "red"))
raise
else:
- print(termcolor.colored(f"✔ {description}", "green"))
+ if result == "downloaded":
+ print(termcolor.colored(f"✔ {description}", "green"))
return result
return wrapper
@@ -84,6 +86,43 @@ def log_result(format_template):
return decorator
+def classify_file_type(
+ video_id: str, filename: str
+) -> Literal["video", "info", "thumbnail"] | None:
+ """
+ Given an already-downloaded file, work out what sort of file it is.
+ """
+ if filename.endswith(".part"):
+ return None
+
+ if filename.endswith(
+ (
+ f"-{video_id}.mp4",
+ f"-{video_id}.webm",
+ f"-{video_id}.mkv",
+ f" [{video_id}].mp4",
+ f" [{video_id}].mkv",
+ f" [{video_id}].webm",
+ )
+ ):
+ return "video"
+
+ if filename.endswith(
+ (
+ f"-{video_id}.jpg",
+ f"-{video_id}.webp",
+ f" [{video_id}].jpg",
+ f" [{video_id}].webp",
+ )
+ ):
+ return "thumbnail"
+
+ if filename.endswith((f"-{video_id}.info.json", f" [{video_id}].info.json")):
+ return "info"
+
+ raise ValueError(f"Unrecognised filename: {filename}")
+
+
@log_result("https://youtube.com/watch?v={video_id}")
def download_video(*, video_id, download_root):
uploader = get_uploader(video_id=video_id, db_path=download_root / "uploaders.db")
@@ -97,38 +136,17 @@ def download_video(*, video_id, download_root):
# Look to see if this video has been downloaded before. If it has, skip any
# further processing.
- matching_filenames = [
- filename for filename in os.listdir(download_dir) if video_id in filename
- ]
-
- has_video = any(
- f.endswith(
- (
- f"-{video_id}.mp4",
- f"-{video_id}.webm",
- f"-{video_id}.mkv",
- f" [{video_id}].webm",
- )
- )
- for f in matching_filenames
- )
-
- has_description = any(
- f.endswith((f"-{video_id}.description", f" [{video_id}].description"))
- for f in matching_filenames
- )
+ matching_filenames = {
+ filename: classify_file_type(video_id, filename)
+ for filename in os.listdir(download_dir)
+ if video_id in filename
+ }
- has_info = any(
- f.endswith((f"-{video_id}.info.json", f" [{video_id}].info.json"))
- for f in matching_filenames
- )
-
- has_thumbnail = any(
- f.endswith((f"-{video_id}.jpg", f" [{video_id}].jpg"))
- for f in matching_filenames
- )
+ has_video = "video" in matching_filenames.values()
+ has_info = "info" in matching_filenames.values()
+ has_thumbnail = "thumbnail" in matching_filenames.values()
- if has_video and has_description and has_info:
+ if has_video and has_thumbnail and has_info:
return
# Construct the command. The expensive bit is redownloading the
@@ -139,9 +157,6 @@ def download_video(*, video_id, download_root):
if has_video:
cmd.append("--skip-download")
- if not has_description:
- cmd.append("--write-description")
-
if not has_info:
cmd.append("--write-info-json")
@@ -151,6 +166,7 @@ def download_video(*, video_id, download_root):
try:
youtube_dl(*cmd, cwd=download_dir)
print(download_dir)
+ return "downloaded"
except subprocess.CalledProcessError as err: # pragma: no cover
print(f"Unable to download {video_url}: {err}", file=sys.stderr)
raise
@@ -159,4 +175,7 @@ def download_video(*, video_id, download_root):
if __name__ == "__main__":
for url in sys.argv[1:]:
video_id = get_video_id(url)
- download_video(video_id=video_id, download_root=BACKUP_ROOT)
+ try:
+ download_video(video_id=video_id, download_root=BACKUP_ROOT)
+ except Exception:
+ pass
web/test_save_youtube_videos.py (0) → web/test_save_youtube_videos.py (874)
diff --git a/web/test_save_youtube_videos.py b/web/test_save_youtube_videos.py
new file mode 100644
index 0000000..c182a8a
--- /dev/null
+++ b/web/test_save_youtube_videos.py
@@ -0,0 +1,29 @@
+import pytest
+
+from save_youtube_videos import classify_file_type
+
+
+@pytest.mark.parametrize(
+ ["video_id", "filename", "file_type"],
+ [
+ ("3VvioE0ziPk", "Who is the loudest sea lion? [3VvioE0ziPk].mkv", "video"),
+ (
+ "TE8KMnGm2Xw",
+ "Warning, biters ! - A Factorio Short [TE8KMnGm2Xw].webp",
+ "thumbnail",
+ ),
+ ("AfsnHVaScjg", "Ravens can talk! [AfsnHVaScjg].info.json", "info"),
+ (
+ "X1ynZm1wI18",
+ "Rami Ismail, Vlambeer - XOXO Festival (2015)-X1ynZm1wI18.webp",
+ "thumbnail",
+ ),
+ (
+ "wGS53t8ZbO8",
+ "The World's Most Remote Buildings-wGS53t8ZbO8.f251.webm.part",
+ None,
+ ),
+ ],
+)
+def test_classify_file_type(video_id, filename, file_type):
+ assert classify_file_type(video_id, filename) == file_type