Add the AO3 script
- ID
7ccd420- date
2024-02-14 23:10:17+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
38ac6ec- message
Add the AO3 script- changed files
3 files, 123 additions, 21 deletions
Changed files
web/README.md (3919) → web/README.md (4353)
diff --git a/web/README.md b/web/README.md
index 0a6518f..491c4a6 100644
--- a/web/README.md
+++ b/web/README.md
@@ -36,6 +36,12 @@ scripts = [
"""
},
{
+ "usage": "save_ao3_links.py [URL...]",
+ "description": """
+ save a copy of a story on AO3, including exports in every available format.
+ """
+ },
+ {
"name": "save_pinboard_bookmarks.py",
"description": """
save a complete copy of all my Pinboard bookmarks, including my archive backups.
@@ -93,6 +99,15 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/web/save_ao3_links.py">
+ <code>save_ao3_links.py [URL...]</code>
+ </a>
+ </dt>
+ <dd>
+ save a copy of a story on AO3, including exports in every available format.
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/web/save_pinboard_bookmarks.py">
<code>save_pinboard_bookmarks.py</code>
</a>
@@ -128,4 +143,4 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
</dd>
</dl>
-<!-- [[[end]]] (checksum: e326ff2ac898ceecc4bddd204f9318b2) -->
+<!-- [[[end]]] (checksum: a4f4aaedc92d2ce7e499f50a87c39d22) -->
web/save_ao3_links.py (0) → web/save_ao3_links.py (1945)
diff --git a/web/save_ao3_links.py b/web/save_ao3_links.py
new file mode 100755
index 0000000..a8512f0
--- /dev/null
+++ b/web/save_ao3_links.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+import tarfile
+
+
+BACKUP_ROOT = pathlib.Path("/Volumes/Media (Sapphire)/backups/ao3")
+
+
+def save_ao3_url(url: str):
+ # e.g. 'https://archiveofourown.org/works/1234' ~> '1234'
+ ao3_id = url.split("/")[-1]
+
+ # Check if the fic is already downloaded -- if it is, nothing to do.
+ if any(
+ name.startswith(f"{ao3_id}-") and name.endswith(".tar.gz")
+ for name in os.listdir(BACKUP_ROOT)
+ ):
+ return
+
+ print(f"Saving {url}...")
+
+ # Otherwise, create a temporary directory for the download.
+ #
+ # Delete any partial downloads first.
+ tmp_dir = BACKUP_ROOT / f"{ao3_id}.tmp"
+
+ try:
+ shutil.rmtree(tmp_dir)
+ except FileNotFoundError:
+ pass
+
+ for ext in ["azw", "epub", "mobi", "pdf", "html"]:
+ wget(
+ "--no-verbose",
+ "--output-file",
+ "-",
+ # The Content-Disposition header is sent by the server to say
+ # what the file "should" be called. By telling wget to respect this,
+ # it means we can request "a.html", the header from AO3 will specify
+ # the correct filename (including the fic title), and the file will
+ # be named correctly.
+ "--content-disposition",
+ "--directory-prefix",
+ tmp_dir,
+ f"https://archiveofourown.org/downloads/{ao3_id}/a.{ext}",
+ )
+
+ try:
+ title = os.listdir(tmp_dir)[0].rsplit(".")[0]
+ except FileNotFoundError:
+ return
+
+ out_path = BACKUP_ROOT / f"{ao3_id}-{title}.tar.gz"
+
+ with tarfile.open(out_path, "w:gz") as tf:
+ tf.add(tmp_dir, arcname=ao3_id)
+
+ shutil.rmtree(tmp_dir)
+
+ print(f" ~> {out_path}")
+
+
+def wget(*args):
+ subprocess.call(["wget"] + list(args), stdout=subprocess.DEVNULL)
+
+
+if __name__ == "__main__":
+ for url in sys.argv[1:]:
+ save_ao3_url(url)
web/save_pinboard_bookmarks.py (5923) → web/save_pinboard_bookmarks.py (6277)
diff --git a/web/save_pinboard_bookmarks.py b/web/save_pinboard_bookmarks.py
index 33f7dc2..1d41c80 100755
--- a/web/save_pinboard_bookmarks.py
+++ b/web/save_pinboard_bookmarks.py
@@ -27,7 +27,7 @@ def write_to_file(name: str, contents: str) -> None:
path.write_text(contents)
-def get_bookmarks_json(username: str, password: str) -> str:
+def get_bookmarks_data(username: str, password: str) -> str:
"""
Call the Pinboard API to get a complete list of my bookmarks.
@@ -41,9 +41,7 @@ def get_bookmarks_json(username: str, password: str) -> str:
resp.raise_for_status()
- json_string = json.dumps(resp.json(), indent=2, sort_keys=True)
-
- return json_string
+ return resp.json()
def get_cache_ids(username: str, password: str) -> dict[str, str]:
@@ -191,25 +189,40 @@ if __name__ == "__main__":
now = datetime.date.today().strftime("%Y-%m-%d")
print("*** Getting a JSON copy of my bookmarks data")
- json_string = get_bookmarks_json(username, password)
+ bookmarks = get_bookmarks_data(username, password)
+ json_string = json.dumps(bookmarks, indent=2, sort_keys=True)
for name in (f"bookmarks.{now}.json", "bookmarks.json"):
write_to_file(name, contents=json_string)
print("")
- print("*** Getting a list of cache IDs")
- all_cache_ids = get_cache_ids(username, password)
-
- for name in (f"cache_ids.{now}.json", "cache_ids.json"):
- write_to_file(name, contents=json.dumps(all_cache_ids))
-
- all_cache_ids = json.load(open(BACKUP_ROOT / "cache_ids.json"))
-
- print("")
-
- print("*** Saving archive files using wget")
-
- with wget_context(username, password):
- for url, cache_id in all_cache_ids.items():
- download_single_archive(url, cache_id)
+ # print("*** Getting a list of cache IDs")
+ # all_cache_ids = get_cache_ids(username, password)
+ #
+ # for name in (f"cache_ids.{now}.json", "cache_ids.json"):
+ # write_to_file(name, contents=json.dumps(all_cache_ids))
+ #
+ # all_cache_ids = json.load(open(BACKUP_ROOT / "cache_ids.json"))
+ #
+ # print("")
+ #
+ # print("*** Saving archive files using wget")
+ #
+ # with wget_context(username, password):
+ # for url, cache_id in all_cache_ids.items():
+ # download_single_archive(url, cache_id)
+ #
+ # print("")
+
+ print("*** Saving stories from AO3")
+
+ ao3_urls = [b["href"] for b in bookmarks if "archiveofourown.org" in b["href"]]
+
+ subprocess.check_call(
+ [
+ "python3",
+ "/Users/alexwlchan/repos/scripts/web/save_ao3_links.py",
+ ]
+ + ao3_urls
+ )