Remove my AO3 script
- ID
84732da- date
2024-06-05 18:56:23+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
786e819- message
Remove my AO3 script- changed files
3 files, 1 addition, 136 deletions
Changed files
web/README.md (5642) → web/README.md (5208)
diff --git a/web/README.md b/web/README.md
index a33c788..6859670 100644
--- a/web/README.md
+++ b/web/README.md
@@ -42,12 +42,6 @@ scripts = [
"""
},
{
- "usage": "save_ao3_links.py [URL...]",
- "description": """
- save a copy of a story on AO3, including exports in every available format.
- """
- },
- {
"name": "save_pinboard_bookmarks.py",
"description": """
save a complete copy of all my Pinboard bookmarks, including my archive backups.
@@ -126,15 +120,6 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
</dd>
<dt>
- <a href="https://github.com/alexwlchan/scripts/blob/main/web/save_ao3_links.py">
- <code>save_ao3_links.py [URL...]</code>
- </a>
- </dt>
- <dd>
- save a copy of a story on AO3, including exports in every available format.
- </dd>
-
- <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/web/save_pinboard_bookmarks.py">
<code>save_pinboard_bookmarks.py</code>
</a>
@@ -188,4 +173,4 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
</dd>
</dl>
-<!-- [[[end]]] (checksum: a31c60eca24c6488caaa93e8bb5f6b44) -->
+<!-- [[[end]]] (checksum: d5cc17500a34414cf4107db0dcc7b0e1) -->
web/save_ao3_links.py (2891) → web/save_ao3_links.py (0)
diff --git a/web/save_ao3_links.py b/web/save_ao3_links.py
deleted file mode 100755
index dd210ed..0000000
--- a/web/save_ao3_links.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import pathlib
-import shutil
-import subprocess
-import sys
-
-import hyperlink
-
-
-BACKUP_ROOT = pathlib.Path("/Volumes/Media (Sapphire)/backups/ao3")
-
-
-def get_ao3_id(url: str) -> str:
- # e.g. 'https://archiveofourown.org/works/1234' ~> '1234'
- u = hyperlink.DecodedURL.from_text(url)
-
- if u.path[0] == "works" and u.path[1].isnumeric():
- return u.path[1]
- elif (
- len(u.path) >= 4
- and u.path[0] == "collections"
- and u.path[2] == "works"
- and u.path[3].isnumeric()
- ):
- return u.path[3]
- else:
- raise ValueError(url)
-
-
-def save_ao3_url(url: str):
- ao3_id = get_ao3_id(url)
-
- # Check if the fic is already downloaded -- if it is, nothing to do.
- if any(
- name.startswith(f"{ao3_id}-") and os.path.isdir(BACKUP_ROOT / name)
- for name in os.listdir(BACKUP_ROOT)
- ):
- return
-
- print(f"Saving {url}...")
-
- # Otherwise, create a temporary directory for the download.
- #
- # Delete any partial downloads first.
- tmp_dir = BACKUP_ROOT / f"{ao3_id}.tmp"
-
- try:
- shutil.rmtree(tmp_dir)
- except FileNotFoundError:
- pass
-
- for ext in ["azw", "epub", "mobi", "pdf", "html"]:
- wget(
- "--no-verbose",
- "--output-file",
- "-",
- # The Content-Disposition header is sent by the server to say
- # what the file "should" be called. By telling wget to respect this,
- # it means we can request "a.html", the header from AO3 will specify
- # the correct filename (including the fic title), and the file will
- # be named correctly.
- "--content-disposition",
- "--directory-prefix",
- tmp_dir,
- f"https://archiveofourown.org/downloads/{ao3_id}/a.{ext}",
- )
-
- try:
- title = os.listdir(tmp_dir)[0].rsplit(".")[0]
- except FileNotFoundError:
- return
-
- out_dir = BACKUP_ROOT / f"{ao3_id}-{title}"
-
- os.rename(tmp_dir, out_dir)
-
- print(f" ~> {out_dir}")
-
-
-def wget(*args):
- subprocess.call(["wget"] + list(args), stdout=subprocess.DEVNULL)
-
-
-if __name__ == "__main__":
- for url in sys.argv[1:]:
- if url == "https://archiveofourown.org/series/136245":
- for story_url in [
- "https://archiveofourown.org/works/1854957",
- "https://archiveofourown.org/works/2089398",
- "https://archiveofourown.org/works/2218554",
- "https://archiveofourown.org/works/2249544",
- "https://archiveofourown.org/works/2330390",
- "https://archiveofourown.org/works/2399867",
- "https://archiveofourown.org/works/2467277",
- "https://archiveofourown.org/works/2802287",
- ]:
- save_ao3_url(story_url)
- else:
- save_ao3_url(url)
web/test_save_ao3_links.py (536) → web/test_save_ao3_links.py (0)
diff --git a/web/test_save_ao3_links.py b/web/test_save_ao3_links.py
deleted file mode 100644
index 72916a7..0000000
--- a/web/test_save_ao3_links.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import pytest
-
-from save_ao3_links import get_ao3_id
-
-
-@pytest.mark.parametrize(
- ["url", "ao3_id"],
- [
- ("https://archiveofourown.org/works/1234", "1234"),
- ("https://archiveofourown.org/works/1234?view_adult=true", "1234"),
- (
- "https://archiveofourown.org/works/1234/chapters/5678?view_adult=true",
- "1234",
- ),
- ("https://archiveofourown.org/collections/yuletide2022/works/1234", "1234"),
- ],
-)
-def test_get_ao3_id(url, ao3_id):
- assert get_ao3_id(url) == ao3_id