Add a save_flickr script
- ID
5e6645c- date
2023-08-05 08:51:42+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
f344649- message
Add a save_flickr script- changed files
2 files, 148 additions
Changed files
images/README.md (4491) → images/README.md (4715)
diff --git a/images/README.md b/images/README.md
index 7ced139..c173fbd 100644
--- a/images/README.md
+++ b/images/README.md
@@ -106,6 +106,15 @@ These scripts are for working with images and other visual material.
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/images/save_flickr">
+ <code>save_flickr [FLICKR_URL]</code>
+ </a>
+ </dt>
+ <dd>
+ saves a single image from Flickr, plus some metadata.
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/images/save_xkcd">
<code>save_xkcd [COMIC_NUMBER]</code>
</a>
images/save_flickr (0) → images/save_flickr (3364)
diff --git a/images/save_flickr b/images/save_flickr
new file mode 100755
index 0000000..20d9b71
--- /dev/null
+++ b/images/save_flickr
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+Downloads and saves a single Flickr image, plus a bit of metadata.
+
+I'm not using this to create a complete archive of Flickr but to create
+a mini-library of my personal favourites.
+"""
+
+import datetime as dt
+import json
+import pathlib
+import shutil
+import sys
+import tempfile
+
+import bs4
+import hyperlink
+import urllib3
+import xmltodict # pip3 install xmltodict==0.12.0
+
+
+BACKUP_ROOT = pathlib.Path("/Volumes/Media (Sapphire)") / "backups" / "flickr"
+
+
+def get_canonical_url(url):
+ http = urllib3.PoolManager()
+
+ seen_urls = set(url)
+
+ url = url.split("/in/")[0]
+
+ while True:
+ resp = http.request(
+ "GET", url,
+ redirect=False,
+ headers={"User-Agent": "urllib3"}
+ )
+
+ try:
+ url = resp.headers["Location"]
+ except KeyError:
+ return url
+
+ if url in seen_urls:
+ raise ValueError("Circular redirect: {url}")
+ else:
+ seen_urls.add(url)
+
+
+def build_url(base, params):
+ u = hyperlink.URL.from_text(base)
+ for k, v in params.items():
+ u = u.set(k, v)
+ return u
+
+
+def get_oembed_data(canonical_url):
+ http = urllib3.PoolManager()
+
+ request_url = build_url(
+ "https://www.flickr.com/services/oembed/",
+ params={"url": url}
+ )
+
+ oembed_resp = http.request("GET", str(request_url))
+ assert oembed_resp.status == 200, oembed_resp.status
+ return xmltodict.parse(oembed_resp.data)["oembed"]
+
+
+def get_description(url):
+ http = urllib3.PoolManager()
+
+ resp = http.request("GET", url)
+ assert resp.status == 200, resp.status
+
+ soup = bs4.BeautifulSoup(resp.data, "html.parser")
+ return soup.find("meta", attrs={"name": "description"}).attrs["content"]
+
+
+def save_image(out_dir, oembed_data):
+ http = urllib3.PoolManager()
+
+ img_url = oembed_data["url"]
+ filename = hyperlink.URL.from_text(img_url).path[-1]
+
+ # See https://stackoverflow.com/q/17285464/1558022
+ resp = http.request("GET", img_url, preload_content=False)
+
+ with (out_dir / filename).open("wb") as out:
+ while True:
+ data = resp.read(1024)
+ if not data:
+ break
+ out.write(data)
+
+ resp.release_conn()
+
+
+if __name__ == "__main__":
+ try:
+ url = sys.argv[1]
+ except IndexError:
+ sys.exit(f"Usage: {__file__} <FLICKR_URL>")
+
+ canonical_url = get_canonical_url(url)
+ oembed_data = get_oembed_data(canonical_url)
+
+ parsed_url = hyperlink.URL.from_text(canonical_url.strip("/"))
+
+ assert parsed_url.path[0] == "photos", parsed_url
+
+ flickr_id = parsed_url.path[-1]
+ assert flickr_id.isnumeric()
+
+ author_name = oembed_data["author_name"]
+
+ author_url = oembed_data["author_url"].strip("/")
+ creator = hyperlink.URL.from_text(author_url).path[-1]
+
+ backup_dir = BACKUP_ROOT / creator
+ backup_dir.mkdir(exist_ok=True)
+
+ description = get_description(canonical_url)
+
+ flickr_info = {
+ "url": url,
+ "canonical_url": canonical_url,
+ "saved_at": dt.datetime.now().isoformat(),
+ "description": description,
+ "oembed_data": oembed_data
+ }
+
+ json_string = json.dumps(flickr_info, indent=2, sort_keys=True)
+
+ (backup_dir / f"{flickr_id}.json").write_text(json_string)
+
+ save_image(backup_dir, oembed_data)
+
+ print(backup_dir)