Add a script for getting Mastodon posts into Obsidian
- ID
06e36c8- date
2023-12-10 10:32:56+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
86efa87- message
Add a script for getting Mastodon posts into Obsidian- changed files
4 files, 96 additions, 18 deletions
Changed files
requirements.in (96) → requirements.in (99)
diff --git a/requirements.in b/requirements.in
index d5f7af5..1077b60 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,5 +1,4 @@
black
-bs4
flake8
httpx
humanize
@@ -8,5 +7,6 @@ keyring
Pillow
pillow_heif
pip-tools
+pytest
termcolor
yt-dlp
requirements.txt (1976) → requirements.txt (1982)
diff --git a/requirements.txt b/requirements.txt
index 278fd1a..983181a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,14 +6,10 @@
#
anyio==4.1.0
# via httpx
-beautifulsoup4==4.12.2
- # via bs4
black==23.11.0
# via -r requirements.in
brotli==1.1.0
# via yt-dlp
-bs4==0.0.1
- # via -r requirements.in
build==1.0.3
# via pip-tools
certifi==2023.11.17
@@ -46,6 +42,8 @@ idna==3.4
# httpx
# hyperlink
# requests
+iniconfig==2.0.0
+ # via pytest
jaraco-classes==3.3.0
# via keyring
keyring==24.3.0
@@ -62,6 +60,7 @@ packaging==23.2
# via
# black
# build
+ # pytest
pathspec==0.11.2
# via black
pillow==10.1.0
@@ -74,6 +73,8 @@ pip-tools==7.3.0
# via -r requirements.in
platformdirs==4.0.0
# via black
+pluggy==1.3.0
+ # via pytest
pycodestyle==2.11.1
# via flake8
pycryptodomex==3.19.0
@@ -82,14 +83,14 @@ pyflakes==3.1.0
# via flake8
pyproject-hooks==1.0.0
# via build
+pytest==7.4.3
+ # via -r requirements.in
requests==2.31.0
# via yt-dlp
sniffio==1.3.0
# via
# anyio
# httpx
-soupsieve==2.5
- # via beautifulsoup4
termcolor==2.3.0
# via -r requirements.in
urllib3==2.1.0
textexpander/get_mastodon_text.py (715) → textexpander/get_mastodon_text.py (2171)
diff --git a/textexpander/get_mastodon_text.py b/textexpander/get_mastodon_text.py
index 2229e06..be8e69d 100755
--- a/textexpander/get_mastodon_text.py
+++ b/textexpander/get_mastodon_text.py
@@ -4,24 +4,82 @@ Look at the Mastodon URL in the frontmost Safari window, and print it
as a blockquote.
"""
+import datetime
+import os
+import pathlib
+import re
import subprocess
-import bs4
import httpx
+import hyperlink
+
+
+ATTACHMENTS_DIR = pathlib.Path.home() / "textfiles" / "Attachments" / "mastodon"
+
+
+def download(url):
+ """
+ Download a file to the attachments directory, or do nothing if it's
+ already downloaded.
+ """
+ resp = httpx.get(url)
+ content = resp.content
+
+ ATTACHMENTS_DIR.mkdir(exist_ok=True)
+
+ out_path = ATTACHMENTS_DIR / os.path.basename(url)
+
+ try:
+ with open(out_path, "xb") as out_file:
+ out_file.write(content)
+ except FileExistsError:
+ if open(out_path, "rb").read() == content:
+ pass
+ else:
+ raise
+
+
+def normalise_text(text: str) -> str:
+ text = text.replace("<p>", "").replace("</p>", "")
+ text = re.sub(
+ r'<a href="[^"]+" class="mention hashtag" rel="tag">#<span>(?P<hashtag>[^<]+)</span></a>',
+ r"\\#\g<hashtag>",
+ text,
+ )
+ return text
if __name__ == "__main__":
- url = (
- subprocess.check_output(["/usr/local/bin/safari", "url"]).decode("utf8").strip()
+ url = subprocess.check_output(["/usr/local/bin/safari", "url"]).decode("utf8")
+
+ u = hyperlink.URL.from_text(url)
+
+ # e.g. https://hachyderm.io/@djnavarro/111535929722933178
+ # ~> https://hachyderm.io/api/v1/statuses/111535929722933178
+ api_url = f"https://{u.host}/api/v1/statuses/{u.path[1]}"
+
+ resp = httpx.get(api_url)
+
+ post_data = resp.json()
+
+ for attachment in post_data["media_attachments"]:
+ download(attachment["url"])
+
+ author = post_data["account"]["display_name"]
+ post_url = post_data["url"]
+
+ # e.g. 2023-12-06T22:53:44.536Z
+ created_at = datetime.datetime.strptime(
+ post_data["created_at"], "%Y-%m-%dT%H:%M:%S.%fz"
)
- resp = httpx.get(url)
- soup = bs4.BeautifulSoup(resp, "html.parser")
- text = soup.find("meta", attrs={"name": "description"}).attrs["content"]
- author = soup.find("meta", attrs={"property": "og:title"}).attrs["content"]
- url = soup.find("meta", attrs={"property": "og:url"}).attrs["content"]
+ print(f'[{author}]({post_url}) ({created_at.strftime("%-d %B %Y")}):')
+ print("")
+ print("> " + normalise_text(post_data["content"]))
- print(f"[{author}]({url}):\n")
+ if post_data["media_attachments"]:
+ print(">\n> ", end="")
+ for attachment in post_data["media_attachments"]:
+ print("![[%s|200]]" % os.path.basename(attachment["url"]), end="")
- for line in text.splitlines():
- print(f"> {line.rstrip()}")
+ print("")
textexpander/test_get_mastodon_text.py (0) → textexpander/test_get_mastodon_text.py (575)
diff --git a/textexpander/test_get_mastodon_text.py b/textexpander/test_get_mastodon_text.py
new file mode 100644
index 0000000..00659f6
--- /dev/null
+++ b/textexpander/test_get_mastodon_text.py
@@ -0,0 +1,19 @@
+import pytest
+
+from get_mastodon_text import normalise_text
+
+
+@pytest.mark.parametrize(
+ ["input", "output"],
+ [
+ (
+ "<p>A variation on the previous system for todays <a "
+ 'href="https://hachyderm.io/tags/ArtAdventCalendar" class="mention '
+ 'hashtag" rel="tag">#<span>ArtAdventCalendar</span></a> '
+ "contribution</p>",
+ "A variation on the previous system for todays \#ArtAdventCalendar contribution",
+ )
+ ],
+)
+def test_normalise_text(input, output):
+ assert normalise_text(input) == output