Add a script for getting Mastodon posts into Obsidian

ID

06e36c8

date

2023-12-10 10:32:56+00:00

author

Alex Chan <alex@alexwlchan.net>

parent

86efa87

message

Add a script for getting Mastodon posts into Obsidian

changed files

4 files, 96 additions, 18 deletions

requirements.in
requirements.txt
textexpander/get_mastodon_text.py
textexpander/test_get_mastodon_text.py

Changed files

requirements.in (96) → requirements.in (99)

diff --git a/requirements.in b/requirements.in
index d5f7af5..1077b60 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,5 +1,4 @@
 black
-bs4
 flake8
 httpx
 humanize
@@ -8,5 +7,6 @@ keyring
 Pillow
 pillow_heif
 pip-tools
+pytest
 termcolor
 yt-dlp

requirements.txt (1976) → requirements.txt (1982)

diff --git a/requirements.txt b/requirements.txt
index 278fd1a..983181a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,14 +6,10 @@
 #
 anyio==4.1.0
     # via httpx
-beautifulsoup4==4.12.2
-    # via bs4
 black==23.11.0
     # via -r requirements.in
 brotli==1.1.0
     # via yt-dlp
-bs4==0.0.1
-    # via -r requirements.in
 build==1.0.3
     # via pip-tools
 certifi==2023.11.17
@@ -46,6 +42,8 @@ idna==3.4
     #   httpx
     #   hyperlink
     #   requests
+iniconfig==2.0.0
+    # via pytest
 jaraco-classes==3.3.0
     # via keyring
 keyring==24.3.0
@@ -62,6 +60,7 @@ packaging==23.2
     # via
     #   black
     #   build
+    #   pytest
 pathspec==0.11.2
     # via black
 pillow==10.1.0
@@ -74,6 +73,8 @@ pip-tools==7.3.0
     # via -r requirements.in
 platformdirs==4.0.0
     # via black
+pluggy==1.3.0
+    # via pytest
 pycodestyle==2.11.1
     # via flake8
 pycryptodomex==3.19.0
@@ -82,14 +83,14 @@ pyflakes==3.1.0
     # via flake8
 pyproject-hooks==1.0.0
     # via build
+pytest==7.4.3
+    # via -r requirements.in
 requests==2.31.0
     # via yt-dlp
 sniffio==1.3.0
     # via
     #   anyio
     #   httpx
-soupsieve==2.5
-    # via beautifulsoup4
 termcolor==2.3.0
     # via -r requirements.in
 urllib3==2.1.0

textexpander/get_mastodon_text.py (715) → textexpander/get_mastodon_text.py (2171)

diff --git a/textexpander/get_mastodon_text.py b/textexpander/get_mastodon_text.py
index 2229e06..be8e69d 100755
--- a/textexpander/get_mastodon_text.py
+++ b/textexpander/get_mastodon_text.py
@@ -4,24 +4,82 @@ Look at the Mastodon URL in the frontmost Safari window, and print it
 as a blockquote.
 """
 
+import datetime
+import os
+import pathlib
+import re
 import subprocess
 
-import bs4
 import httpx
+import hyperlink
+
+
+ATTACHMENTS_DIR = pathlib.Path.home() / "textfiles" / "Attachments" / "mastodon"
+
+
+def download(url):
+    """
+    Download a file to the attachments directory, or do nothing if it's
+    already downloaded.
+    """
+    resp = httpx.get(url)
+    content = resp.content
+
+    ATTACHMENTS_DIR.mkdir(exist_ok=True)
+
+    out_path = ATTACHMENTS_DIR / os.path.basename(url)
+
+    try:
+        with open(out_path, "xb") as out_file:
+            out_file.write(content)
+    except FileExistsError:
+        if open(out_path, "rb").read() == content:
+            pass
+        else:
+            raise
+
+
+def normalise_text(text: str) -> str:
+    text = text.replace("<p>", "").replace("</p>", "")
+    text = re.sub(
+        r'<a href="[^"]+" class="mention hashtag" rel="tag">#<span>(?P<hashtag>[^<]+)</span></a>',
+        r"\\#\g<hashtag>",
+        text,
+    )
+    return text
 
 
 if __name__ == "__main__":
-    url = (
-        subprocess.check_output(["/usr/local/bin/safari", "url"]).decode("utf8").strip()
+    url = subprocess.check_output(["/usr/local/bin/safari", "url"]).decode("utf8")
+
+    u = hyperlink.URL.from_text(url)
+
+    # e.g. https://hachyderm.io/@djnavarro/111535929722933178
+    # ~>  https://hachyderm.io/api/v1/statuses/111535929722933178
+    api_url = f"https://{u.host}/api/v1/statuses/{u.path[1]}"
+
+    resp = httpx.get(api_url)
+
+    post_data = resp.json()
+
+    for attachment in post_data["media_attachments"]:
+        download(attachment["url"])
+
+    author = post_data["account"]["display_name"]
+    post_url = post_data["url"]
+
+    # e.g. 2023-12-06T22:53:44.536Z
+    created_at = datetime.datetime.strptime(
+        post_data["created_at"], "%Y-%m-%dT%H:%M:%S.%fz"
     )
-    resp = httpx.get(url)
-    soup = bs4.BeautifulSoup(resp, "html.parser")
 
-    text = soup.find("meta", attrs={"name": "description"}).attrs["content"]
-    author = soup.find("meta", attrs={"property": "og:title"}).attrs["content"]
-    url = soup.find("meta", attrs={"property": "og:url"}).attrs["content"]
+    print(f'[{author}]({post_url}) ({created_at.strftime("%-d %B %Y")}):')
+    print("")
+    print("> " + normalise_text(post_data["content"]))
 
-    print(f"[{author}]({url}):\n")
+    if post_data["media_attachments"]:
+        print(">\n> ", end="")
+        for attachment in post_data["media_attachments"]:
+            print("![[%s|200]]" % os.path.basename(attachment["url"]), end="")
 
-    for line in text.splitlines():
-        print(f"> {line.rstrip()}")
+    print("")

textexpander/test_get_mastodon_text.py (0) → textexpander/test_get_mastodon_text.py (575)

diff --git a/textexpander/test_get_mastodon_text.py b/textexpander/test_get_mastodon_text.py
new file mode 100644
index 0000000..00659f6
--- /dev/null
+++ b/textexpander/test_get_mastodon_text.py
@@ -0,0 +1,19 @@
+import pytest
+
+from get_mastodon_text import normalise_text
+
+
+@pytest.mark.parametrize(
+    ["input", "output"],
+    [
+        (
+            "<p>A variation on the previous system for todays <a "
+            'href="https://hachyderm.io/tags/ArtAdventCalendar" class="mention '
+            'hashtag" rel="tag">#<span>ArtAdventCalendar</span></a> '
+            "contribution</p>",
+            "A variation on the previous system for todays \#ArtAdventCalendar contribution",
+        )
+    ],
+)
+def test_normalise_text(input, output):
+    assert normalise_text(input) == output