Add a function for parsing Mastodon post URLs

ID

4e2cf06

date

2025-12-03 00:51:11+00:00

author

Alex Chan <alex@alexwlchan.net>

parent

05e6b41

message

Add a function for parsing Mastodon post URLs

changed files

4 files, 77 additions, 2 deletions

CHANGELOG.md
src/chives/__init__.py
src/chives/urls.py
tests/test_urls.py

Changed files

CHANGELOG.md (613) → CHANGELOG.md (680)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ea483a..6a371ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # CHANGELOG
 
+## v6 - 2025-12-03
+
+Add the `parse_mastodon_post_url()` function.
+
 ## v5 - 2025-12-01
 
 When calling `reformat_date()`, ensure all dates are converted to UTC.

src/chives/__init__.py (390) → src/chives/__init__.py (390)

diff --git a/src/chives/__init__.py b/src/chives/__init__.py
index 74de00f..f17eeea 100644
--- a/src/chives/__init__.py
+++ b/src/chives/__init__.py
@@ -11,4 +11,4 @@ I share across multiple sites.
 
 """
 
-__version__ = "5"
+__version__ = "6"

src/chives/urls.py (329) → src/chives/urls.py (1062)

diff --git a/src/chives/urls.py b/src/chives/urls.py
index 16642e8..d4cf7d4 100644
--- a/src/chives/urls.py
+++ b/src/chives/urls.py
@@ -1,5 +1,8 @@
 """Code for manipulating and tidying URLs."""
 
+import re
+from typing import TypedDict
+
 import hyperlink
 
 
@@ -15,3 +18,28 @@ def clean_youtube_url(url: str) -> str:
     u = u.remove("t")
 
     return str(u)
+
+
+def parse_mastodon_post_url(url: str) -> tuple[str, str, str]:
+    """
+    Parse a Mastodon post URL into its component parts:
+    server, account, post ID.
+    """
+    u = hyperlink.parse(url)
+
+    if len(u.path) != 2:
+        raise ValueError("Cannot parse Mastodon URL!")
+
+    if not u.path[0].startswith("@"):
+        raise ValueError("Cannot find `acct` in Mastodon URL!")
+
+    if not re.fullmatch(r"^[0-9]+$", u.path[1]):
+        raise ValueError("Mastodon post ID is not numeric!")
+
+    if u.host == "social.alexwlchan.net":
+        _, acct, server = u.path[0].split("@")
+    else:
+        server = u.host
+        acct = u.path[0].replace("@", "")
+
+    return server, acct, u.path[1]

tests/test_urls.py (657) → tests/test_urls.py (1991)

diff --git a/tests/test_urls.py b/tests/test_urls.py
index 9ceed58..e38098d 100644
--- a/tests/test_urls.py
+++ b/tests/test_urls.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from chives.urls import clean_youtube_url
+from chives.urls import clean_youtube_url, parse_mastodon_post_url
 
 
 @pytest.mark.parametrize(
@@ -23,3 +23,46 @@ def test_clean_youtube_url(url: str, cleaned_url: str) -> None:
     All the query parameters get stripped from YouTube URLs correctly.
     """
     assert clean_youtube_url(url) == cleaned_url
+
+
+@pytest.mark.parametrize(
+    "url, server, acct, post_id",
+    [
+        (
+            "https://iconfactory.world/@Iconfactory/115650922400392083",
+            "iconfactory.world",
+            "Iconfactory",
+            "115650922400392083",
+        ),
+        (
+            "https://social.alexwlchan.net/@chris__martin@functional.cafe/113369395383537892",
+            "functional.cafe",
+            "chris__martin",
+            "113369395383537892",
+        ),
+    ],
+)
+def test_parse_mastodon_post_url(
+    url: str, server: str, acct: str, post_id: str
+) -> None:
+    """
+    Mastodon post URLs are parsed correctly.
+    """
+    assert parse_mastodon_post_url(url) == (server, acct, post_id)
+
+
+@pytest.mark.parametrize(
+    "url, error",
+    [
+        ("https://mastodon.social/", "Cannot parse Mastodon URL"),
+        ("https://mastodon.social/about", "Cannot parse Mastodon URL"),
+        ("https://mastodon.social/about/subdir", "Cannot find `acct`"),
+        ("https://mastodon.social/@example/about", "Mastodon post ID is not numeric"),
+    ],
+)
+def test_parse_mastodon_post_url_errors(url: str, error: str) -> None:
+    """
+    parse_mastodon_post_url returns a useful error if it can't parse the URL.
+    """
+    with pytest.raises(ValueError, match=error):
+        parse_mastodon_post_url(url)