tests/test_urls.py – chives

`tests/test_urls.py`

5.8 kB
Python
View raw
1"""Tests for `chives.urls`."""
2
3from pathlib import Path
4
5import pytest
6from vcr.cassette import Cassette
7
8from chives.urls import (
9    clean_youtube_url,
10    is_mastodon_host,
11    is_url_safe,
12    parse_mastodon_post_url,
13    parse_tumblr_post_url,
14)
15
16
17@pytest.mark.parametrize(
18    "url, cleaned_url",
19    [
20        (
21            "https://www.youtube.com/watch?v=2OHPPSew2nY&list=WL&index=6&t=193s",
22            "https://www.youtube.com/watch?v=2OHPPSew2nY",
23        ),
24        (
25            "https://www.youtube.com/watch?v=2OHPPSew2nY",
26            "https://www.youtube.com/watch?v=2OHPPSew2nY",
27        ),
28        (
29            "https://www.youtube.com/watch?v=WiIi7STG3e0&start_radio=1",
30            "https://www.youtube.com/watch?v=WiIi7STG3e0",
31        ),
32    ],
33)
34def test_clean_youtube_url(url: str, cleaned_url: str) -> None:
35    """
36    All the query parameters get stripped from YouTube URLs correctly.
37    """
38    assert clean_youtube_url(url) == cleaned_url
39
40
41@pytest.mark.parametrize(
42    "url, server, acct, post_id",
43    [
44        pytest.param(
45            "https://iconfactory.world/@Iconfactory/115650922400392083",
46            "iconfactory.world",
47            "Iconfactory",
48            "115650922400392083",
49            id="iconfactory",
50        ),
51        pytest.param(
52            "https://social.alexwlchan.net/@chris__martin@functional.cafe/113369395383537892",
53            "functional.cafe",
54            "chris__martin",
55            "113369395366414375",
56            id="alexwlchan_redirect",
57        ),
58        pytest.param(
59            "https://social.alexwlchan.net/@alex/116300317590482708",
60            "social.alexwlchan.net",
61            "alex",
62            "116300317590482708",
63            id="alexwlchan_self",
64        ),
65    ],
66)
67def test_parse_mastodon_post_url(
68    vcr_cassette: Cassette, url: str, server: str, acct: str, post_id: str
69) -> None:
70    """
71    Mastodon post URLs are parsed correctly.
72    """
73    assert parse_mastodon_post_url(url) == (server, acct, post_id)
74
75
76@pytest.mark.parametrize(
77    "url, error",
78    [
79        pytest.param(
80            "https://mastodon.social/", "Cannot parse Mastodon URL", id="no_path"
81        ),
82        pytest.param(
83            "https://mastodon.social/about",
84            "Cannot parse Mastodon URL",
85            id="no_post_id",
86        ),
87        pytest.param(
88            "https://mastodon.social/about/subdir", "Cannot find `acct`", id="no_acct"
89        ),
90        pytest.param(
91            "https://mastodon.social/@example/about",
92            "Mastodon post ID is not numeric",
93            id="non_numeric_post_id",
94        ),
95        pytest.param(
96            "https://social.alexwlchan.net/@does@not.exist/123",
97            "Cannot parse Mastodon URL",
98            id="alexwlchan_does_not_exist",
99        ),
100    ],
101)
102def test_parse_mastodon_post_url_errors(
103    vcr_cassette: Cassette, url: str, error: str
104) -> None:
105    """
106    parse_mastodon_post_url returns a useful error if it can't parse the URL.
107    """
108    with pytest.raises(ValueError, match=error):
109        parse_mastodon_post_url(url)
110
111
112@pytest.mark.parametrize(
113    "url, blog_identifier, post_id",
114    [
115        (
116            "https://www.tumblr.com/kynvillingur/792473255236796416/",
117            "kynvillingur",
118            "792473255236796416",
119        ),
120        (
121            "https://cut3panda.tumblr.com/post/94093772689/for-some-people-the-more-you-get-to-know-them",
122            "cut3panda",
123            "94093772689",
124        ),
125    ],
126)
127def test_parse_tumblr_post_url(url: str, blog_identifier: str, post_id: str) -> None:
128    """
129    Tumblr URLs are parsed correctly.
130    """
131    assert parse_tumblr_post_url(url) == (blog_identifier, post_id)
132
133
134@pytest.mark.parametrize(
135    "url",
136    [
137        "https://www.tumblr.com/",
138        "https://www.tumblr.com/staff/",
139        "https://staff.tumblr.com/",
140        "https://www.example.com/",
141    ],
142)
143def test_parse_bad_tumblr_url(url: str) -> None:
144    """
145    Parsing a non-Tumblr URL throws a ValueError.
146    """
147    with pytest.raises(ValueError, match="Cannot parse Tumblr URL"):
148        parse_tumblr_post_url(url)
149
150
151class TestIsMastodonHost:
152    """
153    Tests for `is_mastodon_host`.
154    """
155
156    @pytest.mark.parametrize(
157        "host", ["mastodon.social", "hachyderm.io", "social.jvns.ca"]
158    )
159    def test_mastodon_servers(self, host: str, vcr_cassette: Cassette) -> None:
160        """
161        It correctly identifies real Mastodon servers.
162        """
163        assert is_mastodon_host(host)
164
165    @pytest.mark.parametrize(
166        "host",
167        [
168            # These are regular Internet websites which don't expose
169            # the /.well-known/nodeinfo endpoint
170            "example.com",
171            "tailscale.com",
172            #
173            # PeerTube exposes /.well-known/nodeinfo, but it's running
174            # different software.
175            "peertube.tv",
176            #
177            # A website with a known bad SSL certificate, which is assumed
178            # not to be a Mastodon host because we can't connect to it.
179            "expired.badssl.com",
180        ],
181    )
182    def test_non_mastodon_servers(self, host: str, vcr_cassette: Cassette) -> None:
183        """
184        Other websites are not Mastodon servers.
185        """
186        assert not is_mastodon_host(host)
187
188
189class TestIsUrlSafe:
190    """
191    Tests for `is_url_safe`.
192    """
193
194    @pytest.mark.parametrize("path", ["example.txt", Path("a/b/cat.jpg")])
195    def test_safe(self, path: str | Path) -> None:
196        """Paths which are URL safe."""
197        assert is_url_safe(path)
198
199    @pytest.mark.parametrize("path", ["is it?", Path("cat%c.jpg"), "a#b"])
200    def test_unsafe(self, path: str | Path) -> None:
201        """Paths which are not URL safe."""
202        assert not is_url_safe(path)