text: remove the smartypants dependency
- ID
7452067- date
2026-05-09 07:11:09+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
d7cced9- message
text: remove the `smartypants` dependency- changed files
Changed files
CHANGELOG.md (4622) → CHANGELOG.md (4700)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 43514f7..9fae1a2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
# CHANGELOG
+## v38 - 2026-05-09
+
+Remove the `smartypants` dependency from `chives.text`.
+
## v37 - 2026-05-09
Remove the `rapidfuzz` dependency from `chives.static_site_tests`.
dev_requirements.txt (1235) → dev_requirements.txt (1189)
diff --git a/dev_requirements.txt b/dev_requirements.txt
index 21e5b23..6e133fb 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -2,27 +2,27 @@
# uv pip compile dev_requirements.in --output-file=dev_requirements.txt --exclude-newer=P7D --exclude-newer-package alexwlchan-chives=false
-e file:.
# via -r dev_requirements.in
-certifi==2026.2.25
+certifi==2026.4.22
# via alexwlchan-chives
coverage==7.13.5
# via pytest-cov
-greenlet==3.3.2
+greenlet==3.5.0
# via playwright
iniconfig==2.3.0
# via pytest
-librt==0.8.1
+librt==0.9.0
# via mypy
-mypy==1.20.0
+mypy==1.20.2
# via -r dev_requirements.in
mypy-extensions==1.1.0
# via mypy
-packaging==26.0
+packaging==26.2
# via pytest
-pathspec==1.0.4
+pathspec==1.1.1
# via mypy
pillow==12.2.0
# via alexwlchan-chives
-playwright==1.58.0
+playwright==1.59.0
# via alexwlchan-chives
pluggy==1.6.0
# via
@@ -32,7 +32,7 @@ pyee==13.0.1
# via playwright
pygments==2.20.0
# via pytest
-pytest==9.0.2
+pytest==9.0.3
# via
# alexwlchan-chives
# pytest-cov
@@ -43,10 +43,8 @@ pytest-vcr==1.0.2
# via -r dev_requirements.in
pyyaml==6.0.3
# via vcrpy
-ruff==0.15.9
+ruff==0.15.12
# via -r dev_requirements.in
-smartypants==2.0.2
- # via alexwlchan-chives
typing-extensions==4.15.0
# via
# mypy
pyproject.toml (1497) → pyproject.toml (1474)
diff --git a/pyproject.toml b/pyproject.toml
index 6581b3a..042f2bf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,6 @@ license = "MIT"
fetch = ["certifi"]
media = ["Pillow"]
static_site_tests = ["playwright", "pytest"]
-text = ["smartypants"]
urls = ["certifi"]
[project.urls]
src/chives/__init__.py (391) → src/chives/__init__.py (391)
diff --git a/src/chives/__init__.py b/src/chives/__init__.py
index 6a441e1..d67a6f0 100644
--- a/src/chives/__init__.py
+++ b/src/chives/__init__.py
@@ -11,4 +11,4 @@ I share across multiple sites.
"""
-__version__ = "37"
+__version__ = "38"
src/chives/smartypants.py (0) → src/chives/smartypants.py (10516)
diff --git a/src/chives/smartypants.py b/src/chives/smartypants.py
new file mode 100644
index 0000000..9224f63
--- /dev/null
+++ b/src/chives/smartypants.py
@@ -0,0 +1,319 @@
+"""
+A Python implementation of SmartyPants, a tool for adding "smart punctuation"
+to text -- for example, curly quotes and smart dashes.
+
+This is based on SmartyPants.pl, a BSD-licensed Perl script written
+by John Gruber. See https://daringfireball.net/projects/smartypants/
+
+It is also based on SmartyPants.py, a Python fork of SmartyPants
+maintained by Chad Miller, Yu-Jie Lin, Leo Hemsted, and Justin Mayer.
+See https://pypi.org/project/smartypants/
+
+This module is not intended to be used directly -- use `chives.text` instead.
+"""
+
+from collections.abc import Iterator
+import re
+from typing import Literal, NamedTuple
+
+
+def smartypants(text: str) -> str:
+ """
+ Add smart punctuation to a piece of text.
+ """
+ skipped_tag_stack: list[str] = []
+ in_pre = False
+ result: list[str] = []
+
+ # Preserve context for single-character quote tokens. Remember the last
+ # character of the previous token, so we can curl them correctly.
+ prev_token_last_char = ""
+
+ for token in tokenize(text):
+ if token.type == "tag":
+ # Don't mess with quotes inside some tags. This does not handle
+ # self <closing/> tags!
+ result.append(token.value)
+
+ if skip_match := TAGS_TO_SKIP_RE.match(token.value):
+ is_closing = bool(skip_match.group("closing"))
+ tag_name = skip_match.group("tag_name").lower()
+
+ if not is_closing:
+ skipped_tag_stack.append(tag_name)
+ in_pre = True
+ else:
+ if skipped_tag_stack:
+ if tag_name == skipped_tag_stack[-1]:
+ skipped_tag_stack.pop()
+ else: # pragma: no cover
+ assert 0
+ pass
+ # This close doesn't match the open. This isn't
+ # XHTML. We should barf here.
+ else: # pragma: no cover
+ pass
+ if not skipped_tag_stack:
+ in_pre = False
+
+ else:
+ text = token.value
+ # Remember the last character of this token before processing.
+ last_char = text[-1:]
+ if not in_pre:
+ # Process escaped characters; they shouldn't have smart
+ # punctuation added.
+ text = re.sub(r"\\\\", "\", text)
+ text = re.sub(r'\\"', """, text)
+ text = re.sub(r"\\'", "'", text)
+ text = re.sub(r"\\\.", ".", text)
+ text = re.sub(r"\\-", "-", text)
+ text = re.sub(r"\\`", "`", text)
+
+ # Convert quote entities back to regular quotes
+ text = re.sub(""", '"', text)
+
+ # Convert dashes
+ text = re.sub("---", "—", text)
+ text = re.sub("--", "–", text)
+
+ # Convert ellipses
+ text = re.sub(r"\.\.\.", "…", text)
+ text = re.sub(r"\. \. \.", "…", text)
+
+ if text == "'":
+ # Special case: single-character ' token
+ if re.match(r"\S", prev_token_last_char): # pragma: no cover
+ text = "’"
+ else:
+ text = "‘"
+ elif text == '"':
+ # Special case: single-character " token
+ if re.match(r"\S", prev_token_last_char): # pragma: no cover
+ text = "”"
+ else:
+ text = "“"
+
+ else:
+ text = convert_quotes(text)
+
+ # Convert entities
+ CTBL = {
+ "–": "–",
+ "—": "—",
+ "‘": "‘",
+ "’": "’",
+ "“": "“",
+ "”": "”",
+ "…": "…",
+ }
+
+ for k, v in CTBL.items():
+ text = text.replace(k, v)
+
+ prev_token_last_char = last_char
+ result.append(text)
+
+ return "".join(result)
+
+
+def convert_quotes(text: str) -> str:
+ """
+ Convert quotes in *text* into HTML curly quote entities.
+
+ This is based on a function of the same name from the Python
+ SmartyPants.py library.
+
+ TODO: Use named entities.
+ """
+ punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
+
+ # Special case if the very first character is a quote followed by
+ # punctuation at a non-word-break. Close the quotes by brute force:
+ text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), "’", text)
+ text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), "”", text)
+
+ # Special case for double sets of quotes, e.g.:
+ # <p>He said, "'Quoted' words in a larger quote."</p>
+ text = re.sub(r""""'(?=\w)""", "“‘", text)
+ text = re.sub(r"""'"(?=\w)""", "‘“", text)
+
+ # Special case for decade abbreviations (the '80s):
+ text = re.sub(r"""\b'(?=\d{2}s)""", "’", text)
+
+ close_class = r"[^\ \t\r\n\[\{\(\-]"
+ dec_dashes = "–|—"
+
+ # Get most opening single quotes:
+ opening_single_quotes_regex = re.compile(
+ r"""
+ (
+ \s | # a whitespace char, or
+ | # a non-breaking space entity, or
+ -- | # dashes, or
+ &[mn]dash; | # named dash entities
+ %s | # or decimal entities
+ &\#x201[34]; # or hex
+ )
+ ' # the quote
+ (?=\w) # followed by a word character
+ """
+ % (dec_dashes,),
+ re.VERBOSE,
+ )
+ text = opening_single_quotes_regex.sub(r"\1‘", text)
+
+ closing_single_quotes_regex = re.compile(
+ r"""
+ (%s)
+ '
+ (?!\s | s\b | \d)
+ """
+ % (close_class,),
+ re.VERBOSE,
+ )
+ text = closing_single_quotes_regex.sub(r"\1’", text)
+
+ closing_single_quotes_regex = re.compile(
+ r"""
+ (%s)
+ '
+ (\s | s\b)
+ """
+ % (close_class,),
+ re.VERBOSE,
+ )
+ text = closing_single_quotes_regex.sub(r"\1’\2", text)
+
+ # Any remaining single quotes should be opening ones:
+ text = re.sub("'", "‘", text)
+
+ # Get most opening double quotes:
+ opening_double_quotes_regex = re.compile(
+ r"""
+ (
+ \s | # a whitespace char, or
+ | # a non-breaking space entity, or
+ -- | # dashes, or
+ &[mn]dash; | # named dash entities
+ %s | # or decimal entities
+ &\#x201[34]; # or hex
+ )
+ " # the quote
+ (?=\w) # followed by a word character
+ """
+ % (dec_dashes,),
+ re.VERBOSE,
+ )
+ text = opening_double_quotes_regex.sub(r"\1“", text)
+
+ # Double closing quotes:
+ closing_double_quotes_regex = re.compile(
+ r"""
+ #(%s)? # character that indicates the quote should be closing
+ "
+ (?=\s)
+ """
+ % (close_class,),
+ re.VERBOSE,
+ )
+ text = closing_double_quotes_regex.sub("”", text)
+
+ closing_double_quotes_regex = re.compile(
+ r"""
+ ^
+ "
+ (?=%s)
+ """
+ % (punct_class,),
+ re.VERBOSE,
+ )
+ text = closing_double_quotes_regex.sub("”", text)
+
+ closing_double_quotes_regex = re.compile(
+ r"""
+ (%s) # character that indicates the quote should be closing
+ "
+ """
+ % (close_class,),
+ re.VERBOSE,
+ )
+ text = closing_double_quotes_regex.sub(r"\1”", text)
+
+ # Any remaining quotes should be opening ones.
+ text = re.sub('"', "“", text)
+
+ return text
+
+
+# This regex matches HTML tags that use one of these tags.
+tags_to_skip = "|".join(["pre", "samp", "code", "tt", "kbd", "script", "style", "math"])
+TAGS_TO_SKIP_RE = re.compile(
+ r"<(?P<closing>/)?(?P<tag_name>%s)[^>]*>" % tags_to_skip, re.IGNORECASE
+)
+
+
+class Token(NamedTuple):
+ """
+ A token in the input text.
+
+ A token is either:
+
+ - a tag, possibly with nested tags, such as `<a href="<example>">`, or
+ - a run of text between tags.
+
+ """
+
+ type: Literal["text", "tag"]
+ value: str
+
+
+def tokenize(text: str) -> Iterator[Token]:
+ """
+ Find all the tokens in the input text.
+
+ This is based on two existing functions:
+
+ * The `_tokenize()` subroutine from Brad Choate's MTRegex plugin.
+ http://www.bradchoate.com/past/mtregex.php
+ * The `_tokenize()` function from the SmartyPants.py library.
+ https://github.com/justinmayer/smartypants.py/blob/main/smartypants.py
+ """
+ # Matches text outside an HTML tag followed by a comment or a tag.
+ tag_soup = re.compile(
+ r"(?P<text>[^<]*)"
+ r"(?P<tag><!--(?P<comment>.*?)--\s*>|<[^>]*>)",
+ re.DOTALL,
+ )
+
+ previous_end = 0
+
+ while match := tag_soup.match(text, previous_end):
+ if match.group("text"):
+ yield Token(type="text", value=match.group("text"))
+
+ # if -- in text part of comment, then it's not a comment, therefore it
+ # should be converted.
+ #
+ # In HTML4 [1]:
+ # [...] Authors should avoid putting two or more adjacent hyphens
+ # inside comments.
+ #
+ # In HTML5 [2]:
+ # [...] the comment may have text, with the additional restriction
+ # that the text must not [...], nor contain two consecutive U+002D
+ # HYPHEN-MINUS characters (--)
+ #
+ # [1]: http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.4
+ # [2]: http://www.w3.org/TR/html5/syntax.html#comments
+ tag = match.group("tag")
+ if match.group("comment") and "--" in match.group("comment"):
+ yield Token(type="text", value=tag)
+ else:
+ yield Token(type="tag", value=tag)
+
+ previous_end = match.end()
+ print(previous_end)
+
+ if previous_end < len(text):
+ yield Token(type="text", value=text[previous_end:])
src/chives/text.py (721) → src/chives/text.py (215)
diff --git a/src/chives/text.py b/src/chives/text.py
index 2ef2465..8b461a7 100644
--- a/src/chives/text.py
+++ b/src/chives/text.py
@@ -2,31 +2,11 @@
Functions for dealing with text.
"""
-import functools
+from chives.smartypants import smartypants
-import smartypants
-
-@functools.cache
def smartify(text: str) -> str:
"""
Add curly quotes and smart dashes to a string.
"""
- attrs = (
- # normal quotes (" and ') to curly ones
- smartypants.Attr.q
- |
- # quote entities (") to curly quotes
- smartypants.Attr.w
- |
- # typewriter dashes (--) to en-dashes and dashes (---) to em-dashes
- smartypants.Attr.D
- |
- # dashes (...) to ellipses
- smartypants.Attr.e
- |
- # output Unicode chars instead of numeric character references
- smartypants.Attr.u
- )
-
- return smartypants.smartypants(text, attrs)
+ return smartypants(text)
tests/stubs/smartypants.pyi (119) → tests/stubs/smartypants.pyi (0)
diff --git a/tests/stubs/smartypants.pyi b/tests/stubs/smartypants.pyi
deleted file mode 100644
index 766b445..0000000
--- a/tests/stubs/smartypants.pyi
+++ /dev/null
@@ -1,8 +0,0 @@
-class Attr:
- q: int
- D: int
- e: int
- u: int
- w: int
-
-def smartypants(text: str, attrs: int) -> str: ...
tests/test_text.py (674) → tests/test_text.py (1470)
diff --git a/tests/test_text.py b/tests/test_text.py
index 22e137e..187244b 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -10,11 +10,21 @@ from chives.text import smartify
@pytest.mark.parametrize(
"text, expected",
[
+ ("<", "<"),
+ ("<0", "<0"),
+ ("--", "–"),
+ ("---", "—"),
+ (""", "“"),
+ ("'", "‘"),
("Isn't it delightful -- she said", "Isn’t it delightful – she said"),
("Are you ... sure?", "Are you … sure?"),
("<h2>Isn't it delightful?</h2>", "<h2>Isn’t it delightful?</h2>"),
("<li>Isn't it delightful?</li>", "<li>Isn’t it delightful?</li>"),
("<p>"It's nice", he said</p>", "<p>“It’s nice”, he said</p>"),
+ (
+ "<!-- this -- is -- not -- a -- valid -- comment -->",
+ "<!– this – is – not – a – valid – comment –>",
+ ),
],
)
def test_smartify(text: str, expected: str) -> None:
@@ -23,3 +33,24 @@ def test_smartify(text: str, expected: str) -> None:
"""
actual = smartify(text)
assert actual == expected
+
+ assert smartify(actual) == actual
+
+
+@pytest.mark.parametrize(
+ "text",
+ [
+ "<",
+ "<0",
+ '<a href="https://example.com">example.com</a>',
+ '<pre>print("hello world")</pre>',
+ '<pre><code data-lang="python">print("hello world")</code></pre>',
+ "<br/>",
+ "</br>",
+ ],
+)
+def test_is_unchanged_by_smartify(text: str) -> None:
+ """
+ Test these strings are unaffected by "smart" punctuation.
+ """
+ assert smartify(text) == text