Remove a bunch of now-unused scripts

ID

48e51a9

date

2025-05-02 07:32:45+00:00

author

Alex Chan <alex@alexwlchan.net>

parent

9832c0e

message

Remove a bunch of now-unused scripts

changed files

6 files, 1 addition, 379 deletions

text/README.md
text/fix_twemoji.py
text/fix_twitter_thread.py
text/test_fix_twitter_thread.py
text/utf8info
text/utf8info.Dockerfile

Changed files

text/README.md (8105) → text/README.md (6215)

diff --git a/text/README.md b/text/README.md
index 9f3ace5..de415f6 100644
--- a/text/README.md
+++ b/text/README.md
@@ -25,20 +25,6 @@ scripts = [
         """,
     },
     {
-        "usage": "fix_twemoji.py [PATH]",
-        "description": """
-        when I copy/paste a tweet into Obsidian, often any emoji get replaced by "twemoji" – links to Twitter’s custom emoji artwork.
-        This script replaces those links with vanilla emoji characters.
-        """,
-    },
-    {
-        "usage": "fix_twitter_thread.py [PATH]",
-        "description": """
-        when I copy/paste a Twitter thread into Obsidian, this does some
-        initial tidying up of the formatting for me.
-        """,
-    },
-    {
         "usage": "fix_whitespace [PATH]",
         "description": """
         when I copy/paste text into Obsidian from th web, this cleans up some of the extraneous whitespace.
@@ -100,14 +86,6 @@ scripts = [
         "usage": "tally < [PATH]",
         "description": "prints a tally of lines in the given text.",
     },
-    {
-        "name": "utf8info",
-        "usage": "echo [STRING] | utf8info",
-        "description": """
-        read UTF-8 on stdin and print out the raw Unicode "
-        "codepoints. This is a Docker wrapper around <a href="https://github.com/lunasorcery/utf8info">a tool of the same name</a> by @lunasorcery.
-        """,
-    },
 ]
 
 cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
@@ -125,26 +103,6 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/text/fix_twemoji.py">
-      <code>fix_twemoji.py [PATH]</code>
-    </a>
-  </dt>
-  <dd>
-    when I copy/paste a tweet into Obsidian, often any emoji get replaced by "twemoji" – links to Twitter’s custom emoji artwork.
-    This script replaces those links with vanilla emoji characters.
-  </dd>
-
-  <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/text/fix_twitter_thread.py">
-      <code>fix_twitter_thread.py [PATH]</code>
-    </a>
-  </dt>
-  <dd>
-    when I copy/paste a Twitter thread into Obsidian, this does some
-    initial tidying up of the formatting for me.
-  </dd>
-
-  <dt>
     <a href="https://github.com/alexwlchan/scripts/blob/main/text/fix_whitespace">
       <code>fix_whitespace [PATH]</code>
     </a>
@@ -254,15 +212,5 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
   <dd>
     prints a tally of lines in the given text.
   </dd>
-
-  <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/text/utf8info">
-      <code>echo [STRING] | utf8info</code>
-    </a>
-  </dt>
-  <dd>
-    read UTF-8 on stdin and print out the raw Unicode "
-    "codepoints. This is a Docker wrapper around <a href="https://github.com/lunasorcery/utf8info">a tool of the same name</a> by @lunasorcery.
-  </dd>
 </dl>
-<!-- [[[end]]] (checksum: 08eaa47db846418b5ceaa263cd20e662) -->
\ No newline at end of file
+<!-- [[[end]]] (checksum: 105738536f233dce472625ce83573418) -->
\ No newline at end of file

text/fix_twemoji.py (667) → text/fix_twemoji.py (0)

diff --git a/text/fix_twemoji.py b/text/fix_twemoji.py
deleted file mode 100755
index 5c88443..0000000
--- a/text/fix_twemoji.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python3
-"""
-When I copy/paste a tweet into Obsidian, often any emoji get replaced
-by "twemoji" – links to Twitter’s custom emoji artwork.  e.g.
-
-    "Oh damn I need this book!"![🤝](https://abs-0.twimg.com/emoji/v2/svg/1f91d.svg)
-
-This script replaces those links with vanilla emoji characters.
-"""
-
-import sys
-
-from fix_twitter_thread import fix_emoji
-
-
-if __name__ == "__main__":
-    try:
-        path = sys.argv[1]
-    except IndexError:
-        sys.exit(f"Usage: {__file__} <PATH>")
-
-    with open(path) as in_file:
-        text = in_file.read()
-
-    text = fix_emoji(text)
-
-    with open(path, "w") as out_file:
-        out_file.write(text)

text/fix_twitter_thread.py (5852) → text/fix_twitter_thread.py (0)

diff --git a/text/fix_twitter_thread.py b/text/fix_twitter_thread.py
deleted file mode 100755
index 89604d5..0000000
--- a/text/fix_twitter_thread.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/env python3
-
-import datetime
-import os
-import re
-import sys
-
-import httpx
-import termcolor
-
-
-# A regex to find links to somebody's Twitter profile
-#
-# e.g. (https://twitter.com/BooksandChokers)
-PROFILE_URL_RE = re.compile(r"\(https://twitter\.com/(?P<username>[^\)^/]+)\)")
-
-
-def fix_emoji(text: str) -> str:
-    """
-    Replace any Twemoji in the text with the actual emoji characters.
-    """
-    # A regex to find emoji characters which have been replaced with twimg's
-    #
-    # e.g. ![✍🏻](https://abs-0.twimg.com/emoji/v2/svg/270d-1f3fb.svg)
-    # e.g. ![📚](https://abs-0.twimg.com/emoji/v2/svg/1f4da.svg "Books")
-    emoji_twimg_re = re.compile(
-        r"!\[(?P<emoji>[^\]]+)\]"
-        r'\(https://abs\-0\.twimg\.com/emoji/v2/svg/[0-9a-f\-]+\.svg(?: "[A-Za-z]+")?\)'
-    )
-
-    return emoji_twimg_re.sub(repl=r"\g<emoji>", string=text)
-
-
-def fix_hashtags(text: str) -> str:
-    """
-    Replace any hashtag links in the text with literal text.
-    """
-    # A regex to find hashtag links
-    #
-    # e.g. [#akindofspark](https://twitter.com/hashtag/akindofspark?src=hashtag_click)
-    hashtag_re = re.compile(
-        r"\[#(?P<hashtag>[a-zA-Z][a-zA-Z0-9]+)\]"
-        r"\(https://twitter\.com/hashtag/[a-zA-Z][a-zA-Z0-9]+\?src=hashtag_click\)"
-    )
-
-    return hashtag_re.sub(repl=r"\\#\g<hashtag>", string=text)
-
-
-def get_profile_photo_re(handle: str) -> re.Pattern:
-    # A regex to find links to somebody's profile that uses their profile
-    # picture in the body of the link, e.g.
-    #
-    #     [
-    #
-    #     ![](https://pbs.twimg.com/profile_images/1234/bKbnzots_x96.jpg)
-    #
-    #     ](https://twitter.com/BooksandChokers)
-    #
-    return re.compile(
-        r"\[\n"
-        r"\n"
-        r"!\[\]\(https://pbs\.twimg\.com/profile_images/[0-9]+/[a-zA-Z0-9_\.]+\)\n"
-        r"\n"
-        r"\]\(https://twitter\.com/" + handle + r"\)\n"
-    )
-
-
-def remove_profile_links(text: str, /, *, handle: str) -> str:
-    # Remove any profile links that are on a single line, e.g.
-    #
-    # [Elle McNicoll ✍🏻📚](https://twitter.com/BooksandChokers)
-    text = re.sub(
-        r"\[[^\]]+\]\(https://twitter\.com/" + handle + r"\)" + "\xa0\n",
-        repl="",
-        string=text,
-    )
-
-    # Remove any profile links that are spread across multiple lines, e.g.
-    #
-    # [
-    #
-    # @BooksandChokers
-    #
-    # ](https://twitter.com/BooksandChokers)
-    #
-    # .
-    text = re.sub(
-        r"\[\n\n@"
-        + handle
-        + r"\n\n\]\(https://twitter\.com/"
-        + handle
-        + r"\)\n\n(?:·\n)?",
-        repl="",
-        string=text,
-    )
-
-    return text
-
-
-def remove_individual_tweet_links(text: str, /, *, handle: str) -> str:
-    # Remove any links to individual tweets in the thread,
-    #
-    # e.g. [10 Jun](https://twitter.com/BooksandChokers/status/1667617023801839616)
-    return re.sub(
-        r"\[[0-9]+ [A-Z][a-z]+\]"
-        r"\(https://twitter\.com/" + handle + r"/status/[0-9]+\)\n",
-        repl="",
-        string=text,
-    )
-
-
-def download_images(text: str, /, *, handle: str) -> str:
-    # Download images and save them to Obsidian
-
-    image_match = re.compile(
-        r"!\[(?P<alt_text>[^\]]*)\]"
-        r"\((?P<url>https://pbs\.twimg\.com/media/(?P<media_id>[a-zA-Z0-9]+)\?format=(?P<format>jpg))\&name=(?P<size>small|medium)\)"
-    )
-
-    for m in image_match.finditer(text):
-        url = m.group("url")
-        out_name = m.group("media_id") + "." + m.group("format")
-        out_path = os.path.join(
-            os.environ["HOME"],
-            "textfiles",
-            "Attachments",
-            str(datetime.datetime.now().year),
-            out_name,
-        )
-
-        os.makedirs(os.path.dirname(out_path), exist_ok=True)
-
-        resp = httpx.get(url)
-        resp.raise_for_status()
-
-        with open(out_path, "xb") as f:
-            f.write(resp.content)
-
-        alt_text = m.group("alt_text")
-
-        if alt_text != "Image":
-            text = text.replace(m.group(0), f"![[{out_name}|{alt_text}]]")
-        else:
-            text = text.replace(m.group(0), f"![[{out_name}]]")
-
-    # Remove the lingering bits from the image link
-    text = text.replace("[\n", "")
-    text = re.sub(
-        r"\]\(https://twitter\.com/" + handle + r"/status/[0-9]+/photo/[0-9]\)\n",
-        repl="",
-        string=text,
-    )
-
-    return text
-
-
-def remove_view_count_and_reply(text: str) -> str:
-    # Remove the view count and link for me to reply, which are spread
-    # across multiple lines, e.g.:
-    #
-    # 6,260
-    #
-    # Views
-    #
-    # [
-    #
-    # ![Alex Chan](https://pbs.twimg.com/profile_images/1538296879137562624/w3pwqwel_x96.jpg)
-    #
-    # ](https://twitter.com/alexwlchan)
-    text = re.sub(
-        r"\n[0-9,]+\n"
-        r"\n"
-        r"Views\n"
-        r"\n"
-        r"\[\n"
-        r"\n"
-        r"!\[Alex Chan\]\(https://pbs\.twimg\.com/profile_images/[^\)]+\)\n"
-        r"\n"
-        r"\]\(https://twitter\.com/alexwlchan\)\s*\n",
-        "",
-        text,
-    )
-
-    return text
-
-
-if __name__ == "__main__":
-    try:
-        path = sys.argv[1]
-    except IndexError:
-        sys.exit(f"Usage: {__file__} <PATH>")
-
-    new_lines = []
-    last_line = None
-
-    with open(path) as in_file:
-        text = in_file.read()
-
-    text = remove_view_count_and_reply(text)
-
-    handle = re.search(PROFILE_URL_RE, text).group("username")
-    print("Detected handle as", termcolor.colored(handle, "blue"))
-
-    profile_photo_re = get_profile_photo_re(handle)
-    text = profile_photo_re.sub(repl="", string=text)
-
-    text = fix_emoji(text)
-    text = fix_hashtags(text)
-    text = remove_profile_links(text, handle=handle)
-    text = remove_individual_tweet_links(text, handle=handle)
-    text = download_images(text, handle=handle)
-    text = re.sub("\n\n", "\n", text)
-
-    with open(path, "w") as out_file:
-        out_file.write(text)

text/test_fix_twitter_thread.py (1117) → text/test_fix_twitter_thread.py (0)

diff --git a/text/test_fix_twitter_thread.py b/text/test_fix_twitter_thread.py
deleted file mode 100644
index 761c3e4..0000000
--- a/text/test_fix_twitter_thread.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import pytest
-
-from fix_twitter_thread import (
-    fix_emoji,
-    remove_profile_links,
-    remove_view_count_and_reply,
-)
-
-
-@pytest.mark.parametrize(
-    ["input", "output"],
-    [
-        ("![✍🏻](https://abs-0.twimg.com/emoji/v2/svg/270d-1f3fb.svg)", "✍🏻"),
-        (
-            '![✍🏻](https://abs-0.twimg.com/emoji/v2/svg/270d-1f3fb.svg)![📚](https://abs-0.twimg.com/emoji/v2/svg/1f4da.svg "Books")',
-            "✍🏻📚",
-        ),
-    ],
-)
-def test_fix_emoji(input: str, output: str) -> None:
-    assert fix_emoji(input) == output
-
-
-def test_remove_profile_links() -> None:
-    assert (
-        remove_profile_links(
-            "[Elle McNicoll ✍🏻📚](https://twitter.com/BooksandChokers) \n",
-            handle="BooksandChokers",
-        )
-        == ""
-    )
-
-
-def test_remove_view_count_and_reply() -> None:
-    assert (
-        remove_view_count_and_reply(
-            """Hello
-
-6,260
-
-Views
-
-[
-
-![Alex Chan](https://pbs.twimg.com/profile_images/1538296879137562624/w3pwqwel_x96.jpg)
-
-](https://twitter.com/alexwlchan) \n\nworld"""
-        )
-        == "Hello\nworld"
-    )

text/utf8info (432) → text/utf8info (0)

diff --git a/text/utf8info b/text/utf8info
deleted file mode 100755
index 9e56eea..0000000
--- a/text/utf8info
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-# This is a wrapper around @lunasorcery's tool for printing out
-# raw Unicode codepoints: see https://github.com/lunasorcery/utf8info
-
-set -o errexit
-set -o nounset
-
-if ! docker images | grep lunasorcery/utf8info >/dev/null
-then
-  pushd ~/repos/scripts/text
-    docker build \
-      --tag lunasorcery/utf8info \
-      --file utf8info.Dockerfile \
-      .
-  popd
-fi
-
-docker run --interactive lunasorcery/utf8info

text/utf8info.Dockerfile (296) → text/utf8info.Dockerfile (0)

diff --git a/text/utf8info.Dockerfile b/text/utf8info.Dockerfile
deleted file mode 100644
index a7daca2..0000000
--- a/text/utf8info.Dockerfile
+++ /dev/null
@@ -1,15 +0,0 @@
-FROM ubuntu:18.04
-
-RUN apt-get update && \
-    apt-get install --yes curl git g++ make unzip
-
-RUN git clone https://github.com/lunasorcery/utf8info.git
-
-WORKDIR utf8info
-
-RUN make && make install
-
-RUN apt-get remove --yes git make g++ unzip && \
-    apt autoremove --yes
-
-ENTRYPOINT ["utf8info"]