Remove a bunch of now-unused scripts
- ID
48e51a9- date
2025-05-02 07:32:45+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
9832c0e- message
Remove a bunch of now-unused scripts- changed files
6 files, 1 addition, 379 deletions
Changed files
text/README.md (8105) → text/README.md (6215)
diff --git a/text/README.md b/text/README.md
index 9f3ace5..de415f6 100644
--- a/text/README.md
+++ b/text/README.md
@@ -25,20 +25,6 @@ scripts = [
""",
},
{
- "usage": "fix_twemoji.py [PATH]",
- "description": """
- when I copy/paste a tweet into Obsidian, often any emoji get replaced by "twemoji" – links to Twitter’s custom emoji artwork.
- This script replaces those links with vanilla emoji characters.
- """,
- },
- {
- "usage": "fix_twitter_thread.py [PATH]",
- "description": """
- when I copy/paste a Twitter thread into Obsidian, this does some
- initial tidying up of the formatting for me.
- """,
- },
- {
"usage": "fix_whitespace [PATH]",
"description": """
when I copy/paste text into Obsidian from th web, this cleans up some of the extraneous whitespace.
@@ -100,14 +86,6 @@ scripts = [
"usage": "tally < [PATH]",
"description": "prints a tally of lines in the given text.",
},
- {
- "name": "utf8info",
- "usage": "echo [STRING] | utf8info",
- "description": """
- read UTF-8 on stdin and print out the raw Unicode "
- "codepoints. This is a Docker wrapper around <a href="https://github.com/lunasorcery/utf8info">a tool of the same name</a> by @lunasorcery.
- """,
- },
]
cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
@@ -125,26 +103,6 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
</dd>
<dt>
- <a href="https://github.com/alexwlchan/scripts/blob/main/text/fix_twemoji.py">
- <code>fix_twemoji.py [PATH]</code>
- </a>
- </dt>
- <dd>
- when I copy/paste a tweet into Obsidian, often any emoji get replaced by "twemoji" – links to Twitter’s custom emoji artwork.
- This script replaces those links with vanilla emoji characters.
- </dd>
-
- <dt>
- <a href="https://github.com/alexwlchan/scripts/blob/main/text/fix_twitter_thread.py">
- <code>fix_twitter_thread.py [PATH]</code>
- </a>
- </dt>
- <dd>
- when I copy/paste a Twitter thread into Obsidian, this does some
- initial tidying up of the formatting for me.
- </dd>
-
- <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/text/fix_whitespace">
<code>fix_whitespace [PATH]</code>
</a>
@@ -254,15 +212,5 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
<dd>
prints a tally of lines in the given text.
</dd>
-
- <dt>
- <a href="https://github.com/alexwlchan/scripts/blob/main/text/utf8info">
- <code>echo [STRING] | utf8info</code>
- </a>
- </dt>
- <dd>
- read UTF-8 on stdin and print out the raw Unicode "
- "codepoints. This is a Docker wrapper around <a href="https://github.com/lunasorcery/utf8info">a tool of the same name</a> by @lunasorcery.
- </dd>
</dl>
-<!-- [[[end]]] (checksum: 08eaa47db846418b5ceaa263cd20e662) -->
\ No newline at end of file
+<!-- [[[end]]] (checksum: 105738536f233dce472625ce83573418) -->
\ No newline at end of file
text/fix_twemoji.py (667) → text/fix_twemoji.py (0)
diff --git a/text/fix_twemoji.py b/text/fix_twemoji.py
deleted file mode 100755
index 5c88443..0000000
--- a/text/fix_twemoji.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python3
-"""
-When I copy/paste a tweet into Obsidian, often any emoji get replaced
-by "twemoji" – links to Twitter’s custom emoji artwork. e.g.
-
- "Oh damn I need this book!"
-
-This script replaces those links with vanilla emoji characters.
-"""
-
-import sys
-
-from fix_twitter_thread import fix_emoji
-
-
-if __name__ == "__main__":
- try:
- path = sys.argv[1]
- except IndexError:
- sys.exit(f"Usage: {__file__} <PATH>")
-
- with open(path) as in_file:
- text = in_file.read()
-
- text = fix_emoji(text)
-
- with open(path, "w") as out_file:
- out_file.write(text)
text/fix_twitter_thread.py (5852) → text/fix_twitter_thread.py (0)
diff --git a/text/fix_twitter_thread.py b/text/fix_twitter_thread.py
deleted file mode 100755
index 89604d5..0000000
--- a/text/fix_twitter_thread.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/env python3
-
-import datetime
-import os
-import re
-import sys
-
-import httpx
-import termcolor
-
-
-# A regex to find links to somebody's Twitter profile
-#
-# e.g. (https://twitter.com/BooksandChokers)
-PROFILE_URL_RE = re.compile(r"\(https://twitter\.com/(?P<username>[^\)^/]+)\)")
-
-
-def fix_emoji(text: str) -> str:
- """
- Replace any Twemoji in the text with the actual emoji characters.
- """
- # A regex to find emoji characters which have been replaced with twimg's
- #
- # e.g. 
- # e.g. 
- emoji_twimg_re = re.compile(
- r"!\[(?P<emoji>[^\]]+)\]"
- r'\(https://abs\-0\.twimg\.com/emoji/v2/svg/[0-9a-f\-]+\.svg(?: "[A-Za-z]+")?\)'
- )
-
- return emoji_twimg_re.sub(repl=r"\g<emoji>", string=text)
-
-
-def fix_hashtags(text: str) -> str:
- """
- Replace any hashtag links in the text with literal text.
- """
- # A regex to find hashtag links
- #
- # e.g. [#akindofspark](https://twitter.com/hashtag/akindofspark?src=hashtag_click)
- hashtag_re = re.compile(
- r"\[#(?P<hashtag>[a-zA-Z][a-zA-Z0-9]+)\]"
- r"\(https://twitter\.com/hashtag/[a-zA-Z][a-zA-Z0-9]+\?src=hashtag_click\)"
- )
-
- return hashtag_re.sub(repl=r"\\#\g<hashtag>", string=text)
-
-
-def get_profile_photo_re(handle: str) -> re.Pattern:
- # A regex to find links to somebody's profile that uses their profile
- # picture in the body of the link, e.g.
- #
- # [
- #
- # 
- #
- # ](https://twitter.com/BooksandChokers)
- #
- return re.compile(
- r"\[\n"
- r"\n"
- r"!\[\]\(https://pbs\.twimg\.com/profile_images/[0-9]+/[a-zA-Z0-9_\.]+\)\n"
- r"\n"
- r"\]\(https://twitter\.com/" + handle + r"\)\n"
- )
-
-
-def remove_profile_links(text: str, /, *, handle: str) -> str:
- # Remove any profile links that are on a single line, e.g.
- #
- # [Elle McNicoll ✍🏻📚](https://twitter.com/BooksandChokers)
- text = re.sub(
- r"\[[^\]]+\]\(https://twitter\.com/" + handle + r"\)" + "\xa0\n",
- repl="",
- string=text,
- )
-
- # Remove any profile links that are spread across multiple lines, e.g.
- #
- # [
- #
- # @BooksandChokers
- #
- # ](https://twitter.com/BooksandChokers)
- #
- # .
- text = re.sub(
- r"\[\n\n@"
- + handle
- + r"\n\n\]\(https://twitter\.com/"
- + handle
- + r"\)\n\n(?:·\n)?",
- repl="",
- string=text,
- )
-
- return text
-
-
-def remove_individual_tweet_links(text: str, /, *, handle: str) -> str:
- # Remove any links to individual tweets in the thread,
- #
- # e.g. [10 Jun](https://twitter.com/BooksandChokers/status/1667617023801839616)
- return re.sub(
- r"\[[0-9]+ [A-Z][a-z]+\]"
- r"\(https://twitter\.com/" + handle + r"/status/[0-9]+\)\n",
- repl="",
- string=text,
- )
-
-
-def download_images(text: str, /, *, handle: str) -> str:
- # Download images and save them to Obsidian
-
- image_match = re.compile(
- r"!\[(?P<alt_text>[^\]]*)\]"
- r"\((?P<url>https://pbs\.twimg\.com/media/(?P<media_id>[a-zA-Z0-9]+)\?format=(?P<format>jpg))\&name=(?P<size>small|medium)\)"
- )
-
- for m in image_match.finditer(text):
- url = m.group("url")
- out_name = m.group("media_id") + "." + m.group("format")
- out_path = os.path.join(
- os.environ["HOME"],
- "textfiles",
- "Attachments",
- str(datetime.datetime.now().year),
- out_name,
- )
-
- os.makedirs(os.path.dirname(out_path), exist_ok=True)
-
- resp = httpx.get(url)
- resp.raise_for_status()
-
- with open(out_path, "xb") as f:
- f.write(resp.content)
-
- alt_text = m.group("alt_text")
-
- if alt_text != "Image":
- text = text.replace(m.group(0), f"![[{out_name}|{alt_text}]]")
- else:
- text = text.replace(m.group(0), f"![[{out_name}]]")
-
- # Remove the lingering bits from the image link
- text = text.replace("[\n", "")
- text = re.sub(
- r"\]\(https://twitter\.com/" + handle + r"/status/[0-9]+/photo/[0-9]\)\n",
- repl="",
- string=text,
- )
-
- return text
-
-
-def remove_view_count_and_reply(text: str) -> str:
- # Remove the view count and link for me to reply, which are spread
- # across multiple lines, e.g.:
- #
- # 6,260
- #
- # Views
- #
- # [
- #
- # 
- #
- # ](https://twitter.com/alexwlchan)
- text = re.sub(
- r"\n[0-9,]+\n"
- r"\n"
- r"Views\n"
- r"\n"
- r"\[\n"
- r"\n"
- r"!\[Alex Chan\]\(https://pbs\.twimg\.com/profile_images/[^\)]+\)\n"
- r"\n"
- r"\]\(https://twitter\.com/alexwlchan\)\s*\n",
- "",
- text,
- )
-
- return text
-
-
-if __name__ == "__main__":
- try:
- path = sys.argv[1]
- except IndexError:
- sys.exit(f"Usage: {__file__} <PATH>")
-
- new_lines = []
- last_line = None
-
- with open(path) as in_file:
- text = in_file.read()
-
- text = remove_view_count_and_reply(text)
-
- handle = re.search(PROFILE_URL_RE, text).group("username")
- print("Detected handle as", termcolor.colored(handle, "blue"))
-
- profile_photo_re = get_profile_photo_re(handle)
- text = profile_photo_re.sub(repl="", string=text)
-
- text = fix_emoji(text)
- text = fix_hashtags(text)
- text = remove_profile_links(text, handle=handle)
- text = remove_individual_tweet_links(text, handle=handle)
- text = download_images(text, handle=handle)
- text = re.sub("\n\n", "\n", text)
-
- with open(path, "w") as out_file:
- out_file.write(text)
text/test_fix_twitter_thread.py (1117) → text/test_fix_twitter_thread.py (0)
diff --git a/text/test_fix_twitter_thread.py b/text/test_fix_twitter_thread.py
deleted file mode 100644
index 761c3e4..0000000
--- a/text/test_fix_twitter_thread.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import pytest
-
-from fix_twitter_thread import (
- fix_emoji,
- remove_profile_links,
- remove_view_count_and_reply,
-)
-
-
-@pytest.mark.parametrize(
- ["input", "output"],
- [
- ("", "✍🏻"),
- (
- '',
- "✍🏻📚",
- ),
- ],
-)
-def test_fix_emoji(input: str, output: str) -> None:
- assert fix_emoji(input) == output
-
-
-def test_remove_profile_links() -> None:
- assert (
- remove_profile_links(
- "[Elle McNicoll ✍🏻📚](https://twitter.com/BooksandChokers) \n",
- handle="BooksandChokers",
- )
- == ""
- )
-
-
-def test_remove_view_count_and_reply() -> None:
- assert (
- remove_view_count_and_reply(
- """Hello
-
-6,260
-
-Views
-
-[
-
-
-
-](https://twitter.com/alexwlchan) \n\nworld"""
- )
- == "Hello\nworld"
- )
text/utf8info (432) → text/utf8info (0)
diff --git a/text/utf8info b/text/utf8info
deleted file mode 100755
index 9e56eea..0000000
--- a/text/utf8info
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-# This is a wrapper around @lunasorcery's tool for printing out
-# raw Unicode codepoints: see https://github.com/lunasorcery/utf8info
-
-set -o errexit
-set -o nounset
-
-if ! docker images | grep lunasorcery/utf8info >/dev/null
-then
- pushd ~/repos/scripts/text
- docker build \
- --tag lunasorcery/utf8info \
- --file utf8info.Dockerfile \
- .
- popd
-fi
-
-docker run --interactive lunasorcery/utf8info
text/utf8info.Dockerfile (296) → text/utf8info.Dockerfile (0)
diff --git a/text/utf8info.Dockerfile b/text/utf8info.Dockerfile
deleted file mode 100644
index a7daca2..0000000
--- a/text/utf8info.Dockerfile
+++ /dev/null
@@ -1,15 +0,0 @@
-FROM ubuntu:18.04
-
-RUN apt-get update && \
- apt-get install --yes curl git g++ make unzip
-
-RUN git clone https://github.com/lunasorcery/utf8info.git
-
-WORKDIR utf8info
-
-RUN make && make install
-
-RUN apt-get remove --yes git make g++ unzip && \
- apt autoremove --yes
-
-ENTRYPOINT ["utf8info"]