Skip to main content

Make sure we can find links in tweets

ID
a5bea82
date
2024-01-10 23:05:56+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
6149bb5
message
Make sure we can find links in tweets
changed files
1 file, 13 additions, 2 deletions

Changed files

textexpander/get_tweet_text.py (1470) → textexpander/get_tweet_text.py (1721)

diff --git a/textexpander/get_tweet_text.py b/textexpander/get_tweet_text.py
index e2a7638..e1a5c0f 100755
--- a/textexpander/get_tweet_text.py
+++ b/textexpander/get_tweet_text.py
@@ -11,11 +11,17 @@ import sys
 import textwrap
 
 import bs4
+import httpx
 import hyperlink
 
 from urls import get_safari_url
 
 
+def get_tco_redirect(url: str) -> str:
+    resp = httpx.head(url)
+    return resp.headers["location"]
+
+
 if __name__ == "__main__":
     url = get_safari_url()
 
@@ -40,11 +46,16 @@ if __name__ == "__main__":
 
     soup = bs4.BeautifulSoup(html, "html.parser")
 
-    # username = soup.find("div", attrs={"data-testid": "User-Name"}).text.replace(f'@{handle}', '').strip()
-
     text = soup.find("div", attrs={"data-testid": "tweetText"}).text
     text = text.replace("#", "\\#")
 
+    # Look for a link to an external web page
+    card = soup.find("div", attrs={"data-testid": "card.wrapper"})
+
+    if card is not None:
+        linked_url = card.find("a").attrs["href"]
+        text += "\n\n" + get_tco_redirect(linked_url)
+
     time = datetime.datetime.fromisoformat(soup.find("time").attrs["datetime"])
 
     print(f'{url} ({time.strftime("%-d %b %Y")}):')