Skip to main content

document my text utils; move them into a dedicated folder

ID
7c50422
date
2023-05-02 18:45:41+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
fea2650
message
document my text utils; move them into a dedicated folder
changed files
5 files, 35 additions, 62 deletions

Changed files

lineweights (1811) → lineweights (0)

diff --git a/lineweights b/lineweights
deleted file mode 100755
index 1642784..0000000
--- a/lineweights
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env python3
-"""
-This script analyses a file, and highlights the longest lines, e.g.
-
-      4 ▏   1167 ▏
-      7 ▏  26324 █████▏
-      8 ▏    268 ▏
-
-I've been using this to reduce the page weight on the Wellcome Collection
-website; I download the HTML and use ``lineweight`` to find the longest
-lines (and so the places where to target reductions).
-
-See https://alexwlchan.net/2018/05/ascii-bar-charts/
-
-"""
-
-import sys
-
-
-def draw_chart(data, *, min_line_length):
-    max_value = max(count for _, count in data)
-    increment = max_value / 25
-
-    longest_label_length = max(len(label) for label, _ in data)
-
-    for label, count in data:
-        if count <= min_line_length:
-            continue
-
-        # The ASCII block elements come in chunks of 8, so we work out how
-        # many fractions of 8 we need.
-        # https://en.wikipedia.org/wiki/Block_Elements
-        bar_chunks, remainder = divmod(int(count * 8 / increment), 8)
-
-        # First draw the full width chunks
-        bar = '█' * bar_chunks
-
-        # Then add the fractional part.  The Unicode code points for
-        # block elements are (8/8), (7/8), (6/8), ... , so we need to
-        # work backwards.
-        if remainder > 0:
-            bar += chr(ord('█') + (8 - remainder))
-
-        # If the bar is empty, add a left one-eighth block
-        bar = bar or  '▏'
-
-        print(f'{label.rjust(longest_label_length)} ▏ {count:#6d} {bar}')
-
-
-if __name__ == '__main__':
-    try:
-        path = sys.argv[1]
-    except IndexError:
-        sys.exit(f"Usage: {__file__} <PATH>")
-
-    with open(path, "rb") as infile:
-        line_lengths = [
-            (f"L{lineno}", len(line))
-            for lineno, line in enumerate(infile, start=1)
-        ]
-
-    draw_chart(line_lengths, min_line_length=250)

text/README.md (0) → text/README.md (906)

diff --git a/text/README.md b/text/README.md
new file mode 100644
index 0000000..2a7670a
--- /dev/null
+++ b/text/README.md
@@ -0,0 +1,35 @@
+# text
+
+These are utilities for manipulating streams of text; I consider them in a similar category to Unix staples like <code>head</code> and <code>tail</code>.
+
+## The individual scripts
+
+<dl>
+  <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/text/midline">
+      <code>midline [PATH]</code>
+    </a>
+  </dt>
+  <dd>
+    print the line in the middle of a file, e.g. if the file has 5 lines, it prints line 3
+  </dd>
+
+  <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/text/randline">
+      <code>randline [NUMBER] < [PATH]</code>
+    </a>
+  </dt>
+  <dd>
+    prints randomly selected lines from the given text.
+    If `NUMBER` is unspecified, it prints a single line.
+  </dd>
+
+  <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/text/randline">
+      <code>tally < [PATH]</code>
+    </a>
+  </dt>
+  <dd>
+    prints a tally of the given text.
+  </dd>
+</dl>

midline (345) → text/midline (345)

diff --git a/midline b/text/midline
similarity index 100%
rename from midline
rename to text/midline

randline (254) → text/randline (254)

diff --git a/randline b/text/randline
similarity index 100%
rename from randline
rename to text/randline

tally (406) → text/tally (406)

diff --git a/tally b/text/tally
similarity index 100%
rename from tally
rename to text/tally