document my text utils; move them into a dedicated folder
- ID
7c50422- date
2023-05-02 18:45:41+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
fea2650- message
document my text utils; move them into a dedicated folder- changed files
5 files, 35 additions, 62 deletions
Changed files
lineweights (1811) → lineweights (0)
diff --git a/lineweights b/lineweights
deleted file mode 100755
index 1642784..0000000
--- a/lineweights
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env python3
-"""
-This script analyses a file, and highlights the longest lines, e.g.
-
- 4 ▏ 1167 ▏
- 7 ▏ 26324 █████▏
- 8 ▏ 268 ▏
-
-I've been using this to reduce the page weight on the Wellcome Collection
-website; I download the HTML and use ``lineweight`` to find the longest
-lines (and so the places where to target reductions).
-
-See https://alexwlchan.net/2018/05/ascii-bar-charts/
-
-"""
-
-import sys
-
-
-def draw_chart(data, *, min_line_length):
- max_value = max(count for _, count in data)
- increment = max_value / 25
-
- longest_label_length = max(len(label) for label, _ in data)
-
- for label, count in data:
- if count <= min_line_length:
- continue
-
- # The ASCII block elements come in chunks of 8, so we work out how
- # many fractions of 8 we need.
- # https://en.wikipedia.org/wiki/Block_Elements
- bar_chunks, remainder = divmod(int(count * 8 / increment), 8)
-
- # First draw the full width chunks
- bar = '█' * bar_chunks
-
- # Then add the fractional part. The Unicode code points for
- # block elements are (8/8), (7/8), (6/8), ... , so we need to
- # work backwards.
- if remainder > 0:
- bar += chr(ord('█') + (8 - remainder))
-
- # If the bar is empty, add a left one-eighth block
- bar = bar or '▏'
-
- print(f'{label.rjust(longest_label_length)} ▏ {count:#6d} {bar}')
-
-
-if __name__ == '__main__':
- try:
- path = sys.argv[1]
- except IndexError:
- sys.exit(f"Usage: {__file__} <PATH>")
-
- with open(path, "rb") as infile:
- line_lengths = [
- (f"L{lineno}", len(line))
- for lineno, line in enumerate(infile, start=1)
- ]
-
- draw_chart(line_lengths, min_line_length=250)
text/README.md (0) → text/README.md (906)
diff --git a/text/README.md b/text/README.md
new file mode 100644
index 0000000..2a7670a
--- /dev/null
+++ b/text/README.md
@@ -0,0 +1,35 @@
+# text
+
+These are utilities for manipulating streams of text; I consider them in a similar category to Unix staples like <code>head</code> and <code>tail</code>.
+
+## The individual scripts
+
+<dl>
+ <dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/text/midline">
+ <code>midline [PATH]</code>
+ </a>
+ </dt>
+ <dd>
+ print the line in the middle of a file, e.g. if the file has 5 lines, it prints line 3
+ </dd>
+
+ <dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/text/randline">
+ <code>randline [NUMBER] < [PATH]</code>
+ </a>
+ </dt>
+ <dd>
+ prints randomly selected lines from the given text.
+ If `NUMBER` is unspecified, it prints a single line.
+ </dd>
+
+ <dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/text/randline">
+ <code>tally < [PATH]</code>
+ </a>
+ </dt>
+ <dd>
+ prints a tally of the given text.
+ </dd>
+</dl>
midline (345) → text/midline (345)
diff --git a/midline b/text/midline
similarity index 100%
rename from midline
rename to text/midline
randline (254) → text/randline (254)
diff --git a/randline b/text/randline
similarity index 100%
rename from randline
rename to text/randline
tally (406) → text/tally (406)
diff --git a/tally b/text/tally
similarity index 100%
rename from tally
rename to text/tally