Add my ‘lineweights’ script
- ID
ddb90ab- date
2022-03-23 07:17:39+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
1ac7357- message
Add my 'lineweights' script- changed files
1 file, 62 additions
Changed files
lineweights (0) → lineweights (1811)
diff --git a/lineweights b/lineweights
new file mode 100755
index 0000000..1642784
--- /dev/null
+++ b/lineweights
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""
+This script analyses a file, and highlights the longest lines, e.g.
+
+ 4 ▏ 1167 ▏
+ 7 ▏ 26324 █████▏
+ 8 ▏ 268 ▏
+
+I've been using this to reduce the page weight on the Wellcome Collection
+website; I download the HTML and use ``lineweight`` to find the longest
+lines (and so the places where to target reductions).
+
+See https://alexwlchan.net/2018/05/ascii-bar-charts/
+
+"""
+
+import sys
+
+
+def draw_chart(data, *, min_line_length):
+ max_value = max(count for _, count in data)
+ increment = max_value / 25
+
+ longest_label_length = max(len(label) for label, _ in data)
+
+ for label, count in data:
+ if count <= min_line_length:
+ continue
+
+ # The ASCII block elements come in chunks of 8, so we work out how
+ # many fractions of 8 we need.
+ # https://en.wikipedia.org/wiki/Block_Elements
+ bar_chunks, remainder = divmod(int(count * 8 / increment), 8)
+
+ # First draw the full width chunks
+ bar = '█' * bar_chunks
+
+ # Then add the fractional part. The Unicode code points for
+ # block elements are (8/8), (7/8), (6/8), ... , so we need to
+ # work backwards.
+ if remainder > 0:
+ bar += chr(ord('█') + (8 - remainder))
+
+ # If the bar is empty, add a left one-eighth block
+ bar = bar or '▏'
+
+ print(f'{label.rjust(longest_label_length)} ▏ {count:#6d} {bar}')
+
+
+if __name__ == '__main__':
+ try:
+ path = sys.argv[1]
+ except IndexError:
+ sys.exit(f"Usage: {__file__} <PATH>")
+
+ with open(path, "rb") as infile:
+ line_lengths = [
+ (f"L{lineno}", len(line))
+ for lineno, line in enumerate(infile, start=1)
+ ]
+
+ draw_chart(line_lengths, min_line_length=250)