Merge pull request #8 from alexwlchan/more-cog

ID

4b5d7e4

date

2023-12-26 22:41:26+00:00

author

Alex Chan <alex@alexwlchan.net>

parents

139561e, 69b4ce3

message

Merge pull request #8 from alexwlchan/more-cog

Continue converting my README files to use Cog

changed files

6 files, 158 additions, 38 deletions

aws/README.md
aws/login_ecr_public
cog_helpers.py
fs/README.md
recog
text/README.md

Changed files

aws/README.md (4275) → aws/README.md (7675)

diff --git a/aws/README.md b/aws/README.md
index b063774..3bab0ef 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -4,16 +4,96 @@ These are scripts to do stuff in AWS.
 
 ## The individual scripts
 
+<!-- [[[cog
+
+# This adds the root of the repo to the PATH, which has cog_helpers.py
+from os.path import abspath, dirname
+import sys
+
+sys.path.append(abspath(dirname(dirname("."))))
+
+import cog_helpers
+
+folder_name = "aws"
+
+scripts = [
+    {
+        "name": "bulk_sns_publish.py",
+        "description": """
+        a tool for publishing lots of messages to SNS, using the <code>PublishBatch</code> API.
+        See <a href="https://alexwlchan.net/2023/my-sns-firehose/">Publishing lots and lots of messages to SNS</a>.
+        """
+    },
+    {
+        "name": "download_sqs_messages.py",
+        "description": """
+        a tool for downloading lots of messages from SQS, using the <code>ReceiveMessage</code> API.
+        See <a href="https://alexwlchan.net/2018/downloading-sqs-queues/">Getting every message in an SQS queue</a>.
+        """
+    },
+    {
+        "name": "dynamols.py",
+        "description": """
+        print the items in a DynamoDB table, one item per line
+        """
+    },
+    {
+        "name": "s3_unfreeze.py",
+        "description": """
+        takes a list of S3 URIs as input, and either restores those objects from Glacier or reports the status of an in-progress restoration
+        """
+    },
+    {
+        "usage": "s3hash.py <S3_URI> [--algorithm=<ALGO>]",
+        "description": """
+        get the checksum/hash of an object in S3
+        """
+    },
+    {
+        "name": "s3ls.py",
+        "description": """
+        list objects from an S3 prefix using the <code>ListObjectsV2</code> API, and print them as JSON to stdout.
+        <p><pre><code>$ s3ls s3://wellcomedigitalworkflow-workflow-data
+    {"Key": "10009/import/10009_db_export.xml", "LastModified": "2019-12-17T15:11:45+00:00", "ETag": "\"dd51824d2f7f434eba02b84a3ad2d2e0\"", "Size": 36883, "StorageClass": "STANDARD"}
+    {"Key": "10009/import/pp_cri_h_5_20_box_91_b18181272_mrc.xml", "LastModified": "2019-09-06T15:17:56+00:00", "ETag": "\"51899c7af2f78bee7a9ee79f358e5b67\"", "Size": 3462, "StorageClass": "STANDARD"}
+    {"Key": "10009/taskmanager/2013-09-17_15-54-03_1625/pp_cri_h_5_20_box_91_b18181272_jpg_1378258257168.xml", "LastModified": "2019-09-06T15:18:01+00:00", "ETag": "\"f71d4745ad32863008e158463cdc0bd3\"", "Size": 8279, "StorageClass": "STANDARD"}
+    …</code></pre></p>
+        I typically dump the results of this to a file before doing any processing – listing objects from S3 is moderately slow.
+        """
+    },
+    {
+        "name": "s3rm.py",
+        "description": """
+        delete objects from an S3 prefix.
+        To see a preview of what objects this will delete, use the <code>s3ls.py</code> script – they use the same code to list objects.
+        """
+    },
+    {
+        "name": "s3tree.py",
+        "description": """
+        show a tree-like view of objects and folders in an S3 prefix.
+        It includes clickable links to folders in the S3 console, so I can dig into the objects in more detail.
+        <img src="screenshots/s3tree.png">
+        """
+    },
+    {
+        "name": "sqs_stats.py",
+        "description": """
+        prints a summary of messages visible on our SQS queues.
+        The two columns (which are green/red) show messages visible on the main queue and dead-letter queue respectively.
+        <img src="screenshots/sqs_stats.png">
+        """
+    },
+]
+
+cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
+
+]]]-->
 <dl>
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/aws"><code>aws</code></a>
-  </dt>
-  <dd>
-    a wrapper around the AWS CLI that ensures I have fresh credentials (see below)
-  </dd>
-
-  <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/bulk_sns_publish.py"><code>bulk_sns_publish.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/bulk_sns_publish.py">
+      <code>bulk_sns_publish.py</code>
+    </a>
   </dt>
   <dd>
     a tool for publishing lots of messages to SNS, using the <code>PublishBatch</code> API.
@@ -21,7 +101,9 @@ These are scripts to do stuff in AWS.
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/download_sqs_messages.py"><code>download_sqs_messages.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/download_sqs_messages.py">
+      <code>download_sqs_messages.py</code>
+    </a>
   </dt>
   <dd>
     a tool for downloading lots of messages from SQS, using the <code>ReceiveMessage</code> API.
@@ -29,49 +111,61 @@ These are scripts to do stuff in AWS.
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/dynamols.py"><code>dynamols.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/dynamols.py">
+      <code>dynamols.py</code>
+    </a>
   </dt>
   <dd>
     print the items in a DynamoDB table, one item per line
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3_unfreeze.py"><code>s3_unfreeze.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3_unfreeze.py">
+      <code>s3_unfreeze.py</code>
+    </a>
   </dt>
   <dd>
     takes a list of S3 URIs as input, and either restores those objects from Glacier or reports the status of an in-progress restoration
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3hash.py"><code>s3hash.py <S3_URI> [--algorithm=<ALGO>]</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3hash.py">
+      <code>s3hash.py <S3_URI> [--algorithm=<ALGO>]</code>
+    </a>
   </dt>
   <dd>
     get the checksum/hash of an object in S3
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3ls.py"><code>s3ls.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3ls.py">
+      <code>s3ls.py</code>
+    </a>
   </dt>
   <dd>
     list objects from an S3 prefix using the <code>ListObjectsV2</code> API, and print them as JSON to stdout.
-    <p><pre><code>$ s3ls s3://wellcomedigitalworkflow-workflow-data
-{"Key": "10009/import/10009_db_export.xml", "LastModified": "2019-12-17T15:11:45+00:00", "ETag": "\"dd51824d2f7f434eba02b84a3ad2d2e0\"", "Size": 36883, "StorageClass": "STANDARD"}
-{"Key": "10009/import/pp_cri_h_5_20_box_91_b18181272_mrc.xml", "LastModified": "2019-09-06T15:17:56+00:00", "ETag": "\"51899c7af2f78bee7a9ee79f358e5b67\"", "Size": 3462, "StorageClass": "STANDARD"}
-{"Key": "10009/taskmanager/2013-09-17_15-54-03_1625/pp_cri_h_5_20_box_91_b18181272_jpg_1378258257168.xml", "LastModified": "2019-09-06T15:18:01+00:00", "ETag": "\"f71d4745ad32863008e158463cdc0bd3\"", "Size": 8279, "StorageClass": "STANDARD"}
-…</code></pre></p>
-    I typically dump the results of this to a file before doing any processing – listing objects from S3 is moderately slow.
+        <p><pre><code>$ s3ls s3://wellcomedigitalworkflow-workflow-data
+    {"Key": "10009/import/10009_db_export.xml", "LastModified": "2019-12-17T15:11:45+00:00", "ETag": ""dd51824d2f7f434eba02b84a3ad2d2e0"", "Size": 36883, "StorageClass": "STANDARD"}
+    {"Key": "10009/import/pp_cri_h_5_20_box_91_b18181272_mrc.xml", "LastModified": "2019-09-06T15:17:56+00:00", "ETag": ""51899c7af2f78bee7a9ee79f358e5b67"", "Size": 3462, "StorageClass": "STANDARD"}
+    {"Key": "10009/taskmanager/2013-09-17_15-54-03_1625/pp_cri_h_5_20_box_91_b18181272_jpg_1378258257168.xml", "LastModified": "2019-09-06T15:18:01+00:00", "ETag": ""f71d4745ad32863008e158463cdc0bd3"", "Size": 8279, "StorageClass": "STANDARD"}
+    …</code></pre></p>
+        I typically dump the results of this to a file before doing any processing – listing objects from S3 is moderately slow.
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3rm.py"><code>s3rm.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3rm.py">
+      <code>s3rm.py</code>
+    </a>
   </dt>
   <dd>
     delete objects from an S3 prefix.
-    To see a preview of what objects this will delete, use the <code>s3ls</code> script – they use the same code to list objects.
+    To see a preview of what objects this will delete, use the <code>s3ls.py</code> script – they use the same code to list objects.
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3tree.py"><code>s3tree.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3tree.py">
+      <code>s3tree.py</code>
+    </a>
   </dt>
   <dd>
     show a tree-like view of objects and folders in an S3 prefix.
@@ -80,7 +174,9 @@ …</code></pre></p>
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/sqs_stats.py"><code>sqs_stats.py</code></a>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/sqs_stats.py">
+      <code>sqs_stats.py</code>
+    </a>
   </dt>
   <dd>
     prints a summary of messages visible on our SQS queues.
@@ -88,6 +184,7 @@ …</code></pre></p>
     <img src="screenshots/sqs_stats.png">
   </dd>
 </dl>
+<!-- [[[end]]] (checksum: d5f3c003ddf6645e4201be4fe39b23e0) -->
 
 ## Guessing the right account

aws/login_ecr_public (213) → aws/login_ecr_public (0)

diff --git a/aws/login_ecr_public b/aws/login_ecr_public
deleted file mode 100755
index 611f151..0000000
--- a/aws/login_ecr_public
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-set -o errexit
-set -o nounset
-set -o verbose
-
-AWS_PROFILE=experience-dev aws ecr-public \
-  --region=us-east-1 get-login-password | docker login --username AWS --password-stdin public.ecr.aws

cog_helpers.py (1962) → cog_helpers.py (2768)

diff --git a/cog_helpers.py b/cog_helpers.py
index fc0c35e..f91534a 100644
--- a/cog_helpers.py
+++ b/cog_helpers.py
@@ -7,6 +7,7 @@ Here Cog is Ned Batchelder's file generation tool, described here:
 https://nedbatchelder.com/code/cog
 """
 
+import os
 import textwrap
 from typing import TypedDict
 
@@ -41,6 +42,8 @@ def create_description_table(
     repo_name: str = "alexwlchan/scripts",
     primary_branch: str = "main",
 ) -> None:
+    documented_files = set()
+
     outl("<dl>")
 
     for i, s in enumerate(scripts, start=1):
@@ -56,6 +59,11 @@ def create_description_table(
         for index, v in enumerate(variants, start=1):
             name = v.split()[0]
 
+            path = os.path.join(folder_name, name)
+            assert os.path.exists(path), os.path.join(path)
+
+            documented_files.add(name)
+
             outl(
                 f'<a href="https://github.com/{repo_name}/blob/{primary_branch}/{folder_name}/{name}">',
                 indent=4,
@@ -82,3 +90,25 @@ def create_description_table(
             outl("")
 
     outl("</dl>")
+
+    # Now check there isn't anything in the folder which should have
+    # been documented, but isn't.
+    undocumented_files = set()
+
+    for f in os.listdir(folder_name):
+        if os.path.isdir(os.path.join(folder_name, f)):
+            continue
+
+        if f in {"README.md", "utf8info.Dockerfile"}:
+            continue
+
+        if f.startswith(("test_", "_")):
+            continue
+
+        if f not in documented_files:
+            undocumented_files.add(f)
+
+    if undocumented_files:
+        raise ValueError(
+            f"Not all files in {folder_name} are documented: {undocumented_files}"
+        )

fs/README.md (4350) → fs/README.md (4359)

diff --git a/fs/README.md b/fs/README.md
index c68b3e8..68644b0 100644
--- a/fs/README.md
+++ b/fs/README.md
@@ -36,7 +36,7 @@ scripts = [
         "description": "prints the directory which is the deepest child of the given directory"
     },
     {
-        "name": "emptydir",
+        "name": "emptydir.py",
         "description": "removes any empty directories under the current one (including directories that are empty aside from files that can be safely deleted, e.g. <code>.DS_Store</code>)"
     },
     {
@@ -100,8 +100,8 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/fs/emptydir">
-      <code>emptydir</code>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/fs/emptydir.py">
+      <code>emptydir.py</code>
     </a>
   </dt>
   <dd>
@@ -156,4 +156,4 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
       7.58M ~/repos/scripts</code></pre></p>
   </dd>
 </dl>
-<!-- [[[end]]] (checksum: f58d126f6b43914a355f95c7476ecd35) -->
+<!-- [[[end]]] (checksum: 76e15300c60dae8de5db3c785debc5cf) -->

recog (327) → recog (334)

diff --git a/recog b/recog
index 30783f1..65096e0 100755
--- a/recog
+++ b/recog
@@ -9,4 +9,4 @@
 set -o errexit
 set -o nounset
 
-find . -name README.md | xargs cog -c -r
+find . -name README.md | sort | xargs cog -c -r

text/README.md (6120) → text/README.md (6152)

diff --git a/text/README.md b/text/README.md
index 2a208f6..3af6d72 100644
--- a/text/README.md
+++ b/text/README.md
@@ -78,6 +78,7 @@ scripts = [
         "description": "prints a tally of lines in the given text.",
     },
     {
+        "name": "utf8info",
         "usage": "echo [STRING] | utf8info",
         "description": """
         read UTF-8 on stdin and print out the raw Unicode "
@@ -195,7 +196,7 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
   </dd>
 
   <dt>
-    <a href="https://github.com/alexwlchan/scripts/blob/main/text/echo">
+    <a href="https://github.com/alexwlchan/scripts/blob/main/text/utf8info">
       <code>echo [STRING] | utf8info</code>
     </a>
   </dt>
@@ -204,4 +205,4 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
     "codepoints. This is a Docker wrapper around <a href="https://github.com/lunasorcery/utf8info">a tool of the same name</a> by @lunasorcery.
   </dd>
 </dl>
-<!-- [[[end]]] (checksum: 6abfdf25169d3fba58601dec88f4bd9f) -->
\ No newline at end of file
+<!-- [[[end]]] (checksum: 679627866c7239d6d4d5db3a8f03ab91) -->
\ No newline at end of file