Skip to main content

Add a script for removing objects from S3

ID
7878985
date
2023-09-05 15:44:13+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
bf8c6d0
message
Add a script for removing objects from S3
changed files
3 files, 93 additions

Changed files

aws/README.md (5121) → aws/README.md (5415)

diff --git a/aws/README.md b/aws/README.md
index 064b944..70e396f 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -63,6 +63,14 @@ …</code></pre></p>
   </dd>
 
   <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3rm"><code>s3rm</code></a>
+  </dt>
+  <dd>
+    delete objects from an S3 prefix.
+    To see a preview of what objects this will delete, use the <code>s3ls</code> script – they use the same code to list objects.
+  </dd>
+
+  <dt>
     <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3tree"><code>s3tree</code></a>
   </dt>
   <dd>

aws/s3rm (0) → aws/s3rm (99)

diff --git a/aws/s3rm b/aws/s3rm
new file mode 100755
index 0000000..69b9b43
--- /dev/null
+++ b/aws/s3rm
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+
+_ensure_aws_credentials_are_fresh
+s3rm.py "$@"

aws/s3rm.py (0) → aws/s3rm.py (2186)

diff --git a/aws/s3rm.py b/aws/s3rm.py
new file mode 100755
index 0000000..1e08c30
--- /dev/null
+++ b/aws/s3rm.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+A script for deleting all the objects in an S3 prefix.
+"""
+
+import argparse
+import sys
+
+import humanize
+import more_itertools
+import tqdm
+
+from _common import create_s3_session, parse_s3_uri
+from s3ls import get_objects, get_object_versions
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        prog="s3rm", description="Delete all the objects in an S3 prefix"
+    )
+
+    parser.add_argument("S3_URI")
+    parser.add_argument(
+        "--with-versions",
+        action="store_true",
+        help="Delete every version of the objects in S3, not just the latest version",
+    )
+    parser.add_argument(
+        "--start-after", help="Start listing objects at the given key", default=""
+    )
+
+    return parser.parse_args()
+
+
+def delete_objects(sess, iterator):
+    total_deleted_count = 0
+    total_deleted_size = 0
+
+    def print_result():
+        print(f'{humanize.intcomma(total_deleted_count)} object{"s" if total_deleted_count != 1 else ""} deleted, total {humanize.naturalsize(total_deleted_size)}')
+
+    try:
+        for batch in more_itertools.chunked(iterator, 1000):
+            sess.client("s3").delete_objects(
+                Bucket=s3_location["Bucket"],
+                Delete={
+                    "Objects": [
+                        {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
+                        for s3_obj in batch
+                    ],
+                },
+            )
+
+            total_deleted_count += len(batch)
+            total_deleted_size += sum(s3_obj['Size'] for s3_obj in batch)
+    except:
+        print_result()
+        raise
+    else:
+        print_result()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    s3_location = parse_s3_uri(args.S3_URI)
+    s3_list_args = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
+
+    sess = create_s3_session(args.S3_URI)
+
+    if "--with-versions" in sys.argv:
+        iterator = get_object_versions
+        s3_list_args["KeyMarker"] = args.start_after
+    else:
+        iterator = get_objects
+        s3_list_args["StartAfter"] = args.start_after
+
+    delete_objects(sess, iterator=tqdm.tqdm(iterator(sess, **s3_list_args)))