Add a script for removing objects from S3
- ID
7878985- date
2023-09-05 15:44:13+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
bf8c6d0- message
Add a script for removing objects from S3- changed files
3 files, 93 additions
Changed files
aws/README.md (5121) → aws/README.md (5415)
diff --git a/aws/README.md b/aws/README.md
index 064b944..70e396f 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -63,6 +63,14 @@ …</code></pre></p>
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3rm"><code>s3rm</code></a>
+ </dt>
+ <dd>
+ delete objects from an S3 prefix.
+ To see a preview of what objects this will delete, use the <code>s3ls</code> script – they use the same code to list objects.
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3tree"><code>s3tree</code></a>
</dt>
<dd>
aws/s3rm (0) → aws/s3rm (99)
diff --git a/aws/s3rm b/aws/s3rm
new file mode 100755
index 0000000..69b9b43
--- /dev/null
+++ b/aws/s3rm
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+
+_ensure_aws_credentials_are_fresh
+s3rm.py "$@"
aws/s3rm.py (0) → aws/s3rm.py (2186)
diff --git a/aws/s3rm.py b/aws/s3rm.py
new file mode 100755
index 0000000..1e08c30
--- /dev/null
+++ b/aws/s3rm.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+A script for deleting all the objects in an S3 prefix.
+"""
+
+import argparse
+import sys
+
+import humanize
+import more_itertools
+import tqdm
+
+from _common import create_s3_session, parse_s3_uri
+from s3ls import get_objects, get_object_versions
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ prog="s3rm", description="Delete all the objects in an S3 prefix"
+ )
+
+ parser.add_argument("S3_URI")
+ parser.add_argument(
+ "--with-versions",
+ action="store_true",
+ help="Delete every version of the objects in S3, not just the latest version",
+ )
+ parser.add_argument(
+ "--start-after", help="Start listing objects at the given key", default=""
+ )
+
+ return parser.parse_args()
+
+
+def delete_objects(sess, iterator):
+ total_deleted_count = 0
+ total_deleted_size = 0
+
+ def print_result():
+ print(f'{humanize.intcomma(total_deleted_count)} object{"s" if total_deleted_count != 1 else ""} deleted, total {humanize.naturalsize(total_deleted_size)}')
+
+ try:
+ for batch in more_itertools.chunked(iterator, 1000):
+ sess.client("s3").delete_objects(
+ Bucket=s3_location["Bucket"],
+ Delete={
+ "Objects": [
+ {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
+ for s3_obj in batch
+ ],
+ },
+ )
+
+ total_deleted_count += len(batch)
+ total_deleted_size += sum(s3_obj['Size'] for s3_obj in batch)
+ except:
+ print_result()
+ raise
+ else:
+ print_result()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+
+ s3_location = parse_s3_uri(args.S3_URI)
+ s3_list_args = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
+
+ sess = create_s3_session(args.S3_URI)
+
+ if "--with-versions" in sys.argv:
+ iterator = get_object_versions
+ s3_list_args["KeyMarker"] = args.start_after
+ else:
+ iterator = get_objects
+ s3_list_args["StartAfter"] = args.start_after
+
+ delete_objects(sess, iterator=tqdm.tqdm(iterator(sess, **s3_list_args)))