Skip to main content

aws/s3rm.py

1#!/usr/bin/env python3
2"""
3A script for deleting all the objects in an S3 prefix.
4"""
6import argparse
7import os
8import sys
10import humanize
11import more_itertools
12import tqdm
14from _common import create_s3_session, parse_s3_uri
15from s3ls import get_objects, get_object_versions
17# https://github.com/alexwlchan/concurrently
18sys.path.append(os.path.join(os.environ["HOME"], "repos", "concurrently"))
20from concurrently import concurrently # noqa: E402
23def parse_args():
24 parser = argparse.ArgumentParser(
25 prog="s3rm", description="Delete all the objects in an S3 prefix"
26 )
28 parser.add_argument("S3_URI")
29 parser.add_argument(
30 "--with-versions",
31 action="store_true",
32 help="Delete every version of the objects in S3, not just the latest version",
33 )
34 parser.add_argument(
35 "--start-after", help="Start listing objects at the given key", default=""
36 )
38 return parser.parse_args()
41def delete_objects(sess, iterator):
42 total_deleted_count = 0
43 total_deleted_size = 0
45 def print_result():
46 print(
47 f"{humanize.intcomma(total_deleted_count)} object{'s' if total_deleted_count != 1 else ''} deleted, total {humanize.naturalsize(total_deleted_size)}"
48 )
50 def delete_batch(batch):
51 sess.client("s3").delete_objects(
52 Bucket=s3_location["Bucket"],
53 Delete={
54 "Objects": [
55 {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
56 for s3_obj in batch
57 ],
58 },
59 )
61 try:
62 for batch, _ in concurrently(
63 handler=delete_batch,
64 inputs=more_itertools.chunked(iterator, 1000),
65 # Note: if you go too fast, you get a SlowDown error from S3.
66 max_concurrency=3,
67 ):
68 total_deleted_count += len(batch)
69 total_deleted_size += sum(s3_obj["Size"] for s3_obj in batch)
70 except: # noqa: E722
71 print_result()
72 raise
73 else:
74 print_result()
77if __name__ == "__main__":
78 args = parse_args()
80 s3_location = parse_s3_uri(args.S3_URI)
81 s3_list_args = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
83 sess = create_s3_session(args.S3_URI)
85 if "--with-versions" in sys.argv:
86 iterator = get_object_versions
87 s3_list_args["KeyMarker"] = args.start_after
88 else:
89 iterator = get_objects
90 s3_list_args["StartAfter"] = args.start_after
92 delete_objects(sess, iterator=tqdm.tqdm(iterator(sess, **s3_list_args)))