Use concurrently to make deletions go faster
- ID
98db4e4- date
2023-09-06 06:23:15+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
7878985- message
Use `concurrently` to make deletions go faster- changed files
1 file, 21 additions, 11 deletions
Changed files
aws/s3rm.py (2186) → aws/s3rm.py (2429)
diff --git a/aws/s3rm.py b/aws/s3rm.py
index 1e08c30..7272990 100755
--- a/aws/s3rm.py
+++ b/aws/s3rm.py
@@ -4,6 +4,7 @@ A script for deleting all the objects in an S3 prefix.
"""
import argparse
+import os
import sys
import humanize
@@ -13,6 +14,11 @@ import tqdm
from _common import create_s3_session, parse_s3_uri
from s3ls import get_objects, get_object_versions
+# https://github.com/alexwlchan/concurrently
+sys.path.append(os.path.join(os.environ["HOME"], "repos", "concurrently"))
+
+from concurrently import concurrently
+
def parse_args():
parser = argparse.ArgumentParser(
@@ -39,18 +45,22 @@ def delete_objects(sess, iterator):
def print_result():
print(f'{humanize.intcomma(total_deleted_count)} object{"s" if total_deleted_count != 1 else ""} deleted, total {humanize.naturalsize(total_deleted_size)}')
- try:
- for batch in more_itertools.chunked(iterator, 1000):
- sess.client("s3").delete_objects(
- Bucket=s3_location["Bucket"],
- Delete={
- "Objects": [
- {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
- for s3_obj in batch
- ],
- },
- )
+ def delete_batch(batch):
+ sess.client("s3").delete_objects(
+ Bucket=s3_location["Bucket"],
+ Delete={
+ "Objects": [
+ {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
+ for s3_obj in batch
+ ],
+ },
+ )
+ try:
+ for batch, _ in concurrently(
+ handler=delete_batch,
+ inputs=more_itertools.chunked(iterator, 1000)
+ ):
total_deleted_count += len(batch)
total_deleted_size += sum(s3_obj['Size'] for s3_obj in batch)
except: