Skip to main content

Use concurrently to make deletions go faster

ID
98db4e4
date
2023-09-06 06:23:15+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
7878985
message
Use `concurrently` to make deletions go faster
changed files
1 file, 21 additions, 11 deletions

Changed files

aws/s3rm.py (2186) → aws/s3rm.py (2429)

diff --git a/aws/s3rm.py b/aws/s3rm.py
index 1e08c30..7272990 100755
--- a/aws/s3rm.py
+++ b/aws/s3rm.py
@@ -4,6 +4,7 @@ A script for deleting all the objects in an S3 prefix.
 """
 
 import argparse
+import os
 import sys
 
 import humanize
@@ -13,6 +14,11 @@ import tqdm
 from _common import create_s3_session, parse_s3_uri
 from s3ls import get_objects, get_object_versions
 
+# https://github.com/alexwlchan/concurrently
+sys.path.append(os.path.join(os.environ["HOME"], "repos", "concurrently"))
+
+from concurrently import concurrently
+
 
 def parse_args():
     parser = argparse.ArgumentParser(
@@ -39,18 +45,22 @@ def delete_objects(sess, iterator):
     def print_result():
         print(f'{humanize.intcomma(total_deleted_count)} object{"s" if total_deleted_count != 1 else ""} deleted, total {humanize.naturalsize(total_deleted_size)}')
 
-    try:
-        for batch in more_itertools.chunked(iterator, 1000):
-            sess.client("s3").delete_objects(
-                Bucket=s3_location["Bucket"],
-                Delete={
-                    "Objects": [
-                        {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
-                        for s3_obj in batch
-                    ],
-                },
-            )
+    def delete_batch(batch):
+        sess.client("s3").delete_objects(
+            Bucket=s3_location["Bucket"],
+            Delete={
+                "Objects": [
+                    {k: v for (k, v) in s3_obj.items() if k in {"Key", "VersionId"}}
+                    for s3_obj in batch
+                ],
+            },
+        )
 
+    try:
+        for batch, _ in concurrently(
+            handler=delete_batch,
+            inputs=more_itertools.chunked(iterator, 1000)
+        ):
             total_deleted_count += len(batch)
             total_deleted_size += sum(s3_obj['Size'] for s3_obj in batch)
     except: