Skip to main content

Add a rudimentary s3_unfreeze script

ID
53e8065
date
2023-06-21 23:44:52+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
bd2e37f
message
Add a rudimentary s3_unfreeze script
changed files
4 files, 103 additions, 4 deletions

Changed files

aws/README.md (4643) → aws/README.md (4918)

diff --git a/aws/README.md b/aws/README.md
index e78346d..7e1bda2 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -36,6 +36,13 @@ These are scripts to do stuff in AWS.
   </dd>
 
   <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3_unfreeze"><code>s3_unfreeze</code></a>
+  </dt>
+  <dd>
+    takes a list of S3 URIs as input, and either restores those objects from Glacier or reports the status of an in-progress restoration
+  </dd>
+
+  <dt>
     <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3ls"><code>s3ls</code></a>
   </dt>
   <dd>

aws/_common.py (2310) → aws/_common.py (2466)

diff --git a/aws/_common.py b/aws/_common.py
index 160918d..1beb251 100755
--- a/aws/_common.py
+++ b/aws/_common.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 
+import functools
+
 import boto3
 import hyperlink
 
@@ -11,6 +13,7 @@ ACCOUNT_NAMES = {
 }
 
 
+@functools.cache
 def get_aws_session(*, role_arn):
     sts_client = boto3.client("sts")
     assumed_role_object = sts_client.assume_role(
@@ -25,7 +28,7 @@ def get_aws_session(*, role_arn):
     )
 
 
-def guess_account(s3_identifier):
+def guess_account(s3_identifier, role_name):
     """
     Given the name of an S3 bucket, guess the account it belongs to.
 
@@ -47,6 +50,7 @@ def guess_account(s3_identifier):
     elif (
         "wellcomecollection-assets-workingstorage" in s3_identifier
         or "wellcomecollection-platform" in s3_identifier
+        or "wellcomecollection-editorial-photography" in s3_identifier
     ):
         account_id = "760097843905"
     else:
@@ -57,12 +61,12 @@ def guess_account(s3_identifier):
     return {
         "account_id": account_id,
         "name": account_name,
-        "role_arn": f"arn:aws:iam::{account_id}:role/{account_name}-read_only",
+        "role_arn": f"arn:aws:iam::{account_id}:role/{account_name}-{role_name}",
     }
 
 
-def create_s3_session(s3_identifier):
-    account = guess_account(s3_identifier)
+def create_s3_session(s3_identifier, *, role_name="read_only"):
+    account = guess_account(s3_identifier, role_name)
     if account:
         return get_aws_session(role_arn=account["role_arn"])
     else:

aws/s3_unfreeze (0) → aws/s3_unfreeze (106)

diff --git a/aws/s3_unfreeze b/aws/s3_unfreeze
new file mode 100755
index 0000000..1a49f0f
--- /dev/null
+++ b/aws/s3_unfreeze
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+
+_ensure_aws_credentials_are_fresh
+s3_unfreeze.py "$@"

aws/s3_unfreeze.py (0) → aws/s3_unfreeze.py (2148)

diff --git a/aws/s3_unfreeze.py b/aws/s3_unfreeze.py
new file mode 100755
index 0000000..73a877c
--- /dev/null
+++ b/aws/s3_unfreeze.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+"""
+This is a rudimentary script for restoring S3 objects from Glacier.
+
+You pass it a text file with a list of S3 URIs to restore, and it will
+initiate a Glacier restore for each of them.
+
+You can also use it to track the progress of a restore operation -- it
+reports a count of how many objects are in-progress/already restored.
+"""
+
+import os
+import sys
+
+from botocore.exceptions import ClientError
+import hyperlink
+import tqdm
+
+from _common import create_s3_session
+
+sys.path.append(os.path.join(os.environ["HOME"], "repos", "concurrently"))
+from concurrently import concurrently
+
+
+def restore_object(s3_client, s3_uri):
+    uri = hyperlink.URL.from_text(s3_uri)
+
+    bucket = uri.host
+    key = "/".join(uri.path)
+
+    head_resp = s3_client.head_object(Bucket=bucket, Key=key)
+
+    if head_resp.get('Restore') == 'ongoing-request="true"':
+        return "RestoreInProgress"
+
+    if 'ongoing-request="false"' in head_resp.get('Restore', ''):
+        return 'RestoredSuccessfully'
+
+    try:
+        resp = s3_client.restore_object(
+            Bucket=bucket,
+            Key=key,
+            RestoreRequest={"Days": 7, "GlacierJobParameters": {"Tier": "Standard"}},
+        )
+    except ClientError as err:
+        if err.response["Error"]["Code"] == "RestoreAlreadyInProgress":
+            return "RestoreInProgress"
+        else:
+            raise
+
+    if resp["ResponseMetadata"]["HTTPStatusCode"] == 200:
+        return "RestoredSuccessfully"
+    else:
+        return "RestoreInProgress"
+
+
+if __name__ == "__main__":
+    try:
+        path = sys.argv[1]
+    except IndexError:
+        sys.exit(f"Usage: {__file__} <LIST_OF_KEYS>")
+
+    results = {
+        "RestoredSuccessfully": 0,
+        "RestoreInProgress": 0,
+    }
+
+    with open(path) as infile:
+        s3_uris = [line.strip() for line in infile]
+
+    s3 = create_s3_session(s3_uris[0], role_name="developer").client("s3")
+
+    for _, output in tqdm.tqdm(
+        concurrently(inputs=s3_uris, handler=lambda s3_uri: restore_object(s3, s3_uri)),
+        total=len(s3_uris),
+    ):
+        results[output] += 1
+
+    from pprint import pprint
+
+    pprint(results)