Add a rudimentary s3_unfreeze script
- ID
53e8065- date
2023-06-21 23:44:52+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
bd2e37f- message
Add a rudimentary s3_unfreeze script- changed files
4 files, 103 additions, 4 deletions
Changed files
aws/README.md (4643) → aws/README.md (4918)
diff --git a/aws/README.md b/aws/README.md
index e78346d..7e1bda2 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -36,6 +36,13 @@ These are scripts to do stuff in AWS.
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3_unfreeze"><code>s3_unfreeze</code></a>
+ </dt>
+ <dd>
+ takes a list of S3 URIs as input, and either restores those objects from Glacier or reports the status of an in-progress restoration
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3ls"><code>s3ls</code></a>
</dt>
<dd>
aws/_common.py (2310) → aws/_common.py (2466)
diff --git a/aws/_common.py b/aws/_common.py
index 160918d..1beb251 100755
--- a/aws/_common.py
+++ b/aws/_common.py
@@ -1,5 +1,7 @@
#!/usr/bin/env python3
+import functools
+
import boto3
import hyperlink
@@ -11,6 +13,7 @@ ACCOUNT_NAMES = {
}
+@functools.cache
def get_aws_session(*, role_arn):
sts_client = boto3.client("sts")
assumed_role_object = sts_client.assume_role(
@@ -25,7 +28,7 @@ def get_aws_session(*, role_arn):
)
-def guess_account(s3_identifier):
+def guess_account(s3_identifier, role_name):
"""
Given the name of an S3 bucket, guess the account it belongs to.
@@ -47,6 +50,7 @@ def guess_account(s3_identifier):
elif (
"wellcomecollection-assets-workingstorage" in s3_identifier
or "wellcomecollection-platform" in s3_identifier
+ or "wellcomecollection-editorial-photography" in s3_identifier
):
account_id = "760097843905"
else:
@@ -57,12 +61,12 @@ def guess_account(s3_identifier):
return {
"account_id": account_id,
"name": account_name,
- "role_arn": f"arn:aws:iam::{account_id}:role/{account_name}-read_only",
+ "role_arn": f"arn:aws:iam::{account_id}:role/{account_name}-{role_name}",
}
-def create_s3_session(s3_identifier):
- account = guess_account(s3_identifier)
+def create_s3_session(s3_identifier, *, role_name="read_only"):
+ account = guess_account(s3_identifier, role_name)
if account:
return get_aws_session(role_arn=account["role_arn"])
else:
aws/s3_unfreeze (0) → aws/s3_unfreeze (106)
diff --git a/aws/s3_unfreeze b/aws/s3_unfreeze
new file mode 100755
index 0000000..1a49f0f
--- /dev/null
+++ b/aws/s3_unfreeze
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+
+_ensure_aws_credentials_are_fresh
+s3_unfreeze.py "$@"
aws/s3_unfreeze.py (0) → aws/s3_unfreeze.py (2148)
diff --git a/aws/s3_unfreeze.py b/aws/s3_unfreeze.py
new file mode 100755
index 0000000..73a877c
--- /dev/null
+++ b/aws/s3_unfreeze.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+"""
+This is a rudimentary script for restoring S3 objects from Glacier.
+
+You pass it a text file with a list of S3 URIs to restore, and it will
+initiate a Glacier restore for each of them.
+
+You can also use it to track the progress of a restore operation -- it
+reports a count of how many objects are in-progress/already restored.
+"""
+
+import os
+import sys
+
+from botocore.exceptions import ClientError
+import hyperlink
+import tqdm
+
+from _common import create_s3_session
+
+sys.path.append(os.path.join(os.environ["HOME"], "repos", "concurrently"))
+from concurrently import concurrently
+
+
+def restore_object(s3_client, s3_uri):
+ uri = hyperlink.URL.from_text(s3_uri)
+
+ bucket = uri.host
+ key = "/".join(uri.path)
+
+ head_resp = s3_client.head_object(Bucket=bucket, Key=key)
+
+ if head_resp.get('Restore') == 'ongoing-request="true"':
+ return "RestoreInProgress"
+
+ if 'ongoing-request="false"' in head_resp.get('Restore', ''):
+ return 'RestoredSuccessfully'
+
+ try:
+ resp = s3_client.restore_object(
+ Bucket=bucket,
+ Key=key,
+ RestoreRequest={"Days": 7, "GlacierJobParameters": {"Tier": "Standard"}},
+ )
+ except ClientError as err:
+ if err.response["Error"]["Code"] == "RestoreAlreadyInProgress":
+ return "RestoreInProgress"
+ else:
+ raise
+
+ if resp["ResponseMetadata"]["HTTPStatusCode"] == 200:
+ return "RestoredSuccessfully"
+ else:
+ return "RestoreInProgress"
+
+
+if __name__ == "__main__":
+ try:
+ path = sys.argv[1]
+ except IndexError:
+ sys.exit(f"Usage: {__file__} <LIST_OF_KEYS>")
+
+ results = {
+ "RestoredSuccessfully": 0,
+ "RestoreInProgress": 0,
+ }
+
+ with open(path) as infile:
+ s3_uris = [line.strip() for line in infile]
+
+ s3 = create_s3_session(s3_uris[0], role_name="developer").client("s3")
+
+ for _, output in tqdm.tqdm(
+ concurrently(inputs=s3_uris, handler=lambda s3_uri: restore_object(s3, s3_uri)),
+ total=len(s3_uris),
+ ):
+ results[output] += 1
+
+ from pprint import pprint
+
+ pprint(results)