add a script for getting a hash of objects in S3
- ID
2739502- date
2023-08-21 11:53:30+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
58e4514- message
add a script for getting a hash of objects in S3- changed files
6 files, 62 additions, 4 deletions
Changed files
aws/README.md (4918) → aws/README.md (5121)
diff --git a/aws/README.md b/aws/README.md
index 7e1bda2..064b944 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -43,6 +43,13 @@ These are scripts to do stuff in AWS.
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3hash"><code>s3hash <S3_URI> [--algorithm=<ALGO>]</code></a>
+ </dt>
+ <dd>
+ get the checksum/hash of an object in S3
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3ls"><code>s3ls</code></a>
</dt>
<dd>
aws/_common.py (2466) → aws/_common.py (2460)
diff --git a/aws/_common.py b/aws/_common.py
index 1beb251..cc9305d 100755
--- a/aws/_common.py
+++ b/aws/_common.py
@@ -80,9 +80,9 @@ def parse_s3_uri(s3_uri):
raise ValueError(f"Unrecognised scheme in {s3_uri!r}, expected s3://")
bucket = uri.host
- prefix = "/".join(uri.path)
+ path = "/".join(uri.path)
- return {"Bucket": bucket, "Prefix": prefix}
+ return {"Bucket": bucket, "Path": path}
def create_link_text(*, url, label):
aws/s3hash (0) → aws/s3hash (101)
diff --git a/aws/s3hash b/aws/s3hash
new file mode 100755
index 0000000..97bed45
--- /dev/null
+++ b/aws/s3hash
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+
+_ensure_aws_credentials_are_fresh
+s3hash.py "$@"
aws/s3hash.py (0) → aws/s3hash.py (910)
diff --git a/aws/s3hash.py b/aws/s3hash.py
new file mode 100755
index 0000000..8367cc6
--- /dev/null
+++ b/aws/s3hash.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+Get the checksum/hash of an object in S3.
+"""
+
+import argparse
+import hashlib
+import os
+
+from _common import create_link_text, create_s3_session, parse_s3_uri
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ prog=os.path.basename(__file__), description="Get the hash of an object in S3"
+ )
+
+ parser.add_argument("S3_URI")
+ parser.add_argument(
+ "--algorithm", help="which checksum algorithm to use", default="sha256"
+ )
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+
+ s3_location = parse_s3_uri(args.S3_URI)
+
+ sess = create_s3_session(args.S3_URI)
+
+ s3_obj = sess.client("s3").get_object(
+ Bucket=s3_location["Bucket"], Key=s3_location["Path"]
+ )
+
+ h = hashlib.new(args.algorithm)
+
+ while chunk := s3_obj["Body"].read(8192):
+ h.update(chunk)
+
+ print(h.hexdigest(), end="")
aws/s3ls.py (2056) → aws/s3ls.py (2139)
diff --git a/aws/s3ls.py b/aws/s3ls.py
index 5ab9569..d0ea21f 100755
--- a/aws/s3ls.py
+++ b/aws/s3ls.py
@@ -67,7 +67,8 @@ def get_object_versions(sess, **kwargs):
if __name__ == "__main__":
args = parse_args()
- s3_list_args = parse_s3_uri(args.S3_URI)
+ s3_location = parse_s3_uri(args.S3_URI)
+ s3_list_args = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
sess = create_s3_session(args.S3_URI)
aws/s3tree.py (6274) → aws/s3tree.py (6357)
diff --git a/aws/s3tree.py b/aws/s3tree.py
index bafd1e7..099f16b 100755
--- a/aws/s3tree.py
+++ b/aws/s3tree.py
@@ -175,7 +175,8 @@ def pprint_s3tree(*, bucket, tree):
if __name__ == "__main__":
args = parse_args()
- s3_prefix = parse_s3_uri(args.S3_URI)
+ s3_location = parse_s3_uri(args.S3_URI)
+ s3_prefix = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
sess = create_s3_session(args.S3_URI)