Skip to main content

add a script for getting a hash of objects in S3

ID
2739502
date
2023-08-21 11:53:30+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
58e4514
message
add a script for getting a hash of objects in S3
changed files
6 files, 62 additions, 4 deletions

Changed files

aws/README.md (4918) → aws/README.md (5121)

diff --git a/aws/README.md b/aws/README.md
index 7e1bda2..064b944 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -43,6 +43,13 @@ These are scripts to do stuff in AWS.
   </dd>
 
   <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3hash"><code>s3hash <S3_URI> [--algorithm=<ALGO>]</code></a>
+  </dt>
+  <dd>
+    get the checksum/hash of an object in S3
+  </dd>
+
+  <dt>
     <a href="https://github.com/alexwlchan/scripts/blob/main/aws/s3ls"><code>s3ls</code></a>
   </dt>
   <dd>

aws/_common.py (2466) → aws/_common.py (2460)

diff --git a/aws/_common.py b/aws/_common.py
index 1beb251..cc9305d 100755
--- a/aws/_common.py
+++ b/aws/_common.py
@@ -80,9 +80,9 @@ def parse_s3_uri(s3_uri):
         raise ValueError(f"Unrecognised scheme in {s3_uri!r}, expected s3://")
 
     bucket = uri.host
-    prefix = "/".join(uri.path)
+    path = "/".join(uri.path)
 
-    return {"Bucket": bucket, "Prefix": prefix}
+    return {"Bucket": bucket, "Path": path}
 
 
 def create_link_text(*, url, label):

aws/s3hash (0) → aws/s3hash (101)

diff --git a/aws/s3hash b/aws/s3hash
new file mode 100755
index 0000000..97bed45
--- /dev/null
+++ b/aws/s3hash
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+
+_ensure_aws_credentials_are_fresh
+s3hash.py "$@"

aws/s3hash.py (0) → aws/s3hash.py (910)

diff --git a/aws/s3hash.py b/aws/s3hash.py
new file mode 100755
index 0000000..8367cc6
--- /dev/null
+++ b/aws/s3hash.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+Get the checksum/hash of an object in S3.
+"""
+
+import argparse
+import hashlib
+import os
+
+from _common import create_link_text, create_s3_session, parse_s3_uri
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        prog=os.path.basename(__file__), description="Get the hash of an object in S3"
+    )
+
+    parser.add_argument("S3_URI")
+    parser.add_argument(
+        "--algorithm", help="which checksum algorithm to use", default="sha256"
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    s3_location = parse_s3_uri(args.S3_URI)
+
+    sess = create_s3_session(args.S3_URI)
+
+    s3_obj = sess.client("s3").get_object(
+        Bucket=s3_location["Bucket"], Key=s3_location["Path"]
+    )
+
+    h = hashlib.new(args.algorithm)
+
+    while chunk := s3_obj["Body"].read(8192):
+        h.update(chunk)
+
+    print(h.hexdigest(), end="")

aws/s3ls.py (2056) → aws/s3ls.py (2139)

diff --git a/aws/s3ls.py b/aws/s3ls.py
index 5ab9569..d0ea21f 100755
--- a/aws/s3ls.py
+++ b/aws/s3ls.py
@@ -67,7 +67,8 @@ def get_object_versions(sess, **kwargs):
 if __name__ == "__main__":
     args = parse_args()
 
-    s3_list_args = parse_s3_uri(args.S3_URI)
+    s3_location = parse_s3_uri(args.S3_URI)
+    s3_list_args = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
 
     sess = create_s3_session(args.S3_URI)
 

aws/s3tree.py (6274) → aws/s3tree.py (6357)

diff --git a/aws/s3tree.py b/aws/s3tree.py
index bafd1e7..099f16b 100755
--- a/aws/s3tree.py
+++ b/aws/s3tree.py
@@ -175,7 +175,8 @@ def pprint_s3tree(*, bucket, tree):
 if __name__ == "__main__":
     args = parse_args()
 
-    s3_prefix = parse_s3_uri(args.S3_URI)
+    s3_location = parse_s3_uri(args.S3_URI)
+    s3_prefix = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
 
     sess = create_s3_session(args.S3_URI)