Skip to main content

aws/s3ls.py

1#!/usr/bin/env python3
2"""
3A script for listing all the objects in an S3 prefix.
5Objects are printed to stdout as JSON, one object per line.
6"""
8import argparse
9import datetime
10import json
11import sys
13import tqdm
15from _common import create_s3_session, parse_s3_uri
18def parse_args():
19 parser = argparse.ArgumentParser(
20 prog="s3ls", description="List all the objects in an S3 prefix"
21 )
23 parser.add_argument("S3_URI")
24 parser.add_argument(
25 "--with-versions",
26 action="store_true",
27 help="List every version of the objects in S3, not just the latest version",
28 )
29 parser.add_argument(
30 "--start-after", help="Start listing objects at the given key", default=""
31 )
33 return parser.parse_args()
36class DatetimeEncoder(json.JSONEncoder):
37 def default(self, obj):
38 if isinstance(obj, datetime.datetime):
39 return obj.isoformat()
42def get_objects(sess, **kwargs):
43 """
44 Generates every object in an S3 bucket.
45 """
46 paginator = sess.client("s3").get_paginator("list_objects_v2")
48 for page in paginator.paginate(**kwargs):
49 yield from page["Contents"]
52def get_object_versions(sess, **kwargs):
53 """
54 Generates every version of an object in an S3 bucket.
55 """
56 s3_client = sess.client("s3")
57 paginator = s3_client.get_paginator("list_object_versions")
59 for page in paginator.paginate(**kwargs):
60 for key in ("Versions", "DeleteMarkers"):
61 try:
62 yield from page[key]
63 except KeyError:
64 pass
67if __name__ == "__main__":
68 args = parse_args()
70 s3_location = parse_s3_uri(args.S3_URI)
71 s3_list_args = {"Bucket": s3_location["Bucket"], "Prefix": s3_location["Path"]}
73 sess = create_s3_session(args.S3_URI)
75 if "--with-versions" in sys.argv:
76 iterator = get_object_versions
77 s3_list_args["KeyMarker"] = args.start_after
78 else:
79 iterator = get_objects
80 s3_list_args["StartAfter"] = args.start_after
82 for s3_obj in tqdm.tqdm(iterator(sess, **s3_list_args)):
83 print(json.dumps(s3_obj, cls=DatetimeEncoder))