add a script to find big commits
- ID
9bee71f- date
2023-05-28 04:03:02+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
5eb4752- message
add a script to find big commits- changed files
2 files, 83 additions
Changed files
git/README.md (3154) → git/README.md (3393)
diff --git a/git/README.md b/git/README.md
index b7c11fa..8f2f38c 100644
--- a/git/README.md
+++ b/git/README.md
@@ -28,6 +28,15 @@ These scripts are all shortcuts for using [Git], mostly designed to let me do my
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/git/find_big_commits">
+ <code>find_big_commits</code>
+ </a>
+ </dt>
+ <dd>
+ print some information about the biggest files/commits in the Git history.
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/git/gb">
<code>gb [name]</code>
</a>
git/find_big_commits (0) → git/find_big_commits (2072)
diff --git a/git/find_big_commits b/git/find_big_commits
new file mode 100755
index 0000000..17db086
--- /dev/null
+++ b/git/find_big_commits
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Give some information about the biggest files in the .git folder.
+
+This is based on a Stack Overflow answer by raphinesse [1], with a bunch of
+extra formatting and the total of the .git folder printed also.
+
+[1]: https://stackoverflow.com/a/42544963/1558022
+
+"""
+
+import os
+import subprocess
+
+import humanize # humanize==4.4.0
+import termcolor # termcolor==2.1.1
+
+
+def get_blobs():
+ output = subprocess.check_output(
+ "git rev-list --objects --all | "
+ "git cat-file --batch-check='%(objecttype)\t%(objectname)\t%(objectsize)\t%(rest)'",
+ shell=True,
+ )
+
+ for line in output.decode("utf8").splitlines():
+ object_type, object_name, object_size, rest = line.split("\t")
+
+ if object_type == "blob":
+ yield {"commit_id": object_name, "size": int(object_size), "filename": rest}
+
+
+def get_file_paths_under(root=".", *, suffix=""):
+ """Generates the paths to every file under ``root``."""
+ if not os.path.isdir(root):
+ raise ValueError(f"Cannot find files under non-existent directory: {root!r}")
+
+ for dirpath, _, filenames in os.walk(root):
+ for f in filenames:
+ if os.path.isfile(os.path.join(dirpath, f)) and f.lower().endswith(suffix):
+ yield os.path.join(dirpath, f)
+
+
+def get_git_folder_size():
+ root = (
+ subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
+ .decode("utf8")
+ .strip()
+ )
+
+ return sum(
+ os.path.getsize(p) for p in get_file_paths_under(os.path.join(root, ".git"))
+ )
+
+
+if __name__ == "__main__":
+ blobs = [b for b in get_blobs() if b["size"] >= 1024]
+
+ for b in sorted(blobs, key=lambda b: b["size"]):
+ print(
+ b["commit_id"][:7],
+ humanize.naturalsize(b["size"]).rjust(10),
+ " ",
+ b["filename"],
+ )
+
+ print(
+ " " * 7,
+ termcolor.colored(
+ humanize.naturalsize(get_git_folder_size()).rjust(10), "blue"
+ ),
+ " ",
+ termcolor.colored(".git", "blue"),
+ )