Add my ‘images_only_pdf’ script
- ID
168410b- date
2022-09-26 17:49:17+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
55b2b9b- message
Add my 'images_only_pdf' script- changed files
1 file, 42 additions
Changed files
images_only_pdf (0) → images_only_pdf (1339)
diff --git a/images_only_pdf b/images_only_pdf
new file mode 100755
index 0000000..6f662c3
--- /dev/null
+++ b/images_only_pdf
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+This script takes a PDF, and creates a new PDF with just the images
+filling the page.
+
+It's working around a behaviour of the "Scan Document" feature in the
+iOS Notes app – when you export the scan as PDF, it adds large white
+borders around the images which is precisely what I don't want.
+"""
+
+import os
+import sys
+
+import fitz # PyMuPDF
+
+
+if __name__ == "__main__":
+ try:
+ path = sys.argv[1]
+ except IndexError:
+ sys.exit(f"Usage: {__file__} <PATH>")
+
+ with fitz.open(path) as pdf_original, fitz.open() as pdf_new:
+ for page_number, page in enumerate(pdf_original, start=1):
+ for image_number, image in enumerate(page.getImageList(), start=1):
+ # Get the XREF of the image
+ xref = image[0]
+
+ # Extract the image bytes
+ base_image = pdf_original.extractImage(xref)
+
+ new_page = pdf_new.new_page(
+ width=base_image["width"], height=base_image["height"]
+ )
+ rect = fitz.Rect(0.0, 0.0, base_image["width"], base_image["height"])
+ new_page.insertImage(rect, stream=base_image["image"])
+
+ out_path = path.replace(".pdf", "-noimages.pdf")
+ assert path != out_path
+
+ pdf_new.save(out_path)
+ print(out_path)