Skip to main content

images/images_only_pdf.py

1#!/usr/bin/env python3
2"""
3This script takes a PDF, and creates a new PDF with just the images
4filling the page.
6It's working around a behaviour of the "Scan Document" feature in the
7iOS Notes app – when you export the scan as PDF, it adds large white
8borders around the images which is precisely what I don't want.
9"""
11import sys
13from pypdf import PdfReader
16if __name__ == "__main__":
17 try:
18 path = sys.argv[1]
19 except IndexError:
20 sys.exit(f"Usage: {__file__} <PATH>")
22 reader = PdfReader(path)
24 images = []
26 for page in reader.pages:
27 images.extend([im.image for im in page.images])
29 assert len(images) == len(reader.pages)
31 images[0].save(
32 path, "PDF", resolution=100.0, save_all=True, append_images=images[1:]
33 )