Skip to main content

images/pdfthumb.py

1#!/usr/bin/env python3
2"""
3This script creates a 400×400 thumbnail of a specific page of a PDF file.
5It uses similar thumbnailing logic to docstore [1], but it allows me to
6pick a particular page rather than the first page.
8This is helpful because sometimes I download PDF cross-stitch patterns
9where the first page contains text which isn't a good thumbnail, but
10later pages do show the whole pattern.
12[1]: https://github.com/alexwlchan/docstore/blob/main/src/docstore/thumbnails.py
14"""
16import argparse
17import os
18import sys
19import subprocess
20import tempfile
22from pypdf import PdfReader, PdfWriter
25def parse_args(argv):
26 parser = argparse.ArgumentParser(
27 prog=os.path.basename(__file__),
28 description="Get thumbnails from a specific page of a PDF file.",
29 )
31 parser.add_argument("PATH")
32 parser.add_argument(
33 "--page",
34 type=int,
35 metavar="PAGE_NUMBER",
36 help="which page of the PDF to get",
37 required=True,
38 )
39 parser.add_argument(
40 "--width", type=int, help="pixel width of the generated thumbnail", default=400
41 )
43 return parser.parse_args(argv)
46if __name__ == "__main__":
47 args = parse_args(sys.argv[1:])
49 reader = PdfReader(args.PATH)
51 # Remember that pages are 0-indexed
52 try:
53 page = reader.pages[args.page - 1]
54 except IndexError:
55 sys.exit(f"Unrecognised page: {args.page}, expected 1...{len(reader.pages)}")
57 writer = PdfWriter()
58 writer.add_page(page)
60 with tempfile.TemporaryDirectory(suffix=".pdf") as temp_dir:
61 out_path = os.path.join(temp_dir, os.path.basename(args.PATH))
63 with open(out_path, "wb") as out_file:
64 writer.write(out_file)
66 subprocess.check_call(
67 ["qlmanage", "-t", out_path, "-s", f"{args.width}x{args.width}", "-o", "."]
68 )