Skip to main content

document the pdf thumbnail script

ID
7272e8d
date
2023-05-01 11:00:41+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
42857dc
message
document the pdf thumbnail script
changed files
3 files, 81 additions, 45 deletions

Changed files

get_pdf_thumbnail (1274) → get_pdf_thumbnail (0)

diff --git a/get_pdf_thumbnail b/get_pdf_thumbnail
deleted file mode 100755
index fa918ed..0000000
--- a/get_pdf_thumbnail
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-"""
-This script creates a 400×400 thumbnail of a specific page of a PDF file.
-
-It uses similar thumbnailing logic to docstore [1], but it allows me to
-pick a particular page rather than the first page.
-
-This is helpful because sometimes I download PDF cross-stitch patterns
-where the first page contains text which isn't a good thumbnail, but
-later pages do show the whole pattern.
-
-[1]: https://github.com/alexwlchan/docstore/blob/main/src/docstore/thumbnails.py
-
-"""
-
-import os
-import sys
-import subprocess
-import tempfile
-
-from PyPDF2 import PdfFileReader, PdfFileWriter
-
-
-if __name__ == "__main__":
-    try:
-        path = sys.argv[1]
-        page_number = int(sys.argv[2])
-    except (IndexError, ValueError):
-        sys.exit(f"Usage: {__file__} <PATH> <PAGE_NUMBER>")
-
-    reader = PdfFileReader(path)
-
-    # Remember that pages are 0-indexed
-    page = reader.pages[page_number - 1]
-
-    writer = PdfFileWriter()
-    writer.addPage(page)
-
-    with tempfile.TemporaryDirectory(suffix=".pdf") as temp_dir:
-        out_path = os.path.join(temp_dir, os.path.basename(path))
-
-        with open(out_path, "wb") as out_file:
-            writer.write(out_file)
-
-        subprocess.check_call(["qlmanage", "-t", out_path, "-s", "400x400", "-o", "."])

images/README.md (1065) → images/README.md (1404)

diff --git a/images/README.md b/images/README.md
index fe50b85..587cab8 100644
--- a/images/README.md
+++ b/images/README.md
@@ -34,4 +34,17 @@ These scripts are for working with images and other visual material.
     </p>
     I often use this when making images for my website, to create identical light mode and dark mode screenshots.
   </dd>
+  
+  <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/images/pdfthumb">
+      <code>pdfthumb</code>
+    </a>
+  </dt>
+  <dd>
+    get a PNG thumbnail of a specific page of a PDF, for example:
+    <p>
+      <pre><code>pdfthumb pattern.pdf --page=3</code></pre>
+    </p>
+    will create a thumbnail of the third page.
+  </dd>
 </dl>

images/pdfthumb (0) → images/pdfthumb (1884)

diff --git a/images/pdfthumb b/images/pdfthumb
new file mode 100755
index 0000000..202dbce
--- /dev/null
+++ b/images/pdfthumb
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+"""
+This script creates a 400×400 thumbnail of a specific page of a PDF file.
+
+It uses similar thumbnailing logic to docstore [1], but it allows me to
+pick a particular page rather than the first page.
+
+This is helpful because sometimes I download PDF cross-stitch patterns
+where the first page contains text which isn't a good thumbnail, but
+later pages do show the whole pattern.
+
+[1]: https://github.com/alexwlchan/docstore/blob/main/src/docstore/thumbnails.py
+
+"""
+
+import argparse
+import os
+import sys
+import subprocess
+import tempfile
+
+from PyPDF2 import PdfReader, PdfWriter  # pip3 install --user PyPDF2==3.0.1
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(
+        prog=os.path.basename(__file__),
+        description="Get thumbnails from a specific page of a PDF file.",
+    )
+
+    parser.add_argument("PATH")
+    parser.add_argument(
+        "--page",
+        type=int,
+        metavar="PAGE_NUMBER",
+        help="which page of the PDF to get",
+        required=True,
+    )
+    parser.add_argument(
+        "--width", type=int, help="pixel width of the generated thumbnail", default=400
+    )
+
+    return parser.parse_args(argv)
+
+
+if __name__ == "__main__":
+    args = parse_args(sys.argv[1:])
+
+    reader = PdfReader(args.PATH)
+
+    # Remember that pages are 0-indexed
+    try:
+        page = reader.pages[args.page - 1]
+    except IndexError:
+        sys.exit(f"Unrecognised page: {args.page}, expected 1...{len(reader.pages)}")
+
+    writer = PdfWriter()
+    writer.add_page(page)
+
+    with tempfile.TemporaryDirectory(suffix=".pdf") as temp_dir:
+        out_path = os.path.join(temp_dir, os.path.basename(args.PATH))
+
+        with open(out_path, "wb") as out_file:
+            writer.write(out_file)
+
+        subprocess.check_call(
+            ["qlmanage", "-t", out_path, "-s", f"{args.width}x{args.width}", "-o", "."]
+        )