Skip to main content

Convert images_only_pdf to use pypdf

ID
0804d56
date
2023-12-10 15:29:17+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
25dff6f
message
Convert images_only_pdf to use pypdf

I was struggling to install PyMuPDF on my MacBook Air, because I couldn't
install the wheels.  This seems to work just as well and doesn't have
installation issues.
changed files
3 files, 13 additions, 17 deletions

Changed files

images/images_only_pdf (1323) → images/images_only_pdf (801)

diff --git a/images/images_only_pdf b/images/images_only_pdf
index 891c646..888c087 100755
--- a/images/images_only_pdf
+++ b/images/images_only_pdf
@@ -11,31 +11,24 @@ borders around the images which is precisely what I don't want.
 import os
 import sys
 
-import fitz  # PyMuPDF==1.21.0
+from pypdf import PdfReader
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     try:
         path = sys.argv[1]
     except IndexError:
         sys.exit(f"Usage: {__file__} <PATH>")
 
-    with fitz.open(path) as pdf_original, fitz.open() as pdf_new:
-        for page_number, page in enumerate(pdf_original, start=1):
-            for image_number, image in enumerate(page.get_images(), start=1):
-                # Get the XREF of the image
-                xref = image[0]
+    reader = PdfReader(path)
 
-                # Extract the image bytes
-                base_image = pdf_original.extract_image(xref)
+    images = []
 
-                new_page = pdf_new.new_page(
-                    width=base_image["width"], height=base_image["height"]
-                )
-                rect = fitz.Rect(0.0, 0.0, base_image["width"], base_image["height"])
-                new_page.insert_image(rect, stream=base_image["image"])
+    for page in reader.pages:
+        images.extend([
+            im.image for im in page.images
+        ])
 
-        # out_path = path.replace(".pdf", "-noimages.pdf")
-        # assert path != out_path
+    assert len(images) == len(reader.pages)
 
-        pdf_new.save(path)
+    images[0].save(path, "PDF", resolution=100.0, save_all=True, append_images=images[1:])

requirements.in (130) → requirements.in (136)

diff --git a/requirements.in b/requirements.in
index 038293f..0a71a0c 100644
--- a/requirements.in
+++ b/requirements.in
@@ -10,6 +10,7 @@ naturalsort==1.5.1
 Pillow
 pillow_heif
 pip-tools
+pypdf
 pytest
 termcolor
 yt-dlp

requirements.txt (2350) → requirements.txt (2393)

diff --git a/requirements.txt b/requirements.txt
index 2c1b593..2e7ebd3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -95,6 +95,8 @@ pycryptodomex==3.19.0
     # via yt-dlp
 pyflakes==3.1.0
     # via flake8
+pypdf==3.17.2
+    # via -r requirements.in
 pyproject-hooks==1.0.0
     # via build
 pytest==7.4.3