Convert images_only_pdf to use pypdf
- ID
0804d56- date
2023-12-10 15:29:17+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
25dff6f- message
Convert images_only_pdf to use pypdf I was struggling to install PyMuPDF on my MacBook Air, because I couldn't install the wheels. This seems to work just as well and doesn't have installation issues.- changed files
3 files, 13 additions, 17 deletions
Changed files
images/images_only_pdf (1323) → images/images_only_pdf (801)
diff --git a/images/images_only_pdf b/images/images_only_pdf
index 891c646..888c087 100755
--- a/images/images_only_pdf
+++ b/images/images_only_pdf
@@ -11,31 +11,24 @@ borders around the images which is precisely what I don't want.
import os
import sys
-import fitz # PyMuPDF==1.21.0
+from pypdf import PdfReader
-if __name__ == "__main__":
+if __name__ == '__main__':
try:
path = sys.argv[1]
except IndexError:
sys.exit(f"Usage: {__file__} <PATH>")
- with fitz.open(path) as pdf_original, fitz.open() as pdf_new:
- for page_number, page in enumerate(pdf_original, start=1):
- for image_number, image in enumerate(page.get_images(), start=1):
- # Get the XREF of the image
- xref = image[0]
+ reader = PdfReader(path)
- # Extract the image bytes
- base_image = pdf_original.extract_image(xref)
+ images = []
- new_page = pdf_new.new_page(
- width=base_image["width"], height=base_image["height"]
- )
- rect = fitz.Rect(0.0, 0.0, base_image["width"], base_image["height"])
- new_page.insert_image(rect, stream=base_image["image"])
+ for page in reader.pages:
+ images.extend([
+ im.image for im in page.images
+ ])
- # out_path = path.replace(".pdf", "-noimages.pdf")
- # assert path != out_path
+ assert len(images) == len(reader.pages)
- pdf_new.save(path)
+ images[0].save(path, "PDF", resolution=100.0, save_all=True, append_images=images[1:])
requirements.in (130) → requirements.in (136)
diff --git a/requirements.in b/requirements.in
index 038293f..0a71a0c 100644
--- a/requirements.in
+++ b/requirements.in
@@ -10,6 +10,7 @@ naturalsort==1.5.1
Pillow
pillow_heif
pip-tools
+pypdf
pytest
termcolor
yt-dlp
requirements.txt (2350) → requirements.txt (2393)
diff --git a/requirements.txt b/requirements.txt
index 2c1b593..2e7ebd3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -95,6 +95,8 @@ pycryptodomex==3.19.0
# via yt-dlp
pyflakes==3.1.0
# via flake8
+pypdf==3.17.2
+ # via -r requirements.in
pyproject-hooks==1.0.0
# via build
pytest==7.4.3