Skip to main content

Add a script for saving a page as a Safari webarchive

ID
ffea0e9
date
2024-05-16 17:52:13+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
74b4211
message
Add a script for saving a page as a Safari webarchive
changed files
3 files, 131 additions, 1 deletion

Changed files

web/.gitattributes (0) → web/.gitattributes (47)

diff --git a/web/.gitattributes b/web/.gitattributes
new file mode 100644
index 0000000..c226feb
--- /dev/null
+++ b/web/.gitattributes
@@ -0,0 +1 @@
+save_safari_webarchive linguist-language=Swift

web/README.md (5225) → web/README.md (5642)

diff --git a/web/README.md b/web/README.md
index ba7e757..a33c788 100644
--- a/web/README.md
+++ b/web/README.md
@@ -66,6 +66,12 @@ scripts = [
         """
     },
     {
+        "usage": "save_safari_webarchive [URL] [OUTPUT_PATH]",
+        "description": """
+        save a copy of a web page as a Safari webarchive
+        """
+    },
+    {
         "name": "scrape_really_useful_boxes.py",
         "description": """
         scrape the Really Useful Boxes product catalogue, so I can search for boxes in ways their website doesn't allow – in particular, by dimensions, so I can find boxes that fit into specific spaces.<br/><br/><img src="really_useful_boxes.png">
@@ -156,6 +162,15 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
   </dd>
 
   <dt>
+    <a href="https://github.com/alexwlchan/scripts/blob/main/web/save_safari_webarchive">
+      <code>save_safari_webarchive [URL] [OUTPUT_PATH]</code>
+    </a>
+  </dt>
+  <dd>
+    save a copy of a web page as a Safari webarchive
+  </dd>
+
+  <dt>
     <a href="https://github.com/alexwlchan/scripts/blob/main/web/scrape_really_useful_boxes.py">
       <code>scrape_really_useful_boxes.py</code>
     </a>
@@ -173,4 +188,4 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
     this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
   </dd>
 </dl>
-<!-- [[[end]]] (checksum: 54f08d9f43084b85f2d56d5caced4e10) -->
+<!-- [[[end]]] (checksum: a31c60eca24c6488caaa93e8bb5f6b44) -->

web/save_safari_webarchive (0) → web/save_safari_webarchive (3217)

diff --git a/web/save_safari_webarchive b/web/save_safari_webarchive
new file mode 100755
index 0000000..5ca3556
--- /dev/null
+++ b/web/save_safari_webarchive
@@ -0,0 +1,114 @@
+#!/usr/bin/env swift
+/// Save a web page as a Safari webarchive.
+///
+/// Usage: save_safari_webarchive [URL] [OUTPUT_PATH]
+///
+/// This will save the page to the desired file, but may fail for
+/// several reasons:
+///
+///   - the web page can't be loaded
+///   - the web page returns a non-200 status code
+///   - there's already a file at that path (it won't overwrite an existing
+///     webarchive)
+///
+/// For a detailed explanation of the code in this script, see
+/// https://alexwlchan.net/2024/creating-a-safari-webarchive/
+
+import WebKit
+
+/// Print an error message and terminate the process if there are
+/// any errors while loading a page.
+class ExitOnFailureDelegate: NSObject, WKNavigationDelegate {
+  func webView(_: WKWebView, didFail: WKNavigation!, withError error: Error) {
+    fputs("Failed to load web page: \(error.localizedDescription)\n", stderr)
+    exit(1)
+  }
+
+  func webView(
+    _: WKWebView,
+    didFailProvisionalNavigation: WKNavigation!,
+    withError error: Error
+  ) {
+    fputs("Failed to load web page: \(error.localizedDescription)\n", stderr)
+    exit(1)
+  }
+
+  func webView(
+    _: WKWebView,
+    decidePolicyFor navigationResponse: WKNavigationResponse,
+    decisionHandler: (WKNavigationResponsePolicy) -> Void
+  ) {
+    if let httpUrlResponse = (navigationResponse.response as? HTTPURLResponse) {
+      if httpUrlResponse.statusCode != 200 {
+        fputs("Loading web page failed with status code \(httpUrlResponse.statusCode)\n", stderr)
+        exit(1)
+      }
+    }
+
+    decisionHandler(.allow)
+  }
+}
+
+let webView = WKWebView()
+
+let delegate = ExitOnFailureDelegate()
+webView.navigationDelegate = delegate
+
+extension WKWebView {
+
+  /// Load the given URL in the web view.
+  ///
+  /// This method will block until the URL has finished loading.
+  func load(_ urlString: String) {
+    if let url = URL(string: urlString) {
+      let request = URLRequest(url: url)
+      self.load(request)
+
+      while (self.isLoading) {
+        RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1))
+      }
+    } else {
+      fputs("Unable to use \(urlString) as a URL\n", stderr)
+      exit(1)
+    }
+  }
+
+  /// Save a copy of the web view's contents as a webarchive file.
+  ///
+  /// This method will block until the webarchive has been saved,
+  /// or the save has failed for some reason.
+  func saveAsWebArchive(savePath: URL) {
+    var isSaving = true
+
+    self.createWebArchiveData(completionHandler: { result in
+      do {
+        let data = try result.get()
+        try data.write(
+          to: savePath,
+          options: [Data.WritingOptions.withoutOverwriting]
+        )
+        isSaving = false
+      } catch {
+        fputs("Unable to save webarchive file: \(error.localizedDescription)\n", stderr)
+        exit(1)
+      }
+    })
+
+    while (isSaving) {
+      RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1))
+    }
+  }
+}
+
+guard CommandLine.arguments.count == 3 else {
+    print("Usage: \(CommandLine.arguments[0]) <URL> <OUTPUT_PATH>")
+    exit(1)
+}
+
+let urlString = CommandLine.arguments[1]
+let savePath = URL(fileURLWithPath: CommandLine.arguments[2])
+
+webView.load(urlString)
+webView.saveAsWebArchive(savePath: savePath)
+
+print("Saved webarchive to \(savePath)")