Add a script for saving a page as a Safari webarchive
- ID
ffea0e9- date
2024-05-16 17:52:13+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
74b4211- message
Add a script for saving a page as a Safari webarchive- changed files
3 files, 131 additions, 1 deletion
Changed files
web/.gitattributes (0) → web/.gitattributes (47)
diff --git a/web/.gitattributes b/web/.gitattributes
new file mode 100644
index 0000000..c226feb
--- /dev/null
+++ b/web/.gitattributes
@@ -0,0 +1 @@
+save_safari_webarchive linguist-language=Swift
web/README.md (5225) → web/README.md (5642)
diff --git a/web/README.md b/web/README.md
index ba7e757..a33c788 100644
--- a/web/README.md
+++ b/web/README.md
@@ -66,6 +66,12 @@ scripts = [
"""
},
{
+ "usage": "save_safari_webarchive [URL] [OUTPUT_PATH]",
+ "description": """
+ save a copy of a web page as a Safari webarchive
+ """
+ },
+ {
"name": "scrape_really_useful_boxes.py",
"description": """
scrape the Really Useful Boxes product catalogue, so I can search for boxes in ways their website doesn't allow – in particular, by dimensions, so I can find boxes that fit into specific spaces.<br/><br/><img src="really_useful_boxes.png">
@@ -156,6 +162,15 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
</dd>
<dt>
+ <a href="https://github.com/alexwlchan/scripts/blob/main/web/save_safari_webarchive">
+ <code>save_safari_webarchive [URL] [OUTPUT_PATH]</code>
+ </a>
+ </dt>
+ <dd>
+ save a copy of a web page as a Safari webarchive
+ </dd>
+
+ <dt>
<a href="https://github.com/alexwlchan/scripts/blob/main/web/scrape_really_useful_boxes.py">
<code>scrape_really_useful_boxes.py</code>
</a>
@@ -173,4 +188,4 @@ cog_helpers.create_description_table(folder_name=folder_name, scripts=scripts)
this is a wrapper around <a href="https://github.com/yt-dlp/yt-dlp">yt-dlp</a> that does parallel downloads of videos in playlists.
</dd>
</dl>
-<!-- [[[end]]] (checksum: 54f08d9f43084b85f2d56d5caced4e10) -->
+<!-- [[[end]]] (checksum: a31c60eca24c6488caaa93e8bb5f6b44) -->
web/save_safari_webarchive (0) → web/save_safari_webarchive (3217)
diff --git a/web/save_safari_webarchive b/web/save_safari_webarchive
new file mode 100755
index 0000000..5ca3556
--- /dev/null
+++ b/web/save_safari_webarchive
@@ -0,0 +1,114 @@
+#!/usr/bin/env swift
+/// Save a web page as a Safari webarchive.
+///
+/// Usage: save_safari_webarchive [URL] [OUTPUT_PATH]
+///
+/// This will save the page to the desired file, but may fail for
+/// several reasons:
+///
+/// - the web page can't be loaded
+/// - the web page returns a non-200 status code
+/// - there's already a file at that path (it won't overwrite an existing
+/// webarchive)
+///
+/// For a detailed explanation of the code in this script, see
+/// https://alexwlchan.net/2024/creating-a-safari-webarchive/
+
+import WebKit
+
+/// Print an error message and terminate the process if there are
+/// any errors while loading a page.
+class ExitOnFailureDelegate: NSObject, WKNavigationDelegate {
+ func webView(_: WKWebView, didFail: WKNavigation!, withError error: Error) {
+ fputs("Failed to load web page: \(error.localizedDescription)\n", stderr)
+ exit(1)
+ }
+
+ func webView(
+ _: WKWebView,
+ didFailProvisionalNavigation: WKNavigation!,
+ withError error: Error
+ ) {
+ fputs("Failed to load web page: \(error.localizedDescription)\n", stderr)
+ exit(1)
+ }
+
+ func webView(
+ _: WKWebView,
+ decidePolicyFor navigationResponse: WKNavigationResponse,
+ decisionHandler: (WKNavigationResponsePolicy) -> Void
+ ) {
+ if let httpUrlResponse = (navigationResponse.response as? HTTPURLResponse) {
+ if httpUrlResponse.statusCode != 200 {
+ fputs("Loading web page failed with status code \(httpUrlResponse.statusCode)\n", stderr)
+ exit(1)
+ }
+ }
+
+ decisionHandler(.allow)
+ }
+}
+
+let webView = WKWebView()
+
+let delegate = ExitOnFailureDelegate()
+webView.navigationDelegate = delegate
+
+extension WKWebView {
+
+ /// Load the given URL in the web view.
+ ///
+ /// This method will block until the URL has finished loading.
+ func load(_ urlString: String) {
+ if let url = URL(string: urlString) {
+ let request = URLRequest(url: url)
+ self.load(request)
+
+ while (self.isLoading) {
+ RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1))
+ }
+ } else {
+ fputs("Unable to use \(urlString) as a URL\n", stderr)
+ exit(1)
+ }
+ }
+
+ /// Save a copy of the web view's contents as a webarchive file.
+ ///
+ /// This method will block until the webarchive has been saved,
+ /// or the save has failed for some reason.
+ func saveAsWebArchive(savePath: URL) {
+ var isSaving = true
+
+ self.createWebArchiveData(completionHandler: { result in
+ do {
+ let data = try result.get()
+ try data.write(
+ to: savePath,
+ options: [Data.WritingOptions.withoutOverwriting]
+ )
+ isSaving = false
+ } catch {
+ fputs("Unable to save webarchive file: \(error.localizedDescription)\n", stderr)
+ exit(1)
+ }
+ })
+
+ while (isSaving) {
+ RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1))
+ }
+ }
+}
+
+guard CommandLine.arguments.count == 3 else {
+ print("Usage: \(CommandLine.arguments[0]) <URL> <OUTPUT_PATH>")
+ exit(1)
+}
+
+let urlString = CommandLine.arguments[1]
+let savePath = URL(fileURLWithPath: CommandLine.arguments[2])
+
+webView.load(urlString)
+webView.saveAsWebArchive(savePath: savePath)
+
+print("Saved webarchive to \(savePath)")