Write a Node version of the script
- ID
37532a9- date
2024-06-19 21:55:29+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
6b22e6b- message
Write a Node version of the script- changed files
3 files, 135 additions, 1 deletion
Changed files
.github/workflows/test.yml (0) → .github/workflows/test.yml (386)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..feb5f3c
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,22 @@
+name: Test
+
+on:
+ push:
+ branches:
+ - main
+
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v4
+ - name: Run tests
+ run: |
+ node measure https://wellcomecollection.org/collections collections
+ node measure https://wellcomecollection.org/collections
.gitignore (5) → .gitignore (4)
diff --git a/.gitignore b/.gitignore
index c1d18d8..1fcb152 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-_out
+out
measure.js (0) → measure.js (2922)
diff --git a/measure.js b/measure.js
new file mode 100644
index 0000000..df6abcb
--- /dev/null
+++ b/measure.js
@@ -0,0 +1,112 @@
+const fs = require('fs');
+const https = require('https');
+
+// Write text to a file in the `out` directory.
+//
+// This takes an `options` object with two parameters:
+//
+// - `filename` -- the name of the file to write
+// - `contents` -- the text to write to the file
+//
+function writeToFile(options) {
+ let filePath = `out/${options.filename}`;
+
+ fs.mkdir('out', { recursive: true }, (err) => {
+ if (err) {
+ console.error('Error creating `out` directory:', err);
+ process.exit(1);
+ }
+ });
+
+ fs.writeFile(filePath, options.contents, (err) => {
+ if (err) {
+ console.error('Error writing file:', err);
+ process.exit(1);
+ }
+ });
+}
+
+// Format a number of bytes as a human-readable string.
+//
+// Example: naturalsize(1234) ~> "1.21 kB"
+function naturalSize(byteCount) {
+ return `${(byteCount / 1024).toFixed(2)} kB`;
+}
+
+// Left-pad a string with spaces for consistent indentation.
+function leftPad(str, length) {
+ while (str.length < length) {
+ str = ' ' + str;
+ }
+
+ return str;
+}
+
+// Parse command-line arguments.
+//
+// The script takes one or two arguments:
+//
+// * the URL to fetch (required)
+// * a label for the downloaded files (optional)
+//
+const args = process.argv.slice(2);
+
+let url = '';
+let label = '';
+
+if (args.length === 0) {
+ console.error("Usage: measure.js URL [LABEL]");
+ process.exit(1);
+} else if (args.length === 1) {
+ url = args[0];
+ label = "export";
+} else if (args.length === 2) {
+ url = args[0];
+ label = args[1];
+} else {
+ console.error("Usage: measure.js URL [LABEL]");
+ process.exit(1);
+}
+
+// Actually fetch the URL, and save the HTML
+//
+// Note: I add a custom User-Agent because CloudFront seems to reject fetches that
+// come from Node's builtin HTTP library.
+const options = {
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Android 4.4; Mobile; rv:41.0) Gecko/41.0 Firefox/41.0',
+ }
+};
+
+https.get(url, options, (res) => {
+ let html = '';
+
+ res.on('data', (chunk) => {
+ html += chunk;
+ });
+
+ // We've got the whole HTML file. Parse it, and save the results.
+ res.on('end', () => {
+ let htmlByteCount = Buffer.byteLength(html, 'utf8');
+ console.log(`HTML = ${leftPad(naturalSize(htmlByteCount), 10)}`);
+
+ let nextData = html
+ .split('<script id="__NEXT_DATA__" type="application/json">')[1]
+ .split("</script>")[0];
+
+ let nextDataByteCount = Buffer.byteLength(nextData, 'utf8');
+ console.log(`__NEXT_DATA__ = ${leftPad(naturalSize(nextDataByteCount), 10)} (${(nextDataByteCount / htmlByteCount * 100).toFixed(1)}%)`);
+
+ console.log();
+
+ writeToFile({ filename: `${label}.html`, contents: html });
+ console.log(`Saved HTML to out/${label}.html`);
+
+ writeToFile({ filename: `${label}.json`, contents: nextData });
+ console.log(`Saved __NEXT_DATA__ to out/${label}.json`);
+ });
+
+}).on('error', (err) => {
+ console.error('Error fetching the URL: ', err);
+ process.exit(1);
+});