Add a more compact and human-readable encoding

ID

50aba03

date

2025-01-10 11:44:54+00:00

author

Alex Chan <alex@alexwlchan.net>

parent

61dfab0

message

Add a more compact and human-readable encoding

changed files

6 files, 94 additions, 36 deletions

CHANGELOG.md
README.md
src/javascript_data_files/__init__.py
src/javascript_data_files/encoder.py
tests/test_encoder.py
tests/test_javascript_data_files.py

Changed files

CHANGELOG.md (1140) → CHANGELOG.md (1538)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f57684..f8ab0f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
 # CHANGELOG
 
+## v1.1.1
+
+Tweak the way the JavaScript is encoded to make it slightly more compact and readable -- in particular, short lists will now be encoded as a single line, rather than split across multiple lines.
+
+Before:
+
+```json
+[
+  1,
+  2,
+  3
+]
+```
+
+After:
+
+```json
+[1, 2, 3]
+```
+
+The value is the same but should be more readable.
+This opens the door to more readability improvements in the future.
+
 ## v1.1.0 - 2025-01-10
 
 You can now call `write_js()` with a file-like object.

README.md (1573) → README.md (1641)

diff --git a/README.md b/README.md
index a03f351..dd2bbeb 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ const shape = { "sides": 5, "colour": "red" };
 
 Think of this module as the JSON module, but for JavaScript files.
 
+These data files are meant to be both human- and machine-readable.
+
 ## Usage
 
 *   You can read a JavaScript file with `read_js(path, varname)`

src/javascript_data_files/__init__.py (6422) → src/javascript_data_files/__init__.py (6426)

diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index 53cf442..7b0d13e 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -19,10 +19,10 @@ import textwrap
 import typing
 import uuid
 
-from .encoder import encode_as_js
+from .encoder import encode_as_js, encode_as_json
 
 
-__version__ = "1.1.0"
+__version__ = "1.1.1"
 
 
 def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
@@ -131,7 +131,7 @@ def append_to_js_array(p: pathlib.Path | str, *, value: typing.Any) -> None:
 
     json_to_append = (
         b",\n"
-        + textwrap.indent(json.dumps(value, indent=2), prefix="  ").encode("utf8")
+        + textwrap.indent(encode_as_json(value), prefix="  ").encode("utf8")
         + b"\n];\n"
     )
 
@@ -183,7 +183,7 @@ def append_to_js_object(p: pathlib.Path | str, *, key: str, value: typing.Any) -
     file_size = p.stat().st_size
 
     enc_key = json.dumps(key)
-    enc_value = textwrap.indent(json.dumps(value, indent=2), prefix="  ").lstrip()
+    enc_value = textwrap.indent(encode_as_json(value), prefix="  ").lstrip()
 
     json_to_append = f",\n  {enc_key}: {enc_value}\n}};\n".encode("utf8")

src/javascript_data_files/encoder.py (556) → src/javascript_data_files/encoder.py (1155)

diff --git a/src/javascript_data_files/encoder.py b/src/javascript_data_files/encoder.py
index 2dedbae..6720d82 100644
--- a/src/javascript_data_files/encoder.py
+++ b/src/javascript_data_files/encoder.py
@@ -2,18 +2,35 @@
 This file contains pure functions for converting Python values
 to JavaScript strings.
 
-It doesn't do any I/O.
+We prioritise human-readability over absolute efficiency.
+For example, JSON is nicely indented to be more readable, rather than
+a compact encoding that uses less bytes on disk.
 """
 
 import json
 import typing
 
 
+class HumanReadableEncoder(json.JSONEncoder):
+    """
+    A custom JSON encoder with a few niceties for human-readability.
+    """
+
+    def encode(self, o: typing.Any) -> str:
+        """
+        Return a JSON string representation of a Python data structure, o.
+        """
+        if isinstance(o, list) and len(o) < 7 and len(json.dumps(o)) < 60:
+            return json.dumps(o)
+
+        return super().encode(o)
+
+
 def encode_as_json(value: typing.Any) -> str:
     """
     Convert a Python value to a JSON-encoded string.
     """
-    return json.dumps(value, indent=2)
+    return json.dumps(value, indent=2, cls=HumanReadableEncoder)
 
 
 def encode_as_js(value: typing.Any, varname: str) -> str:

tests/test_encoder.py (0) → tests/test_encoder.py (1698)

diff --git a/tests/test_encoder.py b/tests/test_encoder.py
new file mode 100644
index 0000000..de2951b
--- /dev/null
+++ b/tests/test_encoder.py
@@ -0,0 +1,46 @@
+"""
+Tests for ``javascript_data_files.encoder``.
+"""
+
+from javascript_data_files.encoder import encode_as_json
+
+
+def test_it_pretty_prints_json() -> None:
+    """
+    JSON strings are pretty-printed with indentation.
+    """
+    assert (
+        encode_as_json({"sides": 5, "colour": "red"})
+        == '{\n  "sides": 5,\n  "colour": "red"\n}'
+    )
+
+
+def test_a_list_of_ints_is_not_split_over_multiple_lines() -> None:
+    """
+    If there's a list of small integers, they're printed on one line
+    rather than across multiple lines.
+    """
+    assert encode_as_json([1, 2, 3]) == "[1, 2, 3]"
+
+
+def test_a_list_of_long_ints_is_indented_and_split() -> None:
+    """
+    If there's a list with more integers than a sensible line length,
+    they're split across multiple lines.
+    """
+    json_string = encode_as_json(list(range(100)))
+
+    assert json_string == (
+        "["
+        "\n  0,\n  1,\n  2,\n  3,\n  4,\n  5,\n  6,\n  7,\n  8,\n  9,"
+        "\n  10,\n  11,\n  12,\n  13,\n  14,\n  15,\n  16,\n  17,\n  18,\n  19,"
+        "\n  20,\n  21,\n  22,\n  23,\n  24,\n  25,\n  26,\n  27,\n  28,\n  29,"
+        "\n  30,\n  31,\n  32,\n  33,\n  34,\n  35,\n  36,\n  37,\n  38,\n  39,"
+        "\n  40,\n  41,\n  42,\n  43,\n  44,\n  45,\n  46,\n  47,\n  48,\n  49,"
+        "\n  50,\n  51,\n  52,\n  53,\n  54,\n  55,\n  56,\n  57,\n  58,\n  59,"
+        "\n  60,\n  61,\n  62,\n  63,\n  64,\n  65,\n  66,\n  67,\n  68,\n  69,"
+        "\n  70,\n  71,\n  72,\n  73,\n  74,\n  75,\n  76,\n  77,\n  78,\n  79,"
+        "\n  80,\n  81,\n  82,\n  83,\n  84,\n  85,\n  86,\n  87,\n  88,\n  89,"
+        "\n  90,\n  91,\n  92,\n  93,\n  94,\n  95,\n  96,\n  97,\n  98,\n  99"
+        "\n]"
+    )

tests/test_javascript_data_files.py (15597) → tests/test_javascript_data_files.py (14431)

diff --git a/tests/test_javascript_data_files.py b/tests/test_javascript_data_files.py
index 9a8b420..75e93f4 100644
--- a/tests/test_javascript_data_files.py
+++ b/tests/test_javascript_data_files.py
@@ -375,21 +375,6 @@ class TestAppendToObject:
             "sideLengths": [1, 2, 3, 4, 5],
         }
 
-    def test_indentation_is_consistent(self, tmp_path: pathlib.Path) -> None:
-        """
-        If you append to an object, the file looks as if you'd read and rewritten
-        the whole thing with ``write_js()``.
-        """
-        js_path1 = tmp_path / "data1.js"
-        js_path2 = tmp_path / "data2.js"
-
-        write_js(js_path1, varname="shape", value={"colour": "red"})
-        append_to_js_object(js_path1, key="sides", value=[1, 2, 3])
-
-        write_js(js_path2, varname="shape", value={"colour": "red", "sides": [1, 2, 3]})
-
-        assert js_path1.read_text() == js_path2.read_text()
-
     def test_error_if_file_doesnt_look_like_object(self, js_path: pathlib.Path) -> None:
         """
         Appending to a file which doesn't contain a JSON object throws
@@ -462,18 +447,3 @@ class TestRoundTrip:
             "coconut",
             "damson",
         ]
-
-    def test_indentation_is_consistent(self, tmp_path: pathlib.Path) -> None:
-        """
-        If you append to an array, the file looks as if you'd read and rewritten
-        the whole thing with ``write_js()``.
-        """
-        js_path1 = tmp_path / "data1.js"
-        js_path2 = tmp_path / "data2.js"
-
-        write_js(js_path1, varname="numbers", value=[1, 2, 3])
-        append_to_js_array(js_path1, value=[4, 5, 6])
-
-        write_js(js_path2, varname="numbers", value=[1, 2, 3, [4, 5, 6]])
-
-        assert js_path1.read_text() == js_path2.read_text()