Skip to main content

encoder: add a ensure_ascii parameter to mirror json.dumps()

ID
3250c22
date
2025-08-14 23:09:40+00:00
author
Alex Chan <alex@alexwlchan.net>
parent
c3ebe28
message
encoder: add a `ensure_ascii` parameter to mirror `json.dumps()`
changed files
5 files, 102 additions, 9 deletions

Changed files

CHANGELOG.md (4074) → CHANGELOG.md (4730)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c64ee04..7593d79 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,36 @@
 # CHANGELOG
 
+## v1.4.0 - 2025-08-15
+
+Add an `ensure_ascii` parameter to `write_js`.
+If `True`, all incoming ASCII characters will be escaped, otherwise they will be left as-is.
+Default is `False`.
+
+This changes the default output of `write_js`.
+Before, it would escape any incoming ASCII characters, for example `“hello world”` would be encoded as:
+
+```json
+"\u201chello world\u201d"
+```
+
+With the new behaviour, it will be encoded as:
+
+```json
+“hello world”
+```
+
+unless you explicitly pass `ensure_ascii=True`.
+
+This mirrors the parameter on the builtin `json.dumps()`, but with a different default.
+
 ## v1.3.0 - 2025-05-05
 
 Add a `sort_keys` parameter to `write_js`.
 If `True`, dictionaries with be serialised to JSON sorted by key.
 Default `False`.
 
+This mirrors the parameter on the builtin `json.dumps()`.
+
 ## v1.2.3 - 2025-05-04
 
 Tweak the error message introduced in v1.2.2 -- JSON objects are **name**/value pairs, not key/value pairs.

src/javascript_data_files/__init__.py (6833) → src/javascript_data_files/__init__.py (6906)

diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index 0255f26..7820cee 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -22,7 +22,7 @@ from .decoder import decode_from_js
 from .encoder import encode_as_js, encode_as_json
 
 
-__version__ = "1.3.0"
+__version__ = "1.4.0"
 __all__ = [
     "read_js",
     "read_typed_js",
@@ -74,6 +74,7 @@ def write_js(
     *,
     value: typing.Any,
     varname: str,
+    ensure_ascii: bool = False,
     sort_keys: bool = False,
 ) -> None:
     """
@@ -89,7 +90,9 @@ def write_js(
         'const redPentagon = {\n  "sides": 5,\n  "colour": "red"\n};\n'
 
     """
-    js_string = encode_as_js(value, varname, sort_keys=sort_keys)
+    js_string = encode_as_js(
+        value, varname, ensure_ascii=ensure_ascii, sort_keys=sort_keys
+    )
 
     if isinstance(p, io.TextIOBase):
         p.write(js_string)

src/javascript_data_files/encoder.py (1253) → src/javascript_data_files/encoder.py (1439)

diff --git a/src/javascript_data_files/encoder.py b/src/javascript_data_files/encoder.py
index 3f5087d..4c90db6 100644
--- a/src/javascript_data_files/encoder.py
+++ b/src/javascript_data_files/encoder.py
@@ -26,18 +26,32 @@ class HumanReadableEncoder(json.JSONEncoder):
         return super().encode(o)
 
 
-def encode_as_json(value: typing.Any, *, sort_keys: bool = False) -> str:
+def encode_as_json(
+    value: typing.Any, *, ensure_ascii: bool = False, sort_keys: bool = False
+) -> str:
     """
     Convert a Python value to a JSON-encoded string.
     """
-    return json.dumps(value, indent=2, sort_keys=sort_keys, cls=HumanReadableEncoder)
-
-
-def encode_as_js(value: typing.Any, varname: str, *, sort_keys: bool = False) -> str:
+    return json.dumps(
+        value,
+        indent=2,
+        sort_keys=sort_keys,
+        ensure_ascii=ensure_ascii,
+        cls=HumanReadableEncoder,
+    )
+
+
+def encode_as_js(
+    value: typing.Any,
+    varname: str,
+    *,
+    ensure_ascii: bool = False,
+    sort_keys: bool = False,
+) -> str:
     """
     Convert a Python value to a JSON-encoded JavaScript value.
     """
-    json_string = encode_as_json(value, sort_keys=sort_keys)
+    json_string = encode_as_json(value, ensure_ascii=ensure_ascii, sort_keys=sort_keys)
     js_string = f"const {varname} = {json_string};\n"
 
     return js_string

tests/test_encoder.py (2946) → tests/test_encoder.py (3942)

diff --git a/tests/test_encoder.py b/tests/test_encoder.py
index e139915..a3e0ae6 100644
--- a/tests/test_encoder.py
+++ b/tests/test_encoder.py
@@ -4,7 +4,7 @@ Tests for ``javascript_data_files.encoder``.
 
 import string
 
-from javascript_data_files.encoder import encode_as_json
+from javascript_data_files.encoder import encode_as_json, encode_as_js
 
 
 def test_it_pretty_prints_json() -> None:
@@ -32,6 +32,38 @@ def test_it_sorts_keys() -> None:
     )
 
 
+class TestEnsureAscii:
+    """
+    Tests for the `ensure_ascii` parameter.
+    """
+
+    s = "“hello world”"
+    varname = "greeting"
+
+    js_as_utf8 = 'const greeting = "“hello world”";\n'
+    js_as_ascii = 'const greeting = "\\u201chello world\\u201d";\n'
+
+    def test_default_is_utf8(self) -> None:
+        """
+        If you don't pass a value for `ensure_ascii`, then we allow
+        UTF-8 in the output.
+        """
+        assert encode_as_js(self.s, self.varname) == self.js_as_utf8
+
+    def test_explicit_utf8(self) -> None:
+        """
+        If you pass `ensure_ascii=False`, then we allow UTF-8 in the output.
+        """
+        assert encode_as_js(self.s, self.varname, ensure_ascii=False) == self.js_as_utf8
+
+    def test_explicit_ascii(self) -> None:
+        """
+        If you pass `ensure_ascii=True`, then we only return ASCII
+        in the output.
+        """
+        assert encode_as_js(self.s, self.varname, ensure_ascii=True) == self.js_as_ascii
+
+
 def test_a_list_of_ints_is_not_split_over_multiple_lines() -> None:
     """
     If there's a list of small integers, they're printed on one line

tests/test_javascript_data_files.py (18531) → tests/test_javascript_data_files.py (19158)

diff --git a/tests/test_javascript_data_files.py b/tests/test_javascript_data_files.py
index 79553a0..a6a625b 100644
--- a/tests/test_javascript_data_files.py
+++ b/tests/test_javascript_data_files.py
@@ -284,6 +284,25 @@ class TestWriteJs:
             == 'const redPentagon = {\n  "colour": "red",\n  "sides": 5\n};\n'
         )
 
+    @pytest.mark.parametrize(
+        "ensure_ascii, expected_js",
+        [
+            (False, 'const greeting = "“hello world”";\n'),
+            (True, 'const greeting = "\\u201chello world\\u201d";\n'),
+        ],
+    )
+    def test_write_with_ensure_ascii(
+        self, tmp_path: pathlib.Path, ensure_ascii: bool, expected_js: str
+    ) -> None:
+        """
+        You can pass an `ensure_ascii`  parameter.
+        """
+        p = tmp_path / "ascii.js"
+        write_js(
+            p, value="“hello world”", varname="greeting", ensure_ascii=ensure_ascii
+        )
+        assert p.read_text() == expected_js
+
     def test_fails_if_file_is_read_only(self, tmp_path: pathlib.Path) -> None:
         """
         It cannot write to a file open in read-only mode.