Skip to main content

Merge pull request #31 from alexwlchan/handle-duplicate-keys

ID
f3435ff
date
2025-05-03 08:01:51+00:00
author
Alex Chan <alex@alexwlchan.net>
parents
cbd03c1, 89cde5f
message
Merge pull request #31 from alexwlchan/handle-duplicate-keys

Reject duplicate keys when reading a JSON object
changed files
7 files, 190 additions, 15 deletions

Changed files

CHANGELOG.md (3196) → CHANGELOG.md (3790)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d12433f..b9a9cc0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,20 @@
 # CHANGELOG
 
+## v1.2.2 - 2025-05-03
+
+Duplicate keys in JSON objects are now rejected as an error.
+
+For example, consider the following JavaScript:
+
+```javascript
+const shape = {"sides": "5", "colour": "red", "sides": 4};
+```
+
+These duplicate keys are technically allowed by the JSON specification, but are always a mistake when I encounter them.
+Many JSON parsers will silently drop the first instance of `sides`, including both Python's and web browsers.
+
+Previously `read_js` would read this file and silently drop the first key, but now it throws a `ValueError` and prompts you to de-duplicate the key.
+
 ## v1.2.1 - 2025-04-13
 
 Fix a bug in the validation of `typing.Union[A, B]` where both types are a `TypedDict`.

CONTRIBUTING.md (0) → CONTRIBUTING.md (1281)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..9f37a1b
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,56 @@
+# CONTRIBUTING
+
+You can set up a local development environment by cloning the repo and installing dependencies:
+
+```shell
+git clone https://github.com/alexwlchan/javascript-data-files.git
+cd javascript-data-files
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
+```
+
+If you want to run tests, install the dev dependencies and run the tests:
+
+```shell
+# Activate the virtualenv and install dev dependencies
+source .venv/bin/activate
+pip install -r dev_requirements.txt
+
+# Check formatting
+ruff check .
+ruff format --check .
+
+# Check docstrings
+interrogate -vv
+
+# Check types
+mypy src tests
+
+# Run tests
+coverage run -m pytest tests
+coverage report
+```
+
+To make changes:
+
+1.  Create a new branch
+2.  Push your changes to GitHub
+3.  Open a pull request
+4.  Fix any issues flagged by GitHub Actions (including tests, code linting, and type checking)
+6.  Merge it!
+
+To create a new version on PyPI:
+
+1.  Update the version in `src/javascript_data_files/__init__.py`
+2.  Add release notes in `CHANGELOG.md` and push a new tag to GitHub
+3.  Deploy the release using twine:
+
+    ```console
+    $ python3 -m build
+    $ python3 -m twine upload dist/* --username=__token__
+    ```
+
+    You will need [a PyPI API token](https://pypi.org/help/#apitoken) to publish packages.
+
+

README.md (1641) → README.md (1766)

diff --git a/README.md b/README.md
index dd2bbeb..032a0d9 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,10 @@ This is the only way to load data from an external file from an HTML file you've
 I have a lot of HTML files and local sites I build with an HTML viewer and metadata in a JavaScript file.
 The convenience of this approach outweighs the mild annoyance of having to store data in JavaScript, not JSON.
 
+## Development
+
+If you want to make changes to the library, there are instructions in [CONTRIBUTING.md](./CONTRIBUTING.md).
+
 ## License
 
 MIT.

src/javascript_data_files/__init__.py (6907) → src/javascript_data_files/__init__.py (6783)

diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index f4079f1..6e0d030 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -14,15 +14,22 @@ Think of this like the JSON module, but for JavaScript files.
 import io
 import json
 import pathlib
-import re
 import textwrap
 import typing
 import uuid
 
+from .decoder import decode_from_js
 from .encoder import encode_as_js, encode_as_json
 
 
-__version__ = "1.2.1"
+__version__ = "1.2.2"
+__all__ = [
+    "read_js",
+    "read_typed_js",
+    "write_js",
+    "append_to_js_array",
+    "append_to_js_object",
+]
 
 
 T = typing.TypeVar("T")
@@ -44,18 +51,7 @@ def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
     """
     p = pathlib.Path(p)
 
-    contents = p.read_text()
-
-    m = re.compile(r"^(?:const |var )?%s = " % varname)
-
-    if not m.match(contents):
-        raise ValueError(
-            f"File {p} does not start with JavaScript `const` declaration!"
-        )
-
-    json_string = m.sub(repl="", string=contents).rstrip().rstrip(";")
-
-    return json.loads(json_string)
+    return decode_from_js(js_string=p.read_text(), varname=varname)
 
 
 def read_typed_js[T](p: pathlib.Path | str, *, varname: str, model: type[T]) -> T:

src/javascript_data_files/decoder.py (0) → src/javascript_data_files/decoder.py (2194)

diff --git a/src/javascript_data_files/decoder.py b/src/javascript_data_files/decoder.py
new file mode 100644
index 0000000..70440a9
--- /dev/null
+++ b/src/javascript_data_files/decoder.py
@@ -0,0 +1,69 @@
+"""
+This file contains pure functions for converting JSON strings
+to Python values.
+
+Because I expect some of this JSON to be written by me, and I can
+make copy-paste mistakes, there are a couple of ways it tries
+to catch errors.
+"""
+
+import json
+import re
+import typing
+
+
+def decode_from_js(js_string: str, *, varname: str) -> typing.Any:
+    """
+    Parse a string as a JavaScript value.
+    """
+    # Matches 'const varname = ' or 'var varname = ' at the start
+    # of a string.
+    m = re.compile(r"^(?:const |var )?%s = " % varname)
+
+    if not m.match(js_string):
+        raise ValueError("Does not start with JavaScript `const` declaration!")
+
+    json_string = m.sub(repl="", string=js_string).rstrip().rstrip(";")
+
+    return decode_from_json(json_string)
+
+
+def _parse_object_pairs(pairs: list[tuple[str, typing.Any]]) -> dict[str, typing.Any]:
+    """
+    Convert any object literal into a dict.  This receives a list of
+    key-value pairs and returns a dict.
+
+    This is similar to the builtin parser, but it will look for
+    duplicate keys and throw a ValueError if they're found; this is
+    a protection against me making a copy/paste error in my JavaScript.
+    """
+    # First try to parse the object as a dictionary; if it's the same
+    # length as the pairs, then we know all the keys were unique and
+    # we can return.
+    pairs_as_dict = dict(pairs)
+
+    if len(pairs_as_dict) == len(pairs):
+        return pairs_as_dict
+
+    # Otherwise, let's work out what the duplicate key(s) were, so we
+    # can throw an appropriate error message for the user.
+    import collections
+
+    key_tally = collections.Counter(k for k, _ in pairs)
+
+    duplicate_keys = [k for k, count in key_tally.items() if count > 1]
+    assert len(duplicate_keys) > 0
+
+    if len(duplicate_keys) == 1:
+        raise ValueError(f"Found duplicate key in JSON object: {duplicate_keys[0]}")
+    else:
+        raise ValueError(
+            f"Found duplicate keys in JSON object: {', '.join(duplicate_keys)}"
+        )
+
+
+def decode_from_json(json_string: str) -> typing.Any:
+    """
+    Parse a string as a JSON value.
+    """
+    return json.loads(json_string, object_pairs_hook=_parse_object_pairs)

tests/test_decoder.py (0) → tests/test_decoder.py (1063)

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
new file mode 100644
index 0000000..5676fb8
--- /dev/null
+++ b/tests/test_decoder.py
@@ -0,0 +1,35 @@
+"""
+Tests for `javascript_data_files.decoder`.
+"""
+
+import pytest
+
+from javascript_data_files.decoder import decode_from_json
+
+
+@pytest.mark.parametrize(
+    "json_string",
+    [
+        '{ "sides": 3, "sides": 4 }',
+        '{ "sides": 3, "colour": "blue", "sides": 4 }',
+        '[{ "nested": { "sides": 3, "sides": 4 } }]',
+    ],
+)
+def test_object_with_duplicate_keys_is_rejected(json_string: str) -> None:
+    """
+    Trying to decode a JavaScript string which includes an object
+    with duplicate keys throws a ValueError.
+    """
+    with pytest.raises(ValueError, match="Found duplicate key in JSON object: sides"):
+        decode_from_json(json_string)
+
+
+def test_object_with_multiple_duplicate_keys_is_rejected() -> None:
+    """
+    Trying to decode a JavaScript string which includes an object
+    with multiple duplicate keys throws a ValueError.
+    """
+    with pytest.raises(ValueError, match="Found duplicate keys in JSON object:"):
+        decode_from_json(
+            '{ "sides": 3, "colour": "blue", "sides": 4, "colour": "red" }'
+        )

tests/test_javascript_data_files.py (17712) → tests/test_javascript_data_files.py (17712)

diff --git a/tests/test_javascript_data_files.py b/tests/test_javascript_data_files.py
index 34eecd3..3446a02 100644
--- a/tests/test_javascript_data_files.py
+++ b/tests/test_javascript_data_files.py
@@ -92,7 +92,7 @@ class TestReadJs:
         )
 
         with pytest.raises(
-            ValueError, match="does not start with JavaScript `const` declaration"
+            ValueError, match="Does not start with JavaScript `const` declaration"
         ):
             read_js(js_path, varname="blueTriangle")