Reject duplicate keys when reading a JSON object
- ID
89cde5f- date
2025-05-03 07:59:31+00:00- author
Alex Chan <alex@alexwlchan.net>- parent
283ea11- message
Reject duplicate keys when reading a JSON object- changed files
4 files, 100 additions, 2 deletions
Changed files
CHANGELOG.md (3196) → CHANGELOG.md (3790)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d12433f..b9a9cc0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,20 @@
# CHANGELOG
+## v1.2.2 - 2025-05-03
+
+Duplicate keys in JSON objects are now rejected as an error.
+
+For example, consider the following JavaScript:
+
+```javascript
+const shape = {"sides": "5", "colour": "red", "sides": 4};
+```
+
+These duplicate keys are technically allowed by the JSON specification, but are always a mistake when I encounter them.
+Many JSON parsers will silently drop the first instance of `sides`, including both Python's and web browsers.
+
+Previously `read_js` would read this file and silently drop the first key, but now it throws a `ValueError` and prompts you to de-duplicate the key.
+
## v1.2.1 - 2025-04-13
Fix a bug in the validation of `typing.Union[A, B]` where both types are a `TypedDict`.
src/javascript_data_files/__init__.py (6664) → src/javascript_data_files/__init__.py (6783)
diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index 96467dc..6e0d030 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -22,7 +22,14 @@ from .decoder import decode_from_js
from .encoder import encode_as_js, encode_as_json
-__version__ = "1.2.1"
+__version__ = "1.2.2"
+__all__ = [
+ "read_js",
+ "read_typed_js",
+ "write_js",
+ "append_to_js_array",
+ "append_to_js_object",
+]
T = typing.TypeVar("T")
src/javascript_data_files/decoder.py (760) → src/javascript_data_files/decoder.py (2194)
diff --git a/src/javascript_data_files/decoder.py b/src/javascript_data_files/decoder.py
index 5c9eb70..70440a9 100644
--- a/src/javascript_data_files/decoder.py
+++ b/src/javascript_data_files/decoder.py
@@ -25,4 +25,45 @@ def decode_from_js(js_string: str, *, varname: str) -> typing.Any:
json_string = m.sub(repl="", string=js_string).rstrip().rstrip(";")
- return json.loads(json_string)
+ return decode_from_json(json_string)
+
+
+def _parse_object_pairs(pairs: list[tuple[str, typing.Any]]) -> dict[str, typing.Any]:
+ """
+ Convert any object literal into a dict. This receives a list of
+ key-value pairs and returns a dict.
+
+ This is similar to the builtin parser, but it will look for
+ duplicate keys and throw a ValueError if they're found; this is
+ a protection against me making a copy/paste error in my JavaScript.
+ """
+ # First try to parse the object as a dictionary; if it's the same
+ # length as the pairs, then we know all the keys were unique and
+ # we can return.
+ pairs_as_dict = dict(pairs)
+
+ if len(pairs_as_dict) == len(pairs):
+ return pairs_as_dict
+
+ # Otherwise, let's work out what the duplicate key(s) were, so we
+ # can throw an appropriate error message for the user.
+ import collections
+
+ key_tally = collections.Counter(k for k, _ in pairs)
+
+ duplicate_keys = [k for k, count in key_tally.items() if count > 1]
+ assert len(duplicate_keys) > 0
+
+ if len(duplicate_keys) == 1:
+ raise ValueError(f"Found duplicate key in JSON object: {duplicate_keys[0]}")
+ else:
+ raise ValueError(
+ f"Found duplicate keys in JSON object: {', '.join(duplicate_keys)}"
+ )
+
+
+def decode_from_json(json_string: str) -> typing.Any:
+ """
+ Parse a string as a JSON value.
+ """
+ return json.loads(json_string, object_pairs_hook=_parse_object_pairs)
tests/test_decoder.py (0) → tests/test_decoder.py (1063)
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
new file mode 100644
index 0000000..5676fb8
--- /dev/null
+++ b/tests/test_decoder.py
@@ -0,0 +1,35 @@
+"""
+Tests for `javascript_data_files.decoder`.
+"""
+
+import pytest
+
+from javascript_data_files.decoder import decode_from_json
+
+
+@pytest.mark.parametrize(
+ "json_string",
+ [
+ '{ "sides": 3, "sides": 4 }',
+ '{ "sides": 3, "colour": "blue", "sides": 4 }',
+ '[{ "nested": { "sides": 3, "sides": 4 } }]',
+ ],
+)
+def test_object_with_duplicate_keys_is_rejected(json_string: str) -> None:
+ """
+ Trying to decode a JavaScript string which includes an object
+ with duplicate keys throws a ValueError.
+ """
+ with pytest.raises(ValueError, match="Found duplicate key in JSON object: sides"):
+ decode_from_json(json_string)
+
+
+def test_object_with_multiple_duplicate_keys_is_rejected() -> None:
+ """
+ Trying to decode a JavaScript string which includes an object
+ with multiple duplicate keys throws a ValueError.
+ """
+ with pytest.raises(ValueError, match="Found duplicate keys in JSON object:"):
+ decode_from_json(
+ '{ "sides": 3, "colour": "blue", "sides": 4, "colour": "red" }'
+ )