Merge pull request #31 from alexwlchan/handle-duplicate-keys
- ID
f3435ff- date
2025-05-03 08:01:51+00:00- author
Alex Chan <alex@alexwlchan.net>- parents
cbd03c1,89cde5f- message
Merge pull request #31 from alexwlchan/handle-duplicate-keys Reject duplicate keys when reading a JSON object- changed files
Changed files
CHANGELOG.md (3196) → CHANGELOG.md (3790)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d12433f..b9a9cc0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,20 @@
# CHANGELOG
+## v1.2.2 - 2025-05-03
+
+Duplicate keys in JSON objects are now rejected as an error.
+
+For example, consider the following JavaScript:
+
+```javascript
+const shape = {"sides": "5", "colour": "red", "sides": 4};
+```
+
+These duplicate keys are technically allowed by the JSON specification, but are always a mistake when I encounter them.
+Many JSON parsers will silently drop the first instance of `sides`, including both Python's and web browsers.
+
+Previously `read_js` would read this file and silently drop the first key, but now it throws a `ValueError` and prompts you to de-duplicate the key.
+
## v1.2.1 - 2025-04-13
Fix a bug in the validation of `typing.Union[A, B]` where both types are a `TypedDict`.
CONTRIBUTING.md (0) → CONTRIBUTING.md (1281)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..9f37a1b
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,56 @@
+# CONTRIBUTING
+
+You can set up a local development environment by cloning the repo and installing dependencies:
+
+```shell
+git clone https://github.com/alexwlchan/javascript-data-files.git
+cd javascript-data-files
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
+```
+
+If you want to run tests, install the dev dependencies and run the tests:
+
+```shell
+# Activate the virtualenv and install dev dependencies
+source .venv/bin/activate
+pip install -r dev_requirements.txt
+
+# Check formatting
+ruff check .
+ruff format --check .
+
+# Check docstrings
+interrogate -vv
+
+# Check types
+mypy src tests
+
+# Run tests
+coverage run -m pytest tests
+coverage report
+```
+
+To make changes:
+
+1. Create a new branch
+2. Push your changes to GitHub
+3. Open a pull request
+4. Fix any issues flagged by GitHub Actions (including tests, code linting, and type checking)
+6. Merge it!
+
+To create a new version on PyPI:
+
+1. Update the version in `src/javascript_data_files/__init__.py`
+2. Add release notes in `CHANGELOG.md` and push a new tag to GitHub
+3. Deploy the release using twine:
+
+ ```console
+ $ python3 -m build
+ $ python3 -m twine upload dist/* --username=__token__
+ ```
+
+ You will need [a PyPI API token](https://pypi.org/help/#apitoken) to publish packages.
+
+
README.md (1641) → README.md (1766)
diff --git a/README.md b/README.md
index dd2bbeb..032a0d9 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,10 @@ This is the only way to load data from an external file from an HTML file you've
I have a lot of HTML files and local sites I build with an HTML viewer and metadata in a JavaScript file.
The convenience of this approach outweighs the mild annoyance of having to store data in JavaScript, not JSON.
+## Development
+
+If you want to make changes to the library, there are instructions in [CONTRIBUTING.md](./CONTRIBUTING.md).
+
## License
MIT.
src/javascript_data_files/__init__.py (6907) → src/javascript_data_files/__init__.py (6783)
diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index f4079f1..6e0d030 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -14,15 +14,22 @@ Think of this like the JSON module, but for JavaScript files.
import io
import json
import pathlib
-import re
import textwrap
import typing
import uuid
+from .decoder import decode_from_js
from .encoder import encode_as_js, encode_as_json
-__version__ = "1.2.1"
+__version__ = "1.2.2"
+__all__ = [
+ "read_js",
+ "read_typed_js",
+ "write_js",
+ "append_to_js_array",
+ "append_to_js_object",
+]
T = typing.TypeVar("T")
@@ -44,18 +51,7 @@ def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
"""
p = pathlib.Path(p)
- contents = p.read_text()
-
- m = re.compile(r"^(?:const |var )?%s = " % varname)
-
- if not m.match(contents):
- raise ValueError(
- f"File {p} does not start with JavaScript `const` declaration!"
- )
-
- json_string = m.sub(repl="", string=contents).rstrip().rstrip(";")
-
- return json.loads(json_string)
+ return decode_from_js(js_string=p.read_text(), varname=varname)
def read_typed_js[T](p: pathlib.Path | str, *, varname: str, model: type[T]) -> T:
src/javascript_data_files/decoder.py (0) → src/javascript_data_files/decoder.py (2194)
diff --git a/src/javascript_data_files/decoder.py b/src/javascript_data_files/decoder.py
new file mode 100644
index 0000000..70440a9
--- /dev/null
+++ b/src/javascript_data_files/decoder.py
@@ -0,0 +1,69 @@
+"""
+This file contains pure functions for converting JSON strings
+to Python values.
+
+Because I expect some of this JSON to be written by me, and I can
+make copy-paste mistakes, there are a couple of ways it tries
+to catch errors.
+"""
+
+import json
+import re
+import typing
+
+
+def decode_from_js(js_string: str, *, varname: str) -> typing.Any:
+ """
+ Parse a string as a JavaScript value.
+ """
+ # Matches 'const varname = ' or 'var varname = ' at the start
+ # of a string.
+ m = re.compile(r"^(?:const |var )?%s = " % varname)
+
+ if not m.match(js_string):
+ raise ValueError("Does not start with JavaScript `const` declaration!")
+
+ json_string = m.sub(repl="", string=js_string).rstrip().rstrip(";")
+
+ return decode_from_json(json_string)
+
+
+def _parse_object_pairs(pairs: list[tuple[str, typing.Any]]) -> dict[str, typing.Any]:
+ """
+ Convert any object literal into a dict. This receives a list of
+ key-value pairs and returns a dict.
+
+ This is similar to the builtin parser, but it will look for
+ duplicate keys and throw a ValueError if they're found; this is
+ a protection against me making a copy/paste error in my JavaScript.
+ """
+ # First try to parse the object as a dictionary; if it's the same
+ # length as the pairs, then we know all the keys were unique and
+ # we can return.
+ pairs_as_dict = dict(pairs)
+
+ if len(pairs_as_dict) == len(pairs):
+ return pairs_as_dict
+
+ # Otherwise, let's work out what the duplicate key(s) were, so we
+ # can throw an appropriate error message for the user.
+ import collections
+
+ key_tally = collections.Counter(k for k, _ in pairs)
+
+ duplicate_keys = [k for k, count in key_tally.items() if count > 1]
+ assert len(duplicate_keys) > 0
+
+ if len(duplicate_keys) == 1:
+ raise ValueError(f"Found duplicate key in JSON object: {duplicate_keys[0]}")
+ else:
+ raise ValueError(
+ f"Found duplicate keys in JSON object: {', '.join(duplicate_keys)}"
+ )
+
+
+def decode_from_json(json_string: str) -> typing.Any:
+ """
+ Parse a string as a JSON value.
+ """
+ return json.loads(json_string, object_pairs_hook=_parse_object_pairs)
tests/test_decoder.py (0) → tests/test_decoder.py (1063)
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
new file mode 100644
index 0000000..5676fb8
--- /dev/null
+++ b/tests/test_decoder.py
@@ -0,0 +1,35 @@
+"""
+Tests for `javascript_data_files.decoder`.
+"""
+
+import pytest
+
+from javascript_data_files.decoder import decode_from_json
+
+
+@pytest.mark.parametrize(
+ "json_string",
+ [
+ '{ "sides": 3, "sides": 4 }',
+ '{ "sides": 3, "colour": "blue", "sides": 4 }',
+ '[{ "nested": { "sides": 3, "sides": 4 } }]',
+ ],
+)
+def test_object_with_duplicate_keys_is_rejected(json_string: str) -> None:
+ """
+ Trying to decode a JavaScript string which includes an object
+ with duplicate keys throws a ValueError.
+ """
+ with pytest.raises(ValueError, match="Found duplicate key in JSON object: sides"):
+ decode_from_json(json_string)
+
+
+def test_object_with_multiple_duplicate_keys_is_rejected() -> None:
+ """
+ Trying to decode a JavaScript string which includes an object
+ with multiple duplicate keys throws a ValueError.
+ """
+ with pytest.raises(ValueError, match="Found duplicate keys in JSON object:"):
+ decode_from_json(
+ '{ "sides": 3, "colour": "blue", "sides": 4, "colour": "red" }'
+ )
tests/test_javascript_data_files.py (17712) → tests/test_javascript_data_files.py (17712)
diff --git a/tests/test_javascript_data_files.py b/tests/test_javascript_data_files.py
index 34eecd3..3446a02 100644
--- a/tests/test_javascript_data_files.py
+++ b/tests/test_javascript_data_files.py
@@ -92,7 +92,7 @@ class TestReadJs:
)
with pytest.raises(
- ValueError, match="does not start with JavaScript `const` declaration"
+ ValueError, match="Does not start with JavaScript `const` declaration"
):
read_js(js_path, varname="blueTriangle")