Add a read_typed_js function for reading typed JSON – 5b77ed3 – javascript-data-files

Add a `read_typed_js` function for reading typed JSON

ID

5b77ed3

date

2025-03-04 21:18:15+00:00

author

Alex Chan <alex@alexwlchan.net>

parent

3b7f6cb

message

Add a `read_typed_js` function for reading typed JSON

changed files

6 files, 133 additions, 2 deletions

CHANGELOG.md
dev_requirements.in
pyproject.toml
src/javascript_data_files/__init__.py
src/javascript_data_files/validate_type.py
tests/test_javascript_data_files.py

Changed files

CHANGELOG.md (1538) → CHANGELOG.md (2182)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8ab0f6..e7ad325 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # CHANGELOG
 
+## v1.2.0
+
+This adds a new function `read_typed_js`, which is like `read_js` but will additionally validate the data against a type you specify.
+
+*   `read_js()` returns `typing.Any`, and will always return something if the file contains valid JSON
+*   `read_typed_js` returns `T`, where `T` is the type you specify as `model`.
+    This will only return if the file contains JSON that matches the type, and otherwise it will throw a `pydantic.ValidationError`.
+
+This is useful if you want to check your data or you write typed Python.
+
+You need to install the typed extra to get this function, i.e. `pip install javascript-data-files[typed]`.
+
 ## v1.1.1
 
 Tweak the way the JavaScript is encoded to make it slightly more compact and readable -- in particular, short lists will now be encoded as a single line, rather than split across multiple lines.

dev_requirements.in (72) → dev_requirements.in (63)

diff --git a/dev_requirements.in b/dev_requirements.in
index 18f5bc5..7b0b1eb 100644
--- a/dev_requirements.in
+++ b/dev_requirements.in
@@ -1,4 +1,4 @@
--e file:.[validate_types]
+-e file:.[typed]
 
 build
 interrogate

pyproject.toml (1332) → pyproject.toml (1323)

diff --git a/pyproject.toml b/pyproject.toml
index 9afa125..4bb99b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = []
 dynamic = ["version"]
 
 [project.optional-dependencies]
-validate_types = ["pydantic"]
+typed = ["pydantic"]
 
 [project.urls]
 "Homepage" = "https://github.com/alexwlchan/python-js-files"

src/javascript_data_files/__init__.py (6426) → src/javascript_data_files/__init__.py (6907)

diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index 7b0d13e..034b15b 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -25,6 +25,9 @@ from .encoder import encode_as_js, encode_as_json
 __version__ = "1.1.1"
 
 
+T = typing.TypeVar("T")
+
+
 def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
     """
     Read a JavaScript "data file".
@@ -55,6 +58,21 @@ def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
     return json.loads(json_string)
 
 
+def read_typed_js[T](p: pathlib.Path | str, *, varname: str, model: type[T]) -> T:
+    """
+    Read a JavaScript "data file".
+
+    This will validate the contents of the data file against the type
+    you provide, and will throw a ``pydantic.ValidationError`` if the
+    contents does not match the specified type.
+    """
+    from .validate_type import validate_type
+
+    data = read_js(p, varname=varname)
+
+    return validate_type(data, model=model)
+
+
 def write_js(
     p: pathlib.Path | str | io.TextIOBase | io.BufferedIOBase,
     *,

src/javascript_data_files/validate_type.py (0) → src/javascript_data_files/validate_type.py (1290)

diff --git a/src/javascript_data_files/validate_type.py b/src/javascript_data_files/validate_type.py
new file mode 100644
index 0000000..490d4d6
--- /dev/null
+++ b/src/javascript_data_files/validate_type.py
@@ -0,0 +1,48 @@
+"""
+Helper methods for validating that an arbitrary blob matches a given model.
+"""
+
+import functools
+import typing
+
+from pydantic import ConfigDict, TypeAdapter
+
+
+T = typing.TypeVar("T")
+
+
+@functools.cache
+def _get_validator(model: type[T]) -> TypeAdapter[T]:
+    """
+    Get the validator for a given type.  This is a moderately expensive
+    process, so we cache the result -- we only need to create the
+    validator once for each type.
+    """
+    try:
+        model.__pydantic_config__ = ConfigDict(extra="forbid")  # type: ignore
+    except (AttributeError, TypeError):
+        pass
+
+    return TypeAdapter(model)
+
+
+def validate_type(t: typing.Any, *, model: type[T]) -> T:
+    """
+    Check that some data matches a given type.
+
+    We use this to e.g. check that the structured data we receive from
+    Wikimedia matches our definitions, so we can use the data in our
+    type-checked Python.
+
+    See https://stackoverflow.com/a/77386216/1558022
+    """
+    # This is to fix an issue from the type checker:
+    #
+    #     Argument 1 to "__call__" of "_lru_cache_wrapper"
+    #     has incompatible type "type[T]"; expected "Hashable"
+    #
+    assert isinstance(model, typing.Hashable)
+
+    validator = _get_validator(model)
+
+    return validator.validate_python(t, strict=True)

tests/test_javascript_data_files.py (16002) → tests/test_javascript_data_files.py (17712)

diff --git a/tests/test_javascript_data_files.py b/tests/test_javascript_data_files.py
index 6130c33..34eecd3 100644
--- a/tests/test_javascript_data_files.py
+++ b/tests/test_javascript_data_files.py
@@ -6,12 +6,14 @@ import io
 import pathlib
 import typing
 
+import pydantic
 import pytest
 
 from javascript_data_files import (
     append_to_js_array,
     append_to_js_object,
     read_js,
+    read_typed_js,
     write_js,
 )
 
@@ -95,6 +97,57 @@ class TestReadJs:
             read_js(js_path, varname="blueTriangle")
 
 
+class TestReadTypedJs:
+    """
+    Tests for the ``read_typed_js()`` function.
+    """
+
+    def test_matches_model(self, js_path: pathlib.Path) -> None:
+        """
+        If the data matches the model, it's read correctly.
+        """
+        js_path.write_text(
+            'const redPentagon = {\n  "sides": 5,\n  "colour": "red"\n};\n'
+        )
+
+        Shape = typing.TypedDict("Shape", {"sides": int, "colour": str})
+
+        shape = read_typed_js(js_path, varname="redPentagon", model=Shape)
+
+        assert shape == {"sides": 5, "colour": "red"}
+
+    def test_does_not_match_model(self, js_path: pathlib.Path) -> None:
+        """
+        If the data does not match the model, it throws a ValidationError.
+        """
+        js_path.write_text(
+            'const redPentagon = {\n  "sides": 5,\n  "colour": "red"\n};\n'
+        )
+
+        Vehicle = typing.TypedDict("Vehicle", {"wheels": int, "colour": str})
+
+        with pytest.raises(pydantic.ValidationError):
+            read_typed_js(js_path, varname="redPentagon", model=Vehicle)
+
+    def test_can_read_int(self, js_path: pathlib.Path) -> None:
+        """
+        It can read typed data which is an int.
+        """
+        js_path.write_text("const theAnswer = 42;\n")
+
+        answer = read_typed_js(js_path, varname="theAnswer", model=int)
+        assert answer == 42
+
+    def test_can_read_list_int(self, js_path: pathlib.Path) -> None:
+        """
+        It can read typed data which is an int.
+        """
+        js_path.write_text("const diceValues = [1,2,3,4,5,6];\n")
+
+        answer = read_typed_js(js_path, varname="diceValues", model=list[int])
+        assert answer == [1, 2, 3, 4, 5, 6]
+
+
 class TestWriteJs:
     """
     Tests for the ``write_js()`` function.