Skip to main content

Merge pull request #29 from alexwlchan/validate-type

ID
ee337c4
date
2025-03-07 07:08:43+00:00
author
Alex Chan <alex@alexwlchan.net>
parents
49b72b2, 5b77ed3
message
Merge pull request #29 from alexwlchan/validate-type

Add a `read_typed_js` function for reading typed JSON
changed files
7 files, 171 additions, 30 deletions

Changed files

CHANGELOG.md (1538) → CHANGELOG.md (2182)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8ab0f6..e7ad325 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # CHANGELOG
 
+## v1.2.0
+
+This adds a new function `read_typed_js`, which is like `read_js` but will additionally validate the data against a type you specify.
+
+*   `read_js()` returns `typing.Any`, and will always return something if the file contains valid JSON
+*   `read_typed_js` returns `T`, where `T` is the type you specify as `model`.
+    This will only return if the file contains JSON that matches the type, and otherwise it will throw a `pydantic.ValidationError`.
+
+This is useful if you want to check your data or you write typed Python.
+
+You need to install the typed extra to get this function, i.e. `pip install javascript-data-files[typed]`.
+
 ## v1.1.1
 
 Tweak the way the JavaScript is encoded to make it slightly more compact and readable -- in particular, short lists will now be encoded as a single line, rather than split across multiple lines.

dev_requirements.in (56) → dev_requirements.in (63)

diff --git a/dev_requirements.in b/dev_requirements.in
index 1d9fecc..7b0b1eb 100644
--- a/dev_requirements.in
+++ b/dev_requirements.in
@@ -1,4 +1,4 @@
--e file:.
+-e file:.[typed]
 
 build
 interrogate

dev_requirements.txt (1901) → dev_requirements.txt (2024)

diff --git a/dev_requirements.txt b/dev_requirements.txt
index c9563e3..da5405c 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -2,95 +2,102 @@
 #    uv pip compile dev_requirements.in --output-file dev_requirements.txt
 -e file:.
     # via -r dev_requirements.in
-attrs==24.2.0
+annotated-types==0.7.0
+    # via pydantic
+attrs==25.1.0
     # via interrogate
-build==1.2.1
+build==1.2.2.post1
     # via -r dev_requirements.in
-certifi==2024.7.4
+certifi==2025.1.31
     # via requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.1
     # via requests
-click==8.1.7
+click==8.1.8
     # via interrogate
 colorama==0.4.6
     # via interrogate
-coverage==7.6.1
+coverage==7.6.12
     # via pytest-cov
 docutils==0.21.2
     # via readme-renderer
-idna==3.7
-    # via requests
-importlib-metadata==8.2.0
+id==1.5.0
     # via twine
+idna==3.10
+    # via requests
 iniconfig==2.0.0
     # via pytest
 interrogate==1.7.0
     # via -r dev_requirements.in
 jaraco-classes==3.4.0
     # via keyring
-jaraco-context==5.3.0
+jaraco-context==6.0.1
     # via keyring
-jaraco-functools==4.0.2
+jaraco-functools==4.1.0
     # via keyring
-keyring==25.3.0
+keyring==25.6.0
     # via twine
 markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-more-itertools==10.4.0
+more-itertools==10.6.0
     # via
     #   jaraco-classes
     #   jaraco-functools
-mypy==1.11.1
+mypy==1.15.0
     # via -r dev_requirements.in
 mypy-extensions==1.0.0
     # via mypy
-nh3==0.2.18
+nh3==0.2.21
     # via readme-renderer
-packaging==24.1
+packaging==24.2
     # via
     #   build
     #   pytest
-pkginfo==1.10.0
-    # via twine
+    #   twine
 pluggy==1.5.0
     # via pytest
 py==1.11.0
     # via interrogate
-pygments==2.18.0
+pydantic==2.10.6
+    # via javascript-data-files
+pydantic-core==2.27.2
+    # via pydantic
+pygments==2.19.1
     # via
     #   readme-renderer
     #   rich
-pyproject-hooks==1.1.0
+pyproject-hooks==1.2.0
     # via build
-pytest==8.3.2
+pytest==8.3.5
     # via pytest-cov
-pytest-cov==5.0.0
+pytest-cov==6.0.0
     # via -r dev_requirements.in
 readme-renderer==44.0
     # via twine
 requests==2.32.3
     # via
+    #   id
     #   requests-toolbelt
     #   twine
 requests-toolbelt==1.0.0
     # via twine
 rfc3986==2.0.0
     # via twine
-rich==13.7.1
+rich==13.9.4
     # via twine
-ruff==0.6.1
+ruff==0.9.9
     # via -r dev_requirements.in
 tabulate==0.9.0
     # via interrogate
-twine==5.1.1
+twine==6.1.0
     # via -r dev_requirements.in
 typing-extensions==4.12.2
-    # via mypy
-urllib3==2.2.2
+    # via
+    #   mypy
+    #   pydantic
+    #   pydantic-core
+urllib3==2.3.0
     # via
     #   requests
     #   twine
-zipp==3.20.0
-    # via importlib-metadata

pyproject.toml (1269) → pyproject.toml (1323)

diff --git a/pyproject.toml b/pyproject.toml
index c73b6b0..4bb99b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,9 @@ requires-python = ">=3.12"
 dependencies = []
 dynamic = ["version"]
 
+[project.optional-dependencies]
+typed = ["pydantic"]
+
 [project.urls]
 "Homepage" = "https://github.com/alexwlchan/python-js-files"
 "Changelog" = "https://github.com/alexwlchan/python-js-files/blob/main/CHANGELOG.md"

src/javascript_data_files/__init__.py (6426) → src/javascript_data_files/__init__.py (6907)

diff --git a/src/javascript_data_files/__init__.py b/src/javascript_data_files/__init__.py
index 7b0d13e..034b15b 100644
--- a/src/javascript_data_files/__init__.py
+++ b/src/javascript_data_files/__init__.py
@@ -25,6 +25,9 @@ from .encoder import encode_as_js, encode_as_json
 __version__ = "1.1.1"
 
 
+T = typing.TypeVar("T")
+
+
 def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
     """
     Read a JavaScript "data file".
@@ -55,6 +58,21 @@ def read_js(p: pathlib.Path | str, *, varname: str) -> typing.Any:
     return json.loads(json_string)
 
 
+def read_typed_js[T](p: pathlib.Path | str, *, varname: str, model: type[T]) -> T:
+    """
+    Read a JavaScript "data file".
+
+    This will validate the contents of the data file against the type
+    you provide, and will throw a ``pydantic.ValidationError`` if the
+    contents does not match the specified type.
+    """
+    from .validate_type import validate_type
+
+    data = read_js(p, varname=varname)
+
+    return validate_type(data, model=model)
+
+
 def write_js(
     p: pathlib.Path | str | io.TextIOBase | io.BufferedIOBase,
     *,

src/javascript_data_files/validate_type.py (0) → src/javascript_data_files/validate_type.py (1290)

diff --git a/src/javascript_data_files/validate_type.py b/src/javascript_data_files/validate_type.py
new file mode 100644
index 0000000..490d4d6
--- /dev/null
+++ b/src/javascript_data_files/validate_type.py
@@ -0,0 +1,48 @@
+"""
+Helper methods for validating that an arbitrary blob matches a given model.
+"""
+
+import functools
+import typing
+
+from pydantic import ConfigDict, TypeAdapter
+
+
+T = typing.TypeVar("T")
+
+
+@functools.cache
+def _get_validator(model: type[T]) -> TypeAdapter[T]:
+    """
+    Get the validator for a given type.  This is a moderately expensive
+    process, so we cache the result -- we only need to create the
+    validator once for each type.
+    """
+    try:
+        model.__pydantic_config__ = ConfigDict(extra="forbid")  # type: ignore
+    except (AttributeError, TypeError):
+        pass
+
+    return TypeAdapter(model)
+
+
+def validate_type(t: typing.Any, *, model: type[T]) -> T:
+    """
+    Check that some data matches a given type.
+
+    We use this to e.g. check that the structured data we receive from
+    Wikimedia matches our definitions, so we can use the data in our
+    type-checked Python.
+
+    See https://stackoverflow.com/a/77386216/1558022
+    """
+    # This is to fix an issue from the type checker:
+    #
+    #     Argument 1 to "__call__" of "_lru_cache_wrapper"
+    #     has incompatible type "type[T]"; expected "Hashable"
+    #
+    assert isinstance(model, typing.Hashable)
+
+    validator = _get_validator(model)
+
+    return validator.validate_python(t, strict=True)

tests/test_javascript_data_files.py (16002) → tests/test_javascript_data_files.py (17712)

diff --git a/tests/test_javascript_data_files.py b/tests/test_javascript_data_files.py
index 6130c33..34eecd3 100644
--- a/tests/test_javascript_data_files.py
+++ b/tests/test_javascript_data_files.py
@@ -6,12 +6,14 @@ import io
 import pathlib
 import typing
 
+import pydantic
 import pytest
 
 from javascript_data_files import (
     append_to_js_array,
     append_to_js_object,
     read_js,
+    read_typed_js,
     write_js,
 )
 
@@ -95,6 +97,57 @@ class TestReadJs:
             read_js(js_path, varname="blueTriangle")
 
 
+class TestReadTypedJs:
+    """
+    Tests for the ``read_typed_js()`` function.
+    """
+
+    def test_matches_model(self, js_path: pathlib.Path) -> None:
+        """
+        If the data matches the model, it's read correctly.
+        """
+        js_path.write_text(
+            'const redPentagon = {\n  "sides": 5,\n  "colour": "red"\n};\n'
+        )
+
+        Shape = typing.TypedDict("Shape", {"sides": int, "colour": str})
+
+        shape = read_typed_js(js_path, varname="redPentagon", model=Shape)
+
+        assert shape == {"sides": 5, "colour": "red"}
+
+    def test_does_not_match_model(self, js_path: pathlib.Path) -> None:
+        """
+        If the data does not match the model, it throws a ValidationError.
+        """
+        js_path.write_text(
+            'const redPentagon = {\n  "sides": 5,\n  "colour": "red"\n};\n'
+        )
+
+        Vehicle = typing.TypedDict("Vehicle", {"wheels": int, "colour": str})
+
+        with pytest.raises(pydantic.ValidationError):
+            read_typed_js(js_path, varname="redPentagon", model=Vehicle)
+
+    def test_can_read_int(self, js_path: pathlib.Path) -> None:
+        """
+        It can read typed data which is an int.
+        """
+        js_path.write_text("const theAnswer = 42;\n")
+
+        answer = read_typed_js(js_path, varname="theAnswer", model=int)
+        assert answer == 42
+
+    def test_can_read_list_int(self, js_path: pathlib.Path) -> None:
+        """
+        It can read typed data which is an int.
+        """
+        js_path.write_text("const diceValues = [1,2,3,4,5,6];\n")
+
+        answer = read_typed_js(js_path, varname="diceValues", model=list[int])
+        assert answer == [1, 2, 3, 4, 5, 6]
+
+
 class TestWriteJs:
     """
     Tests for the ``write_js()`` function.