From 34802fb377a21c1d361c447c92a8f14474a3ef37 Mon Sep 17 00:00:00 2001 From: hkxs Date: Mon, 30 Dec 2024 12:05:43 -0600 Subject: [PATCH 1/7] Use environment variables to use std::regex --- README.rst | 10 ++++++++++ setup.py | 15 ++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index e84d097..2dacff9 100644 --- a/README.rst +++ b/README.rst @@ -123,3 +123,13 @@ some aspects. See `this section`__ in the documentation for further details. __ https://python-rapidjson.readthedocs.io/en/latest/quickstart.html#incompatibilities .. _RapidJSON: http://rapidjson.org/ + +Regular Expression +------------------ + +By default RapidJson uses a simple NFA regular expression engine for it's schema +validation, see the section `RapidJson Regular Expression`__, it is possible +to use ``std::regex`` instead of the original implementation by setting the +environmental variable ``RAPIDJSON_SCHEMA_USE_STDREGEX``. + +__ http://rapidjson.org/md_doc_schema.html#Regex diff --git a/setup.py b/setup.py index 085fdcb..85371ce 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ import os.path import sys +from os import environ try: from setuptools import setup, Extension @@ -52,13 +53,25 @@ with open('CHANGES.rst', encoding='utf-8') as f: CHANGES = f.read() +if environ.get('RAPIDJSON_SCHEMA_USE_STDREGEX', None): + RAPIDJSON_SCHEMA_USE_STDREGEX = 1 + RAPIDJSON_SCHEMA_USE_INTERNALREGEX = 0 +else: + RAPIDJSON_SCHEMA_USE_STDREGEX = 0 + RAPIDJSON_SCHEMA_USE_INTERNALREGEX = 1 + extension_options = { 'sources': ['./rapidjson.cpp'], 'include_dirs': [rj_include_dir], - 'define_macros': [('PYTHON_RAPIDJSON_VERSION', VERSION)], + 'define_macros': [ + ('PYTHON_RAPIDJSON_VERSION', VERSION), + ('RAPIDJSON_SCHEMA_USE_INTERNALREGEX', RAPIDJSON_SCHEMA_USE_INTERNALREGEX), + ('RAPIDJSON_SCHEMA_USE_STDREGEX', RAPIDJSON_SCHEMA_USE_STDREGEX), + ], 'depends': ['./rapidjson_exact_version.txt'], } + if os.path.exists('rapidjson_exact_version.txt'): with open('rapidjson_exact_version.txt', encoding='utf-8') as f: extension_options['define_macros'].append( From abf2d6b6809fa22c93a9c056adc2ae6503190813 Mon Sep 17 00:00:00 2001 From: hkxs Date: Thu, 2 Jan 2025 10:13:08 -0600 Subject: [PATCH 2/7] Expose RAPIDJSON_SCHEMA_USE_STDREGEX on the module --- rapidjson.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rapidjson.cpp b/rapidjson.cpp index 40ce8fa..a858e75 100644 --- a/rapidjson.cpp +++ b/rapidjson.cpp @@ -4046,6 +4046,7 @@ module_exec(PyObject* m) MM_COERCE_KEYS_TO_STRINGS) || PyModule_AddIntConstant(m, "MM_SKIP_NON_STRING_KEYS", MM_SKIP_NON_STRING_KEYS) || PyModule_AddIntConstant(m, "MM_SORT_KEYS", MM_SORT_KEYS) + || PyModule_AddIntConstant(m, "RAPIDJSON_SCHEMA_USE_STDREGEX", RAPIDJSON_SCHEMA_USE_STDREGEX) || PyModule_AddStringConstant(m, "__version__", STRINGIFY(PYTHON_RAPIDJSON_VERSION)) From 933e0d0f0cb640a203b2971869cfeddc5bab90fe Mon Sep 17 00:00:00 2001 From: hkxs Date: Thu, 2 Jan 2025 10:40:32 -0600 Subject: [PATCH 3/7] Add test case that checks if RAPIDJSON_SCHEMA_USE_STDREGEX is set Update README to set environmental variable to 1 --- README.rst | 2 +- tests/test_validator.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 2dacff9..4921e0d 100644 --- a/README.rst +++ b/README.rst @@ -130,6 +130,6 @@ Regular Expression By default RapidJson uses a simple NFA regular expression engine for it's schema validation, see the section `RapidJson Regular Expression`__, it is possible to use ``std::regex`` instead of the original implementation by setting the -environmental variable ``RAPIDJSON_SCHEMA_USE_STDREGEX``. +environmental variable ``RAPIDJSON_SCHEMA_USE_STDREGEX=1``. __ http://rapidjson.org/md_doc_schema.html#Regex diff --git a/tests/test_validator.py b/tests/test_validator.py index 001cc1f..657d5b7 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -65,3 +65,21 @@ def test_invalid(schema, json, details): def test_additional_and_pattern_properties_valid(schema, json): validate = rj.Validator(schema) validate(json) + + +def test_std_regex_used(): + schema = rj.dumps({ + "type": "object", + "patternProperties": { + "^(?!a).+": { + "type": "string" + } + } + }) + validate = rj.Validator(schema) + validate('{"b": "A string"}') + if rj.RAPIDJSON_SCHEMA_USE_STDREGEX: + with pytest.raises(ValueError) as error: + validate('{"b": 1}') + else: + validate('{"b": 1}') From 97c3f8e7f14055b3ba64fd1b100b60ce22d4875e Mon Sep 17 00:00:00 2001 From: hkxs Date: Thu, 2 Jan 2025 10:45:39 -0600 Subject: [PATCH 4/7] Check for the actual error and not only for the exception --- tests/test_validator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_validator.py b/tests/test_validator.py index 657d5b7..3aeaf6e 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -81,5 +81,6 @@ def test_std_regex_used(): if rj.RAPIDJSON_SCHEMA_USE_STDREGEX: with pytest.raises(ValueError) as error: validate('{"b": 1}') + assert error.value.args == ('patternProperties', '#', '#/b') else: validate('{"b": 1}') From fba16a1aee992509f90f8bd2d03cfea80cc7d6d6 Mon Sep 17 00:00:00 2001 From: hkxs Date: Tue, 7 Jan 2025 17:21:25 -0600 Subject: [PATCH 5/7] Add missing typing --- typings/rapidjson/__init__.pyi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/typings/rapidjson/__init__.pyi b/typings/rapidjson/__init__.pyi index e4bab51..a8a5603 100644 --- a/typings/rapidjson/__init__.pyi +++ b/typings/rapidjson/__init__.pyi @@ -12,6 +12,8 @@ import typing as t __rapidjson_exact_version__: str __rapidjson_version__: str +RAPIDJSON_SCHEMA_USE_STDREGEX: bool + _JSONType = t.Union[ str, From c1869cd91fe2efe3ff202b9d72adf2fd689c7b66 Mon Sep 17 00:00:00 2001 From: hkxs Date: Tue, 7 Jan 2025 17:26:41 -0600 Subject: [PATCH 6/7] Update json schema url on README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4921e0d..0d58475 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ capabilities. Latest version documentation is automatically rendered by `Read the Docs`__. -__ http://json-schema.org/documentation.html +__ http://json-schema.org/docs.html __ https://python-rapidjson.readthedocs.io/en/latest/ From 263c6c29abe291e2b18da61e7d685e4109c6df45 Mon Sep 17 00:00:00 2001 From: hkxs Date: Tue, 7 Jan 2025 17:40:09 -0600 Subject: [PATCH 7/7] Document regex engine on validator.rst --- README.rst | 18 ++++++++++++++++-- docs/validator.rst | 24 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 0d58475..e730f7a 100644 --- a/README.rst +++ b/README.rst @@ -124,12 +124,26 @@ __ https://python-rapidjson.readthedocs.io/en/latest/quickstart.html#incompatibi .. _RapidJSON: http://rapidjson.org/ -Regular Expression ------------------- +Regex Engine +------------ By default RapidJson uses a simple NFA regular expression engine for it's schema validation, see the section `RapidJson Regular Expression`__, it is possible to use ``std::regex`` instead of the original implementation by setting the environmental variable ``RAPIDJSON_SCHEMA_USE_STDREGEX=1``. +.. code-block:: bash + + $ export RAPIDJSON_SCHEMA_USE_STDREGEX=1 + $ pip install python-rapidjson + +After installation, you can verify that std::regex is being used by checking the +constant in the module: + +.. code-block:: python + + >>> import rapidjson + >>> rapidjson.RAPIDJSON_SCHEMA_USE_STDREGEX + 1 + __ http://rapidjson.org/md_doc_schema.html#Regex diff --git a/docs/validator.rst b/docs/validator.rst index 53b5936..7d719be 100644 --- a/docs/validator.rst +++ b/docs/validator.rst @@ -75,3 +75,27 @@ Traceback (most recent call last): File "", line 1, in rapidjson.JSONDecodeError: Invalid JSON + +============ +Regex Engine +============ +By default RapidJson uses a simple NFA regular expression engine for it's schema +validation, see the section `RapidJson Regular Expression`__, it is possible +to use ``std::regex`` instead of the original implementation by setting the +environmental variable ``RAPIDJSON_SCHEMA_USE_STDREGEX=1``. + +.. code-block:: bash + + $ export RAPIDJSON_SCHEMA_USE_STDREGEX=1 + $ pip install python-rapidjson + +After installation, you can verify that std::regex is being used by checking the +constant in the module: + +.. code-block:: python + + >>> import rapidjson + >>> rapidjson.RAPIDJSON_SCHEMA_USE_STDREGEX + 1 + +__ http://rapidjson.org/md_doc_schema.html#Regex