From 022ab80af02228ff72660258442fa28051730eaf Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 5 Oct 2024 14:07:21 +0200 Subject: [PATCH 01/48] Add py.typed This marker file is necessary when *distributing* libraries, in order for the consumer side to know that the library is typed and for typecheckers to know to use it directly rather than look for a stub package (or give up on typing). Closes #218 --- MANIFEST.in | 15 +++++++++------ pyproject.toml | 6 ++++++ src/ua_parser/py.typed | 0 3 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 src/ua_parser/py.typed diff --git a/MANIFEST.in b/MANIFEST.in index 9c004de5..d8aabc5b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,12 @@ -exclude .* -prune .github -global-exclude *~ - include README.rst include LICENSE + graft uap-core -exclude uap-core/.* -recursive-exclude uap-core *.js + +prune .github +prune uap-core/.github +global-exclude *~ +global-exclude .* +global-exclude *.js +global-exclude __pycache__ +global-exclude *.py[co] diff --git a/pyproject.toml b/pyproject.toml index 920fcd0f..1dae0e60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,12 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy" ] +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +"ua_parser" = ["py.typed"] + [tool.ruff.lint] select = ["F", "E", "W", "I", "RET", "RUF", "PT"] ignore = [ diff --git a/src/ua_parser/py.typed b/src/ua_parser/py.typed new file mode 100644 index 00000000..e69de29b From 46778e79bcdc60dbb22f27da243bccc22272bb4e Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 15 Jul 2024 21:20:24 +0200 Subject: [PATCH 02/48] regex-based POC Uses ua-parser/uap-rust#3 Fixes #166 --- .github/workflows/ci.yml | 53 ++++++++++----------------- doc/conf.py | 2 ++ doc/installation.rst | 18 ++++++---- pyproject.toml | 15 +++++--- setup.py | 14 +++++--- src/ua_parser/__main__.py | 17 +++++---- src/ua_parser/regex.py | 76 +++++++++++++++++++++++++++++++++++++++ tests/test_core.py | 19 ++++++++-- tox.ini | 22 ++++++++---- 9 files changed, 169 insertions(+), 67 deletions(-) create mode 100644 src/ua_parser/regex.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c2a7957d..74ab4a5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,15 +2,8 @@ name: CI on: push: - branches: [ '*' ] pull_request: - branches: [ '*' ] workflow_dispatch: - schedule: - # cron is kinda random, assumes 22:00 UTC is a low ebb, eastern - # countries are very early morning, and US are mid-day to - # mid-afternoon - - cron: '0 22 * * 2' jobs: checks: @@ -79,7 +72,6 @@ jobs: test: runs-on: ubuntu-latest needs: compile - continue-on-error: ${{ matrix.python-version == '3.13' || matrix.python-version == 'pypy-3.11' }} strategy: fail-fast: false matrix: @@ -88,19 +80,14 @@ jobs: - sdist - source python-version: - - "3.8" - "3.9" - "3.10" - "3.11" - "3.12" - "3.13" - - "pypy-3.8" - - "pypy-3.9" - "pypy-3.10" # - "pypy-3.11" - # don't enable graal because it's slower than even pypy and - # fails because oracle/graalpython#385 - # - "graalpy-23" + - "graalpy-24" include: - source: sdist artifact: dist/*.tar.gz @@ -116,26 +103,23 @@ jobs: with: python-version: ${{ matrix.python-version }} allow-prereleases: true - - name: Install test dependencies - run: | - python -mpip install --upgrade pip - # cyaml is outright broken on pypy - if ! ${{ startsWith(matrix.python-version, 'pypy-') }}; then - # if binary wheels are not available for the current - # package install libyaml-dev so we can install pyyaml - # from source - if ! pip download --only-binary pyyaml -rrequirements_dev.txt > /dev/null 2>&1; then - sudo apt install libyaml-dev - fi + - run: python -mpip install --upgrade pip + - run: | + # if binary wheels are not available for the current + # package install libyaml-dev so we can install pyyaml + # from source + if ! pip download --only-binary :all: pyyaml > /dev/null 2>&1; then + sudo apt install libyaml-dev fi - python -mpip install pytest pyyaml - - # re2 is basically impossible to install from source so don't - # bother, and suppress installation failure so the test does - # not fail (re2 tests will just be skipped for versions / - # implementations for which google does not provide a binary - # wheel) - python -mpip install --only-binary :all: google-re2 || true + - run: python -mpip install pytest pyyaml + # install rs accelerator if available, ignore if not + - run: python -mpip install ua-parser-rs || true + # re2 is basically impossible to install from source so don't + # bother, and suppress installation failure so the test does + # not fail (re2 tests will just be skipped for versions / + # implementations for which google does not provide a binary + # wheel) + - run: 'python -mpip install --only-binary :all: google-re2 || true' - name: download ${{ matrix.source }} artifact if: matrix.artifact uses: actions/download-artifact@v4 @@ -143,7 +127,6 @@ jobs: name: ${{ matrix.source }} path: dist/ - name: install package in environment - run: | - pip install ${{ matrix.artifact || '.' }} + run: pip install ${{ matrix.artifact || '.' }} - name: run tests run: pytest -v -Werror -Wignore::ImportWarning --doctest-glob="*.rst" -ra diff --git a/doc/conf.py b/doc/conf.py index f0d38386..cc076432 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -19,9 +19,11 @@ rst_epilog = """ .. |pyyaml| replace:: ``PyYaml`` .. |re2| replace:: ``google-re2`` +.. |regex| replace:: ``regex`` .. _pyyaml: https://pyyaml.org .. _re2: https://pypi.org/project/google-re2 +.. _regex: https://pypi.org/project/ua-parser-rs """ # -- General configuration --------------------------------------------------- diff --git a/doc/installation.rst b/doc/installation.rst index e8ca58d9..d4bf7ba4 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -5,11 +5,14 @@ Installation Python Version ============== -ua-parser currently supports Python 3.8 and newer, as well as recent -versions of PyPy supporting the same standards. +ua-parser currently supports CPython 3.9 and newer, recent Pypy +(supporting 3.10), and Graal 24. -.. note:: While PyPy is supported, it is not *fast*, and google-re2 is - not supported on it. +.. note:: + + While pypy and graal are supported, they are rather slow when using + pure python mode and ``[re2]`` is not supported, so using the + ``[regex]`` feature is very strongly recommended. Installation ============ @@ -21,13 +24,14 @@ Installation Optional Dependencies ===================== -ua-parser currently has two optional dependencies, |re2|_ and -|pyyaml|_. These dependencies will be detected and used automatically +ua-parser currently has three optional dependencies, |regex|_, |re2|_ and +|pyyaml|_. These dependencies will be detected and used augitomatically if installed, but can also be installed via and alongside ua-parser: .. code-block:: sh + $ pip install 'ua-parser[regex]' $ pip install 'ua-parser[re2]' $ pip install 'ua-parser[yaml]' - $ pip install 'ua-parser[re2,yaml]' + $ pip install 'ua-parser[regex,yaml]' diff --git a/pyproject.toml b/pyproject.toml index 1dae0e60..b7b0280a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,8 @@ name = "ua-parser" description = "Python port of Browserscope's user agent parser" version = "1.0.0a1" readme = "README.rst" -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [] -optional-dependencies = { yaml = ["PyYaml"], re2 = ["google-re2"] } license = {text = "Apache 2.0"} urls = {repository = "https://github.com/ua-parser/uap-python"} @@ -35,14 +34,20 @@ classifiers = [ "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy" + "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python :: Implementation :: GraalPy", ] +[project.optional-dependencies] +yaml = ["PyYaml"] +re2 = ["google-re2"] +regex = ["ua-parser-rs"] + [tool.setuptools.packages.find] where = ["src"] @@ -63,7 +68,7 @@ known-first-party = ["ua_parser"] combine-as-imports = true [tool.mypy] -python_version = "3.8" +python_version = "3.9" files = "src,tests" # can't use strict because it's only global diff --git a/setup.py b/setup.py index c6947780..f423348e 100644 --- a/setup.py +++ b/setup.py @@ -67,16 +67,20 @@ def run(self) -> None: dest_lazy = outdir / "_lazy.py" dest_legacy = outdir / "_regexes.py" - with dest.open("wb") as eager, dest_lazy.open("wb") as lazy, dest_legacy.open( - "wb" - ) as legacy: + with ( + dest.open("wb") as eager, + dest_lazy.open("wb") as lazy, + dest_legacy.open("wb") as legacy, + ): eager = EagerWriter(eager) lazy = LazyWriter(lazy) legacy = LegacyWriter(legacy) for section in ["user_agent_parsers", "os_parsers", "device_parsers"]: - with eager.section(section), lazy.section(section), legacy.section( - section + with ( + eager.section(section), + lazy.section(section), + legacy.section(section), ): extract = EXTRACTORS[section] for p in regexes[section]: diff --git a/src/ua_parser/__main__.py b/src/ua_parser/__main__.py index d4ff29b2..c461a288 100644 --- a/src/ua_parser/__main__.py +++ b/src/ua_parser/__main__.py @@ -39,11 +39,13 @@ from .caching import Cache, Local from .loaders import load_builtins, load_yaml from .re2 import Resolver as Re2Resolver +from .regex import Resolver as RegexResolver from .user_agent_parser import Parse CACHEABLE = { "basic": True, "re2": True, + "regex": True, "legacy": False, } @@ -178,6 +180,8 @@ def get_parser( r = BasicResolver(rules) elif parser == "re2": r = Re2Resolver(rules) + elif parser == "regex": + r = RegexResolver(rules) else: sys.exit(f"unknown parser {parser!r}") @@ -327,6 +331,7 @@ def run_threaded(args: argparse.Namespace) -> None: ("locking-lru", CachingResolver(basic, caching.Lru(CACHESIZE))), ("local-lru", CachingResolver(basic, Local(lambda: caching.Lru(CACHESIZE)))), ("re2", Re2Resolver(load_builtins())), + ("regex", RegexResolver(load_builtins())), ] for name, resolver in resolvers: print(f"{name:11}: ", end="", flush=True) @@ -436,14 +441,14 @@ def __call__( bench.add_argument( "--bases", nargs="+", - choices=["basic", "re2", "legacy"], - default=["basic", "re2", "legacy"], + choices=["basic", "re2", "regex", "legacy"], + default=["basic", "re2", "regex", "legacy"], help="""Base resolvers to benchmark. `basic` is a linear search through the regexes file, `re2` is a prefiltered regex set - implemented in C++, `legacy` is the legacy API (essentially a - basic resolver with a clearing cache of fixed 200 entries, but - less layered so usually slightly faster than an equivalent - basic-based resolver).""", + implemented in C++, `regex` is a prefiltered regex set implemented + in Rust, `legacy` is the legacy API (essentially a basic resolver + with a clearing cache of fixed 200 entries, but less layered so + usually slightly faster than an equivalent basic-based resolver).""", ) bench.add_argument( "--caches", diff --git a/src/ua_parser/regex.py b/src/ua_parser/regex.py new file mode 100644 index 00000000..704df160 --- /dev/null +++ b/src/ua_parser/regex.py @@ -0,0 +1,76 @@ +__all__ = ["Resolver"] + +from operator import attrgetter + +import ua_parser_rs # type: ignore + +from .core import ( + Device, + Domain, + Matchers, + OS, + PartialResult, + UserAgent, +) + + +class Resolver: + ua: ua_parser_rs.UserAgentExtractor + os: ua_parser_rs.OSExtractor + de: ua_parser_rs.DeviceExtractor + + def __init__(self, matchers: Matchers) -> None: + ua, os, de = matchers + self.ua = ua_parser_rs.UserAgentExtractor( + map( + attrgetter("regex", "family", "major", "minor", "patch", "patch_minor"), + ua, + ) + ) + self.os = ua_parser_rs.OSExtractor( + map( + attrgetter("regex", "family", "major", "minor", "patch", "patch_minor"), + os, + ) + ) + self.de = ua_parser_rs.DeviceExtractor( + map( + attrgetter("regex", "regex_flag", "family", "brand", "model"), + de, + ) + ) + + def __call__(self, ua: str, domains: Domain, /) -> PartialResult: + user_agent = os = device = None + if Domain.USER_AGENT in domains: + if m := self.ua.extract(ua): + user_agent = UserAgent( + m.family, + m.major, + m.minor, + m.patch, + m.patch_minor, + ) + if Domain.OS in domains: + if m := self.os.extract(ua): + os = OS( + m.family, + m.major, + m.minor, + m.patch, + m.patch_minor, + ) + if Domain.DEVICE in domains: + if m := self.de.extract(ua): + device = Device( + m.family, + m.brand, + m.model, + ) + return PartialResult( + domains=domains, + string=ua, + user_agent=user_agent, + os=os, + device=device, + ) diff --git a/tests/test_core.py b/tests/test_core.py index 4c801265..310ddec5 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -53,6 +53,19 @@ else: PARSERS.append(pytest.param(Parser(re2.Resolver(load_builtins())), id="re2")) +try: + from ua_parser import regex +except ImportError: + PARSERS.append( + pytest.param( + None, + id="regex", + marks=pytest.mark.skip(reason="regex parser not available"), + ) + ) +else: + PARSERS.append(pytest.param(Parser(regex.Resolver(load_builtins())), id="regex")) + UA_FIELDS = {f.name for f in dataclasses.fields(UserAgent)} @@ -64,7 +77,7 @@ CORE_DIR / "test_resources" / "firefox_user_agent_strings.yaml", CORE_DIR / "test_resources" / "pgts_browser_list.yaml", ], - ids=attrgetter("name"), + ids=attrgetter("stem"), ) def test_ua(parser, test_file): with test_file.open("rb") as f: @@ -90,7 +103,7 @@ def test_ua(parser, test_file): CORE_DIR / "tests" / "test_os.yaml", CORE_DIR / "test_resources" / "additional_os_tests.yaml", ], - ids=attrgetter("name"), + ids=attrgetter("stem"), ) def test_os(parser, test_file): with test_file.open("rb") as f: @@ -111,7 +124,7 @@ def test_os(parser, test_file): [ CORE_DIR / "tests" / "test_device.yaml", ], - ids=attrgetter("name"), + ids=attrgetter("stem"), ) def test_devices(parser, test_file): with test_file.open("rb") as f: diff --git a/tox.ini b/tox.ini index bb4af081..778055e7 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,14 @@ [tox] min_version = 4.0 -env_list = py3{8,9,10,11,12} - pypy3.{8,9,10} +env_list = py3{9,10,11,12} + pypy3.10 + #graalpy-24 flake8, black, typecheck labels = - test = py3{8,9,10,11,12},pypy3.{8,9,10} - cpy = py3{8,9,10,11,12} - pypy = pypy3.{8,9,10} + test = py3{9,10,11,12},pypy3.10#,graalpy-24 + cpy = py3{9,10,11,12} + pypy = pypy3.10 + #graal = graalpy-24 check = flake8, black, typecheck [testenv] @@ -20,13 +22,21 @@ deps = pytest pyyaml google-re2 + ua-parser-rs commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:pypy3.{8,9,10}] +[testenv:pypy3.10] deps = pytest pyyaml + ua-parser-rs + +[testenv:graalpy-24] +deps = + pytest + pyyaml + ua-parser-rs [testenv:flake8] package = skip From 60e1c0f9f4e9bd29f9ac6e756f33c8c87da3a82d Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 15 Jul 2024 21:20:24 +0200 Subject: [PATCH 03/48] Fix perf scripts - enable graal on tox (24.1 with master's virtualenv plugin seems to work) - make tracemalloc optional in CLI script (doesn't work in pypy) - add regex to CLI script - comment graalpy trove classifier (doesn't exist yet) fixes #206 --- pyproject.toml | 9 ++++++++- src/ua_parser/__main__.py | 24 +++++++++++++++++++++--- tox.ini | 6 +++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b7b0280a..1979ebb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,8 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", - "Programming Language :: Python :: Implementation :: GraalPy", + # no graalpy classifier yet (pypa/trove-classifiers#188) + # "Programming Language :: Python :: Implementation :: GraalPy", ] [project.optional-dependencies] @@ -54,6 +55,12 @@ where = ["src"] [tool.setuptools.package-data] "ua_parser" = ["py.typed"] +[tool.ruff] +exclude = [ + "src/ua_parser/_lazy.py", + "src/ua_parser/_matchers.py", +] + [tool.ruff.lint] select = ["F", "E", "W", "I", "RET", "RUF", "PT"] ignore = [ diff --git a/src/ua_parser/__main__.py b/src/ua_parser/__main__.py index c461a288..0ed140f5 100644 --- a/src/ua_parser/__main__.py +++ b/src/ua_parser/__main__.py @@ -11,7 +11,7 @@ import sys import threading import time -import tracemalloc +import types from typing import ( Any, Callable, @@ -38,8 +38,15 @@ ) from .caching import Cache, Local from .loaders import load_builtins, load_yaml -from .re2 import Resolver as Re2Resolver -from .regex import Resolver as RegexResolver + +try: + from .re2 import Resolver as Re2Resolver +except ImportError: + pass +try: + from .regex import Resolver as RegexResolver +except ImportError: + pass from .user_agent_parser import Parse CACHEABLE = { @@ -60,6 +67,17 @@ ] ) +try: + import tracemalloc +except ImportError: + snapshot = types.SimpleNamespace( + compare_to=lambda _1, _2: [], + ) + tracemalloc = types.SimpleNamespace( # type: ignore + start=lambda: None, + take_snapshot=lambda: snapshot, + ) + def get_rules(parsers: List[str], regexes: Optional[io.IOBase]) -> Matchers: if regexes: diff --git a/tox.ini b/tox.ini index 778055e7..d8c7cf01 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,13 @@ min_version = 4.0 env_list = py3{9,10,11,12} pypy3.10 - #graalpy-24 + graalpy-24 flake8, black, typecheck labels = - test = py3{9,10,11,12},pypy3.10#,graalpy-24 + test = py3{9,10,11,12},pypy3.10,graalpy-24 cpy = py3{9,10,11,12} pypy = pypy3.10 - #graal = graalpy-24 + graal = graalpy-24 check = flake8, black, typecheck [testenv] From d9efbfdc62b4e5429565910a61ada296dbc39b00 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 28 Oct 2024 21:30:42 +0100 Subject: [PATCH 04/48] Simplify tox specs We're not handling multiple versions of pypy (anymore) or graal so don't bother with version numbers. --- tox.ini | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tox.ini b/tox.ini index d8c7cf01..de36509a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,11 @@ [tox] min_version = 4.0 env_list = py3{9,10,11,12} - pypy3.10 - graalpy-24 + pypy + graalpy flake8, black, typecheck labels = - test = py3{9,10,11,12},pypy3.10,graalpy-24 + test = py3{9,10,11,12},pypy,graalpy cpy = py3{9,10,11,12} pypy = pypy3.10 graal = graalpy-24 @@ -26,13 +26,7 @@ deps = commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:pypy3.10] -deps = - pytest - pyyaml - ua-parser-rs - -[testenv:graalpy-24] +[testenv:{pypy,graalpy}] deps = pytest pyyaml From 1358e75775646da38e72d7f2b2b801f4cd19aaba Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 28 Oct 2024 21:31:14 +0100 Subject: [PATCH 05/48] mypy's strict_concatenate is deprecated We're supposed to use `--extra-checks` instead but it doesn't have a documented config setting. Just remove it. Cf python/mypy#16189 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1979ebb7..3777c29b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ warn_redundant_casts = true # these can be overridden (maybe?) strict_equality = true -strict_concatenate = true +# strict_concatenate = true check_untyped_defs = true disallow_subclassing_any = true disallow_untyped_decorators = true From 6fb7b586479824bc435f2628d568458db1de879d Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 28 Oct 2024 21:33:39 +0100 Subject: [PATCH 06/48] Add finite-automaton simplifier, for re2 and graal As I've discovered a while ago, finite automaton engines are not very fond of large bounded repetitions. In re2 and regex, that mostly translates to increased memory consumption (e.g. in their default modes, converting `.*` to `.{0,500}` increases the pattern's size by 115x in re2 and 84x in regex, if a capture is added on top then regex balloons to 219x), there is a performance impact but it's high single digit to low double, in regex at least (didn't test re2). However as it turns out Graal uses a JIT-ed DFA, and it *really* doesn't like these patterns, it spends a lot of time JIT-compiling (this is apparently the source of the extra 300% CPU use I could observe on what are purely single-threaded workloads, the JIT desperately trying to optimise regexes) them with no gain in performance: down-converting the regex back to the sensible increases performances by ~25%, though it doesn't seem to impact memory use... So... do that: `fa_simplifier` is the same idea as ua-parser/uap-rust@29b9195d886a5e1d13dc7109a002a7f8f12e5406 but from the Python side, and applied to graal and re2 (not regex because it does that internally as linked above). Also switch Graal over to the lazy builtins, it kinda spreads the cost but it seems stupid to compile the regexes only to immediately swap (fa_simplifier) and recompile them... so don't do that, especially as I couldn't be arsed to make the replacement conditional (so every eager regex is recompiled, even though only those which actually got modified by `fa_simplifier` need it...). Fixes #228 --- pyproject.toml | 1 + src/ua_parser/__init__.py | 10 ++++++---- src/ua_parser/basic.py | 23 ++++++++++++++++++++++- src/ua_parser/re2.py | 9 +++++---- src/ua_parser/utils.py | 33 +++++++++++++++++++++++++++++++++ tests/test_fa_simplifier.py | 15 +++++++++++++++ 6 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 tests/test_fa_simplifier.py diff --git a/pyproject.toml b/pyproject.toml index 3777c29b..65271a4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ module = [ "test_core", "test_caches", "test_parsers_basics", + "test_fa_simplifier", ] #check_untyped_defs = false diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index a9a09b47..85ac75a4 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -57,6 +57,7 @@ UserAgent, ) from .loaders import load_builtins, load_lazy_builtins +from .utils import IS_GRAAL Re2Resolver: Optional[Callable[[Matchers], Resolver]] = None if importlib.util.find_spec("re2"): @@ -132,10 +133,11 @@ def parse_device(self: Resolver, ua: str) -> Optional[Device]: def __getattr__(name: str) -> Parser: global parser if name == "parser": - parser = Parser.from_matchers( - load_builtins() if Re2Resolver is None else load_lazy_builtins() - ) - return parser + if Re2Resolver or IS_GRAAL: + matchers = load_lazy_builtins() + else: + matchers = load_builtins() + return Parser.from_matchers(matchers) raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/ua_parser/basic.py b/src/ua_parser/basic.py index bdc1e69e..00b49e15 100644 --- a/src/ua_parser/basic.py +++ b/src/ua_parser/basic.py @@ -1,7 +1,9 @@ __all__ = ["Resolver"] +import re +from itertools import chain from operator import methodcaller -from typing import List +from typing import Any, List from .core import ( Device, @@ -12,6 +14,7 @@ PartialResult, UserAgent, ) +from .utils import IS_GRAAL, fa_simplifier class Resolver: @@ -30,6 +33,24 @@ def __init__( matchers: Matchers, ) -> None: self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers + if IS_GRAAL: + matcher: Any + kind = next( + ( + "eager" if hasattr(type(m), "regex") else "lazy" + for m in chain.from_iterable(matchers) + ), + None, + ) + if kind == "eager": + for matcher in chain.from_iterable(matchers): + matcher.pattern = re.compile( + fa_simplifier(matcher.pattern.pattern), + flags=matcher.pattern.flags, + ) + elif kind == "lazy": + for matcher in chain.from_iterable(matchers): + matcher.regex = fa_simplifier(matcher.pattern.pattern) def __call__(self, ua: str, domains: Domain, /) -> PartialResult: parse = methodcaller("__call__", ua) diff --git a/src/ua_parser/re2.py b/src/ua_parser/re2.py index 83a4a148..1f17e225 100644 --- a/src/ua_parser/re2.py +++ b/src/ua_parser/re2.py @@ -14,6 +14,7 @@ PartialResult, UserAgent, ) +from .utils import fa_simplifier class DummyFilter: @@ -38,7 +39,7 @@ def __init__( if self.user_agent_matchers: self.ua = re2.Filter() for u in self.user_agent_matchers: - self.ua.Add(u.regex) + self.ua.Add(fa_simplifier(u.regex)) self.ua.Compile() else: self.ua = DummyFilter() @@ -46,7 +47,7 @@ def __init__( if self.os_matchers: self.os = re2.Filter() for o in self.os_matchers: - self.os.Add(o.regex) + self.os.Add(fa_simplifier(o.regex)) self.os.Compile() else: self.os = DummyFilter() @@ -58,9 +59,9 @@ def __init__( # no pattern uses global flags, but since they're not # supported in JS that seems safe. if d.flags & re.IGNORECASE: - self.devices.Add("(?i)" + d.regex) + self.devices.Add("(?i)" + fa_simplifier(d.regex)) else: - self.devices.Add(d.regex) + self.devices.Add(fa_simplifier(d.regex)) self.devices.Compile() else: self.devices = DummyFilter() diff --git a/src/ua_parser/utils.py b/src/ua_parser/utils.py index f3afa486..ac11c5a0 100644 --- a/src/ua_parser/utils.py +++ b/src/ua_parser/utils.py @@ -1,6 +1,9 @@ +import platform import re from typing import Match, Optional +IS_GRAAL: bool = platform.python_implementation() == "GraalVM" + def get(m: Match[str], idx: int) -> Optional[str]: return (m[idx] or None) if 0 < idx <= m.re.groups else None @@ -28,3 +31,33 @@ def replacer(repl: str, m: Match[str]) -> Optional[str]: return None return re.sub(r"\$(\d)", lambda n: get(m, int(n[1])) or "", repl).strip() or None + + +REPETITION_PATTERN = re.compile(r"\{(0|1)\s*,\s*\d{3,}\}") +CLASS_PATTERN = re.compile( + r""" +\[[^]]*\\(d|w)[^]]*\] +| +\\(d|w) +""", + re.VERBOSE, +) + + +def class_replacer(m: re.Match[str]) -> str: + d, w = ("0-9", "A-Za-z0-9_") if m[1] else ("[0-9]", "[A-Za-z0-9_]") + return m[0].replace(r"\d", d).replace(r"\w", w) + + +def fa_simplifier(pattern: str) -> str: + """uap-core makes significant use of large bounded repetitions, to + mitigate catastrophic backtracking. + + However this explodes the number of states (and thus graph size) + for finite automaton engines, which significantly increases their + memory use, and for those which use JITs it can exceed the JIT + threshold and force fallback to a slower engine (seems to be the + case for graal's TRegex). + """ + pattern = REPETITION_PATTERN.sub(lambda m: "*" if m[1] == "0" else "+", pattern) + return CLASS_PATTERN.sub(class_replacer, pattern) diff --git a/tests/test_fa_simplifier.py b/tests/test_fa_simplifier.py new file mode 100644 index 00000000..1c660509 --- /dev/null +++ b/tests/test_fa_simplifier.py @@ -0,0 +1,15 @@ +import pytest # type: ignore + +from ua_parser.utils import fa_simplifier + + +@pytest.mark.parametrize( + ("from_", "to"), + [ + (r"\d", "[0-9]"), + (r"[\d]", "[0-9]"), + (r"[\d\.]", r"[0-9\.]"), + ], +) +def test_classes(from_, to): + assert fa_simplifier(from_) == to From de1c9c85291910307595e0210c4d2c1f4cf78d8c Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 28 Oct 2024 22:24:00 +0100 Subject: [PATCH 07/48] Disable TRegex (Graal) JIT compilation TRegex JIT-ing really doesn't like the UAP workload one bit. It basically uses 3 cores worth of CPU to do nothing as the runtime is the same with and without compilation. Sadge. Might as well cut it off, no sense wasting CPU time on the runners. --- .github/workflows/ci.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74ab4a5e..4f41e866 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,6 +93,9 @@ jobs: artifact: dist/*.tar.gz - source: wheel artifact: dist/*.whl + - opts: "" + - python-version: graalpy-24 + opts: "--experimental-options --engine.CompileOnly='~tregex re'" steps: - name: Checkout working copy uses: actions/checkout@v4 @@ -127,6 +130,6 @@ jobs: name: ${{ matrix.source }} path: dist/ - name: install package in environment - run: pip install ${{ matrix.artifact || '.' }} + run: python -m pip install ${{ matrix.artifact || '.' }} - name: run tests - run: pytest -v -Werror -Wignore::ImportWarning --doctest-glob="*.rst" -ra + run: python ${{ matrix.opts }} -m pytest -v -Werror -Wignore::ImportWarning --doctest-glob="*.rst" -ra From 4e07493716642ff84edc6d4b6d489a9c9b9166c9 Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 29 Oct 2024 17:15:17 +0100 Subject: [PATCH 08/48] Add the regex resolver at the top of the queue Not sure why I didn't do that when I merged it, but I think it's the best default, if available, which is unlikely for the reason that it requires a completely bespoke dep. --- src/ua_parser/__init__.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index 85ac75a4..f0340c61 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -59,9 +59,23 @@ from .loaders import load_builtins, load_lazy_builtins from .utils import IS_GRAAL -Re2Resolver: Optional[Callable[[Matchers], Resolver]] = None +_ResolverCtor = Callable[[Matchers], Resolver] +Re2Resolver: Optional[_ResolverCtor] = None if importlib.util.find_spec("re2"): from .re2 import Resolver as Re2Resolver +RegexResolver: Optional[_ResolverCtor] = None +if importlib.util.find_spec("ua_parser_rs"): + from .regex import Resolver as RegexResolver +BestAvailableResolver: _ResolverCtor = next( + filter( + None, + ( + RegexResolver, + Re2Resolver, + lambda m: CachingResolver(BasicResolver(m), Cache(200)), + ), + ) +) VERSION = (1, 0, 0) @@ -82,15 +96,7 @@ def from_matchers(cls, m: Matchers, /) -> Parser: stack. """ - if Re2Resolver is not None: - return cls(Re2Resolver(m)) - else: - return cls( - CachingResolver( - BasicResolver(m), - Cache(200), - ) - ) + return cls(BestAvailableResolver(m)) def __init__(self, resolver: Resolver) -> None: self.resolver = resolver @@ -133,7 +139,7 @@ def parse_device(self: Resolver, ua: str) -> Optional[Device]: def __getattr__(name: str) -> Parser: global parser if name == "parser": - if Re2Resolver or IS_GRAAL: + if RegexResolver or Re2Resolver or IS_GRAAL: matchers = load_lazy_builtins() else: matchers = load_builtins() From 4f3bcdeba1cf13658f7bbc4780aa48ccc0ad40f9 Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 29 Oct 2024 20:33:33 +0100 Subject: [PATCH 09/48] Add doc on picking resolvers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also bump cache up: on `bench` the `basic` resolver high water marks as: - 40MB with no cache, averaging 455µs/line - 40.7MB with a 200 entries s3fifo, averaging 324µs/line - 42.4MB with a 2000 entries s3fifo, averaging 191µs/line - 44.2MB with a 5000 entries s3fifo, averaging 155µs/line - 47.2MB with a 10000 entries s3fifo, averaging 134µs/line - 53MB with a 2000 entries s3fifo, averaging 123µs/line Either 2000 or 5000 seem like pretty good defaults, the gains taper afterwards as memory use increases sharply. Bump to 2000 to stay on the conservative side. --- README.rst | 15 +++--- doc/api.rst | 13 ++++++ doc/guides.rst | 97 +++++++++++++++++++++++++++++++++++++++ doc/installation.rst | 6 +++ src/ua_parser/__init__.py | 2 +- 5 files changed, 126 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 096a6471..d3805ea8 100644 --- a/README.rst +++ b/README.rst @@ -30,17 +30,20 @@ Just add ``ua-parser`` to your project's dependencies, or run to install in the current environment. -Installing `google-re2 `_ is -*strongly* recommended as it leads to *significantly* better -performances. This can be done directly via the ``re2`` optional -dependency: +Installing `ua-parser-rs `_ or +`google-re2 `_ is *strongly* +recommended as they yield *significantly* better performances. This +can be done directly via the ``regex`` and ``re2`` optional +dependencies respectively: .. code-block:: sh + $ pip install 'ua_parser[regex]' $ pip install 'ua_parser[re2]' -If ``re2`` is available, ``ua-parser`` will simply use it by default -instead of the pure-python resolver. +If either dependency is already available (e.g. because the software +makes use of re2 for other reasons) ``ua-parser`` will use the +corresponding resolver automatically. Quick Start ----------- diff --git a/doc/api.rst b/doc/api.rst index 18a7d484..6f984a4d 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -75,6 +75,19 @@ from user agent strings. .. warning:: Only available if |re2|_ is installed. +.. class::ua_parser.regex.Resolver(Matchers) + + An advanced resolver based on |regex|_ and a bespoke implementation + of regex prefiltering, by the sibling project `ua-rust + _ is + installed. + Eager Matchers '''''''''''''' diff --git a/doc/guides.rst b/doc/guides.rst index b216d18a..039bd241 100644 --- a/doc/guides.rst +++ b/doc/guides.rst @@ -129,6 +129,103 @@ from here on:: :class:`~ua_parser.caching.Local`, which is also caching-related, and serves to use thread-local caches rather than a shared cache. +Builtin Resolvers +================= + +.. list-table:: + :header-rows: 1 + :stub-columns: 1 + + * - + - speed + - portability + - memory use + - safety + * - ``regex`` + - great + - good + - bad + - great + * - ``re2`` + - good + - bad + - good + - good + * - ``basic`` + - terrible + - great + - great + - great + +``regex`` +--------- + +The ``regex`` resolver is a bespoke effort as part of the `uap-rust +`_ sibling project, built on +`rust-regex `_ and `a bespoke +regex-prefiltering implementation +`_, +it: + +- Is the fastest available resolver, usually edging out ``re2`` by a + significant margin (when that is even available). +- Is fully controlled by the project, and thus can be built for all + interpreters and platforms supported by pyo3 (currently: cpython, + pypy, and graalpy, on linux, macos and linux, intel and arm). It is + also built as a cpython abi3 wheel and should thus suffer from no + compatibility issues with new release. +- Built entirely out of safe rust code, its safety risks are entirely + in ``regex`` and ``pyo3``. +- Its biggest drawback is that it is a lot more memory intensive than + the other resolvers, because ``regex`` tends to trade memory for + speed (~155MB high water mark on a real-world dataset). + +If available, it is the default resolver, without a cache. + +``re2`` +------- + +The ``re2`` resolver is built atop the widely used `google-re2 +`_ via its built-in Python bindings. +It: + +- Is extremely fast, though around 80% slower than ``regex`` on + real-world data. +- Is only compatible with CPython, and uses pure API wheels, so needs + a different release for each cpython version, for each OS, for each + architecture. +- Is built entirely in C++, but by experienced Google developers. +- Is more memory intensive than the pure-python ``basic`` resolver, + but quite slim all things considered (~55MB high water mark on a + real-world dataset). + +If available, it is the second-preferred resolver, without a cache. + +``basic`` +--------- + +The ``basic`` resolver is a naive linear traversal of all rules, using +the standard library's ``re``. It: + +- Is *extremely* slow, about 10x slower than ``re2`` in cpython, and + pypy and graal's regex implementations do *not* like the workload + and behind cpython by a factor of 3~4. +- Has perfect compatibility, with the caveat above, by virtue of being + built entirely out of standard library code. +- Is basically as safe as Python software can be by virtue of being + just Python, with the native code being the standard library's. +- Is the slimmest resolver at about 40MB. + +This is caveated by a hard requirement to use caches which makes it +workably faster on real-world datasets (if still nowhere near +*uncached* ``re2`` or ``regex``) but increases its memory requirement +significantly e.g. using "sieve" and a cache size of 20000 on a +real-world dataset, it is about 4x slower than ``re2`` for about the +same memory requirements. + +It is the fallback and least preferred resolver, with a medium +(currently 2000 entries) cache by default. + Writing Custom Resolvers ======================== diff --git a/doc/installation.rst b/doc/installation.rst index d4bf7ba4..ac6b311b 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -35,3 +35,9 @@ if installed, but can also be installed via and alongside ua-parser: $ pip install 'ua-parser[yaml]' $ pip install 'ua-parser[regex,yaml]' +``yaml`` simply enables the ability to :func:`load yaml rulesets +`. + +The other two dependencies enable more efficient resolvers. By +default, ``ua-parser`` will select the fastest resolver it finds out +of the available set. For more, see :ref:`builtin resolvers`. diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index f0340c61..19b6faa2 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -72,7 +72,7 @@ ( RegexResolver, Re2Resolver, - lambda m: CachingResolver(BasicResolver(m), Cache(200)), + lambda m: CachingResolver(BasicResolver(m), Cache(2000)), ), ) ) From 2476d040ea8c44f07c046aadc8a8e2d53157e95e Mon Sep 17 00:00:00 2001 From: masklinn Date: Wed, 27 Mar 2024 22:11:29 +0100 Subject: [PATCH 10/48] Add test to ensure backfilling results does not lead to evictions Partial results are back-filled (new domains added) by re-setting them in the cache. With a sufficiently incorrect implementation, the cache can evict entries on that occasion even though it does not need to (because we're replacing an existing entry). Exactly that should have been fixed by #204, but was not tested at the time. Fixes #199 --- tests/test_caches.py | 74 +++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/tests/test_caches.py b/tests/test_caches.py index d612520a..c4ff990b 100644 --- a/tests/test_caches.py +++ b/tests/test_caches.py @@ -1,17 +1,14 @@ from collections import OrderedDict +import pytest # type: ignore + from ua_parser import ( - BasicResolver, CachingResolver, - Device, Domain, - OS, Parser, PartialResult, - UserAgent, ) -from ua_parser.caching import Lru -from ua_parser.matchers import DeviceMatcher, OSMatcher, UserAgentMatcher +from ua_parser.caching import Lru, S3Fifo, Sieve def test_lru(): @@ -19,7 +16,9 @@ def test_lru(): popped LRU-first. """ cache = Lru(2) - p = Parser(CachingResolver(BasicResolver(([], [], [])), cache)) + p = Parser( + CachingResolver(lambda s, d: PartialResult(d, None, None, None, s), cache) + ) p.parse("a") p.parse("b") @@ -41,37 +40,36 @@ def test_lru(): ) -def test_backfill(): - """Tests that caches handle partial parsing correctly, by updating the - existing entry when new parts get parsed. +@pytest.mark.parametrize("cache", [Lru, S3Fifo, Sieve]) +def test_backfill(cache): + """Tests that caches handle partial parsing correctly, by updating + the existing entry when new parts get parsed, without evicting + still-fitting entries. """ - cache = Lru(2) - p = Parser( - CachingResolver( - BasicResolver( - ( - [UserAgentMatcher("(a)")], - [OSMatcher("(a)")], - [DeviceMatcher("(a)")], - ) - ), - cache, - ) - ) + misses = 0 + + def resolver(ua: str, domains: Domain, /) -> PartialResult: + nonlocal misses + misses += 1 + return PartialResult(domains, None, None, None, ua) + + p = Parser(CachingResolver(resolver, cache(10))) + # fill the cache first, no need to hit the entries twice because + # S3 waits until it needs space in the main cache before demotes + # (or promotes) from the probationary cache. + for s in map(str, range(9)): + p.parse(s) + assert misses == 9 + # add a partial entry p.parse_user_agent("a") - assert cache.cache == { - "a": PartialResult(Domain.USER_AGENT, UserAgent("a"), None, None, "a"), - } - p("a", Domain.OS) - assert cache.cache == { - "a": PartialResult( - Domain.USER_AGENT | Domain.OS, UserAgent("a"), OS("a"), None, "a" - ), - } - p.parse("a") - assert cache.cache == { - "a": PartialResult( - Domain.ALL, UserAgent("a"), OS("a"), Device("a", None, "a"), "a" - ), - } + # fill the partial entry, counts as a miss since it needs to + # resolve the new bit + p.parse_os("a") + assert misses == 11 + + misses = 0 + # check that the original entries are all hits + for s in map(str, range(9)): + p.parse(s) + assert misses == 0 From 34c307bbae24fd16a9668902de3d11f9bc3d4f90 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 9 Nov 2024 09:36:49 +0100 Subject: [PATCH 11/48] Split precompiled data into a sub-project (and wheel) The goal of this is the ability to generate wheels for precompiled instances of uap-core, at whatever version we want. 1. It resolves #146 by splitting the versioning of the API and that of the (pre-compiled) data, this is an issue for 1.0 as that detaches uap-python's versioning from uap-core's. 2. It allows users to update the API and the precompiled dataset separately, something they would otherwise need to do via yaml. 3. It fixes #221 by allowing the regular release of "preview" precompiled regexes from uap-core snapshots e.g. we could release 0.19.dev202412 at the start of december with whatever uap-core merged between the previous prerelease and then. This should not be picked up by pip by default, but would allow users to access those prerelases via `pip install --pre`. 4. If done well enough, it might allow users to build bespoke precompiled datasets so they don't have to pick between custom rules and precompiled (not sure there's any demand for this but it seems like it might be useful). 5. If it works well enough it might actually be possible to have 0.x use the legacy codegen package meaning it should not need to be updated anymore. This is implemented via hatch build hooks (which seem seem simpler than doing it via setuptools in the end). Adding `regexes.yaml` to the sdist via artifacts is a bit strange but necessary in order to generate a complete sdist which a wheel can be built from (even though the release script will likely only push the wheel). --- .github/workflows/ci.yml | 8 +- pyproject.toml | 5 +- setup.cfg | 8 - setup.py | 221 ------------------ src/ua_parser/__init__.py | 8 +- src/ua_parser/_lazy.pyi | 11 - src/ua_parser/_matchers.pyi | 11 - src/ua_parser/_regexes.pyi | 7 - src/ua_parser/caching.py | 4 +- src/ua_parser/core.py | 8 +- src/ua_parser/lazy.py | 2 +- src/ua_parser/loaders.py | 4 +- src/ua_parser/matchers.py | 2 +- src/ua_parser/user_agent_parser.py | 6 +- tests/test_core.py | 38 +-- tox.ini | 3 + ua-parser-builtins/README.md | 7 + ua-parser-builtins/hatch_build.py | 206 ++++++++++++++++ ua-parser-builtins/pyproject.toml | 44 ++++ .../ua_parser_builtins/__init__.py | 0 .../ua_parser_builtins/py.typed | 0 ua-parser-builtins/uap-core | 1 + 22 files changed, 309 insertions(+), 295 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py delete mode 100644 src/ua_parser/_lazy.pyi delete mode 100644 src/ua_parser/_matchers.pyi delete mode 100644 src/ua_parser/_regexes.pyi create mode 100644 ua-parser-builtins/README.md create mode 100644 ua-parser-builtins/hatch_build.py create mode 100644 ua-parser-builtins/pyproject.toml create mode 100644 ua-parser-builtins/ua_parser_builtins/__init__.py create mode 100644 ua-parser-builtins/ua_parser_builtins/py.typed create mode 120000 ua-parser-builtins/uap-core diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4f41e866..df1cfb3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,6 @@ name: CI on: push: pull_request: - workflow_dispatch: jobs: checks: @@ -11,6 +10,9 @@ jobs: steps: - name: Checkout working copy uses: actions/checkout@v4 + with: + submodules: true + fetch-depth: 0 - name: ruff check uses: chartboost/ruff-action@v1 - name: ruff format @@ -29,7 +31,7 @@ jobs: if: ${{ always() && steps.setup_python.conclusion == 'success' }} run: | python -mpip install --upgrade pip - python -mpip install mypy types-PyYaml + python -mpip install mypy types-PyYaml ./ua-parser-builtins - name: mypy if: ${{ always() && steps.install_mypy.conclusion == 'success' }} run: mypy @@ -101,6 +103,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -115,6 +118,7 @@ jobs: sudo apt install libyaml-dev fi - run: python -mpip install pytest pyyaml + - run: python -mpip install ./ua-parser-builtins # install rs accelerator if available, ignore if not - run: python -mpip install ua-parser-rs || true # re2 is basically impossible to install from source so don't diff --git a/pyproject.toml b/pyproject.toml index 65271a4c..c0d4192c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "Python port of Browserscope's user agent parser" version = "1.0.0a1" readme = "README.rst" requires-python = ">=3.9" -dependencies = [] +dependencies = ["ua-parser-builtins"] license = {text = "Apache 2.0"} urls = {repository = "https://github.com/ua-parser/uap-python"} @@ -57,8 +57,7 @@ where = ["src"] [tool.ruff] exclude = [ - "src/ua_parser/_lazy.py", - "src/ua_parser/_matchers.py", + "src/ua_parser/generate_builtins.py", ] [tool.ruff.lint] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 9b07aee0..00000000 --- a/setup.cfg +++ /dev/null @@ -1,8 +0,0 @@ -[options] -packages = find: -package_dir = - =src -setup_requires = pyyaml - -[options.packages.find] -where = src diff --git a/setup.py b/setup.py deleted file mode 100644 index f423348e..00000000 --- a/setup.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python -# flake8: noqa -import io -from contextlib import suppress, contextmanager -from os import fspath -from pathlib import Path -from typing import Optional, List, Dict - -from setuptools import setup, Command, find_namespace_packages -from setuptools.command.build import build, SubCommand -from setuptools.command.editable_wheel import editable_wheel - -import yaml - - -build.sub_commands.insert(0, ("compile-regexes", None)) - - -class CompileRegexes(Command, SubCommand): - def initialize_options(self) -> None: - self.pkg_name: Optional[str] = None - - def finalize_options(self) -> None: - self.pkg_name = self.distribution.get_name().replace("-", "_") - - def get_source_files(self) -> List[str]: - return ["uap-core/regexes.yaml"] - - def get_outputs(self) -> List[str]: - return [f"{self.pkg_name}/_regexes.py"] - - def get_output_mapping(self) -> Dict[str, str]: - return dict(zip(self.get_source_files(), self.get_outputs())) - - def run(self) -> None: - # FIXME: check git / submodules? - """ - work_path = self.work_path - if not os.path.exists(os.path.join(work_path, ".git")): - return - - log.info("initializing git submodules") - check_output(["git", "submodule", "init"], cwd=work_path) - check_output(["git", "submodule", "update"], cwd=work_path) - """ - if not self.pkg_name: - return # or error? - - yaml_src = Path("uap-core", "regexes.yaml") - if not yaml_src.is_file(): - raise RuntimeError( - f"Unable to find regexes.yaml, should be at {yaml_src!r}" - ) - - with yaml_src.open("rb") as f: - regexes = yaml.safe_load(f) - - if self.editable_mode: - dist_dir = Path("src") - else: - dist_dir = Path(self.get_finalized_command("bdist_wheel").bdist_dir) - - outdir = dist_dir / self.pkg_name - outdir.mkdir(parents=True, exist_ok=True) - - dest = outdir / "_matchers.py" - dest_lazy = outdir / "_lazy.py" - dest_legacy = outdir / "_regexes.py" - - with ( - dest.open("wb") as eager, - dest_lazy.open("wb") as lazy, - dest_legacy.open("wb") as legacy, - ): - eager = EagerWriter(eager) - lazy = LazyWriter(lazy) - legacy = LegacyWriter(legacy) - - for section in ["user_agent_parsers", "os_parsers", "device_parsers"]: - with ( - eager.section(section), - lazy.section(section), - legacy.section(section), - ): - extract = EXTRACTORS[section] - for p in regexes[section]: - el = trim(extract(p)) - eager.item(el) - lazy.item(el) - legacy.item(el) - eager.end() - lazy.end() - legacy.end() - - -def trim(l): - while len(l) > 1 and l[-1] is None: - l.pop() - return l - - -EXTRACTORS = { - "user_agent_parsers": lambda p: [ - p["regex"], - p.get("family_replacement"), - p.get("v1_replacement"), - p.get("v2_replacement"), - ], - "os_parsers": lambda p: [ - p["regex"], - p.get("os_replacement"), - p.get("os_v1_replacement"), - p.get("os_v2_replacement"), - p.get("os_v3_replacement"), - p.get("os_v4_replacement"), - ], - "device_parsers": lambda p: [ - p["regex"], - p.get("regex_flag"), - p.get("device_replacement"), - p.get("brand_replacement"), - p.get("model_replacement"), - ], -} - - -class Writer: - section_end = b"" - - def __init__(self, fp): - self.fp = fp - self.fp.write( - b"""\ -######################################################## -# NOTICE: this file is autogenerated from regexes.yaml # -######################################################## -""" - ) - self.fp.write(self.prefix) - self._section = None - - @contextmanager - def section(self, id): - self._section = id - self.fp.write(self.sections[id]) - yield - self.fp.write(self.section_end) - - def item(self, elements): - # DeviceMatcher(re, flag, repl1), - self.fp.write(self.items[self._section]) - self.fp.write(", ".join(map(repr, elements)).encode()) - self.fp.write(b"),\n") - - def end(self): - self.fp.write(self.suffix) - - -class LegacyWriter(Writer): - prefix = b"""\ -__all__ = [ - "USER_AGENT_PARSERS", - "DEVICE_PARSERS", - "OS_PARSERS", -] - -from .user_agent_parser import UserAgentParser, DeviceParser, OSParser - -""" - sections = { - "user_agent_parsers": b"USER_AGENT_PARSERS = [\n", - "os_parsers": b"\n\nOS_PARSERS = [\n", - "device_parsers": b"\n\nDEVICE_PARSERS = [\n", - } - section_end = b"]" - items = { - "user_agent_parsers": b" UserAgentParser(", - "os_parsers": b" OSParser(", - "device_parsers": b" DeviceParser(", - } - suffix = b"\n" - - -class EagerWriter(Writer): - prefix = b"""\ -__all__ = ["MATCHERS"] - -from typing import Tuple, List -from .matchers import UserAgentMatcher, OSMatcher, DeviceMatcher - -MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ -""" - sections = { - "user_agent_parsers": b"", - "os_parsers": b"], [\n", - "device_parsers": b"], [\n", - } - items = { - "user_agent_parsers": b" UserAgentMatcher(", - "os_parsers": b" OSMatcher(", - "device_parsers": b" DeviceMatcher(", - } - suffix = b"])\n" - - -class LazyWriter(EagerWriter): - prefix = b"""\ -__all__ = ["MATCHERS"] - -from typing import Tuple, List -from .lazy import UserAgentMatcher, OSMatcher, DeviceMatcher - -MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ -""" - - -setup( - cmdclass={ - "compile-regexes": CompileRegexes, - } -) diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index 19b6faa2..040dda3e 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -20,17 +20,17 @@ from __future__ import annotations __all__ = [ + "OS", "BasicResolver", - "CachingResolver", "Cache", + "CachingResolver", "DefaultedResult", "Device", "Domain", "Matchers", - "OS", - "Result", - "Resolver", "PartialResult", + "Resolver", + "Result", "UserAgent", "load_builtins", "load_lazy_builtins", diff --git a/src/ua_parser/_lazy.pyi b/src/ua_parser/_lazy.pyi deleted file mode 100644 index 741db1af..00000000 --- a/src/ua_parser/_lazy.pyi +++ /dev/null @@ -1,11 +0,0 @@ -__all__ = ["MATCHERS"] - -from typing import List, Tuple - -from .lazy import DeviceMatcher, OSMatcher, UserAgentMatcher - -MATCHERS: Tuple[ - List[UserAgentMatcher], - List[OSMatcher], - List[DeviceMatcher], -] diff --git a/src/ua_parser/_matchers.pyi b/src/ua_parser/_matchers.pyi deleted file mode 100644 index 2269fb43..00000000 --- a/src/ua_parser/_matchers.pyi +++ /dev/null @@ -1,11 +0,0 @@ -__all__ = ["MATCHERS"] - -from typing import List, Tuple - -from .matchers import DeviceMatcher, OSMatcher, UserAgentMatcher - -MATCHERS: Tuple[ - List[UserAgentMatcher], - List[OSMatcher], - List[DeviceMatcher], -] diff --git a/src/ua_parser/_regexes.pyi b/src/ua_parser/_regexes.pyi deleted file mode 100644 index 10bc2ef4..00000000 --- a/src/ua_parser/_regexes.pyi +++ /dev/null @@ -1,7 +0,0 @@ -from typing import List - -from .user_agent_parser import DeviceParser, OSParser, UserAgentParser - -USER_AGENT_PARSERS: List[UserAgentParser] -OS_PARSERS: List[OSParser] -DEVICE_PARSERS: List[DeviceParser] diff --git a/src/ua_parser/caching.py b/src/ua_parser/caching.py index 706ad4b3..998c4b36 100644 --- a/src/ua_parser/caching.py +++ b/src/ua_parser/caching.py @@ -78,7 +78,7 @@ def __setitem__(self, key: str, value: PartialResult) -> None: @dataclasses.dataclass class CacheEntry: - __slots__ = ["key", "value", "freq"] + __slots__ = ["freq", "key", "value"] key: str value: PartialResult freq: int @@ -161,7 +161,7 @@ def _evict_small(self) -> None: @dataclasses.dataclass class SieveNode: - __slots__ = ("key", "value", "visited", "next") + __slots__ = ("key", "next", "value", "visited") key: str value: PartialResult visited: bool diff --git a/src/ua_parser/core.py b/src/ua_parser/core.py index 8ea880d6..b7133d4c 100644 --- a/src/ua_parser/core.py +++ b/src/ua_parser/core.py @@ -4,14 +4,14 @@ from typing import Generic, List, Optional, Protocol, Tuple, TypeVar __all__ = [ + "OS", "DefaultedResult", "Device", "Domain", "Matchers", - "OS", - "Result", "PartialResult", "Resolver", + "Result", "UserAgent", ] @@ -74,7 +74,7 @@ def __init__( class Device: """Device information parsed from the user agent string.""" - __slots__ = ("family", "brand", "model") + __slots__ = ("brand", "family", "model") family: str brand: Optional[str] model: Optional[str] @@ -172,7 +172,7 @@ class PartialResult: """ - __slots__ = ("domains", "user_agent", "os", "device", "string") + __slots__ = ("device", "domains", "os", "string", "user_agent") domains: Domain user_agent: Optional[UserAgent] os: Optional[OS] diff --git a/src/ua_parser/lazy.py b/src/ua_parser/lazy.py index c5aa5e23..4f0abedf 100644 --- a/src/ua_parser/lazy.py +++ b/src/ua_parser/lazy.py @@ -1,4 +1,4 @@ -__all__ = ["UserAgentMatcher", "OSMatcher", "DeviceMatcher"] +__all__ = ["DeviceMatcher", "OSMatcher", "UserAgentMatcher"] import re from functools import cached_property diff --git a/src/ua_parser/loaders.py b/src/ua_parser/loaders.py index 18fc3d25..55774eaf 100644 --- a/src/ua_parser/loaders.py +++ b/src/ua_parser/loaders.py @@ -52,7 +52,7 @@ def load_builtins() -> Matchers: further imports simply reference the existing datas. """ - from ._matchers import MATCHERS + from ua_parser_builtins.matchers import MATCHERS # typing and mypy don't have safe upcast (#5756) and mypy is # unhappy about returning concrete matchers for a mixed type @@ -66,7 +66,7 @@ def load_lazy_builtins() -> Matchers: further imports simply reference the existing datas. """ - from ._lazy import MATCHERS + from ua_parser_builtins.lazy import MATCHERS return cast(Matchers, MATCHERS) diff --git a/src/ua_parser/matchers.py b/src/ua_parser/matchers.py index 3956b3b5..35200b0b 100644 --- a/src/ua_parser/matchers.py +++ b/src/ua_parser/matchers.py @@ -1,4 +1,4 @@ -__all__ = ["UserAgentMatcher", "OSMatcher", "DeviceMatcher"] +__all__ = ["DeviceMatcher", "OSMatcher", "UserAgentMatcher"] import re from typing import Literal, Optional, Pattern diff --git a/src/ua_parser/user_agent_parser.py b/src/ua_parser/user_agent_parser.py index 5cb1c744..e6e4bb3e 100644 --- a/src/ua_parser/user_agent_parser.py +++ b/src/ua_parser/user_agent_parser.py @@ -521,4 +521,8 @@ def GetFilters( del SafeLoader else: # Just load our pre-compiled versions - from ._regexes import DEVICE_PARSERS, OS_PARSERS, USER_AGENT_PARSERS + from ua_parser_builtins.regexes import ( + DEVICE_PARSERS, + OS_PARSERS, + USER_AGENT_PARSERS, + ) diff --git a/tests/test_core.py b/tests/test_core.py index 310ddec5..1a87702f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -3,25 +3,22 @@ import dataclasses import logging import pathlib -import platform from operator import attrgetter +from typing import cast import pytest # type: ignore -if platform.python_implementation() == "PyPy": - from yaml import SafeLoader, load -else: - try: - from yaml import ( # type: ignore - CSafeLoader as SafeLoader, - load, - ) - except ImportError: - logging.getLogger(__name__).warning( - "PyYaml C extension not available to run tests, this will result " - "in dramatic tests slowdown." - ) - from yaml import SafeLoader, load +try: + from yaml import ( + CSafeLoader as SafeLoader, + load, + ) +except ImportError: + logging.getLogger(__name__).warning( + "PyYaml C extension not available to run tests, this will result " + "in tests slowdown." + ) + from yaml import SafeLoader, load # type: ignore from ua_parser import ( BasicResolver, @@ -32,15 +29,22 @@ UserAgent, load_builtins, load_lazy_builtins, + loaders, ) from ua_parser.matchers import UserAgentMatcher CORE_DIR = (pathlib.Path(__name__).parent.parent / "uap-core").resolve() +data = cast(loaders.FileLoader, loaders.load_yaml)(CORE_DIR / "regexes.yaml") +data_lazy = cast(loaders.FileLoader, loaders.load_yaml)( + CORE_DIR / "regexes.yaml", loader=loaders.load_lazy +) PARSERS = [ pytest.param(Parser(BasicResolver(load_builtins())), id="basic"), pytest.param(Parser(BasicResolver(load_lazy_builtins())), id="lazy"), + pytest.param(Parser(BasicResolver(data)), id="basic-yaml"), + pytest.param(Parser(BasicResolver(data_lazy)), id="lazy-yaml"), ] try: from ua_parser import re2 @@ -51,7 +55,7 @@ ) ) else: - PARSERS.append(pytest.param(Parser(re2.Resolver(load_builtins())), id="re2")) + PARSERS.append(pytest.param(Parser(re2.Resolver(data)), id="re2")) try: from ua_parser import regex @@ -64,7 +68,7 @@ ) ) else: - PARSERS.append(pytest.param(Parser(regex.Resolver(load_builtins())), id="regex")) + PARSERS.append(pytest.param(Parser(regex.Resolver(data)), id="regex")) UA_FIELDS = {f.name for f in dataclasses.fields(UserAgent)} diff --git a/tox.ini b/tox.ini index de36509a..0f2edd4c 100644 --- a/tox.ini +++ b/tox.ini @@ -23,6 +23,7 @@ deps = pyyaml google-re2 ua-parser-rs + ./ua-parser-builtins commands = pytest -Werror --doctest-glob="*.rst" {posargs} @@ -31,6 +32,7 @@ deps = pytest pyyaml ua-parser-rs + ./ua-parser-builtins [testenv:flake8] package = skip @@ -47,4 +49,5 @@ package = skip deps = mypy types-PyYaml + ./ua-parser-builtins commands = mypy {posargs:} diff --git a/ua-parser-builtins/README.md b/ua-parser-builtins/README.md new file mode 100644 index 00000000..8a568237 --- /dev/null +++ b/ua-parser-builtins/README.md @@ -0,0 +1,7 @@ +# Precompiled ruleset for [ua-parser](https://pypi.org/project/ua-parser/) + +This project does not do anything on its own, nor does it have any +actual API: it contains the dataset of +[uap-core](https://github.com/ua-parser/uap-core) pre-compiled for use +by [ua-parser](https://pypi.org/project/ua-parser/) to decrease +initialisation times. diff --git a/ua-parser-builtins/hatch_build.py b/ua-parser-builtins/hatch_build.py new file mode 100644 index 00000000..e92e9730 --- /dev/null +++ b/ua-parser-builtins/hatch_build.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +import io +import os +import os.path +import tempfile +from contextlib import contextmanager +from typing import Any, Callable, ClassVar, Iterator, cast + +import yaml +from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from hatchling.metadata.plugin.interface import MetadataHookInterface +from versioningit import get_version + + +class MetadataHook(MetadataHookInterface): + def update(self, metadata: dict[str, Any]) -> None: + v = get_version( + os.path.join(self.root, "uap-core"), + config={ + "format": { + "distance": "{next_version}.dev{distance}", + } + }, + ) + metadata["version"] = v + + +class CompilerHook(BuildHookInterface): + def initialize( + self, + version: str, + build_data: dict[str, Any], + ) -> None: + with open(os.path.join(self.root, "uap-core/regexes.yaml"), "rb") as f: + data = yaml.safe_load(f) + + with ( + tempfile.NamedTemporaryFile(delete=False) as matchers, + tempfile.NamedTemporaryFile(delete=False) as lazy, + tempfile.NamedTemporaryFile(delete=False) as regexes, + ): + matchers_w = EagerWriter(cast(io.RawIOBase, matchers)) + lazy_w = LazyWriter(cast(io.RawIOBase, lazy)) + legacy_w = LegacyWriter(cast(io.RawIOBase, regexes)) + + for section, specs in data.items(): + with ( + matchers_w.section(section), + lazy_w.section(section), + legacy_w.section(section), + ): + extract = EXTRACTORS[section] + for s in specs: + el = trim(extract(s)) + matchers_w.item(el) + lazy_w.item(el) + legacy_w.item(el) + + matchers_w.end() + lazy_w.end() + legacy_w.end() + + build_data["force_include"][matchers.name] = "ua_parser_builtins/matchers.py" + build_data["force_include"][lazy.name] = "ua_parser_builtins/lazy.py" + build_data["force_include"][regexes.name] = "ua_parser_builtins/regexes.py" + + def finalize( + self, + version: str, + build_data: dict[str, Any], + artifact_path: str, + ): + tempdir = tempfile.gettempdir() + for k in build_data["force_include"]: + if k.startswith(tempdir): + os.remove(k) + + +def trim(items: list[str | None]) -> list[str | None]: + """Removes trailing `None` from the extraction""" + while len(items) > 1 and items[-1] is None: + items.pop() + return items + + +EXTRACTORS: dict[str, Callable[[dict[str, str]], list[str | None]]] = { + "user_agent_parsers": lambda p: [ + p["regex"], + p.get("family_replacement"), + p.get("v1_replacement"), + p.get("v2_replacement"), + p.get("v3_replacement"), + p.get("v4_replacement"), + ], + "os_parsers": lambda p: [ + p["regex"], + p.get("os_replacement"), + p.get("os_v1_replacement"), + p.get("os_v2_replacement"), + p.get("os_v3_replacement"), + p.get("os_v4_replacement"), + ], + "device_parsers": lambda p: [ + p["regex"], + p.get("regex_flag"), + p.get("device_replacement"), + p.get("brand_replacement"), + p.get("model_replacement"), + ], +} + + +class Writer: + items: ClassVar[dict[str, bytes]] + sections: ClassVar[dict[str, bytes]] + prefix: bytes + suffix = b"" + section_end = b"" + + def __init__(self, fp: io.RawIOBase) -> None: + self.fp = fp + self.fp.write( + b"""\ +######################################################## +# NOTICE: this file is autogenerated from regexes.yaml # +######################################################## +""" + ) + self.fp.write(self.prefix) + self._section: str | None = None + + @contextmanager + def section(self, id: str) -> Iterator[None]: + self._section = id + self.fp.write(self.sections[id]) + yield + self.fp.write(self.section_end) + + def item(self, elements: list[str | None]) -> None: + # DeviceMatcher(re, flag, repl1), + # assume we're in a section + self.fp.write(self.items[cast(str, self._section)]) + self.fp.write(", ".join(map(repr, elements)).encode()) + self.fp.write(b"),\n") + + def end(self) -> None: + self.fp.write(self.suffix) + + +class LegacyWriter(Writer): + prefix = b"""\ +__all__ = [ + "USER_AGENT_PARSERS", + "DEVICE_PARSERS", + "OS_PARSERS", +] + +from ua_parser.user_agent_parser import UserAgentParser, DeviceParser, OSParser + +""" + sections: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b"USER_AGENT_PARSERS = [\n", + "os_parsers": b"\n\nOS_PARSERS = [\n", + "device_parsers": b"\n\nDEVICE_PARSERS = [\n", + } + section_end = b"]" + items: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b" UserAgentParser(", + "os_parsers": b" OSParser(", + "device_parsers": b" DeviceParser(", + } + suffix = b"\n" + + +class EagerWriter(Writer): + prefix = b"""\ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from ua_parser.matchers import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ +""" + sections: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b"", + "os_parsers": b"], [\n", + "device_parsers": b"], [\n", + } + items: ClassVar[dict[str, bytes]] = { + "user_agent_parsers": b" UserAgentMatcher(", + "os_parsers": b" OSMatcher(", + "device_parsers": b" DeviceMatcher(", + } + suffix = b"])\n" + + +class LazyWriter(EagerWriter): + prefix = b"""\ +__all__ = ["MATCHERS"] + +from typing import Tuple, List +from ua_parser.lazy import UserAgentMatcher, OSMatcher, DeviceMatcher + +MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([ +""" diff --git a/ua-parser-builtins/pyproject.toml b/ua-parser-builtins/pyproject.toml new file mode 100644 index 00000000..db0da38b --- /dev/null +++ b/ua-parser-builtins/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["hatchling", "versioningit", "pyyaml"] +build-backend = "hatchling.build" + +[project] +name = "ua-parser-builtins" +description = "Precompiled rules for User Agent Parser" +readme = "README.md" +dependencies = ["ua-parser"] +requires-python = ">=3.9" +license = {text = "Apache 2.0"} +urls = {repository = "https://github.com/ua-parser/uap-python"} +dynamic = ["version"] +maintainers = [ + { name = "masklinn", email = "uap@masklinn.net" } +] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + # "Programming Language :: Python :: Implementation :: GraalPy", +] + +[tool.hatch.build.hooks.custom] + +[tool.hatch.metadata.hooks.custom] + +[tool.hatch.build.targets.sdist] +artifacts = [ + "uap-core/regexes.yaml", +] diff --git a/ua-parser-builtins/ua_parser_builtins/__init__.py b/ua-parser-builtins/ua_parser_builtins/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ua-parser-builtins/ua_parser_builtins/py.typed b/ua-parser-builtins/ua_parser_builtins/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/ua-parser-builtins/uap-core b/ua-parser-builtins/uap-core new file mode 120000 index 00000000..fbefe368 --- /dev/null +++ b/ua-parser-builtins/uap-core @@ -0,0 +1 @@ +../uap-core \ No newline at end of file From 3d5afdb163ee2946beaa7ec50ef7d1edf99638bc Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 19 Nov 2024 19:39:31 +0100 Subject: [PATCH 12/48] Add release workflow - The workflow does no release if a valid environment is not selected. - By default, the workflow will create a .devN release using the current uap-core master. --- .github/workflows/release.yml | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..df318125 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,90 @@ +name: Publish Python distribution to PyPI and TestPyPI + +on: + schedule: + # schedule a dev release on every 1st of the month, at 2034 UTC + - cron: "34 20 1 * *" + workflow_dispatch: + inputs: + tag: + description: "uap-core ref to release" + type: string + environment: + description: "environment to release for (testpypy or pypy)" + type: environment + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + fetch-depth: 0 + - name: update core + # needs to detach because we can update to a tag + run: git -C uap-core switch --detach ${{ inputs.tag || 'master' }} + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install pypa/build + run: python3 -m pip install build --user + - name: Build wheel + run: | + python3 -m build -w ua-parser-builtins + mv ua-parser-builtins/dist . + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-testpypi: + name: Publish to TestPyPI + if: ${{ github.event.inputs.environment == 'testpypi' }} + needs: + - build + runs-on: ubuntu-latest + + environment: + name: testpypi + url: https://test.pypi.org/p/ua-parser-builtins + + permissions: + id-token: write + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + publish-to-pypi: + name: publish + if: ${{ github.event_name == 'schedule' || github.event.inputs.environment == 'pypi' }} + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/ua-parser-builtins + permissions: + id-token: write + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish + uses: pypa/gh-action-pypi-publish@release/v1 From f2434ba9aa7c2bb389d6cb6a69fda712af448f00 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sun, 24 Nov 2024 20:36:58 +0100 Subject: [PATCH 13/48] doc update, fixups --- README.rst | 45 +++++++++++++++++++------------------- doc/advanced/migration.rst | 30 +++++++++++++------------ doc/guides.rst | 25 ++++++++++++++------- doc/installation.rst | 22 +++++-------------- doc/quickstart.rst | 2 +- 5 files changed, 62 insertions(+), 62 deletions(-) diff --git a/README.rst b/README.rst index d3805ea8..512d0d25 100644 --- a/README.rst +++ b/README.rst @@ -10,40 +10,34 @@ Build Status .. image:: https://github.com/ua-parser/uap-python/actions/workflows/ci.yml/badge.svg :alt: CI on the master branch -⚠️ THIS IS NOT THE DOCUMENTATION YOU ARE LOOKING FOR (probably) ⚠️ ------------------------------------------------------------------- - -This is the readme for the `future 1.0 `_. - -For the current releases, see `the 0.x branch -`_. - Installing ---------- -Just add ``ua-parser`` to your project's dependencies, or run +Add ``ua-parser[regex]`` to your project's dependencies, or run .. code-block:: sh - $ pip install ua-parser + $ pip install 'ua-parser[regex]' to install in the current environment. -Installing `ua-parser-rs `_ or -`google-re2 `_ is *strongly* -recommended as they yield *significantly* better performances. This -can be done directly via the ``regex`` and ``re2`` optional -dependencies respectively: +ua-parser supports CPython 3.9 and newer, recent pypy (supporting +3.10), and GraalPy 24. -.. code-block:: sh +.. note:: + + The ``[regex]`` feature is *strongly* recommended: - $ pip install 'ua_parser[regex]' - $ pip install 'ua_parser[re2]' + - ``[re2]`` is slightly slower and only works with cpython, though + it is still a great option then (and is more memory-efficient). + - Pure python (no feature) is *significantly* slower, especially on + non-cpython runtimes, but it is the most memory efficient even + with caches. -If either dependency is already available (e.g. because the software -makes use of re2 for other reasons) ``ua-parser`` will use the -corresponding resolver automatically. + See `builtin resolvers`_ for more explanation of the tradeoffs + between the different options. + +.. _builtin resolvers: https://readthedocs.org/ua-parser/uap-python/guides#builtin-resolvers Quick Start ----------- @@ -109,3 +103,10 @@ Extract device information from user-agent string >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parse_device(ua_string) Device(family='Mac', brand='Apple', model='Mac') + +Upgrading +--------- + +Upgrading from 0.x? See `the upgrade guide`_. + +.. _the upgrade guide: https://readthedocs.org/ua-parser/uap-python/advanced/migration diff --git a/doc/advanced/migration.rst b/doc/advanced/migration.rst index c77b5d89..a75d85e5 100644 --- a/doc/advanced/migration.rst +++ b/doc/advanced/migration.rst @@ -5,11 +5,13 @@ From 0.x to 1.0 Don't Touch A Thing =================== -The first and simplest way to transition is to not transition: the 0.x -API won't be removed for a long time, possibly ever. While it is -unlikely to get updated any further and will eventually (hopefully?) -fall behind, if you can't be arsed you probably don't have to until an -unlikely 2.0. +The first and simplest way to upgrade is to not do anything: the 0.x +API is still present in 1.x and won't be removed for a long time, +possibly ever. + +While it is unlikely to get updated any further and will eventually +(hopefully?) fall behind, if you can't be arsed you probably don't +have to do anything for now, or just now. Unavoidable Divergences ======================= @@ -29,20 +31,20 @@ special attention: # force initialisation of global parser ua_parser.parser -- The 1.0 API defaults to an :class:`re2-based parser - ` if |re2|_ is installed, although it seems - unlikely you may wish to consider replacing it with configuring a - :class:`~ua_parser.Parser` with a :class:`ua_parser.basic.Resolver` - if |re2|_ is one of your dependencies. +- The 1.0 API defaults to powerful native parsers (based on |regex|_ + or |re2|_) if available, although it seems unlikely you may wish to + consider replacing it with configuring a :class:`~ua_parser.Parser` + with a :class:`ua_parser.basic.Resolver`, especially if for some + reason |re2| is already one of your dependencies but you want to + *avoid* the |re2|-based resolver. Default Ruleset =============== While the 1.0 API was designed to better respect :pep:`8` and support -:mod:`typing`, it was also designed to easily be transitioned from. +:mod:`typing`, it was also designed to easily be transitioned to. -Given a 0.x API not using YAML, the conversion should be very easy and -consists of: +Given a 0.x API not using YAML, the conversion consists of: - updating the import from ``ua_parser.user_agent_parser`` to just ``ua_parser`` @@ -116,7 +118,7 @@ Legacy YAML support can be added via a pretty small shim:: import ua_parser from ua_parser.loaders import load_yaml - if yaml_path = os.environ.get("UA_PARSER_YAML"): + if yaml_path := os.environ.get("UA_PARSER_YAML"): ua_parser.parser = ua_parser.Parser.from_matchers( load_yaml(yaml_path)) diff --git a/doc/guides.rst b/doc/guides.rst index 039bd241..39b43e4b 100644 --- a/doc/guides.rst +++ b/doc/guides.rst @@ -7,10 +7,8 @@ Guides Custom Rulesets =============== -ua-parser defaults to the version of `ua-core -`_ -current when it was packaged, using a precompiled version of -``regexes.yaml``. +ua-parser defaults to the latest stable release of `ua-core`_ via +`precompiled regexes.yaml`__. That is a suitable defaut, but there are plenty of reasons to use custom rulesets: @@ -18,10 +16,13 @@ custom rulesets: - trim down the default ruleset to only the most current or relevant rules for efficiency e.g. you might not care about CalDav or podcast applications -- add new rules relevant to your own traffic but which don't (possibly - can't) be in the main project +- add new rules relevant to your own traffic but which aren't (possibly + can't be) in the main project - experiment with the creation of new rules - use a completely bespoke ruleset to track UA-identified API clients +- use "experimental" rules which haven't been released yet (although + `ua-parser-builtins`_ provides regular prerelease versions which may + be suitable for this) ua-parser provides easy ways to load custom rolesets: @@ -38,6 +39,12 @@ ua-parser provides easy ways to load custom rolesets: parser = Parser.from_matchers(load_yaml("regexes.yaml")) parser.parse(some_ua) +.. _ua-parser-builtins: https://pypi.org/project/ua-parser-builtins + +__ ua-parser-builtins_ + +.. _ua-core: https://github.com/ua-parser/uap-core/blob/master/regexes.yaml + .. _guide-custom-global-parser: Custom Global Parser @@ -129,6 +136,8 @@ from here on:: :class:`~ua_parser.caching.Local`, which is also caching-related, and serves to use thread-local caches rather than a shared cache. +.. _builtin-resolvers: + Builtin Resolvers ================= @@ -207,9 +216,9 @@ If available, it is the second-preferred resolver, without a cache. The ``basic`` resolver is a naive linear traversal of all rules, using the standard library's ``re``. It: -- Is *extremely* slow, about 10x slower than ``re2`` in cpython, and +- Is *extremely* slow: about 10x slower than ``re2`` on cpython, and pypy and graal's regex implementations do *not* like the workload - and behind cpython by a factor of 3~4. + and are 3x-4x slower than *cpython*. - Has perfect compatibility, with the caveat above, by virtue of being built entirely out of standard library code. - Is basically as safe as Python software can be by virtue of being diff --git a/doc/installation.rst b/doc/installation.rst index ac6b311b..09ba9e83 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -2,23 +2,11 @@ Installation ============ -Python Version -============== - -ua-parser currently supports CPython 3.9 and newer, recent Pypy -(supporting 3.10), and Graal 24. - -.. note:: - - While pypy and graal are supported, they are rather slow when using - pure python mode and ``[re2]`` is not supported, so using the - ``[regex]`` feature is very strongly recommended. - Installation ============ .. include:: ../README.rst - :start-line: 23 + :start-line: 14 :end-before: Quick Start Optional Dependencies @@ -35,9 +23,9 @@ if installed, but can also be installed via and alongside ua-parser: $ pip install 'ua-parser[yaml]' $ pip install 'ua-parser[regex,yaml]' -``yaml`` simply enables the ability to :func:`load yaml rulesets +``yaml`` enables the ability to :func:`load rulesets from yaml `. -The other two dependencies enable more efficient resolvers. By -default, ``ua-parser`` will select the fastest resolver it finds out -of the available set. For more, see :ref:`builtin resolvers`. +The other two features enable more efficient resolvers. By default, +``ua-parser`` will select the fastest resolver it finds out of the +available set (regex > re2 > python). diff --git a/doc/quickstart.rst b/doc/quickstart.rst index 4267ffe0..c6cfe9cf 100644 --- a/doc/quickstart.rst +++ b/doc/quickstart.rst @@ -3,4 +3,4 @@ Quick Start =========== .. include:: ../README.rst - :start-line: 47 + :start-line: 44 From c727d9b915b3c69a72f6962e7e0e0c252007bd11 Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 26 Nov 2024 17:51:38 +0100 Subject: [PATCH 14/48] Add release workflow for ua-parser Also update the release workflow for the builtins: - configure `run-name` to print the inputs (otherwise they're not printed anywhere and it's hard to check them) - make the publish action `verbose` - don't fail testpypi on duplicates (easier to test workflow files) --- .../{release.yml => release-builtins.yml} | 8 ++- .github/workflows/release-main.yml | 49 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) rename .github/workflows/{release.yml => release-builtins.yml} (91%) create mode 100644 .github/workflows/release-main.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release-builtins.yml similarity index 91% rename from .github/workflows/release.yml rename to .github/workflows/release-builtins.yml index df318125..73b0f366 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release-builtins.yml @@ -1,4 +1,6 @@ -name: Publish Python distribution to PyPI and TestPyPI +name: Publish ua-parser builtins + +run-name: Publish ${{ inputs.tag || 'master' }} to ${{ inputs.environment || 'dummy' }} on: schedule: @@ -67,6 +69,8 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ + skip-existing: true + verbose: true publish-to-pypi: name: publish @@ -88,3 +92,5 @@ jobs: path: dist/ - name: Publish uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml new file mode 100644 index 00000000..742e6078 --- /dev/null +++ b/.github/workflows/release-main.yml @@ -0,0 +1,49 @@ +name: Publish ua-parser + +on: + workflow_dispatch: + release: + types: [created] + +env: + ENVNAME: ${{ github.event_name == 'release' && 'pypi' || 'testpypi' }} + ENVURL: https://${{ github.event_name != 'release' && 'test.' || '' }}pypy.org/p/ua-parser + +jobs: + release: + runs-on: ubuntu-latest + + environment: + name: ${{ github.event_name == 'release' && 'pypi' || 'testpypi' }} + url: https://${{ github.event_name != 'release' && 'test.' || '' }}pypy.org/p/ua-parser + + permissions: + id-token: write + + steps: + - name: Checkout working copy + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install dependency + run: | + python -mpip install --upgrade pip + python -mpip install build + - name: Build sdist and wheel + run: python -mbuild + - name: Publish to testpypi + if: ${{ env.ENVNAME == 'testpypi' }} + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + verbose: true + password: ${{ secrets.PUBLISH_TOKEN }} + - name: Publish to pypi + if: ${{ env.ENVNAME == 'pypi' }} + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true + password: ${{ secrets.PUBLISH_TOKEN }} From 26ed6f85fe877416473091fb104b6b0777cc5398 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 25 Nov 2024 17:23:30 +0100 Subject: [PATCH 15/48] Add configuration file for RTFD --- .readthedocs.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..1eec0722 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,32 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +# needed for uap-core to get initialised properly +submodules: + include: all + +build: + os: ubuntu-24.04 + tools: + python: "3.12" + jobs: + post_checkout: + # rtfd doesn't retrieve tags by default, but we need them for `git + # describe` in order to build the parser builtins + # FIXME: remove once upb is published and can be installed from pypi) + - git fetch --unshallow --tags || true + +python: + install: + - method: pip + path: . + extra_requirements: + - yaml + - regex + - re2 + +sphinx: + configuration: doc/conf.py + fail_on_warning: true From 17875497555a370cd4115c72149bb4ababc534c4 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 25 Nov 2024 17:56:31 +0100 Subject: [PATCH 16/48] Fix typo in directive (forgot space) which prevented the regex resolver from being documented --- doc/api.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 6f984a4d..c77eee47 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -75,11 +75,12 @@ from user agent strings. .. warning:: Only available if |re2|_ is installed. -.. class::ua_parser.regex.Resolver(Matchers) +.. class:: ua_parser.regex.Resolver(Matchers) - An advanced resolver based on |regex|_ and a bespoke implementation - of regex prefiltering, by the sibling project `ua-rust - `_ and a bespoke implementation + of regex prefiltering, by the sibling project `uap-rust + `_. Sufficiently fast that a cache may not be necessary, and may even be detrimental at smaller cache sizes From 4fc1d04ed0dde973c7ae6002aa5e5adb76bd2cce Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 25 Nov 2024 18:46:39 +0100 Subject: [PATCH 17/48] Fix typos in the cache doc --- doc/advanced/caches.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/advanced/caches.rst b/doc/advanced/caches.rst index c804d7c2..9f09701a 100644 --- a/doc/advanced/caches.rst +++ b/doc/advanced/caches.rst @@ -45,14 +45,14 @@ exercises the caches themselves and barely looks at the data. ---------------------------- ``bench`` is much more expensive in both CPU and wallclock as it -actually runs the base resolvers, combined with various caches of -various sizes. For usability, it can report its data (the average -parse time per input entry) in both human-readable text with one -result per line and CSV with resolver configurations as the columns -and cache sizes as the rows. +actually runs the resolvers on the sample file, combined with various +caches of various sizes. For usability, it can report its data (the +average parse time per input entry) in both human-readable text with +one result per line and CSV with resolver configurations as the +columns and cache sizes as the rows. ``hitrates`` is generally sufficient as generally speaking for the -same base resolver performances tend to more or less follo hit rates: +same base resolver performances tend to more or less follow hit rates: a cache hit is close to free compared to a cache miss. Although this is truer for the basic resolver (for which misses tend to be very expensive). ``bench`` is mostly useful to validate or tie-break From 7e040fd8b71177838e7e98176d8e1d8020447250 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 25 Nov 2024 19:31:09 +0100 Subject: [PATCH 18/48] Add doc badges, link action badge --- README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.rst b/README.rst index 512d0d25..0cb76ddd 100644 --- a/README.rst +++ b/README.rst @@ -8,8 +8,13 @@ Build Status ------------ .. image:: https://github.com/ua-parser/uap-python/actions/workflows/ci.yml/badge.svg + :target: https://github.com/ua-parser/uap-python/actions/workflows/ci.yml?query=branch%3Amaster :alt: CI on the master branch +.. image:: https://readthedocs.org/projects/uap-python/badge/?version=latest + :target: https://readthedocs.org/project/uap-python/en/latest/ + :alt: Documentation Status + Installing ---------- From b9adc087a44bfd85d1c370776b09b047483e4253 Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 26 Nov 2024 19:54:22 +0100 Subject: [PATCH 19/48] Fix a bunch of URLs - it's pypi.org not pypy.org - rtfd doesn't serve docs out of its main domain (which makes sense since docs can contain arbitrary JS) - add the doc and issues to the project URLs --- .github/workflows/release-main.yml | 3 +-- README.rst | 6 +++--- pyproject.toml | 8 ++++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index 742e6078..17b2c95d 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -7,7 +7,6 @@ on: env: ENVNAME: ${{ github.event_name == 'release' && 'pypi' || 'testpypi' }} - ENVURL: https://${{ github.event_name != 'release' && 'test.' || '' }}pypy.org/p/ua-parser jobs: release: @@ -15,7 +14,7 @@ jobs: environment: name: ${{ github.event_name == 'release' && 'pypi' || 'testpypi' }} - url: https://${{ github.event_name != 'release' && 'test.' || '' }}pypy.org/p/ua-parser + url: https://${{ github.event_name != 'release' && 'test.' || '' }}pypi.org/p/ua-parser permissions: id-token: write diff --git a/README.rst b/README.rst index 0cb76ddd..e5baf40f 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ Build Status :alt: CI on the master branch .. image:: https://readthedocs.org/projects/uap-python/badge/?version=latest - :target: https://readthedocs.org/project/uap-python/en/latest/ + :target: https://uap-python.readthedocs.io/en/latest/ :alt: Documentation Status Installing @@ -42,7 +42,7 @@ ua-parser supports CPython 3.9 and newer, recent pypy (supporting See `builtin resolvers`_ for more explanation of the tradeoffs between the different options. -.. _builtin resolvers: https://readthedocs.org/ua-parser/uap-python/guides#builtin-resolvers +.. _builtin resolvers: https://uap-python.readthedocs.io/en/latest/guides.html#builtin-resolvers Quick Start ----------- @@ -114,4 +114,4 @@ Upgrading Upgrading from 0.x? See `the upgrade guide`_. -.. _the upgrade guide: https://readthedocs.org/ua-parser/uap-python/advanced/migration +.. _the upgrade guide: https://uap-python.readthedocs.io/en/latest/advanced/migration.html diff --git a/pyproject.toml b/pyproject.toml index c0d4192c..28e1bb78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,13 +5,12 @@ build-backend = "setuptools.build_meta" [project] name = "ua-parser" description = "Python port of Browserscope's user agent parser" -version = "1.0.0a1" +version = "1.0.0a2" readme = "README.rst" requires-python = ">=3.9" dependencies = ["ua-parser-builtins"] license = {text = "Apache 2.0"} -urls = {repository = "https://github.com/ua-parser/uap-python"} authors = [ { name = "Stephen Lamm", email = "slamm@google.com"}, @@ -44,6 +43,11 @@ classifiers = [ # "Programming Language :: Python :: Implementation :: GraalPy", ] +[project.urls] +documentation = "https://uap-python.readthedocs.io" +repository = "https://github.com/ua-parser/uap-python" +issues = "https://github.com/ua-parser/uap-python/issues" + [project.optional-dependencies] yaml = ["PyYaml"] re2 = ["google-re2"] From 95947761aed896974c55857834ed0456f90bd7b6 Mon Sep 17 00:00:00 2001 From: masklinn Date: Thu, 28 Nov 2024 17:52:36 +0100 Subject: [PATCH 20/48] Update version to 1.0 Hopefully I've not missed one hidden somewhere. Also update the development status because y not. Closes #234 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 28e1bb78..161c98b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "ua-parser" description = "Python port of Browserscope's user agent parser" -version = "1.0.0a2" +version = "1.0.0" readme = "README.rst" requires-python = ">=3.9" dependencies = ["ua-parser-builtins"] @@ -24,7 +24,7 @@ maintainers = [ ] classifiers = [ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Environment :: Web Environment", "Intended Audience :: Developers", "Operating System :: OS Independent", From 3bd09b751a0a622b7f3ef8b977f10640d3d912ca Mon Sep 17 00:00:00 2001 From: masklinn Date: Thu, 28 Nov 2024 20:45:16 +0100 Subject: [PATCH 21/48] Link readme to the default / stable documentation not latest It doesn't seem like you can deep link into the default? Not entirely clear, I'll probably have to read up on rtfd redirections but that's probably fine for now. --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index e5baf40f..091fddae 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ Build Status :alt: CI on the master branch .. image:: https://readthedocs.org/projects/uap-python/badge/?version=latest - :target: https://uap-python.readthedocs.io/en/latest/ + :target: https://uap-python.readthedocs.io/ :alt: Documentation Status Installing @@ -42,7 +42,7 @@ ua-parser supports CPython 3.9 and newer, recent pypy (supporting See `builtin resolvers`_ for more explanation of the tradeoffs between the different options. -.. _builtin resolvers: https://uap-python.readthedocs.io/en/latest/guides.html#builtin-resolvers +.. _builtin resolvers: https://uap-python.readthedocs.io/stable/guides.html#builtin-resolvers Quick Start ----------- @@ -114,4 +114,4 @@ Upgrading Upgrading from 0.x? See `the upgrade guide`_. -.. _the upgrade guide: https://uap-python.readthedocs.io/en/latest/advanced/migration.html +.. _the upgrade guide: https://uap-python.readthedocs.io/stable/advanced/migration.html From e5bbe5aa221118e53e371432bbd5777be388ef3f Mon Sep 17 00:00:00 2001 From: masklinn Date: Thu, 5 Dec 2024 18:55:43 +0100 Subject: [PATCH 22/48] Add hook to do postN releases Turns out I did a booboo which was predictable, and if we want to track the versioning of uap-core we can't just start diverging. post-releases provide a path forwards, but was not initially predicted. Special case the existing stable releases in the script, might do something smarter eventually if necessary but... --- ua-parser-builtins/hatch_build.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ua-parser-builtins/hatch_build.py b/ua-parser-builtins/hatch_build.py index e92e9730..1eac7764 100644 --- a/ua-parser-builtins/hatch_build.py +++ b/ua-parser-builtins/hatch_build.py @@ -23,6 +23,9 @@ def update(self, metadata: dict[str, Any]) -> None: } }, ) + if v in ("0.15.0", "0.16.0", "0.18.0"): + v = f"{v}.post1" + metadata["version"] = v From ca65e02248700c6bae6529f34af8e23479e53880 Mon Sep 17 00:00:00 2001 From: masklinn Date: Wed, 4 Dec 2024 20:47:41 +0100 Subject: [PATCH 23/48] Remove dependency from ua-parser-builtins It's not super useful, and apparently it causes issues for some tools / workflows (TBF I was surprised pip was fine with it). Fixes #246 --- ua-parser-builtins/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ua-parser-builtins/pyproject.toml b/ua-parser-builtins/pyproject.toml index db0da38b..a9c6d3ea 100644 --- a/ua-parser-builtins/pyproject.toml +++ b/ua-parser-builtins/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" name = "ua-parser-builtins" description = "Precompiled rules for User Agent Parser" readme = "README.md" -dependencies = ["ua-parser"] +dependencies = [] requires-python = ">=3.9" license = {text = "Apache 2.0"} urls = {repository = "https://github.com/ua-parser/uap-python"} From e5a13482f334f14d954c55a03f8205501dc15348 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Wed, 4 Dec 2024 16:11:48 -0600 Subject: [PATCH 24/48] Specify the PyPy version to target in tox This helps ensure that PyPy 3.10 is actually getting tested locally. --- tox.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index 0f2edd4c..5bd97e9e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,11 @@ [tox] min_version = 4.0 env_list = py3{9,10,11,12} - pypy + pypy310 graalpy flake8, black, typecheck labels = - test = py3{9,10,11,12},pypy,graalpy + test = py3{9,10,11,12},pypy310,graalpy cpy = py3{9,10,11,12} pypy = pypy3.10 graal = graalpy-24 @@ -27,7 +27,7 @@ deps = commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:{pypy,graalpy}] +[testenv:{pypy310,graalpy}] deps = pytest pyyaml From 9f170aabf6d8a28ca5723d99b660c334a50da85e Mon Sep 17 00:00:00 2001 From: masklinn Date: Sun, 22 Dec 2024 14:02:51 +0100 Subject: [PATCH 25/48] Add zizmor to CI - Can't switch release actions to trusted publishing, see #224. - Remove git credentials persistence everywhere. - Fix "unsafe" template expansion in release-builtins. It should not be accessible to any untrusted third party as it's only on `workflow_dispatch` and `schedule`, but it can't hurt. Fixes #249 --- .github/workflows/ci.yml | 3 +++ .github/workflows/release-builtins.yml | 5 +++- .github/workflows/release-main.yml | 6 +++-- .github/workflows/zizmor.yml | 32 ++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/zizmor.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index df1cfb3e..4cc5f9f2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,6 +13,7 @@ jobs: with: submodules: true fetch-depth: 0 + persist-credentials: false - name: ruff check uses: chartboost/ruff-action@v1 - name: ruff format @@ -46,6 +47,7 @@ jobs: uses: actions/checkout@v4 with: submodules: true + persist-credentials: false - name: Set up Python uses: actions/setup-python@v5 with: @@ -104,6 +106,7 @@ jobs: with: submodules: true fetch-depth: 0 + persist-credentials: false - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index 73b0f366..6f41709b 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -25,9 +25,12 @@ jobs: with: submodules: true fetch-depth: 0 + persist-credentials: false - name: update core + env: + TAG: ${{ inputs.tag || 'master '}} # needs to detach because we can update to a tag - run: git -C uap-core switch --detach ${{ inputs.tag || 'master' }} + run: git -C uap-core switch --detach "$TAG" - name: Set up Python uses: actions/setup-python@v5 with: diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index 17b2c95d..bc6fca96 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -22,6 +22,8 @@ jobs: steps: - name: Checkout working copy uses: actions/checkout@v4 + with: + persist-credentials: false - name: Set up Python uses: actions/setup-python@v5 with: @@ -34,7 +36,7 @@ jobs: run: python -mbuild - name: Publish to testpypi if: ${{ env.ENVNAME == 'testpypi' }} - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@release/v1 # zizmor: ignore[use-trusted-publishing] with: repository-url: https://test.pypi.org/legacy/ skip-existing: true @@ -42,7 +44,7 @@ jobs: password: ${{ secrets.PUBLISH_TOKEN }} - name: Publish to pypi if: ${{ env.ENVNAME == 'pypi' }} - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@release/v1 # zizmor: ignore[use-trusted-publishing] with: verbose: true password: ${{ secrets.PUBLISH_TOKEN }} diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml new file mode 100644 index 00000000..5bf4f98f --- /dev/null +++ b/.github/workflows/zizmor.yml @@ -0,0 +1,32 @@ +name: Zizmor + +on: + push: + pull_request: + +jobs: + zizmor: + runs-on: ubuntu-latest + permissions: + security-events: write + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Install the latest version of uv + uses: astral-sh/setup-uv@v5 + + - name: Run zizmor + run: uvx zizmor --format sarif . > results.sarif + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload SARIF file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif + category: zizmor From 5f5f3387a5b19908ed015d36ccfd16d716db07fd Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 1 Feb 2025 14:16:09 +0100 Subject: [PATCH 26/48] formatting fixes I assume ruff's updated a few things since last time it was run, as it now fails. --- src/ua_parser/__main__.py | 6 ++-- tests/test_legacy.py | 72 +++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/ua_parser/__main__.py b/src/ua_parser/__main__.py index 0ed140f5..047efaaa 100644 --- a/src/ua_parser/__main__.py +++ b/src/ua_parser/__main__.py @@ -101,7 +101,7 @@ def run_stdout(args: argparse.Namespace) -> None: lines = list(args.file) count = len(lines) uniques = len(set(lines)) - print(f"{args.file.name}: {count} lines, {uniques} unique ({uniques/count:.0%})") + print(f"{args.file.name}: {count} lines, {uniques} unique ({uniques / count:.0%})") rules = get_rules(args.bases, args.regexes) @@ -320,7 +320,7 @@ def belady(maxsize: int) -> Cache: overhead / cache_size, ) print( - f"{cache.__name__.lower():8}({cache_size:{w}}): {(total - misses.count)/total*100:2.0f}% hit rate {diff}" + f"{cache.__name__.lower():8}({cache_size:{w}}): {(total - misses.count) / total * 100:2.0f}% hit rate {diff}" ) del misses, parser @@ -378,7 +378,7 @@ def run_threaded(args: argparse.Namespace) -> None: totlines = len(lines) * args.threads # runtime in us t = (time.perf_counter_ns() - st) / 1000 - print(f"{t/totlines:>4.0f}us/line", flush=True) + print(f"{t / totlines:>4.0f}us/line", flush=True) EPILOG = """For good results the sample `file` should be an actual diff --git a/tests/test_legacy.py b/tests/test_legacy.py index 7ada17c5..8fafbee6 100644 --- a/tests/test_legacy.py +++ b/tests/test_legacy.py @@ -107,18 +107,18 @@ def runUserAgentTestsFromYAML(self, file_name): result = {} result = user_agent_parser.ParseUserAgent(user_agent_string) - assert ( - result == expected - ), "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format( - user_agent_string, - expected["family"], - expected["major"], - expected["minor"], - expected["patch"], - result["family"], - result["major"], - result["minor"], - result["patch"], + assert result == expected, ( + "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format( + user_agent_string, + expected["family"], + expected["major"], + expected["minor"], + expected["patch"], + result["family"], + result["major"], + result["minor"], + result["patch"], + ) ) assert ( len(user_agent_parser._PARSE_CACHE) <= user_agent_parser.MAX_CACHE_SIZE @@ -143,20 +143,20 @@ def runOSTestsFromYAML(self, file_name): } result = user_agent_parser.ParseOS(user_agent_string) - assert ( - result == expected - ), "UA: {0}\n expected<{1} {2} {3} {4} {5}> != actual<{6} {7} {8} {9} {10}>".format( - user_agent_string, - expected["family"], - expected["major"], - expected["minor"], - expected["patch"], - expected["patch_minor"], - result["family"], - result["major"], - result["minor"], - result["patch"], - result["patch_minor"], + assert result == expected, ( + "UA: {0}\n expected<{1} {2} {3} {4} {5}> != actual<{6} {7} {8} {9} {10}>".format( + user_agent_string, + expected["family"], + expected["major"], + expected["minor"], + expected["patch"], + expected["patch_minor"], + result["family"], + result["major"], + result["minor"], + result["patch"], + result["patch_minor"], + ) ) def runDeviceTestsFromYAML(self, file_name): @@ -176,16 +176,16 @@ def runDeviceTestsFromYAML(self, file_name): } result = user_agent_parser.ParseDevice(user_agent_string) - assert ( - result == expected - ), "UA: {0}\n expected<{1} {2} {3}> != actual<{4} {5} {6}>".format( - user_agent_string, - expected["family"], - expected["brand"], - expected["model"], - result["family"], - result["brand"], - result["model"], + assert result == expected, ( + "UA: {0}\n expected<{1} {2} {3}> != actual<{4} {5} {6}>".format( + user_agent_string, + expected["family"], + expected["brand"], + expected["model"], + result["family"], + result["brand"], + result["model"], + ) ) From ce129055a661eb3be0ab0edaa7669efec0cf8d7d Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 1 Feb 2025 14:17:40 +0100 Subject: [PATCH 27/48] Fix memoisation of lazy parser & bump version Reported by @Rafiot: the lazy parser is not memoised, this has limited effect on the basic / pure Python parser as its initialisation is trivial, but it *significantly* impact the re2 and regex parsers as they need to process regexes into a filter tree. The memoization was mistakenly removed in #230: while refactoring initialisation I removed the setting of the `parser` global. - add a test to ensure the parser is correctly memoized, not re-instantiated every time - reinstate setting the global - add a mutex on `__getattr__`, it should only be used on first access and avoids two threads creating an expensive parser at the same time (which is a waste of CPU) Fixes #253 --- pyproject.toml | 2 +- src/ua_parser/__init__.py | 29 +++++++++++++++++++++-------- tests/test_convenience_parser.py | 17 +++++++++++++++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 161c98b1..11425fca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "ua-parser" description = "Python port of Browserscope's user agent parser" -version = "1.0.0" +version = "1.0.1" readme = "README.rst" requires-python = ">=3.9" dependencies = ["ua-parser-builtins"] diff --git a/src/ua_parser/__init__.py b/src/ua_parser/__init__.py index 040dda3e..5b5ba71b 100644 --- a/src/ua_parser/__init__.py +++ b/src/ua_parser/__init__.py @@ -41,7 +41,8 @@ ] import importlib.util -from typing import Callable, Optional +import threading +from typing import Callable, Optional, cast from .basic import Resolver as BasicResolver from .caching import CachingResolver, S3Fifo as Cache @@ -78,7 +79,7 @@ ) -VERSION = (1, 0, 0) +VERSION = (1, 0, 1) class Parser: @@ -135,15 +136,27 @@ def parse_device(self: Resolver, ua: str) -> Optional[Device]: initialisation, rather than pay for it at first call. """ +_lazy_globals_lock = threading.Lock() + def __getattr__(name: str) -> Parser: global parser - if name == "parser": - if RegexResolver or Re2Resolver or IS_GRAAL: - matchers = load_lazy_builtins() - else: - matchers = load_builtins() - return Parser.from_matchers(matchers) + with _lazy_globals_lock: + if name == "parser": + # if two threads access `ua_parser.parser` before it's + # initialised, the second one will wait until the first + # one's finished by which time the parser global should be + # set and can be returned with no extra work + if p := globals().get("parser"): + return cast(Parser, p) + + if RegexResolver or Re2Resolver or IS_GRAAL: + matchers = load_lazy_builtins() + else: + matchers = load_builtins() + parser = Parser.from_matchers(matchers) + return parser + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/tests/test_convenience_parser.py b/tests/test_convenience_parser.py index cf1d3609..86240613 100644 --- a/tests/test_convenience_parser.py +++ b/tests/test_convenience_parser.py @@ -1,6 +1,23 @@ +import ua_parser from ua_parser import Domain, Parser, PartialResult, Result +def test_parser_memoized() -> None: + """The global parser should be lazily instantiated but memoized""" + # ensure there is no global parser + vars(ua_parser).pop("parser", None) + + p1 = ua_parser.parser + p2 = ua_parser.parser + + assert p1 is p2 + + # force the creation of a clean parser + del ua_parser.parser + p3 = ua_parser.parser + assert p3 is not p1 + + def resolver(s: str, d: Domain) -> PartialResult: return PartialResult(d, None, None, None, s) From 1b64406fce241dec909b03a05383f3b31a073e7e Mon Sep 17 00:00:00 2001 From: William Douglas Date: Sat, 15 Feb 2025 04:24:29 -0800 Subject: [PATCH 28/48] builtins: fallback to package.json for uap-core version In case where uap-core isn't a git repo (e.g. git archive), use uap-core's `package.json` as a fallback for getting a version. --- ua-parser-builtins/hatch_build.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/ua-parser-builtins/hatch_build.py b/ua-parser-builtins/hatch_build.py index 1eac7764..9bbe23f0 100644 --- a/ua-parser-builtins/hatch_build.py +++ b/ua-parser-builtins/hatch_build.py @@ -1,6 +1,7 @@ from __future__ import annotations import io +import json import os import os.path import tempfile @@ -10,19 +11,24 @@ import yaml from hatchling.builders.hooks.plugin.interface import BuildHookInterface from hatchling.metadata.plugin.interface import MetadataHookInterface -from versioningit import get_version +from versioningit import errors, get_version class MetadataHook(MetadataHookInterface): def update(self, metadata: dict[str, Any]) -> None: - v = get_version( - os.path.join(self.root, "uap-core"), - config={ - "format": { - "distance": "{next_version}.dev{distance}", - } - }, - ) + try: + v = get_version( + os.path.join(self.root, "uap-core"), + config={ + "format": { + "distance": "{next_version}.dev{distance}", + } + }, + ) + except errors.NotSdistError: + with open(os.path.join(self.root, "uap-core", "package.json")) as ufile: + ujson = json.load(ufile) + v = ujson["version"] if v in ("0.15.0", "0.16.0", "0.18.0"): v = f"{v}.post1" From 2ca789ea504afd885a9ee3d33ef64db6d34e8908 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 15 Feb 2025 13:58:37 +0100 Subject: [PATCH 29/48] Fix fallback input for release action Apparently the way submodules repos are configured leads to the branches not being mirrored locally (?) As such, the release job's fallback of checking out `'master'` fails whether triggered[^1] or scheduled[^2]. [^1]: https://github.com/ua-parser/uap-python/actions/runs/13090871627 [^2]: https://github.com/ua-parser/uap-python/actions/runs/13092233962 --- .github/workflows/release-builtins.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index 6f41709b..f2ad7b82 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -28,7 +28,7 @@ jobs: persist-credentials: false - name: update core env: - TAG: ${{ inputs.tag || 'master '}} + TAG: ${{ inputs.tag || 'origin/master '}} # needs to detach because we can update to a tag run: git -C uap-core switch --detach "$TAG" - name: Set up Python From ea7a5ae639150589729946746009d86aa4f8c0c5 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 15 Feb 2025 14:01:06 +0100 Subject: [PATCH 30/48] builtins release: make inputs required for manual triggers --- .github/workflows/release-builtins.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index f2ad7b82..fad58c0c 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -11,9 +11,11 @@ on: tag: description: "uap-core ref to release" type: string + required: true environment: description: "environment to release for (testpypy or pypy)" type: environment + required: true jobs: build: From 60b35ec1fb358005f9e732831600b10e8533c080 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 15 Feb 2025 14:02:10 +0100 Subject: [PATCH 31/48] Clarify environment fallback Since the environment is required via `workflow_dispatch`, the only fallback is scheduled release in which case we're publishing to pypy. --- .github/workflows/release-builtins.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index fad58c0c..917dd3de 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -1,6 +1,6 @@ name: Publish ua-parser builtins -run-name: Publish ${{ inputs.tag || 'master' }} to ${{ inputs.environment || 'dummy' }} +run-name: Publish ${{ inputs.tag || 'master' }} to ${{ inputs.environment || 'pypy (scheduled)' }} on: schedule: From 997990f47f97c64db8d69d650c3b7f7c87e402a7 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 3 Mar 2025 18:53:09 +0100 Subject: [PATCH 32/48] Fix fallback input for release action for real Turns out `'master'` probably worked all along as a fallback, the problem is that I was using `'master '`, with a trailing space, which was not a branch git managed to find for obvious reason, and since I carried the error into the fully qualified reference... is still didn't work. And manual triggers didn't have the issue because the tag was `required`, so I'd have to input the tag by hand every time, and the fallback value would be bypassed. - fix the fallback value - remove the requirement on `tag`, such that it's possible to manually trigger the action in a default state --- .github/workflows/release-builtins.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index 917dd3de..408f9c74 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -11,7 +11,6 @@ on: tag: description: "uap-core ref to release" type: string - required: true environment: description: "environment to release for (testpypy or pypy)" type: environment @@ -30,7 +29,7 @@ jobs: persist-credentials: false - name: update core env: - TAG: ${{ inputs.tag || 'origin/master '}} + TAG: ${{ inputs.tag || 'origin/master' }} # needs to detach because we can update to a tag run: git -C uap-core switch --detach "$TAG" - name: Set up Python From 911b7a313e3b0ee2c419d1ac66f2b5331a6d2143 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 9 Jun 2025 16:23:47 +0200 Subject: [PATCH 33/48] Update classifiers and version bounds - add classifier for cpython 3.13 - add classifier for graal (now that it's been merged) - add pypy 3.11 to tox - re2 still hasn't published for CPython 3.13 so exclude from tox Fixes #257, fixes #265 --- .github/workflows/ci.yml | 2 +- pyproject.toml | 4 ++-- tox.ini | 15 +++++++-------- ua-parser-builtins/pyproject.toml | 3 ++- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4cc5f9f2..633c55ec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,7 +90,7 @@ jobs: - "3.12" - "3.13" - "pypy-3.10" - # - "pypy-3.11" + - "pypy-3.11" - "graalpy-24" include: - source: sdist diff --git a/pyproject.toml b/pyproject.toml index 11425fca..33fc8255 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,10 +37,10 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", - # no graalpy classifier yet (pypa/trove-classifiers#188) - # "Programming Language :: Python :: Implementation :: GraalPy", + "Programming Language :: Python :: Implementation :: GraalPy", ] [project.urls] diff --git a/tox.ini b/tox.ini index 5bd97e9e..9b3f1541 100644 --- a/tox.ini +++ b/tox.ini @@ -1,14 +1,13 @@ [tox] min_version = 4.0 -env_list = py3{9,10,11,12} - pypy310 +env_list = py3{9,10,11,12,13} + pypy{310,311} graalpy flake8, black, typecheck labels = - test = py3{9,10,11,12},pypy310,graalpy - cpy = py3{9,10,11,12} - pypy = pypy3.10 - graal = graalpy-24 + test = py3{9,10,11,12,13},pypy{310,311},graalpy + cpy = py3{9,10,11,12,13} + pypy = pypy{310,311} check = flake8, black, typecheck [testenv] @@ -21,16 +20,16 @@ wheel_build_env = .pkg deps = pytest pyyaml - google-re2 ua-parser-rs ./ua-parser-builtins commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:{pypy310,graalpy}] +[testenv:py3{9,10,11,12}] deps = pytest pyyaml + google-re2 ua-parser-rs ./ua-parser-builtins diff --git a/ua-parser-builtins/pyproject.toml b/ua-parser-builtins/pyproject.toml index a9c6d3ea..0086e853 100644 --- a/ua-parser-builtins/pyproject.toml +++ b/ua-parser-builtins/pyproject.toml @@ -29,9 +29,10 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", - # "Programming Language :: Python :: Implementation :: GraalPy", + "Programming Language :: Python :: Implementation :: GraalPy", ] [tool.hatch.build.hooks.custom] From bd607ffc0b14190c8a8a20bf06383c43e4115f81 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 9 Jun 2025 16:41:16 +0200 Subject: [PATCH 34/48] Update wording of resolvers guide Given ua-parser/uap-rust#29 and ua-parser/uap-rust#31, the wording of the comparison needs to be updated to account for: - The `regex` memory use being much improved. - The `regex` runtime on devices being slightly improved, with the Python interface to `re2` not supporting custom atom lengths. Closes #264 --- doc/guides.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/guides.rst b/doc/guides.rst index 39b43e4b..c16e601e 100644 --- a/doc/guides.rst +++ b/doc/guides.rst @@ -153,7 +153,7 @@ Builtin Resolvers * - ``regex`` - great - good - - bad + - fine - great * - ``re2`` - good @@ -182,12 +182,11 @@ it: interpreters and platforms supported by pyo3 (currently: cpython, pypy, and graalpy, on linux, macos and linux, intel and arm). It is also built as a cpython abi3 wheel and should thus suffer from no - compatibility issues with new release. + compatibility issues with new releases of cpython at least. - Built entirely out of safe rust code, its safety risks are entirely in ``regex`` and ``pyo3``. -- Its biggest drawback is that it is a lot more memory intensive than - the other resolvers, because ``regex`` tends to trade memory for - speed (~155MB high water mark on a real-world dataset). +- Uses somewhat more memory than the other resolvers (~85MB high water + mark on a real-world dataset). If available, it is the default resolver, without a cache. @@ -198,7 +197,7 @@ The ``re2`` resolver is built atop the widely used `google-re2 `_ via its built-in Python bindings. It: -- Is extremely fast, though around 80% slower than ``regex`` on +- Is quite fast, though only about half the speed of ``regex`` on real-world data. - Is only compatible with CPython, and uses pure API wheels, so needs a different release for each cpython version, for each OS, for each @@ -210,6 +209,9 @@ It: If available, it is the second-preferred resolver, without a cache. +At the end of the day, it is really only useful if the codebase +already uses ``re2``. + ``basic`` --------- From 5cd8d515a6e99036220931570af5fd9fa4781dd7 Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 9 Jun 2025 17:30:53 +0200 Subject: [PATCH 35/48] Remove unnecessary mentions of re2 Don't remove the feature, don't remove the resolver, and keep the resolver itself documented, but significantly de-emphasize `re2` by removing it from the README and from examples: users should not be encouraged to use it when they could use `regex`. --- README.rst | 10 +++------- doc/guides.rst | 14 +++++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 091fddae..17b405c7 100644 --- a/README.rst +++ b/README.rst @@ -31,13 +31,9 @@ ua-parser supports CPython 3.9 and newer, recent pypy (supporting .. note:: - The ``[regex]`` feature is *strongly* recommended: - - - ``[re2]`` is slightly slower and only works with cpython, though - it is still a great option then (and is more memory-efficient). - - Pure python (no feature) is *significantly* slower, especially on - non-cpython runtimes, but it is the most memory efficient even - with caches. + The ``[regex]`` feature is *strongly* recommended, the Pure python + (no feature) is *significantly* slower, especially on non-cpython + runtimes, though it is the most memory efficient. See `builtin resolvers`_ for more explanation of the tradeoffs between the different options. diff --git a/doc/guides.rst b/doc/guides.rst index c16e601e..9ea323e2 100644 --- a/doc/guides.rst +++ b/doc/guides.rst @@ -93,10 +93,10 @@ composing :class:`~ua_parser.Resolver` objects. The most basic such customisation is simply configuring caching away from the default setup. -As an example, in the default configuration if |re2|_ is available the -RE2-based resolver is not cached, a user might consider the memory -investment worth it and want to reconfigure the stack for a cached -base. +As an example, in the default configuration if |regex|_ is available +the regex-based resolver is not cached, a user might consider the +memory investment worth it and want to reconfigure the stack for a +cached base. The process is uncomplicated as the APIs are designed to compose together. @@ -105,8 +105,8 @@ The first step is to instantiate a base resolver, instantiated with the relevant :class:`Matchers` data:: import ua_parser.loaders - import ua_parser.re2 - base = ua_parser.re2.Resolver( + import ua_parser.regex + base = ua_parser.regex.Resolver( ua_parser.loaders.load_lazy_builtins()) The next step is to instantiate the cache [#cache]_ suitably @@ -295,7 +295,7 @@ could then use something like:: Parser(FallbackResolver([ foo_resolver, - re2.Resolver(load_lazy_builtins()), + regex.Resolver(load_lazy_builtins()), ])) to prioritise cheap resolving of our application while still resolving From a187a29d926573d45225b60f675424723dc6e7a6 Mon Sep 17 00:00:00 2001 From: masklinn Date: Wed, 24 Dec 2025 20:20:29 +0100 Subject: [PATCH 36/48] Disable PT030 for legacy tests This complains that `DeprecationWarning` is too broad, and I really don't care. Disable via per-file-ignore setting, because ruff conflicts with itself when trying to disable via magic stanza in the file (apparently `ruff: noqa` doesn't work if there's a leading space, but then ruff format is amgy) --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 33fc8255..fb94bbf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,9 @@ classes = ["OS"] known-first-party = ["ua_parser"] combine-as-imports = true +[tool.ruff.lint.per-file-ignores] +"tests/test_legacy.py" = ["PT030"] + [tool.mypy] python_version = "3.9" files = "src,tests" From 0f2c13f5ae9827c0cc0f1d1ed19054f6aaed234d Mon Sep 17 00:00:00 2001 From: masklinn Date: Wed, 24 Dec 2025 20:21:50 +0100 Subject: [PATCH 37/48] Improve tox file - add an entry for the docs (and remove the docs makefile, given sphinx's make-mode) - remove the old automation makefile - move the labels into the individual testenvs, easier to maintain - also apparently I forgot to add cpython 3.13 to the cpython testenv? --- Makefile | 20 -------------------- doc/Makefile | 20 -------------------- tox.ini | 31 +++++++++++++++++++++---------- 3 files changed, 21 insertions(+), 50 deletions(-) delete mode 100644 Makefile delete mode 100644 doc/Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index 0604a25c..00000000 --- a/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -all: test - -test: - tox - -clean: - @find . -name '*.pyc' -delete - @rm -rf tmp \ - src/ua_parser.egg-info \ - dist \ - build \ - src/ua_parser/_regexes.py -format: - @black . - -release: clean - pyproject-build - twine upload -s dist/* - -.PHONY: all test clean format release diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index d4bb2cbb..00000000 --- a/doc/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/tox.ini b/tox.ini index 9b3f1541..da50c20d 100644 --- a/tox.ini +++ b/tox.ini @@ -3,14 +3,10 @@ min_version = 4.0 env_list = py3{9,10,11,12,13} pypy{310,311} graalpy - flake8, black, typecheck -labels = - test = py3{9,10,11,12,13},pypy{310,311},graalpy - cpy = py3{9,10,11,12,13} - pypy = pypy{310,311} - check = flake8, black, typecheck + check, format, typecheck [testenv] +labels = test # wheel install package = wheel # wheel is universal so can use the same wheel for all envs @@ -25,7 +21,8 @@ deps = commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:py3{9,10,11,12}] +[testenv:py3{9,10,11,12,13}] +labels = test, cpy deps = pytest pyyaml @@ -33,20 +30,34 @@ deps = ua-parser-rs ./ua-parser-builtins -[testenv:flake8] +[testenv:pypy{310,311}] +labels = test, pypy + +[testenv:check] +labels = check package = skip deps = ruff commands = ruff check {posargs} -[testenv:black] +[testenv:format] +description = Runs the formatter (just showing errors by default) +labels = check package = skip deps = ruff commands = ruff format {posargs:--diff} [testenv:typecheck] +labels = check package = skip deps = mypy types-PyYaml ./ua-parser-builtins -commands = mypy {posargs:} +commands = mypy {posargs} + +[testenv:docs] +description = Builds the documentation +labels = +package = skip +deps = sphinx +commands = sphinx-build -M {posargs:html} doc docs/_build From fdcddf15b788d237dff6b72415d780a805da76c4 Mon Sep 17 00:00:00 2001 From: masklinn Date: Wed, 24 Dec 2025 19:56:01 +0100 Subject: [PATCH 38/48] Calver release of builtins The updated workflow relies on the two new scripts: - `relrev` retrieves the REVISION of the latest published package - `tagcore` updates uap-core to the specified revision and writes out the commit hash to REVISION, printing it out to stdout The workflow calls those two scripts and check if they differ, in which case it cuts a new release. If the two revisions match the release is skipped. Fixes #277 --- .github/workflows/release-builtins.yml | 48 ++++++++++++----- scripts/relrev.py | 67 ++++++++++++++++++++++++ scripts/tagcore.py | 72 ++++++++++++++++++++++++++ ua-parser-builtins/hatch_build.py | 4 ++ 4 files changed, 179 insertions(+), 12 deletions(-) create mode 100644 scripts/relrev.py create mode 100644 scripts/tagcore.py diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index 408f9c74..538ef905 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -20,30 +20,56 @@ jobs: build: name: Build distribution runs-on: ubuntu-latest - + outputs: + release: ${{ steps.check.outputs.release }} steps: - uses: actions/checkout@v4 with: submodules: true fetch-depth: 0 persist-credentials: false - - name: update core - env: - TAG: ${{ inputs.tag || 'origin/master' }} - # needs to detach because we can update to a tag - run: git -C uap-core switch --detach "$TAG" - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" + - name: Check necessity of release + id: check + env: + PYPI: ${{ github.event.inputs.environment }} + REF: ${{ inputs.tag || 'HEAD' }} + run: | + case $PYPI in + pypi) + DOMAIN=pypi.org + ;; + testpypi) + DOMAIN=test.pypi.org + ;; + *) + exit 1 + esac + + RELREV=$(python scripts/relrev.py --domain "$DOMAIN") + VERSION=$(date +%Y%m) + CURREV=$(python scripts/tagcore.py --ref $REF --version $VERSION) + + if [ -n "$CURREV" -a "$RELREV" = "$CURREV" ] + then + echo "current rev matches latest release, skip new release" + else + echo release=true >> $GITHUB_OUTPUT + fi - name: Install pypa/build + if: ${{ steps.check.outputs.release == 'true' }} run: python3 -m pip install build --user - name: Build wheel + if: ${{ steps.check.outputs.release == 'true' }} run: | python3 -m build -w ua-parser-builtins mv ua-parser-builtins/dist . - name: Store the distribution packages + if: ${{ steps.check.outputs.release == 'true' }} uses: actions/upload-artifact@v4 with: name: python-package-distributions @@ -51,9 +77,8 @@ jobs: publish-to-testpypi: name: Publish to TestPyPI - if: ${{ github.event.inputs.environment == 'testpypi' }} - needs: - - build + needs: build + if: ${{ github.event.inputs.environment == 'testpypi' && needs.build.outputs.release == 'true' }} runs-on: ubuntu-latest environment: @@ -78,9 +103,8 @@ jobs: publish-to-pypi: name: publish - if: ${{ github.event_name == 'schedule' || github.event.inputs.environment == 'pypi' }} - needs: - - build + needs: build + if: ${{ (github.event_name == 'schedule' || github.event.inputs.environment == 'pypi') && needs.build.outputs.release == 'true' }} runs-on: ubuntu-latest environment: name: pypi diff --git a/scripts/relrev.py b/scripts/relrev.py new file mode 100644 index 00000000..0979a407 --- /dev/null +++ b/scripts/relrev.py @@ -0,0 +1,67 @@ +import argparse +import contextlib +import hashlib +import json +import re +import shutil +import sys +import tempfile +import zipfile +from urllib import parse, request + +parser = argparse.ArgumentParser( + description="Retrieves the revision for the latest release of ua-parser-builtins", +) +parser.add_argument( + "--domain", + default="pypi.org", +) +args = parser.parse_args() + +url = parse.urlunsplit(("https", args.domain, "simple/ua-parser-builtins", "", "")) + +print("checking", url, file=sys.stderr) +res = request.urlopen( + request.Request( + url, + headers={ + "Accept": "application/vnd.pypi.simple.v1+json", + }, + ) +) +if res.status != 200: + exit(f"Failed to retrieve project distributions: {res.status}") + +distributions = json.load(res) +version, distribution = next( + (v, d) + for v, d in zip( + reversed(distributions["versions"]), reversed(distributions["files"]) + ) + if not d["yanked"] + if re.fullmatch( + r"(\d+!)?\d+(\.\d+)*(\.post\d+)?", + v, + flags=re.ASCII, + ) +) +print("latest version:", version, file=sys.stderr) + +res = request.urlopen(distribution["url"]) +if res.status != 200: + exit(f"Failed to retrieve wheel: {res.status}") + +with tempfile.SpooledTemporaryFile(256 * 1024) as tf: + shutil.copyfileobj(res, tf) + for name, val in distribution["hashes"].items(): + tf.seek(0) + d = hashlib.file_digest(tf, name).hexdigest() + if d != val: + exit(f"{name} mismatch: expected {val!r} got {d!r}") + tf.seek(0) + with zipfile.ZipFile(tf) as z: + # if the REVISION file is not found then it's fine it's a + # pre-calver release (hopefully) and that means we should cut + # a calver one + with contextlib.suppress(KeyError): + print(z.read("REVISION").decode()) diff --git a/scripts/tagcore.py b/scripts/tagcore.py new file mode 100644 index 00000000..a5ef7f88 --- /dev/null +++ b/scripts/tagcore.py @@ -0,0 +1,72 @@ +import argparse +import datetime +import pathlib +import shutil +import subprocess + +CORE_REMOTE = "https://github.com/ua-parser/uap-core" + + +parser = argparse.ArgumentParser( + description="""Updates `uap-core` to `ref` and tags it with `version` + +If successful, writes the commit to `REVISION` and prints it to stdout. +""" +) +parser.add_argument( + "--ref", + default="HEAD", + help="uap-core ref to build, defaults to HEAD (the head of the default branch)", +) +parser.add_argument( + "--version", + help="version to tag the package as, defaults to an YMD calendar version matching the ref's commit date", +) +args = parser.parse_args() + + +if not shutil.which("git"): + exit("git required") + +r = subprocess.run( + ["git", "ls-remote", CORE_REMOTE, args.ref], + encoding="utf-8", + stdout=subprocess.PIPE, +) +if r.returncode: + exit("Unable to query uap-core repo") + +if r.stdout: + if r.stdout.count("\n") > 1: + exit(f"Found multiple matching refs for {args.ref}:\n{r.stdout}") + commit, _rest = r.stdout.split("\t", 1) +else: + try: + int(args.ref, 16) + commit = args.ref + except ValueError: + exit(f"Unknown or invalid ref {args.ref!r}") + +CORE_PATH = pathlib.Path(__file__).resolve().parent.parent / "uap-core" + +r = subprocess.run(["git", "-C", CORE_PATH, "fetch", CORE_REMOTE, commit]) +if r.returncode: + exit(f"Unable to retrieve commit {commit!r}") + +if args.version: + tagname = args.version +else: + r = subprocess.run( + ["git", "-C", CORE_PATH, "show", "-s", "--format=%cs", commit], + encoding="utf-8", + stdout=subprocess.PIPE, + ) + if r.returncode or not r.stdout: + exit(f"Unable to retrieve commit date from commit {commit!r}") + + tagname = datetime.date.fromisoformat(r.stdout.rstrip()).strftime("%Y%m%d") + +subprocess.run(["git", "-C", CORE_PATH, "switch", "-d", commit]) +subprocess.run(["git", "-C", CORE_PATH, "tag", tagname, commit]) +CORE_PATH.joinpath("REVISION").write_text(commit) +print(commit) diff --git a/ua-parser-builtins/hatch_build.py b/ua-parser-builtins/hatch_build.py index 9bbe23f0..2a86012e 100644 --- a/ua-parser-builtins/hatch_build.py +++ b/ua-parser-builtins/hatch_build.py @@ -41,6 +41,10 @@ def initialize( version: str, build_data: dict[str, Any], ) -> None: + rev = os.path.join(self.root, "uap-core/REVISION") + if os.path.exists(rev): + build_data["force_include"][rev] = "REVISION" + with open(os.path.join(self.root, "uap-core/regexes.yaml"), "rb") as f: data = yaml.safe_load(f) From c069278a1f3249327cef20310e0826cf720f84fe Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 11:29:16 +0100 Subject: [PATCH 39/48] Remove support for Python 3.9, bump minver to 3.10 3.9 reached end of life in november 2025. Fixes #283, fixes #284 --- .github/workflows/ci.yml | 1 - pyproject.toml | 5 ++--- tox.ini | 4 ++-- ua-parser-builtins/pyproject.toml | 3 +-- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 633c55ec..acfa5b39 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,7 +84,6 @@ jobs: - sdist - source python-version: - - "3.9" - "3.10" - "3.11" - "3.12" diff --git a/pyproject.toml b/pyproject.toml index fb94bbf8..14bf83cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "ua-parser" description = "Python port of Browserscope's user agent parser" version = "1.0.1" readme = "README.rst" -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = ["ua-parser-builtins"] license = {text = "Apache 2.0"} @@ -33,7 +33,6 @@ classifiers = [ "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -81,7 +80,7 @@ combine-as-imports = true "tests/test_legacy.py" = ["PT030"] [tool.mypy] -python_version = "3.9" +python_version = "3.10" files = "src,tests" # can't use strict because it's only global diff --git a/tox.ini b/tox.ini index da50c20d..d5572913 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] min_version = 4.0 -env_list = py3{9,10,11,12,13} +env_list = py3{10,11,12,13} pypy{310,311} graalpy check, format, typecheck @@ -21,7 +21,7 @@ deps = commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:py3{9,10,11,12,13}] +[testenv:py3{10,11,12,13}] labels = test, cpy deps = pytest diff --git a/ua-parser-builtins/pyproject.toml b/ua-parser-builtins/pyproject.toml index 0086e853..ba885009 100644 --- a/ua-parser-builtins/pyproject.toml +++ b/ua-parser-builtins/pyproject.toml @@ -7,7 +7,7 @@ name = "ua-parser-builtins" description = "Precompiled rules for User Agent Parser" readme = "README.md" dependencies = [] -requires-python = ">=3.9" +requires-python = ">=3.10" license = {text = "Apache 2.0"} urls = {repository = "https://github.com/ua-parser/uap-python"} dynamic = ["version"] @@ -25,7 +25,6 @@ classifiers = [ "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", From 96e3ad62d3064f2b8df422b7403b5c0517b9c639 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 11:33:04 +0100 Subject: [PATCH 40/48] Remove support for pypy 3.10 Starting with 7.3.20 released July 2025, pypy stops supporting python 3.10 --- .github/workflows/ci.yml | 1 - tox.ini | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index acfa5b39..bce0c92b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,7 +88,6 @@ jobs: - "3.11" - "3.12" - "3.13" - - "pypy-3.10" - "pypy-3.11" - "graalpy-24" include: diff --git a/tox.ini b/tox.ini index d5572913..fb42e87d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,7 @@ [tox] min_version = 4.0 env_list = py3{10,11,12,13} - pypy{310,311} + pypy{311} graalpy check, format, typecheck @@ -30,9 +30,6 @@ deps = ua-parser-rs ./ua-parser-builtins -[testenv:pypy{310,311}] -labels = test, pypy - [testenv:check] labels = check package = skip From 5b6b0af1a8037274f8b89effd3fcab9700828244 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 11:39:50 +0100 Subject: [PATCH 41/48] Add support for cpython 3.14 Fixes #274 Also add google re2 to the 3.14 test matrix, kinda (3.13 was already added a few commits back "by mistake"), which fixes #276 --- .github/workflows/ci.yml | 1 + pyproject.toml | 1 + tox.ini | 4 ++-- ua-parser-builtins/pyproject.toml | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bce0c92b..79a46c3d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,6 +88,7 @@ jobs: - "3.11" - "3.12" - "3.13" + - "3.14" - "pypy-3.11" - "graalpy-24" include: diff --git a/pyproject.toml b/pyproject.toml index 14bf83cb..d2421d62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python :: Implementation :: GraalPy", diff --git a/tox.ini b/tox.ini index fb42e87d..d35e475a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] min_version = 4.0 -env_list = py3{10,11,12,13} +env_list = py3{10,11,12,13,14} pypy{311} graalpy check, format, typecheck @@ -21,7 +21,7 @@ deps = commands = pytest -Werror --doctest-glob="*.rst" {posargs} -[testenv:py3{10,11,12,13}] +[testenv:py3{10,11,12,13,14}] labels = test, cpy deps = pytest diff --git a/ua-parser-builtins/pyproject.toml b/ua-parser-builtins/pyproject.toml index ba885009..6fc800dc 100644 --- a/ua-parser-builtins/pyproject.toml +++ b/ua-parser-builtins/pyproject.toml @@ -29,6 +29,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python :: Implementation :: GraalPy", From 7638ca335d3e170ed72f8c8e0589204c1a134c7a Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 12:51:23 +0100 Subject: [PATCH 42/48] Add support for graal 25 Switch to tox-uv and its versioning, as I couldn't get tox and pyenv to collaborate on different graal versions. This seems to work. Fixes #281 --- .github/workflows/ci.yml | 4 ++-- tox.ini | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79a46c3d..485e1702 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,14 +90,14 @@ jobs: - "3.13" - "3.14" - "pypy-3.11" - - "graalpy-24" + - "graalpy-25" include: - source: sdist artifact: dist/*.tar.gz - source: wheel artifact: dist/*.whl - opts: "" - - python-version: graalpy-24 + - python-version: graalpy-25 opts: "--experimental-options --engine.CompileOnly='~tregex re'" steps: - name: Checkout working copy diff --git a/tox.ini b/tox.ini index d35e475a..31cbec7a 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ min_version = 4.0 env_list = py3{10,11,12,13,14} pypy{311} - graalpy + graalpy3{11,12} check, format, typecheck [testenv] From 5be313168e8c034167491cb92fbfb05b6f0ac74b Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 13:07:52 +0100 Subject: [PATCH 43/48] Add support for free-threaded python Fixes #275 --- .github/workflows/ci.yml | 1 + tox.ini | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 485e1702..ac6765ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,6 +89,7 @@ jobs: - "3.12" - "3.13" - "3.14" + - "3.14t" - "pypy-3.11" - "graalpy-25" include: diff --git a/tox.ini b/tox.ini index 31cbec7a..63cddcd9 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] min_version = 4.0 -env_list = py3{10,11,12,13,14} +env_list = py3{10,11,12,13,14,14t} pypy{311} graalpy3{11,12} check, format, typecheck From a9bfdcd9d4e773d3d360759d86e33d8a0c36dd65 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 13:15:22 +0100 Subject: [PATCH 44/48] Unify CI check Having to update the required statuses in the branch protection rules every time the test matrix is updated is a pain in the ass. --- .github/workflows/ci.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac6765ea..f88cf2a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -140,3 +140,12 @@ jobs: run: python -m pip install ${{ matrix.artifact || '.' }} - name: run tests run: python ${{ matrix.opts }} -m pytest -v -Werror -Wignore::ImportWarning --doctest-glob="*.rst" -ra + # necessary to create a unified CI result and not have to update + # branch protection rules every time the matrix gets updated + results: + name: "CI Results" + needs: ["checks", "test"] + runs-on: ubuntu-latest + permissions: {} + steps: + - run: exit 0 From 0c54802d503acc74f2ac1442eddb55a1552a4809 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Dec 2025 13:42:55 +0100 Subject: [PATCH 45/48] Update and pin all actions --- .github/workflows/ci.yml | 29 +++++++++++++++----------- .github/workflows/release-builtins.yml | 16 +++++++------- .github/workflows/release-main.yml | 10 +++++---- .github/workflows/zizmor.yml | 8 ++++--- .github/zizmor.yml | 4 ++++ 5 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 .github/zizmor.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f88cf2a4..f82d5534 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,27 +4,32 @@ on: push: pull_request: +permissions: {} + jobs: checks: runs-on: ubuntu-latest steps: - name: Checkout working copy - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # 6.0.1 with: submodules: true fetch-depth: 0 persist-credentials: false - name: ruff check - uses: chartboost/ruff-action@v1 + uses: astral-sh/ruff-action@57714a7c8a2e59f32539362ba31877a1957dded1 # 3.5.1 + with: + version: "latest" - name: ruff format if: always() - uses: chartboost/ruff-action@v1 + uses: astral-sh/ruff-action@57714a7c8a2e59f32539362ba31877a1957dded1 # 3.5.1 with: - args: format --diff + version: "latest" + args: format --check --diff - name: Set up Python id: setup_python if: always() - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.x" - name: Install mypy @@ -44,12 +49,12 @@ jobs: steps: - name: Checkout working copy - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # 6.0.1 with: submodules: true persist-credentials: false - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.x" - name: Install dependency @@ -60,14 +65,14 @@ jobs: run: | python -mbuild - name: Upload sdist - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # 6.0.0 with: name: sdist path: dist/*.tar.gz retention-days: 1 - name: Upload wheel - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # 6.0.0 with: name: wheel path: dist/*.whl @@ -102,13 +107,13 @@ jobs: opts: "--experimental-options --engine.CompileOnly='~tregex re'" steps: - name: Checkout working copy - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # 6.0.1 with: submodules: true fetch-depth: 0 persist-credentials: false - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: ${{ matrix.python-version }} allow-prereleases: true @@ -132,7 +137,7 @@ jobs: - run: 'python -mpip install --only-binary :all: google-re2 || true' - name: download ${{ matrix.source }} artifact if: matrix.artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # 7.0.0 with: name: ${{ matrix.source }} path: dist/ diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index 538ef905..db013a2d 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -2,6 +2,8 @@ name: Publish ua-parser builtins run-name: Publish ${{ inputs.tag || 'master' }} to ${{ inputs.environment || 'pypy (scheduled)' }} +permissions: {} + on: schedule: # schedule a dev release on every 1st of the month, at 2034 UTC @@ -23,13 +25,13 @@ jobs: outputs: release: ${{ steps.check.outputs.release }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # 6.0.1 with: submodules: true fetch-depth: 0 persist-credentials: false - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.x" @@ -70,7 +72,7 @@ jobs: mv ua-parser-builtins/dist . - name: Store the distribution packages if: ${{ steps.check.outputs.release == 'true' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # 6.0.0 with: name: python-package-distributions path: dist/ @@ -90,12 +92,12 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # 7.0.0 with: name: python-package-distributions path: dist/ - name: Publish - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # 1.13.0 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true @@ -114,11 +116,11 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # 7.0.0 with: name: python-package-distributions path: dist/ - name: Publish - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # 1.13.0 with: verbose: true diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index bc6fca96..0e42d7ec 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -5,6 +5,8 @@ on: release: types: [created] +permissions: {} + env: ENVNAME: ${{ github.event_name == 'release' && 'pypi' || 'testpypi' }} @@ -21,11 +23,11 @@ jobs: steps: - name: Checkout working copy - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # 6.0.1 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.x" - name: Install dependency @@ -36,7 +38,7 @@ jobs: run: python -mbuild - name: Publish to testpypi if: ${{ env.ENVNAME == 'testpypi' }} - uses: pypa/gh-action-pypi-publish@release/v1 # zizmor: ignore[use-trusted-publishing] + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # 1.13.0 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true @@ -44,7 +46,7 @@ jobs: password: ${{ secrets.PUBLISH_TOKEN }} - name: Publish to pypi if: ${{ env.ENVNAME == 'pypi' }} - uses: pypa/gh-action-pypi-publish@release/v1 # zizmor: ignore[use-trusted-publishing] + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # 1.13.0 with: verbose: true password: ${{ secrets.PUBLISH_TOKEN }} diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml index 5bf4f98f..f94fa7be 100644 --- a/.github/workflows/zizmor.yml +++ b/.github/workflows/zizmor.yml @@ -4,6 +4,8 @@ on: push: pull_request: +permissions: {} + jobs: zizmor: runs-on: ubuntu-latest @@ -13,12 +15,12 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # 6.0.1 with: persist-credentials: false - name: Install the latest version of uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # 7.1.6 - name: Run zizmor run: uvx zizmor --format sarif . > results.sarif @@ -26,7 +28,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload SARIF file - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # 4.31.9 with: sarif_file: results.sarif category: zizmor diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 00000000..62ab71e7 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,4 @@ +rules: + use-trusted-publishing: + ignore: + - release-main.yml # can't do that until pypi/support#6661 From 72ec405a1bfd4d18262623edb9a85354ff44f339 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sun, 4 Jan 2026 17:51:47 +0100 Subject: [PATCH 46/48] Fix scheduled publication of -builtins There is no `environment` set when running scheduled. Fixes #286 --- .github/workflows/release-builtins.yml | 2 +- ua-parser-builtins/README.md | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index db013a2d..6597070b 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -38,7 +38,7 @@ jobs: - name: Check necessity of release id: check env: - PYPI: ${{ github.event.inputs.environment }} + PYPI: ${{ github.event.inputs.environment || 'pypy' }} REF: ${{ inputs.tag || 'HEAD' }} run: | case $PYPI in diff --git a/ua-parser-builtins/README.md b/ua-parser-builtins/README.md index 8a568237..061f1c76 100644 --- a/ua-parser-builtins/README.md +++ b/ua-parser-builtins/README.md @@ -1,7 +1,14 @@ -# Precompiled ruleset for [ua-parser](https://pypi.org/project/ua-parser/) +# Precompiled ruleset for [ua-parser] This project does not do anything on its own, nor does it have any -actual API: it contains the dataset of -[uap-core](https://github.com/ua-parser/uap-core) pre-compiled for use -by [ua-parser](https://pypi.org/project/ua-parser/) to decrease +actual API: it contains the dataset of [uap-core] pre-compiled for use +by [ua-parser] to decrease initialisation times. + +The precompiled ruleset is released monthly based on whatever +[uap-core]'s default branch is at that moment. The [uap-core] commit +used for creating the compiled ruleset is stored in the `REVISION` +file at the root of the wheel. + +[ua-parser]: https://pypi.org/project/ua-parser/ +[uap-core]: https://github.com/ua-parser/uap-core From d2782a687a481d34e6ea3b32f779f7facce14e5b Mon Sep 17 00:00:00 2001 From: masklinn Date: Mon, 2 Feb 2026 17:30:11 +0100 Subject: [PATCH 47/48] Surprise, the builtins release is broken again! it's pypi not pypy, it was written correctly just 4 lines below but apparently I still could not get it right in #288. --- .github/workflows/release-builtins.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release-builtins.yml b/.github/workflows/release-builtins.yml index 6597070b..fa54a251 100644 --- a/.github/workflows/release-builtins.yml +++ b/.github/workflows/release-builtins.yml @@ -1,6 +1,6 @@ name: Publish ua-parser builtins -run-name: Publish ${{ inputs.tag || 'master' }} to ${{ inputs.environment || 'pypy (scheduled)' }} +run-name: Publish ${{ inputs.tag || 'master' }} to ${{ inputs.environment || 'pypi (scheduled)' }} permissions: {} @@ -14,7 +14,7 @@ on: description: "uap-core ref to release" type: string environment: - description: "environment to release for (testpypy or pypy)" + description: "environment to release for (testpypi or pypi)" type: environment required: true @@ -38,7 +38,7 @@ jobs: - name: Check necessity of release id: check env: - PYPI: ${{ github.event.inputs.environment || 'pypy' }} + PYPI: ${{ github.event.inputs.environment || 'pypi' }} REF: ${{ inputs.tag || 'HEAD' }} run: | case $PYPI in From f225dd1f41094cfbabd568855d9b5113971220c4 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 14 Feb 2026 13:09:57 +0100 Subject: [PATCH 48/48] Switch main publish workflow to trusted publishing I'll probably discover this is broken the first time I try it, but this at least allows removing the token from github, and invalidating it in my account. --- .github/workflows/release-main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index 0e42d7ec..98c17306 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -43,10 +43,8 @@ jobs: repository-url: https://test.pypi.org/legacy/ skip-existing: true verbose: true - password: ${{ secrets.PUBLISH_TOKEN }} - name: Publish to pypi if: ${{ env.ENVNAME == 'pypi' }} uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # 1.13.0 with: verbose: true - password: ${{ secrets.PUBLISH_TOKEN }}