From 04d7df774f13e5e36a50b72f895828c3c43498de Mon Sep 17 00:00:00 2001 From: Fantix King Date: Fri, 23 Apr 2021 16:40:19 -0400 Subject: [PATCH] Local draft --- .gitignore | 6 +- MANIFEST.in | 2 + httptools/parser/url_cparser.pxd | 31 ---- httptools/parser/url_parser.pyx | 108 -------------- httptools/parser/url_parser/Cargo.lock | 187 +++++++++++++++++++++++++ httptools/parser/url_parser/Cargo.toml | 18 +++ httptools/parser/url_parser/src/lib.rs | 49 +++++++ pyproject.toml | 2 + setup.py | 40 ++---- 9 files changed, 276 insertions(+), 167 deletions(-) delete mode 100644 httptools/parser/url_cparser.pxd delete mode 100644 httptools/parser/url_parser.pyx create mode 100644 httptools/parser/url_parser/Cargo.lock create mode 100644 httptools/parser/url_parser/Cargo.toml create mode 100644 httptools/parser/url_parser/src/lib.rs create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 284ec93..45f6882 100644 --- a/.gitignore +++ b/.gitignore @@ -29,5 +29,7 @@ __pycache__/ /.pytest_cache /.mypy_cache /.vscode -.eggs -.venv +/.eggs +/.venv +/.python-version +/httptools/parser/url_parser/target/ diff --git a/MANIFEST.in b/MANIFEST.in index 6b2e857..fc03be7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,4 @@ recursive-include vendor *.c *.h LICENSE* README* include MANIFEST.in LICENSE +include httptools/parser/url_parser/Cargo.toml +recursive-include httptools/parser/url_parser/src * diff --git a/httptools/parser/url_cparser.pxd b/httptools/parser/url_cparser.pxd deleted file mode 100644 index ab9265a..0000000 --- a/httptools/parser/url_cparser.pxd +++ /dev/null @@ -1,31 +0,0 @@ -from libc.stdint cimport uint16_t - - -cdef extern from "http_parser.h": - # URL Parser - - enum http_parser_url_fields: - UF_SCHEMA = 0, - UF_HOST = 1, - UF_PORT = 2, - UF_PATH = 3, - UF_QUERY = 4, - UF_FRAGMENT = 5, - UF_USERINFO = 6, - UF_MAX = 7 - - struct http_parser_url_field_data: - uint16_t off - uint16_t len - - struct http_parser_url: - uint16_t field_set - uint16_t port - http_parser_url_field_data[UF_MAX] field_data - - void http_parser_url_init(http_parser_url *u) - - int http_parser_parse_url(const char *buf, - size_t buflen, - int is_connect, - http_parser_url *u) diff --git a/httptools/parser/url_parser.pyx b/httptools/parser/url_parser.pyx deleted file mode 100644 index 49908f3..0000000 --- a/httptools/parser/url_parser.pyx +++ /dev/null @@ -1,108 +0,0 @@ -#cython: language_level=3 - -from __future__ import print_function -from cpython.mem cimport PyMem_Malloc, PyMem_Free -from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \ - Py_buffer - -from .errors import HttpParserInvalidURLError - -cimport cython -from . cimport url_cparser as uparser - -__all__ = ('parse_url',) - -@cython.freelist(250) -cdef class URL: - cdef readonly bytes schema - cdef readonly bytes host - cdef readonly object port - cdef readonly bytes path - cdef readonly bytes query - cdef readonly bytes fragment - cdef readonly bytes userinfo - - def __cinit__(self, bytes schema, bytes host, object port, bytes path, - bytes query, bytes fragment, bytes userinfo): - - self.schema = schema - self.host = host - self.port = port - self.path = path - self.query = query - self.fragment = fragment - self.userinfo = userinfo - - def __repr__(self): - return ('' - .format(self.schema, self.host, self.port, self.path, - self.query, self.fragment, self.userinfo)) - - -def parse_url(url): - cdef: - Py_buffer py_buf - char* buf_data - uparser.http_parser_url* parsed - int res - bytes schema = None - bytes host = None - object port = None - bytes path = None - bytes query = None - bytes fragment = None - bytes userinfo = None - object result = None - int off - int ln - - parsed = \ - PyMem_Malloc(sizeof(uparser.http_parser_url)) - uparser.http_parser_url_init(parsed) - - PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) - try: - buf_data = py_buf.buf - res = uparser.http_parser_parse_url(buf_data, py_buf.len, 0, parsed) - - if res == 0: - if parsed.field_set & (1 << uparser.UF_SCHEMA): - off = parsed.field_data[uparser.UF_SCHEMA].off - ln = parsed.field_data[uparser.UF_SCHEMA].len - schema = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_HOST): - off = parsed.field_data[uparser.UF_HOST].off - ln = parsed.field_data[uparser.UF_HOST].len - host = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_PORT): - port = parsed.port - - if parsed.field_set & (1 << uparser.UF_PATH): - off = parsed.field_data[uparser.UF_PATH].off - ln = parsed.field_data[uparser.UF_PATH].len - path = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_QUERY): - off = parsed.field_data[uparser.UF_QUERY].off - ln = parsed.field_data[uparser.UF_QUERY].len - query = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_FRAGMENT): - off = parsed.field_data[uparser.UF_FRAGMENT].off - ln = parsed.field_data[uparser.UF_FRAGMENT].len - fragment = buf_data[off:off+ln] - - if parsed.field_set & (1 << uparser.UF_USERINFO): - off = parsed.field_data[uparser.UF_USERINFO].off - ln = parsed.field_data[uparser.UF_USERINFO].len - userinfo = buf_data[off:off+ln] - - return URL(schema, host, port, path, query, fragment, userinfo) - else: - raise HttpParserInvalidURLError("invalid url {!r}".format(url)) - finally: - PyBuffer_Release(&py_buf) - PyMem_Free(parsed) diff --git a/httptools/parser/url_parser/Cargo.lock b/httptools/parser/url_parser/Cargo.lock new file mode 100644 index 0000000..7e20153 --- /dev/null +++ b/httptools/parser/url_parser/Cargo.lock @@ -0,0 +1,187 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "cpython" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f11357af68648b6a227e7e2384d439cec8595de65970f45e3f7f4b2600be472" +dependencies = [ + "libc", + "num-traits", + "paste", + "python3-sys", +] + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "idna" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89829a5d69c23d348314a7ac337fe39173b61149a9864deabd260983aed48c21" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "libc" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" + +[[package]] +name = "matches" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "paste" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" +dependencies = [ + "paste-impl", + "proc-macro-hack", +] + +[[package]] +name = "paste-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" +dependencies = [ + "proc-macro-hack", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "python3-sys" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b29b99c6868eb02beb3bf6ed025c8bcdf02efc149b8e80347d3e5d059a806db" +dependencies = [ + "libc", + "regex", +] + +[[package]] +name = "regex" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + +[[package]] +name = "tinyvec" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317cca572a0e89c3ce0ca1f1bdc9369547fe318a683418e42ac8f59d14701023" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "unicode-bidi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" +dependencies = [ + "matches", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07fbfce1c8a97d547e8b5334978438d9d6ec8c20e38f56d4a4374d181493eaef" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "url_parser" +version = "0.1.0" +dependencies = [ + "cpython", + "url", +] diff --git a/httptools/parser/url_parser/Cargo.toml b/httptools/parser/url_parser/Cargo.toml new file mode 100644 index 0000000..9df1ec0 --- /dev/null +++ b/httptools/parser/url_parser/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "url_parser" +license = "MIT" +version = "0.1.0" +authors = ["MagicStack Inc. "] +edition = "2018" + +[dependencies] +url = "2.2.1" + +[dependencies.cpython] +version = "0.5.2" +features = ["extension-module"] + +[lib] +crate-type = ["lib", "cdylib"] +name = "url_parser" +path = "src/lib.rs" diff --git a/httptools/parser/url_parser/src/lib.rs b/httptools/parser/url_parser/src/lib.rs new file mode 100644 index 0000000..8c2cb5e --- /dev/null +++ b/httptools/parser/url_parser/src/lib.rs @@ -0,0 +1,49 @@ +use cpython::{py_class, py_fn, py_module_initializer, PythonObject}; +use cpython::{PyErr, PyResult, Python, ToPyObject, PyBytes, PyObject}; +use url::Url; +use std::str::from_utf8; + +py_module_initializer!(url_parser, |py, m| { + m.add(py, "__doc__", "This module is implemented in Rust.")?; + m.add(py, "__all__", ("parse_url",).to_py_object(py))?; + m.add(py, "URL", py.get_type::())?; + m.add(py, "parse_url", py_fn!(py, parse_url(url: &[u8])))?; + Ok(()) +}); + +py_class!(pub class URL |py| { + data _url: Url; + + @property def schema(&self) -> PyResult { + Ok(PyBytes::new(py, self._url(py).scheme().as_bytes())) + } + + @property def host(&self) -> PyResult { + match self._url(py).host_str() { + Some(host_str) => Ok(PyBytes::new(py, host_str.as_bytes()).into_object()), + None => Ok(py.None()), + } + } +}); + +fn get_invalid_url_error(py: Python, message: String) -> PyResult { + let errors = py.import("httptools.parser.errors")?; + let url_error = errors.get(py, "HttpParserInvalidURLError")?.extract(py)?; + Ok(PyErr::new_lazy_init( + url_error, + Some(message.to_py_object(py).into_object()) + )) +} + +fn parse_url(py: Python, url: &[u8]) -> PyResult { + match from_utf8(url) { + Ok(url_str) => match Url::parse(url_str) { + Ok(parsed_url) => URL::create_instance( + py, + parsed_url, + ), + Err(e) => Err(get_invalid_url_error(py, e.to_string())?) + }, + Err(e) => Err(get_invalid_url_error(py, e.to_string())?) + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..923975f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "setuptools-rust", "wheel"] diff --git a/setup.py b/setup.py index 9752a54..34ee707 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ ROOT = pathlib.Path(__file__).parent CYTHON_DEPENDENCY = 'Cython==0.29.22' +RUST_DEPENDENCY = 'setuptools-rust~=0.12' class httptools_build_ext(build_ext): @@ -28,15 +29,12 @@ class httptools_build_ext(build_ext): 'Cythion compiler directives'), ('use-system-llhttp', None, 'Use the system provided llhttp, instead of the bundled one'), - ('use-system-http-parser', None, - 'Use the system provided http-parser, instead of the bundled one'), ] boolean_options = build_ext.boolean_options + [ 'cython-always', 'cython-annotate', 'use-system-llhttp', - 'use-system-http-parser', ] def initialize_options(self): @@ -48,7 +46,6 @@ def initialize_options(self): super().initialize_options() self.use_system_llhttp = False - self.use_system_http_parser = False self.cython_always = False self.cython_annotate = None self.cython_directives = None @@ -112,7 +109,7 @@ def finalize_options(self): self._initialized = True def build_extensions(self): - mod_parser, mod_url_parser = self.distribution.ext_modules + mod_parser = self.distribution.ext_modules[0] if self.use_system_llhttp: mod_parser.libraries.append('llhttp') @@ -129,19 +126,6 @@ def build_extensions(self): mod_parser.sources.append('vendor/llhttp/src/http.c') mod_parser.sources.append('vendor/llhttp/src/llhttp.c') - if self.use_system_http_parser: - mod_url_parser.libraries.append('http_parser') - - if sys.platform == 'darwin' and \ - os.path.exists('/opt/local/include'): - # Support macports on Mac OS X. - mod_url_parser.include_dirs.append('/opt/local/include') - else: - mod_url_parser.include_dirs.append( - str(ROOT / 'vendor' / 'http-parser')) - mod_url_parser.sources.append( - 'vendor/http-parser/http_parser.c') - super().build_extensions() @@ -160,7 +144,7 @@ def build_extensions(self): 'unable to read the version from httptools/_version.py') -setup_requires = [] +setup_requires = [RUST_DEPENDENCY] if (not (ROOT / 'httptools' / 'parser' / 'parser.c').exists() or '--cython-always' in sys.argv): @@ -168,6 +152,16 @@ def build_extensions(self): setup_requires.append(CYTHON_DEPENDENCY) +def get_rust_extensions(): + import setuptools_rust + + yield setuptools_rust.RustExtension( + "httptools.parser.url_parser", + path="httptools/parser/url_parser/Cargo.toml", + binding=setuptools_rust.Binding.RustCPython, + ) + + setup( name='httptools', version=VERSION, @@ -201,14 +195,8 @@ def build_extensions(self): ], extra_compile_args=CFLAGS, ), - Extension( - "httptools.parser.url_parser", - sources=[ - "httptools/parser/url_parser.pyx", - ], - extra_compile_args=CFLAGS, - ), ], + rust_extensions=get_rust_extensions(), include_package_data=True, test_suite='tests.suite', setup_requires=setup_requires,